Skip to content

Commit c8f18a7

Browse files
committedJul 26, 2018
Initial commit
0 parents  commit c8f18a7

19 files changed

+86503
-0
lines changed
 

‎.Rhistory

+256
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
# Directory containing the file with phenotypes
2+
dir_gwas <- "T:/dep_coupland/grp_hancock/johan/GWAS/"
3+
# Name of the file to import
4+
file_name <- "test_export_df.txt"
5+
# Name of the phenotype indicated in the dataframe (see Lists of phenotypes below)
6+
phenotype <- "HX"
7+
# Name chosen for the output file
8+
name_phenotype <- "HX"
9+
# List of packages required for this analysis
10+
pkg <- c("ggplot2")
11+
# Check if packages are not installed and assign the
12+
# names of the packages not installed to the variable new.pkg
13+
new.pkg <- pkg[!(pkg %in% installed.packages())]
14+
# If there are any packages in the list that aren't installed,
15+
# install them
16+
if (length(new.pkg)) {
17+
install.packages(new.pkg, repos = "http://cran.rstudio.com")
18+
}
19+
#Library to plot Manhattan plots
20+
library(ggplot2)
21+
# fun_nucleus.R is located in same directory as this script
22+
source("fun_nucleus.R")
23+
file_path <- paste(dir_gwas, file_name, sep="")
24+
df_gwas <- read.table(file_path, header=TRUE, sep="\t")
25+
View(df_gwas)
26+
View(df_gwas)
27+
names(df_gwas)
28+
names(df_gwas)
29+
# Not that df_gwas$phenotype cannot be used as $ does not take variables
30+
variable <- df_gwas[,phenotype]
31+
name_file <- paste(name_phenotype,".tsv", sep="")
32+
path_file <- paste(dir_gwas, name_file, sep="")
33+
write.table(variable, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
34+
# Directory containing the file with phenotypes
35+
dir_gwas <- "T:/dep_coupland/grp_hancock/johan/GWAS/"
36+
# Name of the file to import. The file is generated by process_chromatinJ_output.Rmd
37+
file_name <- "test_export_df.txt"
38+
# Name of the phenotype indicated in the dataframe (see Lists of phenotypes below)
39+
phenotype <- "Area"
40+
# Name chosen for the output file
41+
name_phenotype <- "Area"
42+
# List of packages required for this analysis
43+
pkg <- c("ggplot2")
44+
# Check if packages are not installed and assign the
45+
# names of the packages not installed to the variable new.pkg
46+
new.pkg <- pkg[!(pkg %in% installed.packages())]
47+
# If there are any packages in the list that aren't installed,
48+
# install them
49+
if (length(new.pkg)) {
50+
install.packages(new.pkg, repos = "http://cran.rstudio.com")
51+
}
52+
#Library to plot Manhattan plots
53+
library(ggplot2)
54+
# fun_nucleus.R is located in same directory as this script
55+
source("fun_nucleus.R")
56+
file_path <- paste(dir_gwas, file_name, sep="")
57+
df_gwas <- read.table(file_path, header=TRUE, sep="\t")
58+
names(df_gwas)
59+
# Not that df_gwas$phenotype cannot be used as $ does not take variables
60+
variable <- df_gwas[,phenotype]
61+
name_file <- paste(name_phenotype,".tsv", sep="")
62+
path_file <- paste(dir_gwas, name_file, sep="")
63+
write.table(variable, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
64+
#normalize data
65+
variable_scaled <- scale(variable)
66+
# Add automatically the suffix "_scaled" when the variable is scaled
67+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
68+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
69+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
70+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
71+
ggplot.boxplot(df_gwas, "Accession", variable, name_phenotype)+labs(x="Accession",y=name_phenotype)
72+
#Test normality of residual distribution
73+
hist(variable)
74+
#Test normality of residual distribution
75+
qqnorm(variable)
76+
qqline(variable)
77+
#normalize data
78+
variable_scaled <- scale(variable)
79+
# Add automatically the suffix "_scaled" when the variable is scaled
80+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
81+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
82+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
83+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
84+
name_phenotype <- "Area"
85+
#normalize data
86+
variable_scaled <- scale(variable)
87+
# Add automatically the suffix "_scaled" when the variable is scaled
88+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
89+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
90+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
91+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
92+
# Not that df_gwas$phenotype cannot be used as $ does not take variables
93+
variable <- df_gwas[,phenotype]
94+
name_file <- paste(name_phenotype,".tsv", sep="")
95+
path_file <- paste(dir_gwas, name_file, sep="")
96+
write.table(variable, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
97+
a <- c("something", "to", "paste")
98+
a <- c("Something", "to", "paste")
99+
paste(a, sep="_")
100+
paste(a, sep="_")
101+
paste(a, collapse="_")
102+
#normalize data
103+
variable_scaled <- scale(variable)
104+
# Add automatically the suffix "_scaled" when the variable is scaled
105+
name_phenotype_scaled <- paste(name_phenotype, "scaled", collapse="_")
106+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
107+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
108+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
109+
paste(a, collapse="_")
110+
a <- "Scaled"
111+
b <- "Area"
112+
paste(a,b,sep="_")
113+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
114+
name_file_scaled
115+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
116+
name_phenotype_scaled
117+
variable_scaled <- scale(variable)
118+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
119+
name_phenotype_scaled
120+
#normalize data
121+
variable_scaled <- scale(variable)
122+
# Add automatically the suffix "_scaled" when the variable is scaled
123+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
124+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
125+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
126+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
127+
name_file_scaled
128+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
129+
path_file
130+
variable_scaled
131+
path_file
132+
# Directory containing the file with phenotypes
133+
dir_gwas <- "T:/dep_coupland/grp_hancock/johan/GWAS/"
134+
# Name of the file to import. The file is generated by process_chromatinJ_output.Rmd
135+
file_name <- "test_export_df.txt"
136+
# Name of the phenotype indicated in the dataframe (see Lists of phenotypes below)
137+
phenotype <- "Area"
138+
# Name chosen for the output file
139+
name_phenotype <- "Area"
140+
# List of packages required for this analysis
141+
pkg <- c("ggplot2")
142+
# Check if packages are not installed and assign the
143+
# names of the packages not installed to the variable new.pkg
144+
new.pkg <- pkg[!(pkg %in% installed.packages())]
145+
# If there are any packages in the list that aren't installed,
146+
# install them
147+
if (length(new.pkg)) {
148+
install.packages(new.pkg, repos = "http://cran.rstudio.com")
149+
}
150+
#Library to plot Manhattan plots
151+
library(ggplot2)
152+
# fun_nucleus.R is located in same directory as this script
153+
source("fun_nucleus.R")
154+
file_path <- paste(dir_gwas, file_name, sep="")
155+
df_gwas <- read.table(file_path, header=TRUE, sep="\t")
156+
names(df_gwas)
157+
# Not that df_gwas$phenotype cannot be used as $ does not take variables
158+
variable <- df_gwas[,phenotype]
159+
name_file <- paste(name_phenotype,".tsv", sep="")
160+
path_file <- paste(dir_gwas, name_file, sep="")
161+
write.table(variable, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
162+
#normalize data
163+
variable_scaled <- scale(variable)
164+
# Add automatically the suffix "_scaled" when the variable is scaled
165+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
166+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
167+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
168+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
169+
ggplot.boxplot(df_gwas, "Accession", variable, name_phenotype)+labs(x="Accession",y=name_phenotype)
170+
#Test normality of residual distribution
171+
hist(variable)
172+
#Test normality of residual distribution
173+
qqnorm(variable)
174+
qqline(variable)
175+
#normalize data
176+
variable_scaled <- scale(variable)
177+
# Add automatically the suffix "_scaled" when the variable is scaled
178+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
179+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
180+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
181+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
182+
# Directory containing the file with phenotypes
183+
dir_gwas <- "T:/dep_coupland/grp_hancock/johan/GWAS/"
184+
# Name of the file to import. The file is generated by process_chromatinJ_output.Rmd
185+
file_name <- "test_export_df.txt"
186+
# Name of the phenotype indicated in the dataframe (see Lists of phenotypes below)
187+
phenotype <- "Area"
188+
# Name chosen for the output file
189+
name_phenotype <- "Area_nucleus"
190+
# List of packages required for this analysis
191+
pkg <- c("ggplot2")
192+
# Check if packages are not installed and assign the
193+
# names of the packages not installed to the variable new.pkg
194+
new.pkg <- pkg[!(pkg %in% installed.packages())]
195+
# If there are any packages in the list that aren't installed,
196+
# install them
197+
if (length(new.pkg)) {
198+
install.packages(new.pkg, repos = "http://cran.rstudio.com")
199+
}
200+
#Library to plot Manhattan plots
201+
library(ggplot2)
202+
# fun_nucleus.R is located in same directory as this script
203+
source("fun_nucleus.R")
204+
file_path <- paste(dir_gwas, file_name, sep="")
205+
df_gwas <- read.table(file_path, header=TRUE, sep="\t")
206+
names(df_gwas)
207+
# Not that df_gwas$phenotype cannot be used as $ does not take variables
208+
variable <- df_gwas[,phenotype]
209+
name_file <- paste(name_phenotype,".tsv", sep="")
210+
path_file <- paste(dir_gwas, name_file, sep="")
211+
write.table(variable, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
212+
#normalize data
213+
variable_scaled <- scale(variable)
214+
# Add automatically the suffix "_scaled" when the variable is scaled
215+
name_phenotype_scaled <- paste(name_phenotype, "scaled", sep="_")
216+
name_file_scaled <- paste(name_phenotype_scaled,".tsv", sep="")
217+
path_file <- paste(dir_gwas, name_file_scaled, sep="")
218+
write.table(variable_scaled, row.names = FALSE, col.names = FALSE, quote = FALSE, file = path_file, sep = "\t")
219+
ggplot.boxplot(df_gwas, "Accession", variable, name_phenotype)+labs(x="Accession",y=name_phenotype)
220+
#Test normality of residual distribution
221+
hist(variable)
222+
#Test normality of residual distribution
223+
qqnorm(variable)
224+
qqline(variable)
225+
knitr::opts_chunk$set(message=TRUE, warning=FALSE)
226+
# List of packages required for this analysis
227+
pkg <- c("qqman", "ggplot2")
228+
# Check if packages are not installed and assign the
229+
# names of the packages not installed to the variable new.pkg
230+
new.pkg <- pkg[!(pkg %in% installed.packages())]
231+
# If there are any packages in the list that aren't installed,
232+
# install them
233+
if (length(new.pkg)) {
234+
install.packages(new.pkg, repos = "http://cran.rstudio.com")
235+
}
236+
#Library to plot Manhattan plots
237+
library(qqman)
238+
library(ggplot2)
239+
dir_file <- "T:/dep_coupland/grp_hancock/johan/GWAS/output/"
240+
file.name <- "Area_nucleus.assoc.clean.txt"
241+
path.file <- paste(dir_file, file.name, sep="")
242+
gwas.results <- read.delim(path.file, sep="\t")
243+
qq(gwas.results$P, main=file.name)
244+
plot(-log(gwas.results$P)~gwas.results$CHR, main=file.name)
245+
hist(-log(gwas.results$P), main=file.name)
246+
as.data.frame(table(gwas.results$CHR))
247+
# Get positions of the chromosome with SNPs having a -log(P) > 5
248+
threshold <- (10^-5)
249+
gwas_significant <- subset(gwas.results, P < threshold)
250+
# Get a vector of the SNPs with significant value
251+
SNP_significant <- as.vector(gwas_significant$SNP)
252+
manhattan(gwas.results, highlight=SNP_significant, main=file.name)
253+
#Check if dataframe is not empty (no SNPs above threshold value
254+
if(dim(gwas_significant)[[1]] != 0){
255+
gwas_significant
256+
}

‎assoc2qqman.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/python
2+
3+
import sys
4+
import os
5+
import getopt
6+
import re
7+
8+
'''
9+
AUTHOR: Johan Zicola
10+
11+
USAGE: assoc2qqman.py <prefix.assoc.txt>
12+
Read the output of gemma 'prefix.assoc.txt' derived
13+
from the association analysis command and performs
14+
readjustment of the file to be compatible for Read
15+
R analysis with the package 'qqman'.
16+
'''
17+
18+
19+
def main():
20+
21+
# Open the file
22+
input_file = sys.argv[1]
23+
input_file = open(input_file, "r")
24+
lines_file = input_file.read().splitlines()
25+
26+
27+
for line in lines_file:
28+
if line[:3] == "chr": #First line should be header
29+
header = "SNP\tCHR\tBP\tP\tzscore"
30+
print header
31+
else:
32+
line = line.strip().split("\t") # Get rid of EOL and Create a list based on \t separation
33+
SNP = line[2] # third column
34+
chr_name = line[1].split(":")
35+
#Keep only lines matching chromosomes (exclude Mt, Pt, ...)
36+
if chr_name[0][0:3] == "Chr":
37+
CHR = chr_name[0].replace("Chr", "")
38+
BP = line[2]
39+
P = line[8]
40+
zscore = line[7]
41+
new_line = SNP, CHR, BP, P, zscore
42+
print "\t".join(new_line)
43+
input_file.close()
44+
45+
if __name__ == "__main__":
46+
sys.exit(main())
47+

0 commit comments

Comments
 (0)
Please sign in to comment.