-
Notifications
You must be signed in to change notification settings - Fork 0
/
snp_filter.R
31 lines (21 loc) · 988 Bytes
/
snp_filter.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#read in file from Stacks output file
sumstats <- read.delim('batch_1.sumstats.tsv', skip=1,stringsAsFactors = FALSE, sep='\t')
seqs <- read.delim('batch_1.catalog.tags.tsv', skip=1,stringsAsFactors = FALSE, header=FALSE,sep='\t')
head(sumstats)
seqs[1,]
length(seqs[10])
#limiting snps to those that have one snp per read
countSNPs <- table(sumstats$Locus.ID)
singleSNPs <- which(countSNPs == 1)
runningSNPs1 <- sumstats[singleSNPs,]
#limiting snps to those that fall in the middle of the read
#assuming reads are 130, limiting by 40 nucleotides on each side
midseq <- which(runningSNPs1$Col %in% 40:90)
runningSNPs2 <- runningSNPs1[midseq,]
#limiting snps to equal allele freqs
allelefreq <- which(runningSNPs2$P <= 0.70)
runningSNPs3 <- runningSNPs2[allelefreq,]
#limiting SNPs to min number of inds it was detected in
minN <- which(runningSNPs3$N >= 10)
runningSNPs4 <- runningSNPs3[minN,]
write.table(runningSNPs4,'./snpStats.tsv', sep='\t',quote=FALSE, row.names = FALSE)