- Outgroup
DQ060054 S. frontalis Adams and Rosel 2006 DQ060057 S. frontalis Adams and Rosel 2006 - Sources
- Vollmer and Rosel 2017
./vollmer_2017.online_resource_7.tsv
./vollmer_2017.online_resource_16.tsv
Population Color NW inner #98d4ed NW outer #bebfbf NW oceanic #e0e47e E inner #f5c89b E outer #e67383 E oceanic #c7a3c7 NE oceanic #99c796 - Vollmer et al 2021
./vollmer_2021.SI-table-3.tsv
./vollmer_2021.SI-table-4.tsv
Population Color Entire green pop (n=541) #669e40 Entire blue pop (n=42) #2f5597
- Vollmer and Rosel 2017
./vollmer_2017.online_resource_7.tsv
./vollmer_2017.online_resource_16.tsv
- Download sequences from NCBI to ./vollmer.samples.fa
ROOT=$(git rev-parse --show-toplevel) cat <(grep -v '^#' vollmer_2017.online_resource_7.tsv | cut -f 4 ) \ <(grep -v '^#' vollmer_2021.SI-table-4.tsv | cut -f 7) | sort -u | ~/.local/edirect/efetch -db nuccore -format fasta > $DIR/vollmer.samples.fa
Combine datasets and trim names (includes the two outliers). Align with mafft
ROOT=$(git rev-parse --show-toplevel)
DIR=$ROOT/4-mtCR/
cat $DIR/vollmer.samples.fa \
$ROOT/3-assemble/mtCR.fa \
$DIR/outgroup.fa |
sed '/>/s/ .*//' |
$ROOT/apps/mafft-7.520/mafft.bat --auto --thread 4 \
/dev/stdin > $DIR/1-mafft.fasta
library(adegenet)
library(haplotypes)
library(LEA)
library(tidyverse)
library(ape)
library(tidytree)
library(ggtree)
library(Biostrings)
library(hues)
library(hierfstat)
library(poppr)
library(ascii)
op <- options(asciiType = "org")
library(aplot)
options(width = 70)
sessionInfo()
colors <- list("2017"=c("NW.inner"="#98d4ed",
"E.inner" = "#f5c89b",
"NW.outer" = "#bebfbf",
"E.outer" = "#e67383",
"NW.Oceanic"= "#e0e47e",
"E.oceanic" = "#c7a3c7",
"NE.oceanic"= "#99c796"),
"2021"=c("green" = "#669e40",
"blue" = "#2f5597"))
meta.counts <- mapply(left_join,
list("2017"="vollmer_2017.online_resource_16.tsv",
"2021"="vollmer_2021.SI-table-3.tsv") %>%
lapply(read.delim, comment.char = '#') %>%
lapply(gather, -Haplotype, key="Group", value='count') %>%
lapply(select, Haplotype, Group, count),
list("2017"="vollmer_2017.online_resource_7.tsv",
"2021"="vollmer_2021.SI-table-4.tsv") %>%
lapply(read.delim,sep="\t", skip=8) %>%
lapply(select, Haplotype, "ID"=starts_with("GenBank")),
SIMPLIFY = F) %>%
bind_rows(.id="Set") %>%
unique %>%
mutate(ID=paste0(ID, ".1")) %>%
filter(!grepl("Green.pop.subset", Group ) & count > 0) %>%
mutate(Group = sub("Entire.(green|blue).pop.*", "\\1", Group))
fasta.with_outgroup <-
read.dna("1-mafft.fasta.gz", format='fasta', as.matrix=T)
fasta.with_outgroup
image.DNAbin(fasta.with_outgroup)
Removing gaps at the start and end of the alignments
gap.counts = colSums(as.character(fasta.with_outgroup) == "-")
trim.range = range(which(gap.counts==0))
fasta.with_outgroup.trimmed =
fasta.with_outgroup[,seq(trim.range[1], trim.range[2])]
fasta.with_outgroup.trimmed
Fix ambiguous bases
fasta.with_outgroup.trimmed.fixed <-
solveAmbiguousBases(fasta.with_outgroup.trimmed)
fasta.with_outgroup.trimmed.fixed
Write trimmed fasta file
write.FASTA(fasta.with_outgroup.trimmed, "1-mafft.trimmed.fasta")
write.FASTA(fasta.with_outgroup.trimmed.fixed,
"1-mafft.trimmed.fixed.fasta")
haplotypes.with_outgroup <-
fasta.with_outgroup.trimmed.fixed %>%
as.dna %>%
haplotype(indel='missing')
haplotypes.with_outgroup@freq %>%
table %>%
as.data.frame %>%
setNames(c("Size", "Count")) %>%
mutate(Size = as.character(Size)) %>%
rbind(., c("Total", sum(.$Count)))
Rename haplotypes mtCR-new, mtCR-mix, and mtCR-pub for haplotypes containing only new sequences, mix of published and new, and only published.
haplotypes.new_names <-
haplotypes.with_outgroup@haplist %>%
lapply(as.data.frame) %>%
bind_rows(.id='name') %>%
setNames(c("Haplotype", "Sample")) %>%
group_by(Haplotype) %>%
mutate(New = as.numeric(any(grepl('^SER', Sample))),
Pub = as.numeric(any(!grepl('^SER', Sample))) * 2,
Out = any((Sample %in% c("DQ060054.1", "DQ060057.1")))) %>%
mutate(NewName = as.character(factor(New+Pub, 1:3,
labels = c("mtCR.new",
"mtCR.pub",
"mtCR.mix")))) %>%
mutate(NewName = ifelse(Out, "mtCR.out", NewName),
Size=n()) %>%
select(Haplotype, NewName, Size) %>%
unique() %>%
rowid_to_column("tmp") %>%
arrange(desc(Size)) %>%
group_by(NewName) %>%
mutate(NewName = sprintf("%s-%d", NewName, row_number())) %>%
arrange(tmp) %>%
select(-tmp)
haplotypes.new_names
names(haplotypes.with_outgroup@hapind) <- haplotypes.new_names$NewName
names(haplotypes.with_outgroup@haplist) <- haplotypes.new_names$NewName
rownames(haplotypes.with_outgroup@sequence) <-
haplotypes.new_names$NewName
Write fasta
haplotypes.with_outgroup@sequence %>%
as.DNAbin %>%
write.FASTA("2-haplotypes.with_outgroup.fixed.fasta")
haplotypes.with_outgroup@sequence[
!grepl("mtCR.out", names(haplotypes.with_outgroup@haplist)),
] %>%
as.DNAbin %>%
write.FASTA("2-haplotypes.without_outgroup.fixed.fasta")
Number and Size range of each type of haplotype
data.frame(Haplotype=names(haplotypes.with_outgroup@hapind),
Size = haplotypes.with_outgroup@freq) %>%
mutate(Haplotype = sub('-.*', '', Haplotype)) %>%
group_by(Haplotype) %>%
summarise(Count = n(),
Range = sprintf("[%d - %d]",
min(Size),
max(Size)),
.groups='drop') %>%
ascii(digit=0, include.rownames=F)
Haplotype | Count | Range |
---|---|---|
mtCR.mix | 12 | [2 - 198] |
mtCR.new | 8 | [1 - 3] |
mtCR.out | 2 | [1 - 1] |
mtCR.pub | 53 | [1 - 4] |
Published vs New haplotype overlap
haplotypes.with_outgroup@haplist %>%
lapply(as.data.frame) %>%
bind_rows(.id='name') %>%
setNames(c("Haplotype", "Sample")) %>%
group_by(Haplotype) %>%
summarise(New = sum(grepl('^SER', Sample)),
Pub = sum(!grepl('^SER', Sample)),
Total = n()) %>%
mutate(Haplotype = ifelse(Pub==0,
sprintf("*%s*", Haplotype),
Haplotype)) %>%
arrange(desc(Total)) %>%
ascii(digit=0, include.rownames=F)
Haplotype | New | Pub | Total |
---|---|---|---|
mtCR.mix-1 | 190 | 8 | 198 |
mtCR.mix-2 | 65 | 5 | 70 |
mtCR.mix-3 | 46 | 8 | 54 |
mtCR.mix-4 | 40 | 10 | 50 |
mtCR.mix-5 | 22 | 1 | 23 |
mtCR.mix-6 | 11 | 1 | 12 |
mtCR.mix-7 | 8 | 1 | 9 |
mtCR.mix-8 | 4 | 1 | 5 |
mtCR.mix-9 | 3 | 1 | 4 |
mtCR.pub-1 | 0 | 4 | 4 |
mtCR.pub-2 | 0 | 4 | 4 |
mtCR.mix-10 | 1 | 2 | 3 |
mtCR.new-1 | 3 | 0 | 3 |
mtCR.pub-3 | 0 | 3 | 3 |
mtCR.pub-4 | 0 | 3 | 3 |
mtCR.mix-11 | 1 | 1 | 2 |
mtCR.mix-12 | 1 | 1 | 2 |
mtCR.new-2 | 2 | 0 | 2 |
mtCR.pub-5 | 0 | 2 | 2 |
mtCR.pub-6 | 0 | 2 | 2 |
mtCR.pub-7 | 0 | 2 | 2 |
mtCR.pub-8 | 0 | 2 | 2 |
mtCR.new-3 | 1 | 0 | 1 |
mtCR.new-4 | 1 | 0 | 1 |
mtCR.new-5 | 1 | 0 | 1 |
mtCR.new-6 | 1 | 0 | 1 |
mtCR.new-7 | 1 | 0 | 1 |
mtCR.new-8 | 1 | 0 | 1 |
mtCR.out-1 | 0 | 1 | 1 |
mtCR.out-2 | 0 | 1 | 1 |
mtCR.pub-10 | 0 | 1 | 1 |
mtCR.pub-11 | 0 | 1 | 1 |
mtCR.pub-12 | 0 | 1 | 1 |
mtCR.pub-13 | 0 | 1 | 1 |
mtCR.pub-14 | 0 | 1 | 1 |
mtCR.pub-15 | 0 | 1 | 1 |
mtCR.pub-16 | 0 | 1 | 1 |
mtCR.pub-17 | 0 | 1 | 1 |
mtCR.pub-18 | 0 | 1 | 1 |
mtCR.pub-19 | 0 | 1 | 1 |
mtCR.pub-20 | 0 | 1 | 1 |
mtCR.pub-21 | 0 | 1 | 1 |
mtCR.pub-22 | 0 | 1 | 1 |
mtCR.pub-23 | 0 | 1 | 1 |
mtCR.pub-24 | 0 | 1 | 1 |
mtCR.pub-25 | 0 | 1 | 1 |
mtCR.pub-26 | 0 | 1 | 1 |
mtCR.pub-27 | 0 | 1 | 1 |
mtCR.pub-28 | 0 | 1 | 1 |
mtCR.pub-29 | 0 | 1 | 1 |
mtCR.pub-30 | 0 | 1 | 1 |
mtCR.pub-31 | 0 | 1 | 1 |
mtCR.pub-32 | 0 | 1 | 1 |
mtCR.pub-33 | 0 | 1 | 1 |
mtCR.pub-34 | 0 | 1 | 1 |
mtCR.pub-35 | 0 | 1 | 1 |
mtCR.pub-36 | 0 | 1 | 1 |
mtCR.pub-37 | 0 | 1 | 1 |
mtCR.pub-38 | 0 | 1 | 1 |
mtCR.pub-39 | 0 | 1 | 1 |
mtCR.pub-40 | 0 | 1 | 1 |
mtCR.pub-41 | 0 | 1 | 1 |
mtCR.pub-42 | 0 | 1 | 1 |
mtCR.pub-43 | 0 | 1 | 1 |
mtCR.pub-44 | 0 | 1 | 1 |
mtCR.pub-45 | 0 | 1 | 1 |
mtCR.pub-46 | 0 | 1 | 1 |
mtCR.pub-47 | 0 | 1 | 1 |
mtCR.pub-48 | 0 | 1 | 1 |
mtCR.pub-49 | 0 | 1 | 1 |
mtCR.pub-50 | 0 | 1 | 1 |
mtCR.pub-51 | 0 | 1 | 1 |
mtCR.pub-52 | 0 | 1 | 1 |
mtCR.pub-9 | 0 | 1 | 1 |
Sequence differences between mtCR.new haplotypes and the most similar haplotype sequences
haplotypes.with_outgroup.dist <-
haplotypes.with_outgroup@sequence %>%
as.DNAbin %>%
dist.dna(model = 'N', as.matrix = T) %>%
as.data.frame %>%
rownames_to_column("Hap1") %>%
gather(-Hap1, key="Hap2", value="Dist")
haplotypes.of.interest <- haplotypes.with_outgroup.dist %>%
mutate(tmp=Hap1, Hap1=Hap2, Hap2=tmp) %>%
select(-tmp) %>%
rbind(haplotypes.with_outgroup.dist) %>%
filter(grepl("mtCR.new", Hap1)) %>%
filter(Dist <= 1) %>%
unique
haplotypes.with_outgroup@sequence %>%
t %>%
as.data.frame %>%
rowid_to_column("Pos") %>%
gather(-Pos, key="Hap2", value="base") %>%
right_join(haplotypes.of.interest,
relationship = 'many-to-many') %>%
group_by(Hap1, Pos ) %>%
filter(length(unique(base)) > 1) %>%
ungroup %>%
ggplot(aes(Pos, Hap2, color=base)) +
geom_text(aes(label=base)) +
facet_grid(rows=vars(Hap1), space = 'free', scales = 'free',
switch='y') +
theme_bw() +
theme(strip.text.y.left = element_text(angle = 0),
strip.placement = 'outside',
legend.position = 'none',
axis.title = element_blank())
Plot pie chart
haplotypes.with_outgroup@freq %>%
as.data.frame(nm='Size') %>%
group_by(Size) %>%
count %>%
ggplot(aes(x=1, n, fill=factor(Size))) +
geom_col() +
coord_polar(theta = 'y', direction = -1) +
scale_fill_iwanthue() +
theme_minimal() +
theme(axis.title = element_blank(),
axis.text.y = element_blank())
- convert to genlight
snps.without_outgroup <- fasta2genlight( "2-haplotypes.without_outgroup.fixed.fasta", quiet = T, snpOnly=T) snps.without_outgroup
- Convert
genlight
togeno
format (a ladartR
) and runsnmf
geno <- as.matrix(snps.without_outgroup) geno[is.na(geno)] <- 9 ## Remove constant snps #geno <- geno[,apply(geno, 2, function(x) length(unique(x)) > 1)] outfile <- "4-structure/haplotypes-fixed" write.table( geno, paste(outfile, ".lfmm", sep = ""), col.names = FALSE, row.names = FALSE, sep = " " ) ## write geno write.table( t(geno), paste(outfile, ".geno", sep = ""), col.names = FALSE, row.names = FALSE, sep = "" ) ## project <- snmf(paste0(outfile, ".geno"), ## K = 1:15, ## entropy = T, ## repetitions = 10, ## project = "new", ## ploidy=1, ## CPU = 4) project <- load.snmfProject(paste0(outfile, ".snmfProject"))
- PCA scree plot
pca.scree.plot <- pca(paste0(outfile, ".lfmm"), scale=T, K=15) %>% tracy.widom %>% ggplot(aes(N, percentage)) + geom_line() + geom_point() + scale_y_continuous(labels=scales::label_percent()) + labs(title = "PCA Scree Plot", x = "Principal Components", y = "Percentage of Variance") + theme_minimal() pca.scree.plot
K | run | crossEntropy |
---|---|---|
1 | 8 | 0.20777 |
2 | 8 | 0.13652 |
3 | 8 | 0.08029 |
4 | 8 | 0.08253 |
5 | 8 | 0.07747 |
6 | 8 | 0.08881 |
7 | 10 | 0.13266 |
8 | 10 | 0.14189 |
9 | 5 | 0.17276 |
10 | 10 | 0.16388 |
11 | 2 | 0.17915 |
12 | 8 | 0.23712 |
13 | 7 | 0.20993 |
14 | 2 | 0.18649 |
15 | 8 | 0.18041 |
- Assign memberships for best run of each K
admix.coefs <- mapply(Q, K=best.runs$K, run=best.runs$run, MoreArgs = list(project)) %>% lapply(as.data.frame) %>% lapply(rowid_to_column, "ID") %>% bind_rows(.id="K") %>% gather(-K, -ID, key="pop", value="q") %>% filter(!is.na(q)) %>% mutate(pop = as.numeric(sub("V", "", pop)), K = as.numeric(K)) %>% group_by(K, ID) %>% reframe(K, ID, pop, q, best.pop=pop[which.max(q)], best.q = max(q))
- Structure membership graph
member.data <- filter(admix.coefs, K==4) member.data.order <- spread(member.data, pop, q) %>% select(-ID, -K, -best.pop, -best.q) %>% as.matrix %>% dist %>% hclust(method="ave") %>% as.dendrogram %>% order.dendrogram haplotypes.names <- row.names(geno) pop.labels <- sprintf("mtCR.%s", c("sound", "inner", "outer", "ocean")) member.data <- mutate(member.data, ID = haplotypes.names[ID], pop = factor(pop, c(1, 2, 3, 4), pop.labels), best.pop = factor(best.pop, c(1, 2, 3, 4), pop.labels)) %>% mutate(ID = factor(ID, haplotypes.names[member.data.order] )) plot.structure <- ggplot(member.data, aes(ID, q, fill=pop)) + geom_col(width=1) + facet_grid(cols=vars(best.pop), scales="free", space="free", switch='x') + scale_fill_iwanthue() + scale_x_discrete(position = "top", expand = c(0,0)) + scale_y_continuous(labels=scales::label_percent(), expand = c(0,0))+ theme_bw()+ theme( axis.text.x = element_blank(), axis.title.x = element_blank(), legend.position = 'bottom') plot.structure
anno <- haplotypes.with_outgroup@haplist %>%
lapply(as.data.frame, nm='ID') %>%
bind_rows(.id="name") %>%
left_join(select(meta.counts, ID, Set, Group, count) %>%
rbind(data.frame(ID = grep("SER",
row.names(fasta.with_outgroup.trimmed),
value=T),
Set="New",
Group="New",
count = 1)) , by="ID") %>%
group_by(name, Set, Group) %>%
summarise(count = sum(count)) %>%
rename("name"="ID") %>%
left_join(unique(select(member.data, ID, best.pop))) %>%
mutate(Set = ifelse(is.na(Set), "2021", Set),
Group = if_else(is.na(Group), "blue", Group)) %>%
mutate(
ID = factor(ID, haplotypes.names[member.data.order] ),
Group = factor(Group, c('NW.inner', 'E.inner', 'NW.outer',
'E.outer', 'NW.Oceanic', 'NE.oceanic',
'E.oceanic', 'green', 'blue', "New"))) %>%
filter(!is.na(ID))
meta.counts %>%
select(ID, Group, count) %>%
spread(key=Group, value = count) %>%
write.table("published-counts.txt")
write.table(anno, "haplotype.published-groups.txt",
sep="\t", row.names=F)
anno <- read.delim("haplotype.published-groups.txt")
heatmap.plot <- ggplot(anno, aes(ID,Group)) +
geom_tile(aes(fill=Group), na.rm=T) +
geom_text(aes(label=count), na.rm=T) +
facet_grid(rows=vars(Set), cols=vars(best.pop),
scales="free", space="free") +
scale_fill_manual( values=c(colors[['2021']],
colors[['2017']],
New="grey")) +
scale_x_discrete(expand = c(0,0)) +
theme_bw() +
theme(axis.title = element_blank(),
axis.text.x= element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = 'none')
heatmap.plot
anno %>%
group_by(Set, Group, best.pop) %>%
summarise(count=sum(count)) %>%
ggplot(aes(Group, count)) +
geom_col(aes(fill=Group)) +
geom_label(aes(label=count), y=10, fill='white') +
facet_grid(cols=vars(Set), rows=vars(best.pop),
scales="free", space="free") +
scale_fill_manual( values=c(colors[['2021']],
colors[['2017']],
New="grey")) +
scale_x_discrete(expand = c(0,0)) +
scale_y_continuous(expand = c(0,0), limits=c()) +
coord_cartesian(y=c(0,75)) +
theme_bw() +
theme(axis.title = element_blank(),
axis.text.x= element_text(angle = 25, hjust = 1,
vjust = 1),
legend.position = 'none')
library(tidyverse)
read.delim("haplotype.published-groups.txt") %>%
replace_na(list(count=1)) %>%
mutate(Group = factor(Group, c('NW.inner', 'E.inner', 'NW.outer',
'E.outer', 'NW.Oceanic', 'NE.oceanic',
'E.oceanic', 'green', 'blue', "New")),
best.pop = factor(best.pop, c("mtCR.sound","mtCR.inner",
"mtCR.outer","mtCR.ocean"))) %>%
group_by(Set, Group, best.pop) %>%
summarise(samples=sum(count), haplotypes=n()) %>%
pivot_wider(id_cols=c("Set", "Group"),
values_from = c("samples", "haplotypes"),
names_from = c("best.pop"),
names_vary = 'slowest',
values_fill = 0) %>%
split(., .$Set) %>%
lapply(as.data.frame)
## haplotypes.with_outgroup.fasta <-
## read.FASTA("2-haplotypes.with_outgroup.fixed.fasta")
## tree.data <- haplotypes.with_outgroup.fasta %>%
## dist.dna(pairwise.deletion = T) %>%
## nj
## tree.data$node.label <-
## boot.phylo(tree.data,
## as.matrix(haplotypes.with_outgroup.fasta),
## FUN=function(xx) nj(dist.gene(xx,
## pairwise.deletion = T)),
## B=1000)
## tree.data <- root(tree.data,
## outgroup=c("mtCR.out-1", "mtCR.out-2"))
plot <- anno %>%
mutate(Group = factor(Group, c('NW.inner',
'E.inner',
'NW.outer',
'E.outer',
'NW.Oceanic',
'NE.oceanic',
'E.oceanic',
'green',
'blue',
"New")),
best.pop = factor(best.pop, c("mtCR.sound",
"mtCR.inner",
"mtCR.outer",
"mtCR.ocean"))) %>%
ggplot(aes(Group, ID)) +
geom_tile(aes(fill=Group), na.rm=T) +
geom_text(aes(label=count), na.rm=T) +
facet_grid(cols=vars(Set), scales="free", space="free") +
scale_fill_manual( values=c(colors[['2021']],
colors[['2017']],
New="grey")) +
ggtitle("(C) Haplotype Members") +
theme_bw() +
theme(axis.title = element_blank(),
axis.text.y= element_text(),
axis.text.x= element_text(angle = 25, hjust = 1),
legend.position = 'none')
plot.structure <- right_join(member.data,
data.frame(ID=tree.data$tip.label)) %>%
ggplot(aes(q, ID, fill=(pop))) +
geom_col(width=1) +
scale_fill_iwanthue(breaks=pop.labels, name="") +
# scale_y_discrete(expand = c(0,0)) +
scale_x_continuous(labels=scales::label_percent(),
breaks = c(0.25, 0.5, 0.75), expand = c(0,0))+
ggtitle("(B) Population Structure") +
theme_bw()+
theme( axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = c(0.48,1.015),
legend.direction = "horizontal",
legend.background = element_blank())
tree <- ggtree(tree.data) +
geom_nodepoint(aes(color=as.numeric(label)), na.rm=T,
size=3) +
geom_tiplab(align=T, as_ylab=T) +
#geom_text2(aes(label=label, subset=!isTip),
# vjust=0.5, hjust=-0.1, size=3) +
#geom_nodelab(aes(label=node), geom="label", bg="white") +
#geom_tiplab(aes(label=node), bg="white") +
ggtitle("(A) Neighboor-Joining Tree") +
scale_x_continuous(breaks=seq(0.005, 0.05, 0.01)) +
scale_color_viridis_c(name="Bootstrap Values") +
guides(color=guide_colorbar(title.position="top")) +
theme_tree2() +
theme(legend.position = c(0.25,0.95),
legend.direction = "horizontal")
tree <- flip(tree, 116, 104)
tree <- flip(tree, 106, 88)
tree <- flip(tree, 117, 132)
tree <- flip(tree, 87, 97)
tree <- flip(tree, 84, 89)
tree <- flip(tree, 82, 90)
#
main.plot <- plot.structure %>%
insert_left(tree, width=0.80)%>%
insert_right(plot, width=0.90)
options("aplot_guides" = "keep")
main.plot
library(pegas)
pegas.haps <- haplotypes::as.DNAbin(as.dna(haplotypes.with_outgroup))
pegas.haps <- pegas.haps[c(-73, -74),]
class(pegas.haps) <- c("haplotype", "DNAbin")
attr(pegas.haps, "index") <- haplotypes.with_outgroup@hapind
attr(pegas.haps, "from") <- "haps"
pegas.haps.split <-
select(haplotype.membership,
"Haplotype"=ID,
"Population"=best.pop) %>%
distinct %>%
split(., .$Population) %>%
lapply(pull, "Haplotype") %>%
lapply(function(x) {
haps <- pegas.haps[x,]
class(haps) <- c("haplotype", "DNAbin")
attr(haps, "index") <- haplotypes.with_outgroup@hapind[x]
attr(haps, "from") <- "haps"
haps
})
pegas.haps.split[['all']] <- pegas.haps
lapply(pegas.haps.split, pegas::hap.div, variance = T) %>%
lapply(setNames, c('Hap. Diversity', 'var')) %>%
bind_rows(.id='Population') %>%
mutate(Population = fct_relevel(factor(Population),
c("all", pop.labels))) %>%
arrange(Population)
Population | Hap. Diversity | var |
---|---|---|
all | 0.80832661831856 | 0.00020120894594653 |
mtCR.sound | 0.75622692375315 | 0.000267128534545299 |
mtCR.inner | 0.933823529411765 | 0.00149002681255929 |
mtCR.outer | 0.96969696969697 | 0.000440854139372 |
mtCR.ocean | 0.977941176470588 | 0.000742592387824971 |
- Nucleotide Diversity (pi)
lapply(pegas.haps.split, pegas::nuc.div, variance = T) %>% lapply(setNames, c('pi', 'var')) %>% bind_rows(.id='Population') %>% mutate(Population = fct_relevel(factor(Population), c("all", pop.labels))) %>% arrange(Population) %>% ascii(digits=c(0, 4, 3), format=c('s', 'f', 'e'), include.rownames = F)
Population pi var all 0.0113 3.939e-05 mtCR.sound 0.0037 6.681e-06 mtCR.inner 0.0071 2.026e-05 mtCR.outer 0.0081 2.430e-05 mtCR.ocean 0.0087 2.818e-05
- Population differentiation (Fst)
pop <- select(haplotype.membership, best.pop, ID) %>% unique() %>% pull(best.pop, name=ID) %>% na.omit() genind <- haplotypes.with_outgroup %>% as.dna %>% haplotypes::as.DNAbin(.) %>% .[names(pop),] %>% DNAbin2genind(., pop=pop) strata(genind) <- data.frame(pop) library(hierfstat) all <- wc(genind) pair <- pairwise.WCfst(genind) %>% as.data.frame %>% rownames_to_column("PopA") %>% gather(-PopA, key="PopB", value="Fst") %>% filter(PopA < PopB) %>% mutate(class = cut(Fst, breaks=c(0,0.05,0.15,0.25,1), labels=c("little", "moderate", "great", "very great"))) pair[which.min(pair$Fst),] pair[which.max(pair$Fst),] rbind(c('all', 'all', all$FST, NA, use.names = F), pair)
Vollmer 2017
gomx.pop <- filter(meta.counts, Set == '2017') %>% filter(!grepl("not submitted", ID)) %>% select(ID, Group,count) %>% arrange(count) %>% group_by(ID) %>% summarise(Group=Group[1]) %>% pull(Group, name=ID) gomx.genind <- fasta.with_outgroup.trimmed.fixed[names(gomx.pop),] %>% DNAbin2genind(., pop=gomx.pop) strata(gomx.genind) <- data.frame(gomx.pop) all <- wc(gomx.genind) pair <- pairwise.WCfst(gomx.genind) %>% as.data.frame %>% rownames_to_column("PopA") %>% gather(-PopA, key="PopB", value="Fst") %>% filter(PopA < PopB) %>% mutate(class = cut(Fst, breaks=c(0,0.05,0.15,0.25,1), labels=c("little", "moderate", "great", "very great"))) %>% arrange(Fst) rbind(c('all', 'all', all$FST, NA, use.names = F), pair)
- PhiST
- Global
ddp <- as.genclone(genind) phistp <- poppr.amova(ddp, ~pop, filter=T, threshold=0.1) phistp
- Global
- Per Population
set.seed(45243) ## pop[pop == '2'] <- paste0( pop[pop == '2'], ## letters[sample.int(4, size = sum(pop==2), replace = T)]) pop <- factor(pop) table(pop) ddp <- as.genclone(genind) phist.subset <- function(pop1, pop2) { pops <- popsub(ddp, sublist = c(pop1, pop2), drop=T) gc <- as.genclone(pops) phistsp <- poppr.amova(gc, ~pop, filter=T, threshold=0.1) return(unlist(phistsp$statphi)) } phist.table <- expand_grid(pop1=levels(pop), pop2=levels(pop)) %>% rowwise() %>% mutate(phist = phist.subset(pop1, pop2)) %>% ungroup
phist.table %>%
filter(pop1 != pop2) %>%
group_by(pop1, pop2) %>%
summarise( phist.all = paste(sprintf("%0.4f", phist),
collapse = " "),
phist=mean(phist), .groups="drop") %>%
arrange(phist.all) %>%
ascii(digits=4)
pop1 | pop2 | phist.all | phist | |
---|---|---|---|---|
1 | mtCR.inner | mtCR.outer | 0.4239 | 0.4239 |
2 | mtCR.outer | mtCR.inner | 0.4239 | 0.4239 |
3 | mtCR.inner | mtCR.sound | 0.4402 | 0.4402 |
4 | mtCR.sound | mtCR.inner | 0.4402 | 0.4402 |
5 | mtCR.ocean | mtCR.outer | 0.5878 | 0.5878 |
6 | mtCR.outer | mtCR.ocean | 0.5878 | 0.5878 |
7 | mtCR.outer | mtCR.sound | 0.6434 | 0.6434 |
8 | mtCR.sound | mtCR.outer | 0.6434 | 0.6434 |
9 | mtCR.inner | mtCR.ocean | 0.6534 | 0.6534 |
10 | mtCR.ocean | mtCR.inner | 0.6534 | 0.6534 |
11 | mtCR.ocean | mtCR.sound | 0.7442 | 0.7442 |
12 | mtCR.sound | mtCR.ocean | 0.7442 | 0.7442 |
- Private alleles
private.alleles <- private_alleles(genind) %>%
t %>%
as.data.frame %>%
rownames_to_column("POS")
ascii(private.alleles, digits=0)
colSums(private.alleles > 0)
POS | mtCR.sound | mtCR.inner | mtCR.outer | mtCR.ocean | |
---|---|---|---|---|---|
1 | 23.g | 0 | 0 | 0 | 1 |
2 | 26.g | 1 | 0 | 0 | 0 |
3 | 32.c | 0 | 0 | 0 | 15 |
4 | 33.t | 0 | 1 | 0 | 0 |
5 | 64.a | 0 | 2 | 0 | 0 |
6 | 73.c | 2 | 0 | 0 | 0 |
7 | 101.g | 0 | 1 | 0 | 0 |
8 | 104.t | 1 | 0 | 0 | 0 |
9 | 106.t | 1 | 0 | 0 | 0 |
10 | 110.c | 0 | 0 | 0 | 15 |
11 | 116.t | 0 | 1 | 0 | 0 |
12 | 117.c | 0 | 0 | 0 | 1 |
13 | 123.c | 1 | 0 | 0 | 0 |
14 | 213.g | 0 | 1 | 0 | 0 |
15 | 216.c | 0 | 1 | 0 | 0 |
16 | 235.c | 0 | 0 | 2 | 0 |
17 | 249.a | 0 | 0 | 1 | 0 |
18 | 252.t | 0 | 0 | 0 | 2 |
19 | 265.c | 0 | 0 | 0 | 1 |
20 | 273.g | 2 | 0 | 0 | 0 |
21 | 274.c | 0 | 0 | 2 | 0 |
22 | 278.c | 1 | 0 | 0 | 0 |
23 | 283.g | 1 | 0 | 0 | 0 |
24 | 291.g | 0 | 1 | 0 | 0 |
25 | 294.c | 1 | 0 | 0 | 0 |
26 | 295.t | 0 | 0 | 1 | 0 |
27 | 306.c | 0 | 0 | 1 | 0 |
POS mtCR.sound mtCR.inner mtCR.outer mtCR.ocean 27 9 7 5 6