Mito Control Region

Vollmer mtCR sequences

Outgroup

DQ060054 S. frontalis Adams and Rosel 2006

DQ060057 S. frontalis Adams and Rosel 2006

Sources

Vollmer and Rosel 2017 ./vollmer_2017.online_resource_7.tsv ./vollmer_2017.online_resource_16.tsv

Population	Color
NW inner	#98d4ed
NW outer	#bebfbf
NW oceanic	#e0e47e
E inner	#f5c89b
E outer	#e67383
E oceanic	#c7a3c7
NE oceanic	#99c796

Vollmer et al 2021 ./vollmer_2021.SI-table-3.tsv ./vollmer_2021.SI-table-4.tsv

Population Color

Entire green pop (n=541) #669e40

Entire blue pop (n=42) #2f5597

Download sequences from NCBI to ./vollmer.samples.fa

ROOT=$(git rev-parse --show-toplevel)
cat <(grep -v '^#' vollmer_2017.online_resource_7.tsv | cut -f 4 ) \
    <(grep -v '^#' vollmer_2021.SI-table-4.tsv | cut -f 7) |
    sort -u |
    ~/.local/edirect/efetch -db nuccore -format fasta > $DIR/vollmer.samples.fa

With Outgroup

Align

Combine datasets and trim names (includes the two outliers). Align with mafft

ROOT=$(git rev-parse --show-toplevel)
DIR=$ROOT/4-mtCR/

cat $DIR/vollmer.samples.fa \
    $ROOT/3-assemble/mtCR.fa \
    $DIR/outgroup.fa |
    sed '/>/s/ .*//' |
    $ROOT/apps/mafft-7.520/mafft.bat --auto --thread 4  \
        /dev/stdin > $DIR/1-mafft.fasta

Load libraries

library(adegenet)
library(haplotypes)
library(LEA)
library(tidyverse)
library(ape)
library(tidytree)
library(ggtree)
library(Biostrings)
library(hues)
library(hierfstat)
library(poppr)
library(ascii)
op <- options(asciiType = "org")

library(aplot)
options(width = 70)

sessionInfo()

Load metadata and color

colors <- list("2017"=c("NW.inner"="#98d4ed",
                        "E.inner"  =  "#f5c89b",
                        "NW.outer" =  "#bebfbf",
                        "E.outer"  =  "#e67383",
                        "NW.Oceanic"= "#e0e47e",
                        "E.oceanic" = "#c7a3c7",
                        "NE.oceanic"= "#99c796"),
               "2021"=c("green" =  "#669e40",
                        "blue"  =  "#2f5597"))

meta.counts <- mapply(left_join,
       list("2017"="vollmer_2017.online_resource_16.tsv",
       "2021"="vollmer_2021.SI-table-3.tsv") %>%
       lapply(read.delim, comment.char = '#') %>%
       lapply(gather, -Haplotype, key="Group", value='count') %>%
       lapply(select, Haplotype, Group, count),
       list("2017"="vollmer_2017.online_resource_7.tsv",
            "2021"="vollmer_2021.SI-table-4.tsv") %>%
       lapply(read.delim,sep="\t", skip=8) %>%
       lapply(select, Haplotype, "ID"=starts_with("GenBank")),
       SIMPLIFY = F) %>%
  bind_rows(.id="Set") %>%
  unique %>%
  mutate(ID=paste0(ID, ".1")) %>%
  filter(!grepl("Green.pop.subset", Group ) & count > 0) %>%
  mutate(Group = sub("Entire.(green|blue).pop.*", "\\1", Group))

Load and trim alignment (w/ outgroup)

fasta.with_outgroup <-
  read.dna("1-mafft.fasta.gz", format='fasta', as.matrix=T)
fasta.with_outgroup

image.DNAbin(fasta.with_outgroup)

Removing gaps at the start and end of the alignments

gap.counts = colSums(as.character(fasta.with_outgroup) == "-")
trim.range = range(which(gap.counts==0))

fasta.with_outgroup.trimmed =
  fasta.with_outgroup[,seq(trim.range[1], trim.range[2])]

fasta.with_outgroup.trimmed

Fix ambiguous bases

fasta.with_outgroup.trimmed.fixed <-
  solveAmbiguousBases(fasta.with_outgroup.trimmed)
fasta.with_outgroup.trimmed.fixed

Write trimmed fasta file

write.FASTA(fasta.with_outgroup.trimmed, "1-mafft.trimmed.fasta")
write.FASTA(fasta.with_outgroup.trimmed.fixed,
            "1-mafft.trimmed.fixed.fasta")

Haplotypes

haplotypes.with_outgroup <-
  fasta.with_outgroup.trimmed.fixed %>%
  as.dna %>%
  haplotype(indel='missing')

haplotypes.with_outgroup@freq %>%
  table %>%
  as.data.frame %>%
  setNames(c("Size", "Count")) %>%
  mutate(Size = as.character(Size)) %>%
  rbind(., c("Total", sum(.$Count)))

Rename haplotypes mtCR-new, mtCR-mix, and mtCR-pub for haplotypes containing only new sequences, mix of published and new, and only published.

haplotypes.new_names <-
  haplotypes.with_outgroup@haplist %>%
  lapply(as.data.frame) %>%
  bind_rows(.id='name') %>%
  setNames(c("Haplotype", "Sample")) %>%
  group_by(Haplotype) %>%
  mutate(New = as.numeric(any(grepl('^SER', Sample))),
         Pub = as.numeric(any(!grepl('^SER', Sample))) * 2,
         Out = any((Sample %in% c("DQ060054.1", "DQ060057.1")))) %>%
  mutate(NewName = as.character(factor(New+Pub, 1:3,
                                       labels = c("mtCR.new",
                                                  "mtCR.pub",
                                                  "mtCR.mix")))) %>%
  mutate(NewName = ifelse(Out, "mtCR.out", NewName),
         Size=n()) %>%
  select(Haplotype, NewName, Size) %>%
  unique() %>%
  rowid_to_column("tmp") %>%
  arrange(desc(Size)) %>%
  group_by(NewName) %>%
  mutate(NewName = sprintf("%s-%d", NewName, row_number())) %>%
  arrange(tmp) %>%
  select(-tmp)
haplotypes.new_names

names(haplotypes.with_outgroup@hapind) <- haplotypes.new_names$NewName
names(haplotypes.with_outgroup@haplist) <- haplotypes.new_names$NewName
rownames(haplotypes.with_outgroup@sequence) <-
  haplotypes.new_names$NewName

Write fasta

haplotypes.with_outgroup@sequence %>%
  as.DNAbin %>%
  write.FASTA("2-haplotypes.with_outgroup.fixed.fasta")

haplotypes.with_outgroup@sequence[
  !grepl("mtCR.out", names(haplotypes.with_outgroup@haplist)),
  ] %>%
  as.DNAbin %>%
  write.FASTA("2-haplotypes.without_outgroup.fixed.fasta")

Number and Size range of each type of haplotype

data.frame(Haplotype=names(haplotypes.with_outgroup@hapind),
           Size = haplotypes.with_outgroup@freq) %>%
  mutate(Haplotype = sub('-.*', '', Haplotype)) %>%
  group_by(Haplotype) %>%
  summarise(Count = n(),
            Range = sprintf("[%d - %d]",
                            min(Size),
                            max(Size)),
            .groups='drop') %>%
  ascii(digit=0, include.rownames=F)

Haplotype	Count	Range
mtCR.mix	12	[2 - 198]
mtCR.new	8	[1 - 3]
mtCR.out	2	[1 - 1]
mtCR.pub	53	[1 - 4]

Published vs New haplotype overlap

haplotypes.with_outgroup@haplist %>%
  lapply(as.data.frame) %>%
  bind_rows(.id='name') %>%
  setNames(c("Haplotype", "Sample")) %>%
  group_by(Haplotype) %>%
  summarise(New = sum(grepl('^SER', Sample)),
            Pub = sum(!grepl('^SER', Sample)),
            Total = n()) %>%
  mutate(Haplotype = ifelse(Pub==0,
                            sprintf("*%s*", Haplotype),
                            Haplotype)) %>%
  arrange(desc(Total)) %>%
  ascii(digit=0, include.rownames=F)

Haplotype	New	Pub	Total
mtCR.mix-1	190	8	198
mtCR.mix-2	65	5	70
mtCR.mix-3	46	8	54
mtCR.mix-4	40	10	50
mtCR.mix-5	22	1	23
mtCR.mix-6	11	1	12
mtCR.mix-7	8	1	9
mtCR.mix-8	4	1	5
mtCR.mix-9	3	1	4
mtCR.pub-1	0	4	4
mtCR.pub-2	0	4	4
mtCR.mix-10	1	2	3
mtCR.new-1	3	0	3
mtCR.pub-3	0	3	3
mtCR.pub-4	0	3	3
mtCR.mix-11	1	1	2
mtCR.mix-12	1	1	2
mtCR.new-2	2	0	2
mtCR.pub-5	0	2	2
mtCR.pub-6	0	2	2
mtCR.pub-7	0	2	2
mtCR.pub-8	0	2	2
mtCR.new-3	1	0	1
mtCR.new-4	1	0	1
mtCR.new-5	1	0	1
mtCR.new-6	1	0	1
mtCR.new-7	1	0	1
mtCR.new-8	1	0	1
mtCR.out-1	0	1	1
mtCR.out-2	0	1	1
mtCR.pub-10	0	1	1
mtCR.pub-11	0	1	1
mtCR.pub-12	0	1	1
mtCR.pub-13	0	1	1
mtCR.pub-14	0	1	1
mtCR.pub-15	0	1	1
mtCR.pub-16	0	1	1
mtCR.pub-17	0	1	1
mtCR.pub-18	0	1	1
mtCR.pub-19	0	1	1
mtCR.pub-20	0	1	1
mtCR.pub-21	0	1	1
mtCR.pub-22	0	1	1
mtCR.pub-23	0	1	1
mtCR.pub-24	0	1	1
mtCR.pub-25	0	1	1
mtCR.pub-26	0	1	1
mtCR.pub-27	0	1	1
mtCR.pub-28	0	1	1
mtCR.pub-29	0	1	1
mtCR.pub-30	0	1	1
mtCR.pub-31	0	1	1
mtCR.pub-32	0	1	1
mtCR.pub-33	0	1	1
mtCR.pub-34	0	1	1
mtCR.pub-35	0	1	1
mtCR.pub-36	0	1	1
mtCR.pub-37	0	1	1
mtCR.pub-38	0	1	1
mtCR.pub-39	0	1	1
mtCR.pub-40	0	1	1
mtCR.pub-41	0	1	1
mtCR.pub-42	0	1	1
mtCR.pub-43	0	1	1
mtCR.pub-44	0	1	1
mtCR.pub-45	0	1	1
mtCR.pub-46	0	1	1
mtCR.pub-47	0	1	1
mtCR.pub-48	0	1	1
mtCR.pub-49	0	1	1
mtCR.pub-50	0	1	1
mtCR.pub-51	0	1	1
mtCR.pub-52	0	1	1
mtCR.pub-9	0	1	1

Sequence differences between mtCR.new haplotypes and the most similar haplotype sequences

haplotypes.with_outgroup.dist <-
  haplotypes.with_outgroup@sequence %>%
  as.DNAbin %>%
  dist.dna(model = 'N', as.matrix = T) %>%
  as.data.frame %>%
  rownames_to_column("Hap1") %>%
  gather(-Hap1, key="Hap2", value="Dist")

haplotypes.of.interest <- haplotypes.with_outgroup.dist %>%
  mutate(tmp=Hap1, Hap1=Hap2, Hap2=tmp) %>%
  select(-tmp) %>%
  rbind(haplotypes.with_outgroup.dist) %>%
  filter(grepl("mtCR.new", Hap1)) %>%
  filter(Dist <=  1) %>%
  unique

haplotypes.with_outgroup@sequence %>%
  t %>%
  as.data.frame %>%
  rowid_to_column("Pos") %>%
  gather(-Pos, key="Hap2", value="base") %>%
  right_join(haplotypes.of.interest,
             relationship = 'many-to-many') %>%
  group_by(Hap1, Pos ) %>%
  filter(length(unique(base)) > 1) %>%
  ungroup %>%
  ggplot(aes(Pos, Hap2, color=base)) +
  geom_text(aes(label=base)) +
  facet_grid(rows=vars(Hap1), space = 'free', scales = 'free',
             switch='y') +
  theme_bw() +
  theme(strip.text.y.left  = element_text(angle = 0),
        strip.placement = 'outside',
        legend.position = 'none',
        axis.title = element_blank())

Plot pie chart

haplotypes.with_outgroup@freq %>%
  as.data.frame(nm='Size') %>%
  group_by(Size) %>%
  count %>%
  ggplot(aes(x=1, n, fill=factor(Size))) +
  geom_col() +
  coord_polar(theta = 'y', direction = -1) +
  scale_fill_iwanthue() +
  theme_minimal() +
  theme(axis.title = element_blank(),
        axis.text.y = element_blank())

Structure

convert to genlight

snps.without_outgroup <- fasta2genlight(
  "2-haplotypes.without_outgroup.fixed.fasta",
  quiet = T,
  snpOnly=T)
snps.without_outgroup

Convert genlight to geno format (a la dartR) and run snmf

geno <- as.matrix(snps.without_outgroup)
geno[is.na(geno)] <- 9

## Remove constant snps
#geno <- geno[,apply(geno, 2, function(x) length(unique(x)) > 1)]

outfile <- "4-structure/haplotypes-fixed"
write.table(
  geno,
  paste(outfile, ".lfmm", sep = ""),
  col.names = FALSE,
  row.names = FALSE,
  sep = " "
)
## write geno
write.table(
  t(geno),
  paste(outfile, ".geno", sep = ""),
  col.names = FALSE,
  row.names = FALSE,
  sep = ""
)

## project <- snmf(paste0(outfile, ".geno"),
##                 K = 1:15,
##                 entropy = T,
##                 repetitions = 10,
##                 project = "new",
##                 ploidy=1,
##                 CPU = 4)

project <- load.snmfProject(paste0(outfile, ".snmfProject"))

PCA scree plot

pca.scree.plot <- pca(paste0(outfile, ".lfmm"), scale=T, K=15) %>%
  tracy.widom %>%
  ggplot(aes(N, percentage)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels=scales::label_percent()) +
  labs(title = "PCA Scree Plot",
       x = "Principal Components",
       y = "Percentage of Variance") +
  theme_minimal()
pca.scree.plot

K	run	crossEntropy
1	8	0.20777
2	8	0.13652
3	8	0.08029
4	8	0.08253
5	8	0.07747
6	8	0.08881
7	10	0.13266
8	10	0.14189
9	5	0.17276
10	10	0.16388
11	2	0.17915
12	8	0.23712
13	7	0.20993
14	2	0.18649
15	8	0.18041

Assign memberships for best run of each K

admix.coefs <- mapply(Q, K=best.runs$K, run=best.runs$run,
                      MoreArgs = list(project)) %>%
  lapply(as.data.frame) %>%
  lapply(rowid_to_column, "ID") %>%
  bind_rows(.id="K") %>%
  gather(-K, -ID, key="pop", value="q") %>%
  filter(!is.na(q)) %>%
  mutate(pop = as.numeric(sub("V", "", pop)),
         K = as.numeric(K)) %>%
  group_by(K, ID) %>%
  reframe(K, ID, pop, q,
          best.pop=pop[which.max(q)],
          best.q = max(q))

Structure membership graph

member.data <- filter(admix.coefs, K==4)
member.data.order <- spread(member.data, pop, q) %>%
                       select(-ID, -K, -best.pop, -best.q) %>%
                       as.matrix %>%
                       dist %>%
                       hclust(method="ave") %>%
                       as.dendrogram %>%
                       order.dendrogram
haplotypes.names <- row.names(geno)
pop.labels <- sprintf("mtCR.%s",
                    c("sound", "inner", "outer", "ocean"))
member.data <-  mutate(member.data,
                     ID = haplotypes.names[ID],
                     pop = factor(pop, c(1, 2, 3, 4),
                                  pop.labels),
                     best.pop = factor(best.pop, c(1, 2, 3, 4),
                                       pop.labels)) %>%
mutate(ID = factor(ID, haplotypes.names[member.data.order] ))
plot.structure <- ggplot(member.data, aes(ID, q, fill=pop)) +
geom_col(width=1) +
facet_grid(cols=vars(best.pop), scales="free", space="free",
           switch='x') +
scale_fill_iwanthue() +
scale_x_discrete(position = "top", expand = c(0,0)) +
scale_y_continuous(labels=scales::label_percent(), expand = c(0,0))+
theme_bw()+
theme( axis.text.x = element_blank(),
      axis.title.x = element_blank(),
  legend.position = 'bottom')
plot.structure

anno <- haplotypes.with_outgroup@haplist %>%
  lapply(as.data.frame, nm='ID') %>%
  bind_rows(.id="name") %>%
  left_join(select(meta.counts, ID, Set, Group, count) %>%
  rbind(data.frame(ID = grep("SER",
                     row.names(fasta.with_outgroup.trimmed),
                     value=T),
           Set="New",
           Group="New",
           count = 1)) , by="ID") %>%
  group_by(name, Set, Group) %>%
  summarise(count = sum(count)) %>%
  rename("name"="ID") %>%
  left_join(unique(select(member.data, ID, best.pop))) %>%
  mutate(Set = ifelse(is.na(Set), "2021", Set),
         Group = if_else(is.na(Group), "blue", Group)) %>%
  mutate(
    ID = factor(ID, haplotypes.names[member.data.order] ),
    Group = factor(Group, c('NW.inner', 'E.inner', 'NW.outer',
                            'E.outer', 'NW.Oceanic', 'NE.oceanic',
                            'E.oceanic', 'green', 'blue', "New"))) %>%
  filter(!is.na(ID))

meta.counts %>%
  select(ID, Group, count) %>%
  spread(key=Group, value = count) %>%
  write.table("published-counts.txt")

write.table(anno, "haplotype.published-groups.txt",
            sep="\t", row.names=F)

anno <- read.delim("haplotype.published-groups.txt")

heatmap.plot <- ggplot(anno, aes(ID,Group)) +
  geom_tile(aes(fill=Group), na.rm=T) +
  geom_text(aes(label=count), na.rm=T) +
  facet_grid(rows=vars(Set), cols=vars(best.pop),
             scales="free", space="free") +
  scale_fill_manual( values=c(colors[['2021']],
                              colors[['2017']],
                              New="grey")) +
  scale_x_discrete(expand = c(0,0)) +
  theme_bw() +
  theme(axis.title = element_blank(),
        axis.text.x= element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.position = 'none')
heatmap.plot

anno %>%
  group_by(Set, Group, best.pop) %>%
  summarise(count=sum(count)) %>%
  ggplot(aes(Group, count)) +
  geom_col(aes(fill=Group)) +
  geom_label(aes(label=count), y=10, fill='white') +
  facet_grid(cols=vars(Set), rows=vars(best.pop),
             scales="free", space="free") +
  scale_fill_manual( values=c(colors[['2021']],
                              colors[['2017']],
                              New="grey")) +
  scale_x_discrete(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0), limits=c()) +
  coord_cartesian(y=c(0,75)) +
  theme_bw() +
  theme(axis.title = element_blank(),
        axis.text.x= element_text(angle = 25, hjust = 1,
                                  vjust = 1),
        legend.position = 'none')

library(tidyverse)

read.delim("haplotype.published-groups.txt") %>%
  replace_na(list(count=1)) %>%
  mutate(Group = factor(Group, c('NW.inner', 'E.inner', 'NW.outer',
                                 'E.outer', 'NW.Oceanic', 'NE.oceanic',
                                 'E.oceanic', 'green', 'blue', "New")),
         best.pop = factor(best.pop, c("mtCR.sound","mtCR.inner",
                                       "mtCR.outer","mtCR.ocean"))) %>%
  group_by(Set, Group, best.pop) %>%
  summarise(samples=sum(count), haplotypes=n()) %>%
  pivot_wider(id_cols=c("Set", "Group"),
              values_from = c("samples", "haplotypes"),
              names_from = c("best.pop"),
              names_vary = 'slowest',
              values_fill = 0) %>%
  split(., .$Set) %>%
  lapply(as.data.frame)

Phylogenetics tree

## haplotypes.with_outgroup.fasta <-
##   read.FASTA("2-haplotypes.with_outgroup.fixed.fasta")

## tree.data <- haplotypes.with_outgroup.fasta %>%
##   dist.dna(pairwise.deletion = T) %>%
##   nj

## tree.data$node.label <-
##   boot.phylo(tree.data,
##              as.matrix(haplotypes.with_outgroup.fasta),
##              FUN=function(xx) nj(dist.gene(xx,
##                                            pairwise.deletion = T)),
##              B=1000)
## tree.data <- root(tree.data,
##                   outgroup=c("mtCR.out-1", "mtCR.out-2"))

plot <- anno %>%
  mutate(Group = factor(Group, c('NW.inner',
                                 'E.inner',
                                 'NW.outer',
                                 'E.outer',
                                 'NW.Oceanic',
                                 'NE.oceanic',
                                 'E.oceanic',
                                 'green',
                                 'blue',
                                 "New")),
         best.pop = factor(best.pop, c("mtCR.sound",
                                       "mtCR.inner",
                                       "mtCR.outer",
                                       "mtCR.ocean"))) %>%
  ggplot(aes(Group, ID)) +
  geom_tile(aes(fill=Group), na.rm=T) +
  geom_text(aes(label=count), na.rm=T) +
  facet_grid(cols=vars(Set), scales="free", space="free") +
  scale_fill_manual( values=c(colors[['2021']],
                              colors[['2017']],
                              New="grey")) +
  ggtitle("(C) Haplotype Members") +
  theme_bw() +
  theme(axis.title = element_blank(),
        axis.text.y= element_text(),
        axis.text.x= element_text(angle = 25, hjust = 1),
        legend.position = 'none')

plot.structure <- right_join(member.data,
                             data.frame(ID=tree.data$tip.label)) %>%
  ggplot(aes(q, ID, fill=(pop))) +
  geom_col(width=1) +
  scale_fill_iwanthue(breaks=pop.labels, name="") +
#  scale_y_discrete(expand = c(0,0)) +
  scale_x_continuous(labels=scales::label_percent(),
                     breaks = c(0.25, 0.5, 0.75), expand = c(0,0))+
  ggtitle("(B) Population Structure") +
  theme_bw()+
  theme( axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
    legend.position = c(0.48,1.015),
    legend.direction = "horizontal",
    legend.background = element_blank())

tree <- ggtree(tree.data) +
  geom_nodepoint(aes(color=as.numeric(label)), na.rm=T,
                 size=3) +
  geom_tiplab(align=T, as_ylab=T) +
  #geom_text2(aes(label=label, subset=!isTip),
  #           vjust=0.5, hjust=-0.1, size=3) +
  #geom_nodelab(aes(label=node), geom="label", bg="white") +
  #geom_tiplab(aes(label=node), bg="white") +
  ggtitle("(A) Neighboor-Joining Tree") +
  scale_x_continuous(breaks=seq(0.005, 0.05, 0.01)) +
  scale_color_viridis_c(name="Bootstrap Values") +
  guides(color=guide_colorbar(title.position="top")) +
  theme_tree2() +
  theme(legend.position = c(0.25,0.95),
        legend.direction = "horizontal")
tree <- flip(tree, 116, 104)
tree <- flip(tree, 106, 88)
tree <- flip(tree, 117, 132)
tree <- flip(tree, 87, 97)
tree <- flip(tree, 84, 89)
tree <- flip(tree, 82, 90)
#
main.plot <- plot.structure  %>%
  insert_left(tree, width=0.80)%>%
  insert_right(plot, width=0.90)
options("aplot_guides" = "keep")
main.plot

library(pegas)

pegas.haps <- haplotypes::as.DNAbin(as.dna(haplotypes.with_outgroup))
pegas.haps <- pegas.haps[c(-73, -74),]
class(pegas.haps) <- c("haplotype", "DNAbin")
attr(pegas.haps, "index") <- haplotypes.with_outgroup@hapind
attr(pegas.haps, "from") <- "haps"

pegas.haps.split <-
  select(haplotype.membership,
                            "Haplotype"=ID,
                            "Population"=best.pop) %>%
  distinct %>%
  split(., .$Population) %>%
  lapply(pull, "Haplotype") %>%
  lapply(function(x) {
    haps <- pegas.haps[x,]
    class(haps) <- c("haplotype", "DNAbin")
    attr(haps, "index") <- haplotypes.with_outgroup@hapind[x]
    attr(haps, "from") <- "haps"
    haps
  })
pegas.haps.split[['all']] <- pegas.haps


lapply(pegas.haps.split, pegas::hap.div, variance = T) %>%
  lapply(setNames, c('Hap. Diversity', 'var')) %>%
  bind_rows(.id='Population') %>%
  mutate(Population = fct_relevel(factor(Population),
                                  c("all", pop.labels))) %>%
  arrange(Population)

Population	Hap. Diversity	var
all	0.80832661831856	0.00020120894594653
mtCR.sound	0.75622692375315	0.000267128534545299
mtCR.inner	0.933823529411765	0.00149002681255929
mtCR.outer	0.96969696969697	0.000440854139372
mtCR.ocean	0.977941176470588	0.000742592387824971

Nucleotide Diversity (pi)

lapply(pegas.haps.split, pegas::nuc.div, variance = T) %>%
  lapply(setNames, c('pi', 'var')) %>%
  bind_rows(.id='Population') %>%
  mutate(Population = fct_relevel(factor(Population),
                                  c("all", pop.labels))) %>%
  arrange(Population) %>%
  ascii(digits=c(0, 4, 3),
        format=c('s', 'f', 'e'),
        include.rownames = F)

Population	pi	var
all	0.0113	3.939e-05
mtCR.sound	0.0037	6.681e-06
mtCR.inner	0.0071	2.026e-05
mtCR.outer	0.0081	2.430e-05
mtCR.ocean	0.0087	2.818e-05

Population differentiation (Fst)

pop <- select(haplotype.membership, best.pop, ID) %>%
  unique() %>%
  pull(best.pop, name=ID) %>%
  na.omit()

genind <- haplotypes.with_outgroup %>%
  as.dna %>%
  haplotypes::as.DNAbin(.) %>%
  .[names(pop),] %>%
  DNAbin2genind(., pop=pop)
strata(genind) <- data.frame(pop)

library(hierfstat)
all <- wc(genind)
pair <- pairwise.WCfst(genind) %>%
  as.data.frame %>%
  rownames_to_column("PopA") %>%
  gather(-PopA, key="PopB", value="Fst") %>%
  filter(PopA < PopB) %>%
  mutate(class = cut(Fst, breaks=c(0,0.05,0.15,0.25,1),
                     labels=c("little", "moderate", "great",
                              "very great")))

pair[which.min(pair$Fst),]
pair[which.max(pair$Fst),]

rbind(c('all', 'all', all$FST, NA, use.names = F), pair)

Vollmer 2017

gomx.pop <- filter(meta.counts, Set == '2017') %>%
  filter(!grepl("not submitted", ID)) %>%
  select(ID, Group,count) %>%
  arrange(count) %>%
  group_by(ID) %>%
  summarise(Group=Group[1]) %>%
  pull(Group, name=ID)

gomx.genind <-
  fasta.with_outgroup.trimmed.fixed[names(gomx.pop),] %>%
  DNAbin2genind(., pop=gomx.pop)
strata(gomx.genind) <- data.frame(gomx.pop)

all <- wc(gomx.genind)
pair <- pairwise.WCfst(gomx.genind) %>%
  as.data.frame %>%
  rownames_to_column("PopA") %>%
  gather(-PopA, key="PopB", value="Fst") %>%
  filter(PopA < PopB) %>%
  mutate(class = cut(Fst, breaks=c(0,0.05,0.15,0.25,1),
                     labels=c("little", "moderate", "great",
                              "very great"))) %>%
  arrange(Fst)

rbind(c('all', 'all', all$FST, NA, use.names = F), pair)

PhiST

Global

ddp <- as.genclone(genind)
phistp <- poppr.amova(ddp, ~pop, filter=T, threshold=0.1)
phistp

Per Population

set.seed(45243)
## pop[pop == '2'] <- paste0( pop[pop == '2'],
##          letters[sample.int(4, size = sum(pop==2), replace = T)])
pop <- factor(pop)
table(pop)

ddp <- as.genclone(genind)

phist.subset <- function(pop1, pop2) {
  pops <- popsub(ddp, sublist = c(pop1, pop2), drop=T)
  gc <- as.genclone(pops)
  phistsp <- poppr.amova(gc, ~pop, filter=T, threshold=0.1)
  return(unlist(phistsp$statphi))
}

phist.table <- expand_grid(pop1=levels(pop),
              pop2=levels(pop)) %>%
  rowwise() %>%
  mutate(phist = phist.subset(pop1, pop2)) %>%
  ungroup

phist.table %>%
  filter(pop1 != pop2) %>%
  group_by(pop1, pop2) %>%
  summarise( phist.all = paste(sprintf("%0.4f", phist),
                               collapse = " "),
            phist=mean(phist), .groups="drop") %>%
  arrange(phist.all) %>%
  ascii(digits=4)

	pop1	pop2	phist.all	phist
1	mtCR.inner	mtCR.outer	0.4239	0.4239
2	mtCR.outer	mtCR.inner	0.4239	0.4239
3	mtCR.inner	mtCR.sound	0.4402	0.4402
4	mtCR.sound	mtCR.inner	0.4402	0.4402
5	mtCR.ocean	mtCR.outer	0.5878	0.5878
6	mtCR.outer	mtCR.ocean	0.5878	0.5878
7	mtCR.outer	mtCR.sound	0.6434	0.6434
8	mtCR.sound	mtCR.outer	0.6434	0.6434
9	mtCR.inner	mtCR.ocean	0.6534	0.6534
10	mtCR.ocean	mtCR.inner	0.6534	0.6534
11	mtCR.ocean	mtCR.sound	0.7442	0.7442
12	mtCR.sound	mtCR.ocean	0.7442	0.7442

Private alleles

private.alleles <- private_alleles(genind) %>%
  t %>%
  as.data.frame %>%
  rownames_to_column("POS")
ascii(private.alleles, digits=0)

colSums(private.alleles > 0)

	POS	mtCR.sound	mtCR.inner	mtCR.outer	mtCR.ocean
1	23.g	0	0	0	1
2	26.g	1	0	0	0
3	32.c	0	0	0	15
4	33.t	0	1	0	0
5	64.a	0	2	0	0
6	73.c	2	0	0	0
7	101.g	0	1	0	0
8	104.t	1	0	0	0
9	106.t	1	0	0	0
10	110.c	0	0	0	15
11	116.t	0	1	0	0
12	117.c	0	0	0	1
13	123.c	1	0	0	0
14	213.g	0	1	0	0
15	216.c	0	1	0	0
16	235.c	0	0	2	0
17	249.a	0	0	1	0
18	252.t	0	0	0	2
19	265.c	0	0	0	1
20	273.g	2	0	0	0
21	274.c	0	0	2	0
22	278.c	1	0	0	0
23	283.g	1	0	0	0
24	291.g	0	1	0	0
25	294.c	1	0	0	0
26	295.t	0	0	1	0
27	306.c	0	0	1	0

POS mtCR.sound mtCR.inner mtCR.outer mtCR.ocean 27 9 7 5 6

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

readme.org

readme.org

Mito Control Region

Vollmer mtCR sequences

With Outgroup

Align

Load libraries

Load metadata and color

Load and trim alignment (w/ outgroup)

Haplotypes

Structure

Phylogenetics tree

DQ060054	S. frontalis	Adams and Rosel 2006
DQ060057	S. frontalis	Adams and Rosel 2006

Population	Color
Entire green pop (n=541)	#669e40
Entire blue pop (n=42)	#2f5597

Files

readme.org

Latest commit

History

readme.org

File metadata and controls

Mito Control Region

Vollmer mtCR sequences

With Outgroup

Align

Load libraries

Load metadata and color

Load and trim alignment (w/ outgroup)

Haplotypes

Structure

Phylogenetics tree