Skip to content

Latest commit

 

History

History
1313 lines (1177 loc) · 45 KB

readme.org

File metadata and controls

1313 lines (1177 loc) · 45 KB

MS Sound Stranded Dolphins

Reference Sequences
Description, location, and download info for reference sequences used
Primer Design
Methods used to design universal primers for marine dolphins (Delphinidae)
Samples
Description of newly sequenced samples, including amplicon coverage summary
Consensus and SNP Calling
Methods used to create consensus sequences and SNP vcfs for each sample. Includes species validation and heterplasmy search.
mtCR analysis
Methods for comparing the mtCR region
Whole mitogenome analysis
Methods for comparing full mitogenomes.

Figures

Figure 1

library(tidyverse)
library(hues)
library(sf)
library(ggspatial)
library(stars)

map.colors <- c(
  "0" =rgb(193/255, 224/255, 250/255), ## Ocean water
  "11"=rgb(193/255, 224/255, 250/255), ## Open water
  "12"=rgb(229/255, 231/255, 232/255), ## Perennial ice / snow
  "21"="#ebe5cf",                      ## Developed, open space
  "22"="#ebe5cf",                      ## Developed, low intensity
  "23"="#ebe5cf",                      ## Developed, medium intensity
  "24"="#ebe5cf",                      ## Developed. high intensity
  "31"=rgb(179/255, 175/255, 164/255), ## Barren land
  "41"="#e4e8da",                      ## Deciduous forest
  "42"="#e4e8da",                      ## Evergreen forest
  "43"="#e4e8da",                      ## Mixed forest
  "51"=rgb(176/255, 151/255,  61/255), ## Dwarf scrub  (Alaska only)
  "52"="#f4f3ee",   ## Shrub / scrub
  "71"="#f4f3ee",   ## Grassland / herbaceous
  "72"=rgb(209/255, 209/255, 130/255), ## Sedge / herbaceous  (Alaska)
  "74"=rgb(130/255, 186/255, 158/255), ## Moss (Alaska only)
  "81"="#f4f3ee",   ## Pasture hay
  "82"="#f4f3ee",   ## Cultivated crops
  "90"="#e4e8da",   ## Woody wetlands
  "101"=rgb(240/255, 235/255, 211/255),  ## Non-U.S. land
  "102"=rgb(240/255, 235/255, 211/255),  ## Non-U.S. land
  "103"=rgb(240/255, 235/255, 211/255)   ## Non-U.S. land
)

map.data <-
  data.table::fread("2-samples/bigtable.tsv", skip=2)  %>%
  separate(`Latitude/Longitude`, into=c('lat', 'lon', NA),
           sep="[NW]") %>%
  mutate(lat = as.numeric(lat),
         lon=as.numeric(lon) * -1) %>%
  dplyr::select(Sample="Sample Name", lat, lon)

map.data.prj <- st_as_sf(map.data,
                         coords = c('lon', 'lat'),
                         crs=4326) %>%
  st_transform(5070) %>%
  st_coordinates(.$geometry) %>%
  cbind(map.data)
map.data.prj


map.data.bbox <- st_as_sf(map.data,
                          coords = c('lon', 'lat'),
                          crs=4326) %>%
  st_transform(5070) %>%
  st_bbox()
map.data.bbox <- map.data.bbox -
  c(10000, 70000, 0, 0) + c(0,0,30000,25000)
map.data.bbox
                                        # Download National Map Data
## Crop National map to region of interest
TNM <- read_stars(
  "5-full-mito/ldco48i0100a.tif_nt00971/ldco48i0100a.tif"
) %>% st_crop(map.data.bbox)
##download.file("http://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_state_500k.zip", destfile = "states.zip")
##unzip("states.zip")
state <- read_sf("5-full-mito/cb_2015_us_state_500k.shp") %>%
  st_transform(5070) %>%
  st_crop(map.data.bbox)

data <- rbind(
  c("Sound",  "Mobile\nBay",        -88.003, 30.421, "italic"),
  c("Sound",  "Mississippi Sound",  -88.898, 30.317, "italic"),
  c("Sound",  "Chandeleur\nSound",  -89.043, 29.908, "italic"),
  c("Sound",  "Breton\nSound",      -89.302, 29.586, "italic"),
  c("City",   "Biloxi",             -88.889, 30.407, "plain"),
  c("City",   "Mobile",             -87.994, 30.681, "plain"),
  c("City",   "Pensacola",          -87.217, 30.421, "plain"),
  c("Island", "Cat\nIsland",         -89.085, 30.220, "plain"),
  c("Island", "Ship\nIsland",        -88.910, 30.223, "plain"),
  c("Island", "Horn\nIsland",        -88.662, 30.244, "plain"),
  c("Island", "Petit Bois\nIsland",  -88.432, 30.207, "plain"),
  c("Island", "Dauphin\nIsland",     -88.122, 30.248, "plain"),
  c("Island", "Chandeleur\nIsland", -88.826, 29.843, "plain")
) %>%
  as.data.frame %>%
setNames(c("Type", "Name", "lon", "lat", "fontface")) %>%
st_as_sf(coords = c('lon', 'lat'), crs=4326) %>%
st_transform(5070) %>%
cbind(st_coordinates(.$geometry)) %>%
mutate(Y = ifelse(Type == "Island" & Name != "Chandeleur\nIsland",
                  Y - 5000, Y))
data
map.data.bbox
ggplot() +
  geom_stars(data=TNM, downsample = 1) +
  scale_fill_manual(values=map.colors, na.value = "#f3f2ed") +
  ## STATES
  annotate(geom = "text", x = 595000, y = 780000,
           label = "Louisiana",
           family="ETBembo",
           color = "grey22",
           size = 12,
           angle=90,
           alpha=0.25) +
  annotate(geom = "text", x = 660000, y = 880000,
           label = "Mississippi",
           family="ETBembo",
           color = "grey22",
           size = 12,
           alpha=0.25) +
  annotate(geom = "text", x = 795000, y = 880000,
           label = "Alabama",
           family="ETBembo",
           color = "grey22",
           size = 12,
           alpha=0.25) +
  annotate(geom = "text", x = 840000, y = 880000,
           label = "FL",
           family="ETBembo",
           color = "grey22",
           size = 12,
           alpha=0.25) +
  geom_text(aes(X, Y, label=Name, size=Type, color=Type,
                fontface=fontface),
            data=data, family="ETBembo", lineheight=0.75) +
  scale_color_manual(values=c("Sound"="#0a71b3",
                              "Island"="#0a71b3",
                              "City"="black")) +
  scale_size_manual(values=c("Sound"=6, "Island"=4, "City"=4)) +
  ## LAKE
  annotate(geom = "text", x = 613667, y = 794627,
           label = "Lake\nBorgne",
           fontface = "italic",
           family="ETBembo",
           color = "#0a71b3",
           size = 4) +
  ## RIVER
  annotate(geom = "text", x = 611535, y = 820630,
           label = "Pearl River",
           family="ETBembo",
           color = "#0a71b3",
           size = 4,
           angle=-65,
           vjust =0,
           hjust =0.4 ) +
  geom_sf(data = state, fill=NA, linetype='dotted', linewidth=0.2) +
  geom_point(aes(x=X, y=Y), data=map.data.prj, size=0.2, alpha=1) +
  coord_sf(expand = F) +
  theme(panel.grid.major = element_line(color = gray(.25),
                                        linetype = "dashed",
                                        linewidth = 0.1),
        legend.position = 'none',
        axis.title=element_blank()) +
  ## LABEL
  annotate(geom = "text", x = 775000, y = 775000,
           label = "North Central\nGulf of Mexico",
           fontface = "italic",
           family="ETBembo",
           color = "#0a71b3",
           size = 13) +
  annotation_scale(location = "br", width_hint = 0.25) +
  annotation_north_arrow(location = "br", which_north = "true",
                         pad_x = unit(0.75, "in"),
                         pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering)

fig-1.sample-area-map.tiff

Figure 2

library(pegas)
library(tidyverse)
library(hues)
library(ggtree)
library(aplot)


haplotypes.with_outgroup.fasta <-
   read.FASTA("4-mtCR/2-haplotypes.with_outgroup.fixed.fasta")
anno <- read.delim("4-mtCR/haplotype.published-groups.txt")
membership <- read.delim("4-mtCR/4-struct-membership-K4.txt")

tree.data <- haplotypes.with_outgroup.fasta %>%
  dist.dna(pairwise.deletion = T) %>%
  nj

tree.data$node.label <-
  boot.phylo(tree.data,
             as.matrix(haplotypes.with_outgroup.fasta),
             FUN=function(xx) nj(dist.gene(xx,
                                           pairwise.deletion = T)),
             B=1000)
tree.data <- root(tree.data,
                  outgroup=c("mtCR.out-1", "mtCR.out-2"))

colors <- list("2017"=c("NW.inner"="#98d4ed",
                        "E.inner"  =  "#f5c89b",
                        "NW.outer" =  "#bebfbf",
                        "E.outer"  =  "#e67383",
                        "NW.Oceanic"= "#e0e47e",
                        "E.oceanic" = "#c7a3c7",
                        "NE.oceanic"= "#99c796"),
               "2021"=c("green" =  "#669e40",
                        "blue"  =  "#2f5597"))
pop.labels <- c("mtCR.sound",
                "mtCR.inner",
                "mtCR.outer",
                "mtCR.ocean")

plot <- anno %>%
  mutate(Group = factor(Group, c('New',
                                 'NW.inner',
                                 'E.inner',
                                 'NW.outer',
                                 'E.outer',
                                 'NW.Oceanic',
                                 'NE.oceanic',
                                 'E.oceanic',
                                 'green',
                                 'blue')),
         best.pop = factor(best.pop, pop.labels),
         Set = factor(Set, c("New", "2017", "2021"))) %>%
  ggplot(aes(Group, ID)) +
  geom_tile(aes(fill=Group), na.rm=T) +
  geom_text(aes(label=count), na.rm=T) +
  facet_grid(cols=vars(Set), scales="free", space="free") +
  scale_fill_manual( values=c(colors[['2021']],
                              colors[['2017']],
                              New="grey")) +
  ggtitle("(C) Haplotype Members") +
  theme_bw() +
  theme(axis.title = element_blank(),
        axis.text.y= element_blank(),
        axis.text.x= element_text(angle = 25, hjust = 1),
        legend.position = 'none')

plot.structure <- right_join(membership,
                             data.frame(ID=tree.data$tip.label)) %>%
  ggplot(aes(q, ID, fill=pop)) +
  geom_col(width=1) +
  scale_fill_iwanthue(breaks=pop.labels, name="") +
#  scale_y_discrete(expand = c(0,0)) +
  scale_x_continuous(labels=scales::label_percent(),
                     breaks = c(0.25, 0.5, 0.75), expand = c(0,0))+
  ggtitle("(B) Population Structure") +
  theme_bw()+
  theme( axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
    legend.position = c(0.48,1.015),
    legend.direction = "horizontal",
    legend.background = element_blank())

tree <- ggtree(tree.data) +
  geom_nodepoint(aes(color=as.numeric(label)), na.rm=T,
                 size=3) +
  geom_tiplab(align=T, as_ylab=T) +
  #geom_text2(aes(label=label, subset=!isTip),
  #           vjust=0.5, hjust=-0.1, size=3) +
  #geom_nodelab(aes(label=node), geom="label", bg="white") +
  #geom_tiplab(aes(label=node), bg="white") +
  ggtitle("(A) Neighboor-Joining Tree") +
  scale_x_continuous(breaks=seq(0.005, 0.05, 0.01)) +
  scale_color_viridis_c(name="Bootstrap Values") +
  guides(color=guide_colorbar(title.position="top")) +
  theme_tree2() +
  theme(legend.position = c(0.25,0.95),
        legend.direction = "horizontal")
tree <- flip(tree, 116, 104)
tree <- flip(tree, 106, 88)
tree <- flip(tree, 117, 132)
tree <- flip(tree, 87, 97)
tree <- flip(tree, 84, 89)
tree <- flip(tree, 82, 90)
#

main.plot <- plot.structure  %>%
  insert_left(tree, width=0.80)%>%
  insert_right(plot, width=0.90)
options("aplot_guides" = "keep")
main.plot

fig-2.mtcr-analysis.tiff

Figure 3

library(pegas)
library(tidyverse)
library(grid)

haplotypes.sequence <-
  read.FASTA("5-full-mito/2-haplotypes.with_outgroup.fasta")

haplotypes.full.dist <- haplotypes.sequence %>%
  dist.dna(model = "N", pairwise.deletion = T)

haplotypes.full.dist.order <-
  hclust(haplotypes.full.dist)[c("labels", "order")] %>%
  bind_cols %>%
  mutate(new = labels[order]) %>%
  pull(new)%>%
  rev()

membership <-
  read.delim("5-full-mito/3-structure-haplotype.admix.coef.txt") %>%
  filter(K==4) %>%
  mutate(ID = factor(ID, haplotypes.full.dist.order),
         pop = factor(pop))


haplotypes.full.dist.data <-
  haplotypes.full.dist %>%
  as.matrix %>%
  as.data.frame %>%
  rownames_to_column("Hap1") %>%
  gather(-Hap1, key="Hap2", value="dist") %>%
  filter(!Hap1 %in% c("outgroup-001")) %>%
  filter(!Hap2 %in% c("outgroup-001")) %>%
  mutate(Hap1 = factor(Hap1, haplotypes.full.dist.order),
         Hap2 = factor(Hap2, haplotypes.full.dist.order))


plot.structure.hap <-
  ggplot(membership, aes(ID, q, fill=pop)) +
  geom_col(width=1) +
  scale_fill_iwanthue() +
  scale_x_discrete(expand = c(0,0)) +
  scale_y_continuous(labels=scales::label_percent(),
                     expand = c(0,0))+
  guides(fill=guide_legend(title="Population", nrow=1))+
  theme_bw()+
  theme(#axis.text.x = element_text(angle = 90, vjust=0.5),
    axis.text.x = element_blank(),
    axis.title.x = element_blank(),
    legend.position = 'top',
    ## legend.title.position = "top",
    ## legend.title = element_text(hjust=0.5)
    )

plot.distance.hap <-
  filter(haplotypes.full.dist.data,
         as.numeric(Hap1) >= as.numeric(Hap2)) %>%
  ggplot(aes(Hap1, Hap2, fill=dist)) +
  scale_x_discrete(name="Haplotype", position = 'top') +
  scale_y_discrete(name="Haplotype", position = 'right') +
  scale_fill_gradient(limit=c(0,20), low="grey30", high="white",
                      na.value="white") +
  geom_tile() +
  coord_equal() +
  guides(fill=guide_colorbar(title="Distance")) +
  theme_minimal() +
  theme(axis.text = element_blank(),
        axis.title = element_blank(),
        panel.grid = element_blank(),
#        legend.title.position = "top",
        legend.position = 'top')


plot.structure.hap.grob = ggplotGrob(plot.structure.hap)
plot.structure.hap.grobs = split(plot.structure.hap.grob[["grobs"]],
                                 plot.structure.hap.grob[["layout"]]$name)
plot.distance.hap.grob = ggplotGrob(plot.distance.hap)
plot.distance.hap.grobs = split(plot.distance.hap.grob[["grobs"]],
                                 plot.distance.hap.grob[["layout"]]$name)

top.vp <- viewport(
  layout=grid.layout(5, 3,
                     widths=unit(c(5, 5, 2),
                                 c("lines", "null", "lines")),
                     heights=unit(c(4, 1, 2, 2, 4),
                                  c("lines", "null", "mm", "null", "lines")),
                     respect = T))
legend.vp <- viewport(layout.pos.row = 1,
                         layout.pos.col = 2,
                         just=c("center","bottom"),
                         name="legend")
structure.vp <- viewport(layout.pos.row = 2,
                         layout.pos.col = 2,
                         just=c("center","top"),
                         name="structure")
structure.axis.vp <- viewport(layout.pos.row = 2,
                              layout.pos.col = 1,
                              just=c("center","top"),
                              name="structure.axis")
distance.vp <- viewport(name="distance",
                         layout.pos.row = 4,
                         layout.pos.col = 2,
                        just=c("center","top"))
distance.axis.vp <- viewport(layout.pos.row = 4,
                              layout.pos.col = 1,
                              just=c("center","top"),
                              name="distance.axis")
splot <- vpTree(top.vp, vpList(legend.vp,
                               structure.vp,
                               structure.axis.vp,
                               distance.vp,
                               distance.axis.vp))
grid.show.layout(splot$parent$layout, vp=splot$parent)

grid.newpage()
pushViewport(splot)
seekViewport("structure")
grid.draw(plot.structure.hap.grobs[['panel']][[1]])
seekViewport("structure.axis")
grid.draw(plot.structure.hap.grobs[['axis-l']][[1]])
grid.text("Population Structure", rot=90)
grid.text("A)", just='right', y=1, x=0.5, gp=gpar(fontsize=16))
seekViewport("distance")
#
aspect.ratio =
  as.numeric(convertY(unit(1, 'inch'), unitTo = 'native'))/
  as.numeric(convertX(unit(1, 'inch'), unitTo = 'native'))
#
pushViewport(viewport(angle=-45,
                         width=1/sqrt(2),
                         height=aspect.ratio/sqrt(2),
                         #height=unit(1/sqrt(2), "snpc"),
                      y=1))
grid.draw(plot.distance.hap.grobs[['panel']][[1]])
popViewport()
seekViewport("distance.axis")
grid.text("Sequence Similarity", rot=90, just='right', y=0.95)
grid.text("B)", just='right', y=1, x=0.5, gp=gpar(fontsize=16))
#
seekViewport("legend")
pushViewport(viewport(layout = grid.layout(1, 2, widths = c(2,1))))
pushViewport(viewport(layout.pos.col=1, just = c("right","bottom")))
grid.draw(plot.structure.hap.grobs[['guide-box-top']][[1]])
popViewport()
pushViewport(viewport(layout.pos.col=2, just = c("left","bottom")))
grid.draw(plot.distance.hap.grobs[['guide-box-top']][[1]])
popViewport()
popViewport()

fig-3.whole-mito-structure.tiff

Figure 4

library(tidyverse)
library(hues)

library(ggforce)
library(sf)
library(ggspatial)
library(stars)

map.colors <- c(
 "0" =rgb(193/255, 224/255, 250/255), ## Ocean water
 "11"=rgb(193/255, 224/255, 250/255), ## Open water
 "12"=rgb(229/255, 231/255, 232/255), ## Perennial ice / snow
 "21"="#ebe5cf",                      ## Developed, open space
 "22"="#ebe5cf",                      ## Developed, low intensity
 "23"="#ebe5cf",                      ## Developed, medium intensity
 "24"="#ebe5cf",                      ## Developed. high intensity
 "31"=rgb(179/255, 175/255, 164/255), ## Barren land
 "41"="#e4e8da",                      ## Deciduous forest
 "42"="#e4e8da",                      ## Evergreen forest
 "43"="#e4e8da",                      ## Mixed forest
 "51"=rgb(176/255, 151/255,  61/255), ## Dwarf scrub  (Alaska only)
 "52"="#f4f3ee",   ## Shrub / scrub
 "71"="#f4f3ee",   ## Grassland / herbaceous
 "72"=rgb(209/255, 209/255, 130/255), ## Sedge / herbaceous  (Alaska)
 "74"=rgb(130/255, 186/255, 158/255), ## Moss (Alaska only)
 "81"="#f4f3ee",   ## Pasture hay
 "82"="#f4f3ee",   ## Cultivated crops
 "90"="#e4e8da",   ## Woody wetlands
 "101"=rgb(240/255, 235/255, 211/255),  ## Non-U.S. land
 "102"=rgb(240/255, 235/255, 211/255),  ## Non-U.S. land
 "103"=rgb(240/255, 235/255, 211/255),   ## Non-U.S. land
setNames(iwanthue(4), sprintf("mitogroup-%d", 1:4)))


sequence.membership.hap <-
  read.delim("5-full-mito/3-structure-membership.K4.txt")
map.data <-
  data.table::fread("2-samples/bigtable.tsv", skip=2)  %>%
  separate(`Latitude/Longitude`, into=c('lat', 'lon', NA),
           sep="[NW]") %>%
  mutate(lat = as.numeric(lat),
         lon=as.numeric(lon) * -1) %>%
  dplyr::select(Sample="Sample Name", lat, lon) %>%
  right_join(sequence.membership.hap) %>%
  mutate(Population = factor(Population),
         Haplotype = fct_lump_min(Haplotype, min=1,
                                  other_level="Singleton"))
# Transform GPS to Albers_NAD83_L48
map.data.prj <- st_as_sf(map.data,
                         coords = c('lon', 'lat'),
                         crs=4326) %>%
  st_transform(5070) %>%
  st_coordinates(.$geometry) %>%
  cbind(map.data)

map.data.bbox <- st_as_sf(map.data,
                          coords = c('lon', 'lat'),
                          crs=4326) %>%
  st_transform(5070) %>%
  st_bbox()
map.data.bbox <- map.data.bbox -
  c(1000, 25000, 0, 0) + c(0,0,30000,1000)
map.data.bbox

# Download National Map Data
## Crop National map to region of interest
TNM <- file.path("5-full-mito",
                            "ldco48i0100a.tif_nt00971",
                            "ldco48i0100a.tif") %>%
  read_stars() %>%
  st_crop(map.data.bbox)
#plot(TNM)

##download.file("http://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_state_500k.zip", destfile = "states.zip")
##unzip("states.zip")
state <- read_sf("5-full-mito/cb_2015_us_state_500k.shp") %>%
  st_transform(5070) %>%
  st_crop(map.data.bbox)



## Locations based on TNM
## Pensacola - -87.217, 30.421
## Lake Borgne -  -89.625, 30.042
## Pearl River - -89.629 30.277
## Biloxi, MS. - -88.889 60.407
## Scale and compass
## data.frame(
## name=c("Pensacola", "Lake Borgne", "Pearl River", "Biloxi, MS.", "label"),
## lon = c(-87.217, -89.625 ,-89.629 ,-88.889, -88.0247),
## lat = c(30.421, 30.042, 30.277, 30.407, 30.109)
## ) %>% st_as_sf(coords = c('lon', 'lat'), crs=4326) %>%
##   st_transform(5070)
## Projected CRS: NAD83 / Conus Albers
##          name                  geometry
## 1   Pensacola POINT (841011.5 855006.9)
## 2 Lake Borgne POINT (613666.9 794626.7)
## 3 Pearl River POINT (611534.9 820629.7)
## 4 Biloxi, MS. POINT (681360.5 840064.4)
## 5       label POINT (766765.4 813664.9)



map.data.prj.split[[1]][chull(map.data.prj.split[[1]]),]

map.data.prj

map.data.prj.split <- split(map.data.prj, map.data.prj$Population)
plots <- mapply(function(d,name,color){
  ggplot(d) +
    geom_stars(data=TNM, downsample = 1) +
    scale_fill_manual(values=map.colors, na.value = "#f3f2ed") +
    ## STATES
    annotate(geom = "text", x = 660000, y = 855000,
             label = "Mississippi",
             family="ETBembo",
             color = "grey22",
             size = 12,
             alpha=0.25) +
    annotate(geom = "text", x = 780000, y = 855000,
             label = "Alabama",
             family="ETBembo",
             color = "grey22",
             size = 12,
             alpha=0.25) +
    ## CITES
    annotate(geom = "text", x = 681361, y = 840064,
             label = "Biloxi",
             family="ETBembo",
             color = "grey22",
             size = 4,
             hjust = 1) +
    annotate(geom = "text", x = 841012, y = 855007,
             label = "Pensacola, FL",
             family="ETBembo",
             color = "grey22",
             size = 4,
             hjust = 1) +
    ## LAKE
    annotate(geom = "text", x = 613667, y = 794627,
             label = "Lake\nBorgne",
             fontface = "italic",
             family="ETBembo",
             color = "#0a71b3",
             size = 4) +
    ## RIVER
    annotate(geom = "text", x = 611535, y = 820630,
             label = "Pearl River",
             family="ETBembo",
             color = "#0a71b3",
             size = 4,
             angle=-65,
             vjust =0,
             hjust =0.4 ) +
    geom_sf(data = state, fill=NA, linetype='dotted', linewidth=0.2) +
    geom_mark_hull(data=d, aes(x=X, y=Y), alpha=0.2, fill=color, concavity = 3,
                   expand = unit(2,'mm'), radius = unit(1,'mm'), color='grey')  +
    geom_point(aes(x=X, y=Y, color=Population), size=1, color=color) +
    coord_sf(expand = F) +
    ggtitle(name) +
    theme(panel.grid.major = element_line(color = gray(.25),
                                          linetype = "dashed",
                                          linewidth = 0.1),
          legend.position = 'none',
          axis.title=element_blank())
}, map.data.prj.split,
   names(map.data.prj.split),
   iwanthue(4),
   SIMPLIFY=F)
plots[[1]] = plots[[1]] +
    ## LABEL
  annotate(geom = "text", x = 810000, y = 790000,
           label = "North Central\nGulf of Mexico",
           fontface = "italic",
           family="ETBembo",
           color = "#0a71b3",
           size = 10)
plots[[4]] = plots[[4]] +
  annotation_scale(location = "br", width_hint = 0.25) +
  annotation_north_arrow(location = "br", which_north = "true",
                         pad_x = unit(0.75, "in"),
                         pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering)
cowplot::plot_grid(plotlist = plots, ncol = 1)


## ggplot(map.data.prj) +
##     geom_stars(data=TNM, downsample = 1) +
##     scale_fill_manual(values=map.colors, na.value = "#f3f2ed") +
##     ## STATES
##     annotate(geom = "text", x = 660000, y = 855000,
##              label = "Mississippi",
##              family="ETBembo",
##              color = "grey22",
##              size = 12,
##              alpha=0.25) +
##     annotate(geom = "text", x = 780000, y = 855000,
##              label = "Alabama",
##              family="ETBembo",
##              color = "grey22",
##              size = 12,
##              alpha=0.25) +
##     ## CITES
##     annotate(geom = "text", x = 681361, y = 840064,
##              label = "Biloxi",
##              family="ETBembo",
##              color = "grey22",
##              size = 4,
##              hjust = 1) +
##     annotate(geom = "text", x = 841012, y = 855007,
##              label = "Pensacola, FL",
##              family="ETBembo",
##              color = "grey22",
##              size = 4,
##              hjust = 1) +
##     ## LAKE
##     annotate(geom = "text", x = 613667, y = 794627,
##              label = "Lake\nBorgne",
##              fontface = "italic",
##              family="ETBembo",
##              color = "#0a71b3",
##              size = 4) +
##     ## RIVER
##     annotate(geom = "text", x = 611535, y = 820630,
##              label = "Pearl River",
##              family="ETBembo",
##              color = "#0a71b3",
##              size = 4,
##              angle=-65,
##              vjust =0,
##              hjust =0.4 ) +
##     geom_sf(data = state, fill=NA, linetype='dotted', linewidth=0.2) +
##     geom_mark_hull(aes(x=X, y=Y, fill=Population), alpha=0.2, concavity = 3,
##                    expand = unit(2,'mm'), radius = unit(1,'mm'))  +
##     geom_point(aes(x=X, y=Y, color=Population), size=1) +
##     coord_sf(expand = F) +
##   scale_color_iwanthue()+
##     ggtitle("") +
##     theme(panel.grid.major = element_line(color = gray(.25),
##                                           linetype = "dashed",
##                                           linewidth = 0.1),
##           legend.position = 'none',
##           axis.title=element_blank()) +
##     ## LABEL
##   annotate(geom = "text", x = 810000, y = 790000,
##            label = "North Central\nGulf of Mexico",
##            fontface = "italic",
##            family="ETBembo",
##            color = "#0a71b3",
##            size = 10) +
##   annotation_scale(location = "br", width_hint = 0.25) +
##   annotation_north_arrow(location = "br", which_north = "true",
##                          pad_x = unit(0.75, "in"),
##                          pad_y = unit(0.5, "in"),
##                          style = north_arrow_fancy_orienteering)

fig-4.haplotype-map.tiff

Supp. Fig. 1

library(tidyverse)
library(scales)
library(ggtext)

colnames(locs) <- c('Pair', 'used', 'Start', 'End')

locs <- locs %>%
  arrange(, Pair) %>%
  mutate(pos = (Start+End) / 2,
         Pair = factor(Pair, levels = paste0('P', 1:10))) %>%
  mutate(level = as.numeric(Pair)) %>%
  mutate(ymin=level,
         ymax=level + as.numeric(used)*0.25 + 0.5)

locs$Pair <- select(locs, Pair, used) %>%
  unique %>%
  pull(used, name=Pair) %>%
  ifelse(.,
         sprintf("**%s**", names(.)),
         names(.)) %>%
  setNames(names(.), .) %>%
  fct_recode(locs$Pair, !!!.)

#colorbrewer pastel for primers not used
#colorbrewer set1 for primers used
cols <- c(
  "P1" = "#b3e2cddd",
  "P2" = "#fdcdacdd",
  "P3" = "#cbd5e8dd",
  "P5" = "#f4cae4dd",
  "P8" = "#e6f5c9dd",
  "P9" = "#fff2aedd",
  "**P4**" = "#e41a1cff",
  "**P6**" = "#377eb8ff",
  "**P7**" = "#4daf4aff",
  "**P10**"= "#984ea3ff"
)

label <- function(x){
  l <- ifelse(x ==1 , "16.3Kbp",
              label_number(scale_cut=cut_si('bp'))(x))
  str(l)
  return(l)
}

ggplot(locs,
       aes(xmax=Start, xmin=End,
           ymax=ymax, ymin=level,
           fill=Pair )) +
  annotate(geom='rect', xmin=1, xmax=Inf,
           ymin=-Inf, ymax = 0 , fill='white') +
   geom_rect() +
  scale_y_continuous(limits = c(-15, 11), expand = c(0,0)) +
  scale_fill_manual(values=cols)+
  scale_alpha_manual(values=c(0.28, 1)) +
  scale_x_continuous(
    breaks=c(1, seq(2000, 14000, 2000)),
    labels=label) +
  coord_polar(clip='off') +
  ggtitle("Tested Primers") +
  labs(caption="Tested primer pair locations on the mitochondrial genome. Highlighted pairs (P4, P6, P7, and P10)\nwere used to amplify the mtDNA in all samples.") +
  theme_minimal() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        legend.text=element_markdown(),
        plot.caption = element_text(hjust=0))

supp-fig-1.primer-locations.png

Supp. Fig. 2

library(tidyverse)
library(ggrepel)

data <- read.table("2-samples/2-coverage.tbl") %>%
  gather(key="contig", value="avgcov", P4, P6, P7, P10) %>%
  mutate(contig = fct_shift(factor(contig)))

pos = position_jitter(width = 0.3, seed = 2)

ggplot(data, aes(contig, avgcov )) +
geom_boxplot(outlier.alpha = 0) +
geom_jitter(color="grey50", shape=20, position=pos, size=0.5) +
scale_y_log10() +
theme_minimal() +
xlab("Amplicon") +
ylab("Median coverage") +
facet_grid(rows=vars(Type), scales='free_y') +
  labs(title="Median Coverage",
       caption="Median base coverage across each amplicon for each sample.") +
theme(plot.caption = element_text(hjust  = 0))

supp-fig-2.amp-coverage.png

Supp. Fig 3

library(pegas)
library(tidyverse)
library(hues)

read.dna("4-mtCR/1-mafft.fasta.gz", format='fasta', as.matrix=T) %>%
  as.character %>%
  as.data.frame %>%
  rownames_to_column("Samples") %>%
  gather(-Samples, key='Position', value="Base") %>%
  mutate(Position = as.numeric(sub("V", "", Position)),
         Base = str_to_upper(Base)) %>%
  filter(Base != "-") %>%
  ggplot(aes(Position, Samples, fill=Base))+
  geom_raster() +
  scale_x_continuous(expand = c(0,0)) +
  scale_fill_brewer(type='qual', palette="Accent",
                    breaks=c("A","C", "G", "T")) +
  labs(title = "mtCR Alignment",
       caption = "Alignment of all mtCR sequences (both published and newly sequenced). Samples are shown in rows, alignment position is shown in columns. \nThe missing data (white) at both ends were trimmed.") +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text.y = element_blank(),
        plot.caption = element_text(hjust = 0),
        legend.position = "top")

supp-fig-3.alignment.png

Supp. Fig 4

haplotypes.with_outgroup <-
  read.dna("4-mtCR/2-haplotypes.with_outgroup.fixed.fasta",
           format='fasta', as.matrix=T)

haplotypes.with_outgroup.dist <-
  haplotypes.with_outgroup %>%
  dist.dna(model = 'N', as.matrix = T) %>%
  as.data.frame %>%
  rownames_to_column("Hap1") %>%
  gather(-Hap1, key="Hap2", value="Dist")

haplotypes.of.interest <- haplotypes.with_outgroup.dist %>%
  mutate(tmp=Hap1, Hap1=Hap2, Hap2=tmp) %>%
  select(-tmp) %>%
  rbind(haplotypes.with_outgroup.dist) %>%
  filter(grepl("mtCR.new", Hap1)) %>%
  filter(Dist <=  1) %>%
  unique

haplotypes.with_outgroup %>%
  as.character %>%
  t %>%
  as.data.frame %>%
  rowid_to_column("Pos") %>%
  gather(-Pos, key="Hap2", value="base") %>%
  right_join(haplotypes.of.interest,
             relationship = 'many-to-many') %>%
  group_by(Hap1, Pos ) %>%
  filter(length(unique(base)) > 1) %>%
  ungroup %>%
  ggplot(aes(Pos, Hap2, color=base)) +
  geom_text(aes(label=base)) +
  facet_grid(rows=vars(Hap1), space = 'free', scales = 'free',
             switch='y') +
  scale_color_brewer(type='qual', palette='Set1') +
  labs(title = "New haplotype differences",
  caption=paste0(c("Nucleotide differences between the mtCR.new",
                   "haplotypes and the most similar sequences. All",
                   "sequences with a\nsingle mismatched base",
                   "to the haplotype of interest are displayed.",
                   "All identical bases are removed for clarity."),
                 collapse = " ")) +
  theme_bw() +
  theme(strip.text.y.left  = element_text(angle = 0),
        plot.caption = element_text(hjust = 0),
        strip.placement = 'outside',
        legend.position = 'none',
        axis.title = element_blank())

supp-fig-4.unique-haplotypes.png

Supp. Fig 5

library(LEA)

project.snmf <- file.path("4-mtCR", "4-structure",
                          "haplotypes-fixed.snmfProject") %>%
  load.snmfProject


project.pca <- file.path("4-mtCR", "haplotypes-fixed.pcaProject") %>%
  load.pcaProject()

pca.scree.plot <- tracy.widom(project.pca) %>%
  ggplot(aes(N, percentage)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels=scales::label_percent()) +
  labs(title = "PCA Scree Plot",
       x = "Principal Components",
       y = "Percentage of Variance") +
  theme_minimal()

summary.info = summary(project.snmf)$crossEntropy

plot.entropy <- as.data.frame(t(summary.info)) %>%
  tibble::rownames_to_column("K") %>%
  mutate(K = as.numeric(substring(K, 4))) %>%
  ggplot(aes(K, min)) +
#  geom_ribbon(aes(ymin=min, ymax=max), alpha=0.2, color='grey70') +
#  geom_hline(aes(yintercept = min(summary.info[2,]), color = "red")) +
  geom_line(aes(x = K, y = min, group=1)) +
  geom_point() +
  labs(title = "Cross-entropy versus K",
       x = "Number of ancestral populations (K)",
       y = "Minimum Cross-entropy") +
  theme_minimal() +
  theme(legend.position = "none")


cowplot::plot_grid(pca.scree.plot,
                     plot.entropy,
                     NA, NA, rel_heights = c(10,1),
                     labels = c("A", "B")) +
  cowplot::draw_text(paste0("PCA Scree plot (A) and Minimum ",
                            "cross-entropy per K (B) for the mtCR ",
                            "structure analysis", collapse = " "),
                     y=0.05, size=12)

supp-fig-5.select-K.png

Supp Fig 6

library(ggsankey)

mtCR.hap.membership <-
  read.delim("4-mtCR/4-struct-membership.hap-fixed.txt")
full.hap.membership <-
  read.delim("5-full-mito/3-structure-membership.K4.txt")

hap.compare.data <-
  inner_join(full.hap.membership, mtCR.hap.membership,
           by=c("Sample"="Acc")) %>%
  select("Hap.mtCR"="ID", "Hap.full"="Haplotype") %>%
  group_by(Hap.full) %>%
  mutate(full.n=n()) %>%
  group_by(Hap.mtCR) %>%
  mutate(mtcr.n=n()) %>%
  ungroup() %>%
  arrange(desc(mtcr.n), Hap.mtCR, desc(full.n)) %>%
  mutate(Hap.full = fct_inorder(Hap.full),
         Hap.mtCR = fct_inorder(Hap.mtCR))

make_long(hap.compare.data, Hap.mtCR, Hap.full) %>%
  mutate(node = factor(node,
                       levels(fct_c(hap.compare.data$Hap.mtCR,
                                    hap.compare.data$Hap.full))),
         next_node=factor(next_node,
                          levels(fct_c(hap.compare.data$Hap.mtCR,
                                hap.compare.data$Hap.full))),
         x = factor(x, c("Hap.mtCR", "Hap.full"),
                    c("mtCR\nHaplotypes",
                      "Whole Mitogenome\nHaplotypes"))) %>%
  group_by(node) %>%
  mutate(label=ifelse(n() >= 3, as.character(node), NA)) %>%
  ggplot(aes(x = x, next_x = next_x, node = node,
             next_node = next_node, fill = fct_shuffle(node),
             label = label)) +
  geom_sankey(flow.alpha = .6,
              node.color = "gray30") +
  geom_sankey_label(size = 3, color = "white", fill = "gray40") +
  scale_fill_viridis_d(drop = FALSE) +
  theme_sankey(base_size = 18) +
  labs(x = NULL,
       title ="mtCR vs Whole Mitogenome Haplotype Change",
       caption = paste("Sankey plot showing the relationship bewteen",
                       "the haplotypes from mtCR to whole",
                       "\nmitogenome. Haplotypes with five or more", "members are labeled.")) +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0),
        plot.caption = element_text(hjust = 0))

supp-fig-6.mtCR-vs-whole-sankey.png

Supp Fig 7

library(LEA)

project.snmf <- file.path("5-full-mito",
                          "3-structure-haplotype.snmfProject") %>%
  load.snmfProject
project.pca <- file.path("5-full-mito",
                         "3-structure-haplotype.pcaProject") %>%
  load.pcaProject()

pca.scree.plot <- tracy.widom(project.pca) %>%
  ggplot(aes(N, percentage)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels=scales::label_percent()) +
  labs(title = "PCA Scree Plot",
       x = "Principal Components",
       y = "Percentage of Variance") +
  theme_minimal()

summary.info = summary(project.snmf)$crossEntropy

plot.entropy <- as.data.frame(t(summary.info)) %>%
  tibble::rownames_to_column("K") %>%
  mutate(K = as.numeric(substring(K, 4))) %>%
  ggplot(aes(K, min)) +
#  geom_ribbon(aes(ymin=min, ymax=max), alpha=0.2, color='grey70') +
#  geom_hline(aes(yintercept = min(summary.info[2,]), color = "red")) +
  geom_line(aes(x = K, y = min, group=1)) +
  geom_point() +
  labs(title = "Cross-entropy versus K",
       x = "Number of ancestral populations (K)",
       y = "Minimum Cross-entropy") +
  theme_minimal() +
  theme(legend.position = "none")


cowplot::plot_grid(pca.scree.plot,
                     plot.entropy,
                     NA, NA, rel_heights = c(10,1),
                     labels = c("A", "B")) +
  cowplot::draw_text(paste("PCA Scree plot (A) and Minimum",
                           "cross-entropy per K (B) for the whole",
                           "mitogenome structure analysis"),
                     y=0.05, size=12)

supp-fig-7.select-K-full.png

Supp Fig 8

library(ggtree)
tree.data <- read.tree("5-full-mito/haplotypes-nj-tree.nwk")

full.hap.membership <-
  read.delim("5-full-mito/3-structure-membership.K4.txt")

anno <- data.frame(ID=tree.data$tip.label) %>%
  left_join(full.hap.membership, by=c("ID"="Haplotype")) %>%
  select(-Sample) %>%
  mutate(clade=factor(Population)) %>%
  distinct()
anno

layout <- ggtree(tree.data) %<+% anno
p <- layout +
  geom_tippoint(aes(color=clade), size = 1) +
  ##geom_text(aes(label=node)) +
  ## geom_label_repel(aes(label=ifelse(label %in% c("SER19-00888",
  ##                                                "SER11-0141"),
  ##                             label, NA),
  ##                      segment.linetype=2), na.rm = T) +
  ## geom_treescale(y=14) +
  scale_color_iwanthue(na.value="black",
                       breaks=sprintf("mitogroup-%d", 1:4),
                       name="Population") +
  theme_tree2() +
  labs(title="Whole Mitogenome Neighbor-Joining Tree",
       caption="Neighbor-joining tree for the whole mitogenome. Outgroup is the *S. frontalis* sequence NC_060612.1.
<br>Color shows the assigned group of each sample.") +
  theme(legend.position=c(0.05,0.6),
        legend.background = element_blank(),
        axis.text = element_text(),
        plot.caption = element_markdown(hjust=0, size=12))
#p <- rotate_tree(p, -45)
p <- flip(p, 188, 227)
p <- flip(p, 214, 210)
p

supp-fig-8.haplotype-nj-tree.png

mtCR Minimum Spanning Network

library(pegas)
library(tidyverse)
library(poppr)
library(hues)
library(igraph)

haplotypes.without_outgroup.fasta <-
   read.FASTA("4-mtCR/2-haplotypes.without_outgroup.fixed.fasta")

pop.data <-
  read.delim("4-mtCR/haplotype.published-groups.txt") %>%
  select(ID, best.pop) %>%
  mutate(best.pop = factor(best.pop, c("mtCR.sound",
                                       "mtCR.inner",
                                       "mtCR.outer",
                                       "mtCR.ocean"))) %>%
  distinct %>%
  column_to_rownames('ID')

sizes <-
  read.delim("4-mtCR/haplotype.sizes.txt") %>%
  mutate(Haplotype = gsub('[*]', '', Haplotype)) %>%
  filter(!grepl("mtCR.out-", Haplotype)) %>%
  column_to_rownames('Haplotype')

genind <-haplotypes.without_outgroup.fasta %>%
  DNAbin2genind(pop=pop.data[names(haplotypes.without_outgroup.fasta),
                             "best.pop"])
strata(genind) <- sizes

adist <- diss.dist(genind)
amsn <- poppr.msn(genind, adist)

pal <- setNames(
  c("#97C160", "#B65A49", "#737F86", "#914CB1"),
  levels(pop.data$best.pop))

vertex.size <-
  sizes[names(haplotypes.without_outgroup.fasta), "New"]

V(amsn$graph)$size <- log(vertex.size+1) + 1.5
V(amsn$graph)$label <- if_else(vertex.size < 5, NA, vertex.size)
V(amsn$graph)$frame.color <-
               setNames(pop.data$best.pop, rownames(pop.data)) %>%
               .[names(haplotypes.without_outgroup.fasta)] %>%
               pal[.]
V(amsn$graph)$color <-
              if_else(vertex.size > 0, V(amsn$graph)$frame.color, "white")

E(amsn$graph)$width <- 2
E(amsn$graph)$label <- if_else(E(amsn$graph)$weight == 1,
                               NA, E(amsn$graph)$weight)

plot.igraph(amsn$graph, layout = igraph::layout.kamada.kawai)

pushViewport(viewport(x=0.80, y=0.85, width=unit(6, "lines")))
legend <-
  as.data.frame(pal, nm="color") %>%
  rownames_to_column("pop") %>%
  mutate(pop = factor(pop, rev(c("mtCR.sound",
                             "mtCR.inner",
                             "mtCR.outer",
                             "mtCR.ocean")))) %>%
  ggplot(aes(1, pop, fill=color)) +
  geom_tile() +
  scale_fill_identity()+
  scale_y_discrete(position = 'right') +
  coord_equal() +
  ggtitle("Population") +
  theme_void() +
  theme(axis.text.y.right = element_text())
grid.draw(ggplotGrob(legend))
popViewport()

grid.text("mtCR Minimium Spanning Network",
          hjust=0, vjust=0, x=0.1, y=0.95,
          gp=gpar(fontsize=20))

supp-fig-9.mtCR-msn.png

whole mitogenome Minimum Spanning Network

library(pegas)
library(tidyverse)
library(poppr)
library(hues)
library(igraph)

haplotypes.without_outgroup.fasta <-
   read.FASTA("5-full-mito/2-haplotypes.with_outgroup.fasta")[-1]

pop.data <-
  read.delim("5-full-mito/3-structure-membership.K4.txt") %>%
  mutate(Population = factor(Population)) %>%
  group_by(Haplotype, Population) %>%
  count %>%
  mutate(color = factor(Population,
                        sprintf("mitogroup-%d", 1:4),
                        iwanthue(4))) %>%
  column_to_rownames('Haplotype')

genind <-haplotypes.without_outgroup.fasta %>%
  DNAbin2genind(pop=pop.data[names(haplotypes.without_outgroup.fasta),
                             "Population"])
strata(genind) <- pop.data

adist <- diss.dist(genind)
amsn <- poppr.msn(genind, adist)

G <- amsn$graph
#
vertex.size <- pop.data[V(G)$name, "n"]
V(G)$label <- if_else(vertex.size < 5, NA, vertex.size)
V(G)$size <- log(vertex.size+1) +1.75
V(G)$color <- as.character(pop.data[V(G)$name, "color"])
V(G)$label.color <- "white"
#
E(G)$width <- 2
E(G)$label <- if_else(E(G)$weight == 1, NA, E(G)$weight)

set.seed(42)
layout <- layout.reingold.tilford(G, circular = T)
layout <- layout_with_gem(G, coords=layout)
plot.igraph(G, layout=layout)

pushViewport(viewport(x=0.85, y=0.85, width=unit(6, "lines")))
legend <-
  select(pop.data, "Population", "color") %>%
  distinct() %>%
  mutate(Population = fct_rev(factor(Population))) %>%
  ggplot(aes(1, Population, fill=color)) +
  geom_tile() +
  scale_fill_identity()+
  scale_y_discrete(position = 'right') +
  coord_equal() +
  ggtitle("Population") +
  theme_void() +
  theme(axis.text.y.right = element_text())
grid.draw(ggplotGrob(legend))
popViewport()

grid.text("Whole Mitogenome Minimium Spanning Network",
          hjust=0, vjust=0, x=0.1, y=0.95,
          gp=gpar(fontsize=20))

supp-fig-10.mito-msn.png