MS Sound Stranded Dolphins
Reference Sequences Description, location, and download info for reference
sequences used
Primer Design Methods used to design universal primers for marine dolphins
(Delphinidae )
Samples Description of newly sequenced samples, including amplicon
coverage summary
Consensus and SNP Calling Methods used to create consensus sequences and
SNP vcfs for each sample. Includes species validation and heterplasmy search.
mtCR analysis Methods for comparing the mtCR region
Whole mitogenome analysis Methods for comparing full mitogenomes.
library(tidyverse )
library(hues )
library(sf )
library(ggspatial )
library(stars )
map.colors <- c(
" 0" = rgb(193 / 255 , 224 / 255 , 250 / 255 ), # # Ocean water
" 11" = rgb(193 / 255 , 224 / 255 , 250 / 255 ), # # Open water
" 12" = rgb(229 / 255 , 231 / 255 , 232 / 255 ), # # Perennial ice / snow
" 21" = " #ebe5cf" , # # Developed, open space
" 22" = " #ebe5cf" , # # Developed, low intensity
" 23" = " #ebe5cf" , # # Developed, medium intensity
" 24" = " #ebe5cf" , # # Developed. high intensity
" 31" = rgb(179 / 255 , 175 / 255 , 164 / 255 ), # # Barren land
" 41" = " #e4e8da" , # # Deciduous forest
" 42" = " #e4e8da" , # # Evergreen forest
" 43" = " #e4e8da" , # # Mixed forest
" 51" = rgb(176 / 255 , 151 / 255 , 61 / 255 ), # # Dwarf scrub (Alaska only)
" 52" = " #f4f3ee" , # # Shrub / scrub
" 71" = " #f4f3ee" , # # Grassland / herbaceous
" 72" = rgb(209 / 255 , 209 / 255 , 130 / 255 ), # # Sedge / herbaceous (Alaska)
" 74" = rgb(130 / 255 , 186 / 255 , 158 / 255 ), # # Moss (Alaska only)
" 81" = " #f4f3ee" , # # Pasture hay
" 82" = " #f4f3ee" , # # Cultivated crops
" 90" = " #e4e8da" , # # Woody wetlands
" 101" = rgb(240 / 255 , 235 / 255 , 211 / 255 ), # # Non-U.S. land
" 102" = rgb(240 / 255 , 235 / 255 , 211 / 255 ), # # Non-U.S. land
" 103" = rgb(240 / 255 , 235 / 255 , 211 / 255 ) # # Non-U.S. land
)
map.data <-
data.table :: fread(" 2-samples/bigtable.tsv" , skip = 2 ) %> %
separate(`Latitude/Longitude` , into = c(' lat' , ' lon' , NA ),
sep = " [NW]" ) %> %
mutate(lat = as.numeric(lat ),
lon = as.numeric(lon ) * - 1 ) %> %
dplyr :: select(Sample = " Sample Name" , lat , lon )
map.data.prj <- st_as_sf(map.data ,
coords = c(' lon' , ' lat' ),
crs = 4326 ) %> %
st_transform(5070 ) %> %
st_coordinates(. $ geometry ) %> %
cbind(map.data )
map.data.prj
map.data.bbox <- st_as_sf(map.data ,
coords = c(' lon' , ' lat' ),
crs = 4326 ) %> %
st_transform(5070 ) %> %
st_bbox()
map.data.bbox <- map.data.bbox -
c(10000 , 70000 , 0 , 0 ) + c(0 ,0 ,30000 ,25000 )
map.data.bbox
# Download National Map Data
# # Crop National map to region of interest
TNM <- read_stars(
" 5-full-mito/ldco48i0100a.tif_nt00971/ldco48i0100a.tif"
) %> % st_crop(map.data.bbox )
# #download.file("http://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_state_500k.zip", destfile = "states.zip")
# #unzip("states.zip")
state <- read_sf(" 5-full-mito/cb_2015_us_state_500k.shp" ) %> %
st_transform(5070 ) %> %
st_crop(map.data.bbox )
data <- rbind(
c(" Sound" , " Mobile\n Bay" , - 88.003 , 30.421 , " italic" ),
c(" Sound" , " Mississippi Sound" , - 88.898 , 30.317 , " italic" ),
c(" Sound" , " Chandeleur\n Sound" , - 89.043 , 29.908 , " italic" ),
c(" Sound" , " Breton\n Sound" , - 89.302 , 29.586 , " italic" ),
c(" City" , " Biloxi" , - 88.889 , 30.407 , " plain" ),
c(" City" , " Mobile" , - 87.994 , 30.681 , " plain" ),
c(" City" , " Pensacola" , - 87.217 , 30.421 , " plain" ),
c(" Island" , " Cat\n Island" , - 89.085 , 30.220 , " plain" ),
c(" Island" , " Ship\n Island" , - 88.910 , 30.223 , " plain" ),
c(" Island" , " Horn\n Island" , - 88.662 , 30.244 , " plain" ),
c(" Island" , " Petit Bois\n Island" , - 88.432 , 30.207 , " plain" ),
c(" Island" , " Dauphin\n Island" , - 88.122 , 30.248 , " plain" ),
c(" Island" , " Chandeleur\n Island" , - 88.826 , 29.843 , " plain" )
) %> %
as.data.frame %> %
setNames(c(" Type" , " Name" , " lon" , " lat" , " fontface" )) %> %
st_as_sf(coords = c(' lon' , ' lat' ), crs = 4326 ) %> %
st_transform(5070 ) %> %
cbind(st_coordinates(. $ geometry )) %> %
mutate(Y = ifelse(Type == " Island" & Name != " Chandeleur\n Island" ,
Y - 5000 , Y ))
data
map.data.bbox
ggplot() +
geom_stars(data = TNM , downsample = 1 ) +
scale_fill_manual(values = map.colors , na.value = " #f3f2ed" ) +
# # STATES
annotate(geom = " text" , x = 595000 , y = 780000 ,
label = " Louisiana" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
angle = 90 ,
alpha = 0.25 ) +
annotate(geom = " text" , x = 660000 , y = 880000 ,
label = " Mississippi" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
alpha = 0.25 ) +
annotate(geom = " text" , x = 795000 , y = 880000 ,
label = " Alabama" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
alpha = 0.25 ) +
annotate(geom = " text" , x = 840000 , y = 880000 ,
label = " FL" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
alpha = 0.25 ) +
geom_text(aes(X , Y , label = Name , size = Type , color = Type ,
fontface = fontface ),
data = data , family = " ETBembo" , lineheight = 0.75 ) +
scale_color_manual(values = c(" Sound" = " #0a71b3" ,
" Island" = " #0a71b3" ,
" City" = " black" )) +
scale_size_manual(values = c(" Sound" = 6 , " Island" = 4 , " City" = 4 )) +
# # LAKE
annotate(geom = " text" , x = 613667 , y = 794627 ,
label = " Lake\n Borgne" ,
fontface = " italic" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 4 ) +
# # RIVER
annotate(geom = " text" , x = 611535 , y = 820630 ,
label = " Pearl River" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 4 ,
angle = - 65 ,
vjust = 0 ,
hjust = 0.4 ) +
geom_sf(data = state , fill = NA , linetype = ' dotted' , linewidth = 0.2 ) +
geom_point(aes(x = X , y = Y ), data = map.data.prj , size = 0.2 , alpha = 1 ) +
coord_sf(expand = F ) +
theme(panel.grid.major = element_line(color = gray(.25 ),
linetype = " dashed" ,
linewidth = 0.1 ),
legend.position = ' none' ,
axis.title = element_blank()) +
# # LABEL
annotate(geom = " text" , x = 775000 , y = 775000 ,
label = " North Central\n Gulf of Mexico" ,
fontface = " italic" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 13 ) +
annotation_scale(location = " br" , width_hint = 0.25 ) +
annotation_north_arrow(location = " br" , which_north = " true" ,
pad_x = unit(0.75 , " in" ),
pad_y = unit(0.5 , " in" ),
style = north_arrow_fancy_orienteering )
library(pegas )
library(tidyverse )
library(hues )
library(ggtree )
library(aplot )
haplotypes.with_outgroup.fasta <-
read.FASTA(" 4-mtCR/2-haplotypes.with_outgroup.fixed.fasta" )
anno <- read.delim(" 4-mtCR/haplotype.published-groups.txt" )
membership <- read.delim(" 4-mtCR/4-struct-membership-K4.txt" )
tree.data <- haplotypes.with_outgroup.fasta %> %
dist.dna(pairwise.deletion = T ) %> %
nj
tree.data $ node.label <-
boot.phylo(tree.data ,
as.matrix(haplotypes.with_outgroup.fasta ),
FUN = function (xx ) nj(dist.gene(xx ,
pairwise.deletion = T )),
B = 1000 )
tree.data <- root(tree.data ,
outgroup = c(" mtCR.out-1" , " mtCR.out-2" ))
colors <- list (" 2017" = c(" NW.inner" = " #98d4ed" ,
" E.inner" = " #f5c89b" ,
" NW.outer" = " #bebfbf" ,
" E.outer" = " #e67383" ,
" NW.Oceanic" = " #e0e47e" ,
" E.oceanic" = " #c7a3c7" ,
" NE.oceanic" = " #99c796" ),
" 2021" = c(" green" = " #669e40" ,
" blue" = " #2f5597" ))
pop.labels <- c(" mtCR.sound" ,
" mtCR.inner" ,
" mtCR.outer" ,
" mtCR.ocean" )
plot <- anno %> %
mutate(Group = factor (Group , c(' New' ,
' NW.inner' ,
' E.inner' ,
' NW.outer' ,
' E.outer' ,
' NW.Oceanic' ,
' NE.oceanic' ,
' E.oceanic' ,
' green' ,
' blue' )),
best.pop = factor (best.pop , pop.labels ),
Set = factor (Set , c(" New" , " 2017" , " 2021" ))) %> %
ggplot(aes(Group , ID )) +
geom_tile(aes(fill = Group ), na.rm = T ) +
geom_text(aes(label = count ), na.rm = T ) +
facet_grid(cols = vars(Set ), scales = " free" , space = " free" ) +
scale_fill_manual( values = c(colors [[' 2021' ]],
colors [[' 2017' ]],
New = " grey" )) +
ggtitle(" (C) Haplotype Members" ) +
theme_bw() +
theme(axis.title = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_text(angle = 25 , hjust = 1 ),
legend.position = ' none' )
plot.structure <- right_join(membership ,
data.frame (ID = tree.data $ tip.label )) %> %
ggplot(aes(q , ID , fill = pop )) +
geom_col(width = 1 ) +
scale_fill_iwanthue(breaks = pop.labels , name = " " ) +
# scale_y_discrete(expand = c(0,0)) +
scale_x_continuous(labels = scales :: label_percent(),
breaks = c(0.25 , 0.5 , 0.75 ), expand = c(0 ,0 ))+
ggtitle(" (B) Population Structure" ) +
theme_bw()+
theme( axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = c(0.48 ,1.015 ),
legend.direction = " horizontal" ,
legend.background = element_blank())
tree <- ggtree(tree.data ) +
geom_nodepoint(aes(color = as.numeric(label )), na.rm = T ,
size = 3 ) +
geom_tiplab(align = T , as_ylab = T ) +
# geom_text2(aes(label=label, subset=!isTip),
# vjust=0.5, hjust=-0.1, size=3) +
# geom_nodelab(aes(label=node), geom="label", bg="white") +
# geom_tiplab(aes(label=node), bg="white") +
ggtitle(" (A) Neighboor-Joining Tree" ) +
scale_x_continuous(breaks = seq(0.005 , 0.05 , 0.01 )) +
scale_color_viridis_c(name = " Bootstrap Values" ) +
guides(color = guide_colorbar(title.position = " top" )) +
theme_tree2() +
theme(legend.position = c(0.25 ,0.95 ),
legend.direction = " horizontal" )
tree <- flip(tree , 116 , 104 )
tree <- flip(tree , 106 , 88 )
tree <- flip(tree , 117 , 132 )
tree <- flip(tree , 87 , 97 )
tree <- flip(tree , 84 , 89 )
tree <- flip(tree , 82 , 90 )
#
main.plot <- plot.structure %> %
insert_left(tree , width = 0.80 )%> %
insert_right(plot , width = 0.90 )
options(" aplot_guides" = " keep" )
main.plot
library(pegas )
library(tidyverse )
library(grid )
haplotypes.sequence <-
read.FASTA(" 5-full-mito/2-haplotypes.with_outgroup.fasta" )
haplotypes.full.dist <- haplotypes.sequence %> %
dist.dna(model = " N" , pairwise.deletion = T )
haplotypes.full.dist.order <-
hclust(haplotypes.full.dist )[c(" labels" , " order" )] %> %
bind_cols %> %
mutate(new = labels [order ]) %> %
pull(new )%> %
rev()
membership <-
read.delim(" 5-full-mito/3-structure-haplotype.admix.coef.txt" ) %> %
filter(K == 4 ) %> %
mutate(ID = factor (ID , haplotypes.full.dist.order ),
pop = factor (pop ))
haplotypes.full.dist.data <-
haplotypes.full.dist %> %
as.matrix %> %
as.data.frame %> %
rownames_to_column(" Hap1" ) %> %
gather(- Hap1 , key = " Hap2" , value = " dist" ) %> %
filter(! Hap1 %in% c(" outgroup-001" )) %> %
filter(! Hap2 %in% c(" outgroup-001" )) %> %
mutate(Hap1 = factor (Hap1 , haplotypes.full.dist.order ),
Hap2 = factor (Hap2 , haplotypes.full.dist.order ))
plot.structure.hap <-
ggplot(membership , aes(ID , q , fill = pop )) +
geom_col(width = 1 ) +
scale_fill_iwanthue() +
scale_x_discrete(expand = c(0 ,0 )) +
scale_y_continuous(labels = scales :: label_percent(),
expand = c(0 ,0 ))+
guides(fill = guide_legend(title = " Population" , nrow = 1 ))+
theme_bw()+
theme(# axis.text.x = element_text(angle = 90, vjust=0.5),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
legend.position = ' top' ,
# # legend.title.position = "top",
# # legend.title = element_text(hjust=0.5)
)
plot.distance.hap <-
filter(haplotypes.full.dist.data ,
as.numeric(Hap1 ) > = as.numeric(Hap2 )) %> %
ggplot(aes(Hap1 , Hap2 , fill = dist )) +
scale_x_discrete(name = " Haplotype" , position = ' top' ) +
scale_y_discrete(name = " Haplotype" , position = ' right' ) +
scale_fill_gradient(limit = c(0 ,20 ), low = " grey30" , high = " white" ,
na.value = " white" ) +
geom_tile() +
coord_equal() +
guides(fill = guide_colorbar(title = " Distance" )) +
theme_minimal() +
theme(axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
# legend.title.position = "top",
legend.position = ' top' )
plot.structure.hap.grob = ggplotGrob(plot.structure.hap )
plot.structure.hap.grobs = split(plot.structure.hap.grob [[" grobs" ]],
plot.structure.hap.grob [[" layout" ]]$ name )
plot.distance.hap.grob = ggplotGrob(plot.distance.hap )
plot.distance.hap.grobs = split(plot.distance.hap.grob [[" grobs" ]],
plot.distance.hap.grob [[" layout" ]]$ name )
top.vp <- viewport(
layout = grid.layout(5 , 3 ,
widths = unit(c(5 , 5 , 2 ),
c(" lines" , " null" , " lines" )),
heights = unit(c(4 , 1 , 2 , 2 , 4 ),
c(" lines" , " null" , " mm" , " null" , " lines" )),
respect = T ))
legend.vp <- viewport(layout.pos.row = 1 ,
layout.pos.col = 2 ,
just = c(" center" ," bottom" ),
name = " legend" )
structure.vp <- viewport(layout.pos.row = 2 ,
layout.pos.col = 2 ,
just = c(" center" ," top" ),
name = " structure" )
structure.axis.vp <- viewport(layout.pos.row = 2 ,
layout.pos.col = 1 ,
just = c(" center" ," top" ),
name = " structure.axis" )
distance.vp <- viewport(name = " distance" ,
layout.pos.row = 4 ,
layout.pos.col = 2 ,
just = c(" center" ," top" ))
distance.axis.vp <- viewport(layout.pos.row = 4 ,
layout.pos.col = 1 ,
just = c(" center" ," top" ),
name = " distance.axis" )
splot <- vpTree(top.vp , vpList(legend.vp ,
structure.vp ,
structure.axis.vp ,
distance.vp ,
distance.axis.vp ))
grid.show.layout(splot $ parent $ layout , vp = splot $ parent )
grid.newpage()
pushViewport(splot )
seekViewport(" structure" )
grid.draw(plot.structure.hap.grobs [[' panel' ]][[1 ]])
seekViewport(" structure.axis" )
grid.draw(plot.structure.hap.grobs [[' axis-l' ]][[1 ]])
grid.text(" Population Structure" , rot = 90 )
grid.text(" A)" , just = ' right' , y = 1 , x = 0.5 , gp = gpar(fontsize = 16 ))
seekViewport(" distance" )
#
aspect.ratio =
as.numeric(convertY(unit(1 , ' inch' ), unitTo = ' native' ))/
as.numeric(convertX(unit(1 , ' inch' ), unitTo = ' native' ))
#
pushViewport(viewport(angle = - 45 ,
width = 1 / sqrt(2 ),
height = aspect.ratio / sqrt(2 ),
# height=unit(1/sqrt(2), "snpc"),
y = 1 ))
grid.draw(plot.distance.hap.grobs [[' panel' ]][[1 ]])
popViewport()
seekViewport(" distance.axis" )
grid.text(" Sequence Similarity" , rot = 90 , just = ' right' , y = 0.95 )
grid.text(" B)" , just = ' right' , y = 1 , x = 0.5 , gp = gpar(fontsize = 16 ))
#
seekViewport(" legend" )
pushViewport(viewport(layout = grid.layout(1 , 2 , widths = c(2 ,1 ))))
pushViewport(viewport(layout.pos.col = 1 , just = c(" right" ," bottom" )))
grid.draw(plot.structure.hap.grobs [[' guide-box-top' ]][[1 ]])
popViewport()
pushViewport(viewport(layout.pos.col = 2 , just = c(" left" ," bottom" )))
grid.draw(plot.distance.hap.grobs [[' guide-box-top' ]][[1 ]])
popViewport()
popViewport()
library(tidyverse )
library(hues )
library(ggforce )
library(sf )
library(ggspatial )
library(stars )
map.colors <- c(
" 0" = rgb(193 / 255 , 224 / 255 , 250 / 255 ), # # Ocean water
" 11" = rgb(193 / 255 , 224 / 255 , 250 / 255 ), # # Open water
" 12" = rgb(229 / 255 , 231 / 255 , 232 / 255 ), # # Perennial ice / snow
" 21" = " #ebe5cf" , # # Developed, open space
" 22" = " #ebe5cf" , # # Developed, low intensity
" 23" = " #ebe5cf" , # # Developed, medium intensity
" 24" = " #ebe5cf" , # # Developed. high intensity
" 31" = rgb(179 / 255 , 175 / 255 , 164 / 255 ), # # Barren land
" 41" = " #e4e8da" , # # Deciduous forest
" 42" = " #e4e8da" , # # Evergreen forest
" 43" = " #e4e8da" , # # Mixed forest
" 51" = rgb(176 / 255 , 151 / 255 , 61 / 255 ), # # Dwarf scrub (Alaska only)
" 52" = " #f4f3ee" , # # Shrub / scrub
" 71" = " #f4f3ee" , # # Grassland / herbaceous
" 72" = rgb(209 / 255 , 209 / 255 , 130 / 255 ), # # Sedge / herbaceous (Alaska)
" 74" = rgb(130 / 255 , 186 / 255 , 158 / 255 ), # # Moss (Alaska only)
" 81" = " #f4f3ee" , # # Pasture hay
" 82" = " #f4f3ee" , # # Cultivated crops
" 90" = " #e4e8da" , # # Woody wetlands
" 101" = rgb(240 / 255 , 235 / 255 , 211 / 255 ), # # Non-U.S. land
" 102" = rgb(240 / 255 , 235 / 255 , 211 / 255 ), # # Non-U.S. land
" 103" = rgb(240 / 255 , 235 / 255 , 211 / 255 ), # # Non-U.S. land
setNames(iwanthue(4 ), sprintf(" mitogroup-%d" , 1 : 4 )))
sequence.membership.hap <-
read.delim(" 5-full-mito/3-structure-membership.K4.txt" )
map.data <-
data.table :: fread(" 2-samples/bigtable.tsv" , skip = 2 ) %> %
separate(`Latitude/Longitude` , into = c(' lat' , ' lon' , NA ),
sep = " [NW]" ) %> %
mutate(lat = as.numeric(lat ),
lon = as.numeric(lon ) * - 1 ) %> %
dplyr :: select(Sample = " Sample Name" , lat , lon ) %> %
right_join(sequence.membership.hap ) %> %
mutate(Population = factor (Population ),
Haplotype = fct_lump_min(Haplotype , min = 1 ,
other_level = " Singleton" ))
# Transform GPS to Albers_NAD83_L48
map.data.prj <- st_as_sf(map.data ,
coords = c(' lon' , ' lat' ),
crs = 4326 ) %> %
st_transform(5070 ) %> %
st_coordinates(. $ geometry ) %> %
cbind(map.data )
map.data.bbox <- st_as_sf(map.data ,
coords = c(' lon' , ' lat' ),
crs = 4326 ) %> %
st_transform(5070 ) %> %
st_bbox()
map.data.bbox <- map.data.bbox -
c(1000 , 25000 , 0 , 0 ) + c(0 ,0 ,30000 ,1000 )
map.data.bbox
# Download National Map Data
# # Crop National map to region of interest
TNM <- file.path(" 5-full-mito" ,
" ldco48i0100a.tif_nt00971" ,
" ldco48i0100a.tif" ) %> %
read_stars() %> %
st_crop(map.data.bbox )
# plot(TNM)
# #download.file("http://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_state_500k.zip", destfile = "states.zip")
# #unzip("states.zip")
state <- read_sf(" 5-full-mito/cb_2015_us_state_500k.shp" ) %> %
st_transform(5070 ) %> %
st_crop(map.data.bbox )
# # Locations based on TNM
# # Pensacola - -87.217, 30.421
# # Lake Borgne - -89.625, 30.042
# # Pearl River - -89.629 30.277
# # Biloxi, MS. - -88.889 60.407
# # Scale and compass
# # data.frame(
# # name=c("Pensacola", "Lake Borgne", "Pearl River", "Biloxi, MS.", "label"),
# # lon = c(-87.217, -89.625 ,-89.629 ,-88.889, -88.0247),
# # lat = c(30.421, 30.042, 30.277, 30.407, 30.109)
# # ) %>% st_as_sf(coords = c('lon', 'lat'), crs=4326) %>%
# # st_transform(5070)
# # Projected CRS: NAD83 / Conus Albers
# # name geometry
# # 1 Pensacola POINT (841011.5 855006.9)
# # 2 Lake Borgne POINT (613666.9 794626.7)
# # 3 Pearl River POINT (611534.9 820629.7)
# # 4 Biloxi, MS. POINT (681360.5 840064.4)
# # 5 label POINT (766765.4 813664.9)
map.data.prj.split [[1 ]][chull(map.data.prj.split [[1 ]]),]
map.data.prj
map.data.prj.split <- split(map.data.prj , map.data.prj $ Population )
plots <- mapply(function (d ,name ,color ){
ggplot(d ) +
geom_stars(data = TNM , downsample = 1 ) +
scale_fill_manual(values = map.colors , na.value = " #f3f2ed" ) +
# # STATES
annotate(geom = " text" , x = 660000 , y = 855000 ,
label = " Mississippi" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
alpha = 0.25 ) +
annotate(geom = " text" , x = 780000 , y = 855000 ,
label = " Alabama" ,
family = " ETBembo" ,
color = " grey22" ,
size = 12 ,
alpha = 0.25 ) +
# # CITES
annotate(geom = " text" , x = 681361 , y = 840064 ,
label = " Biloxi" ,
family = " ETBembo" ,
color = " grey22" ,
size = 4 ,
hjust = 1 ) +
annotate(geom = " text" , x = 841012 , y = 855007 ,
label = " Pensacola, FL" ,
family = " ETBembo" ,
color = " grey22" ,
size = 4 ,
hjust = 1 ) +
# # LAKE
annotate(geom = " text" , x = 613667 , y = 794627 ,
label = " Lake\n Borgne" ,
fontface = " italic" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 4 ) +
# # RIVER
annotate(geom = " text" , x = 611535 , y = 820630 ,
label = " Pearl River" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 4 ,
angle = - 65 ,
vjust = 0 ,
hjust = 0.4 ) +
geom_sf(data = state , fill = NA , linetype = ' dotted' , linewidth = 0.2 ) +
geom_mark_hull(data = d , aes(x = X , y = Y ), alpha = 0.2 , fill = color , concavity = 3 ,
expand = unit(2 ,' mm' ), radius = unit(1 ,' mm' ), color = ' grey' ) +
geom_point(aes(x = X , y = Y , color = Population ), size = 1 , color = color ) +
coord_sf(expand = F ) +
ggtitle(name ) +
theme(panel.grid.major = element_line(color = gray(.25 ),
linetype = " dashed" ,
linewidth = 0.1 ),
legend.position = ' none' ,
axis.title = element_blank())
}, map.data.prj.split ,
names(map.data.prj.split ),
iwanthue(4 ),
SIMPLIFY = F )
plots [[1 ]] = plots [[1 ]] +
# # LABEL
annotate(geom = " text" , x = 810000 , y = 790000 ,
label = " North Central\n Gulf of Mexico" ,
fontface = " italic" ,
family = " ETBembo" ,
color = " #0a71b3" ,
size = 10 )
plots [[4 ]] = plots [[4 ]] +
annotation_scale(location = " br" , width_hint = 0.25 ) +
annotation_north_arrow(location = " br" , which_north = " true" ,
pad_x = unit(0.75 , " in" ),
pad_y = unit(0.5 , " in" ),
style = north_arrow_fancy_orienteering )
cowplot :: plot_grid(plotlist = plots , ncol = 1 )
# # ggplot(map.data.prj) +
# # geom_stars(data=TNM, downsample = 1) +
# # scale_fill_manual(values=map.colors, na.value = "#f3f2ed") +
# # ## STATES
# # annotate(geom = "text", x = 660000, y = 855000,
# # label = "Mississippi",
# # family="ETBembo",
# # color = "grey22",
# # size = 12,
# # alpha=0.25) +
# # annotate(geom = "text", x = 780000, y = 855000,
# # label = "Alabama",
# # family="ETBembo",
# # color = "grey22",
# # size = 12,
# # alpha=0.25) +
# # ## CITES
# # annotate(geom = "text", x = 681361, y = 840064,
# # label = "Biloxi",
# # family="ETBembo",
# # color = "grey22",
# # size = 4,
# # hjust = 1) +
# # annotate(geom = "text", x = 841012, y = 855007,
# # label = "Pensacola, FL",
# # family="ETBembo",
# # color = "grey22",
# # size = 4,
# # hjust = 1) +
# # ## LAKE
# # annotate(geom = "text", x = 613667, y = 794627,
# # label = "Lake\nBorgne",
# # fontface = "italic",
# # family="ETBembo",
# # color = "#0a71b3",
# # size = 4) +
# # ## RIVER
# # annotate(geom = "text", x = 611535, y = 820630,
# # label = "Pearl River",
# # family="ETBembo",
# # color = "#0a71b3",
# # size = 4,
# # angle=-65,
# # vjust =0,
# # hjust =0.4 ) +
# # geom_sf(data = state, fill=NA, linetype='dotted', linewidth=0.2) +
# # geom_mark_hull(aes(x=X, y=Y, fill=Population), alpha=0.2, concavity = 3,
# # expand = unit(2,'mm'), radius = unit(1,'mm')) +
# # geom_point(aes(x=X, y=Y, color=Population), size=1) +
# # coord_sf(expand = F) +
# # scale_color_iwanthue()+
# # ggtitle("") +
# # theme(panel.grid.major = element_line(color = gray(.25),
# # linetype = "dashed",
# # linewidth = 0.1),
# # legend.position = 'none',
# # axis.title=element_blank()) +
# # ## LABEL
# # annotate(geom = "text", x = 810000, y = 790000,
# # label = "North Central\nGulf of Mexico",
# # fontface = "italic",
# # family="ETBembo",
# # color = "#0a71b3",
# # size = 10) +
# # annotation_scale(location = "br", width_hint = 0.25) +
# # annotation_north_arrow(location = "br", which_north = "true",
# # pad_x = unit(0.75, "in"),
# # pad_y = unit(0.5, "in"),
# # style = north_arrow_fancy_orienteering)
library(tidyverse )
library(scales )
library(ggtext )
colnames(locs ) <- c(' Pair' , ' used' , ' Start' , ' End' )
locs <- locs %> %
arrange(, Pair ) %> %
mutate(pos = (Start + End ) / 2 ,
Pair = factor (Pair , levels = paste0(' P' , 1 : 10 ))) %> %
mutate(level = as.numeric(Pair )) %> %
mutate(ymin = level ,
ymax = level + as.numeric(used )* 0.25 + 0.5 )
locs $ Pair <- select(locs , Pair , used ) %> %
unique %> %
pull(used , name = Pair ) %> %
ifelse(. ,
sprintf(" **%s**" , names(. )),
names(. )) %> %
setNames(names(. ), . ) %> %
fct_recode(locs $ Pair , !!! . )
# colorbrewer pastel for primers not used
# colorbrewer set1 for primers used
cols <- c(
" P1" = " #b3e2cddd" ,
" P2" = " #fdcdacdd" ,
" P3" = " #cbd5e8dd" ,
" P5" = " #f4cae4dd" ,
" P8" = " #e6f5c9dd" ,
" P9" = " #fff2aedd" ,
" **P4**" = " #e41a1cff" ,
" **P6**" = " #377eb8ff" ,
" **P7**" = " #4daf4aff" ,
" **P10**" = " #984ea3ff"
)
label <- function (x ){
l <- ifelse(x == 1 , " 16.3Kbp" ,
label_number(scale_cut = cut_si(' bp' ))(x ))
str(l )
return (l )
}
ggplot(locs ,
aes(xmax = Start , xmin = End ,
ymax = ymax , ymin = level ,
fill = Pair )) +
annotate(geom = ' rect' , xmin = 1 , xmax = Inf ,
ymin = - Inf , ymax = 0 , fill = ' white' ) +
geom_rect() +
scale_y_continuous(limits = c(- 15 , 11 ), expand = c(0 ,0 )) +
scale_fill_manual(values = cols )+
scale_alpha_manual(values = c(0.28 , 1 )) +
scale_x_continuous(
breaks = c(1 , seq(2000 , 14000 , 2000 )),
labels = label ) +
coord_polar(clip = ' off' ) +
ggtitle(" Tested Primers" ) +
labs(caption = " Tested primer pair locations on the mitochondrial genome. Highlighted pairs (P4, P6, P7, and P10)\n were used to amplify the mtDNA in all samples." ) +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.text = element_markdown(),
plot.caption = element_text(hjust = 0 ))
library(tidyverse )
library(ggrepel )
data <- read.table(" 2-samples/2-coverage.tbl" ) %> %
gather(key = " contig" , value = " avgcov" , P4 , P6 , P7 , P10 ) %> %
mutate(contig = fct_shift(factor (contig )))
pos = position_jitter(width = 0.3 , seed = 2 )
ggplot(data , aes(contig , avgcov )) +
geom_boxplot(outlier.alpha = 0 ) +
geom_jitter(color = " grey50" , shape = 20 , position = pos , size = 0.5 ) +
scale_y_log10() +
theme_minimal() +
xlab(" Amplicon" ) +
ylab(" Median coverage" ) +
facet_grid(rows = vars(Type ), scales = ' free_y' ) +
labs(title = " Median Coverage" ,
caption = " Median base coverage across each amplicon for each sample." ) +
theme(plot.caption = element_text(hjust = 0 ))
library(pegas )
library(tidyverse )
library(hues )
read.dna(" 4-mtCR/1-mafft.fasta.gz" , format = ' fasta' , as.matrix = T ) %> %
as.character %> %
as.data.frame %> %
rownames_to_column(" Samples" ) %> %
gather(- Samples , key = ' Position' , value = " Base" ) %> %
mutate(Position = as.numeric(sub(" V" , " " , Position )),
Base = str_to_upper(Base )) %> %
filter(Base != " -" ) %> %
ggplot(aes(Position , Samples , fill = Base ))+
geom_raster() +
scale_x_continuous(expand = c(0 ,0 )) +
scale_fill_brewer(type = ' qual' , palette = " Accent" ,
breaks = c(" A" ," C" , " G" , " T" )) +
labs(title = " mtCR Alignment" ,
caption = " Alignment of all mtCR sequences (both published and newly sequenced). Samples are shown in rows, alignment position is shown in columns. \n The missing data (white) at both ends were trimmed." ) +
theme_minimal() +
theme(panel.grid = element_blank(),
axis.text.y = element_blank(),
plot.caption = element_text(hjust = 0 ),
legend.position = " top" )
haplotypes.with_outgroup <-
read.dna(" 4-mtCR/2-haplotypes.with_outgroup.fixed.fasta" ,
format = ' fasta' , as.matrix = T )
haplotypes.with_outgroup.dist <-
haplotypes.with_outgroup %> %
dist.dna(model = ' N' , as.matrix = T ) %> %
as.data.frame %> %
rownames_to_column(" Hap1" ) %> %
gather(- Hap1 , key = " Hap2" , value = " Dist" )
haplotypes.of.interest <- haplotypes.with_outgroup.dist %> %
mutate(tmp = Hap1 , Hap1 = Hap2 , Hap2 = tmp ) %> %
select(- tmp ) %> %
rbind(haplotypes.with_outgroup.dist ) %> %
filter(grepl(" mtCR.new" , Hap1 )) %> %
filter(Dist < = 1 ) %> %
unique
haplotypes.with_outgroup %> %
as.character %> %
t %> %
as.data.frame %> %
rowid_to_column(" Pos" ) %> %
gather(- Pos , key = " Hap2" , value = " base" ) %> %
right_join(haplotypes.of.interest ,
relationship = ' many-to-many' ) %> %
group_by(Hap1 , Pos ) %> %
filter(length(unique(base )) > 1 ) %> %
ungroup %> %
ggplot(aes(Pos , Hap2 , color = base )) +
geom_text(aes(label = base )) +
facet_grid(rows = vars(Hap1 ), space = ' free' , scales = ' free' ,
switch = ' y' ) +
scale_color_brewer(type = ' qual' , palette = ' Set1' ) +
labs(title = " New haplotype differences" ,
caption = paste0(c(" Nucleotide differences between the mtCR.new" ,
" haplotypes and the most similar sequences. All" ,
" sequences with a\n single mismatched base" ,
" to the haplotype of interest are displayed." ,
" All identical bases are removed for clarity." ),
collapse = " " )) +
theme_bw() +
theme(strip.text.y.left = element_text(angle = 0 ),
plot.caption = element_text(hjust = 0 ),
strip.placement = ' outside' ,
legend.position = ' none' ,
axis.title = element_blank())
library(LEA )
project.snmf <- file.path(" 4-mtCR" , " 4-structure" ,
" haplotypes-fixed.snmfProject" ) %> %
load.snmfProject
project.pca <- file.path(" 4-mtCR" , " haplotypes-fixed.pcaProject" ) %> %
load.pcaProject()
pca.scree.plot <- tracy.widom(project.pca ) %> %
ggplot(aes(N , percentage )) +
geom_line() +
geom_point() +
scale_y_continuous(labels = scales :: label_percent()) +
labs(title = " PCA Scree Plot" ,
x = " Principal Components" ,
y = " Percentage of Variance" ) +
theme_minimal()
summary.info = summary(project.snmf )$ crossEntropy
plot.entropy <- as.data.frame(t(summary.info )) %> %
tibble :: rownames_to_column(" K" ) %> %
mutate(K = as.numeric(substring(K , 4 ))) %> %
ggplot(aes(K , min )) +
# geom_ribbon(aes(ymin=min, ymax=max), alpha=0.2, color='grey70') +
# geom_hline(aes(yintercept = min(summary.info[2,]), color = "red")) +
geom_line(aes(x = K , y = min , group = 1 )) +
geom_point() +
labs(title = " Cross-entropy versus K" ,
x = " Number of ancestral populations (K)" ,
y = " Minimum Cross-entropy" ) +
theme_minimal() +
theme(legend.position = " none" )
cowplot :: plot_grid(pca.scree.plot ,
plot.entropy ,
NA , NA , rel_heights = c(10 ,1 ),
labels = c(" A" , " B" )) +
cowplot :: draw_text(paste0(" PCA Scree plot (A) and Minimum " ,
" cross-entropy per K (B) for the mtCR " ,
" structure analysis" , collapse = " " ),
y = 0.05 , size = 12 )
library(ggsankey )
mtCR.hap.membership <-
read.delim(" 4-mtCR/4-struct-membership.hap-fixed.txt" )
full.hap.membership <-
read.delim(" 5-full-mito/3-structure-membership.K4.txt" )
hap.compare.data <-
inner_join(full.hap.membership , mtCR.hap.membership ,
by = c(" Sample" = " Acc" )) %> %
select(" Hap.mtCR" = " ID" , " Hap.full" = " Haplotype" ) %> %
group_by(Hap.full ) %> %
mutate(full.n = n()) %> %
group_by(Hap.mtCR ) %> %
mutate(mtcr.n = n()) %> %
ungroup() %> %
arrange(desc(mtcr.n ), Hap.mtCR , desc(full.n )) %> %
mutate(Hap.full = fct_inorder(Hap.full ),
Hap.mtCR = fct_inorder(Hap.mtCR ))
make_long(hap.compare.data , Hap.mtCR , Hap.full ) %> %
mutate(node = factor (node ,
levels(fct_c(hap.compare.data $ Hap.mtCR ,
hap.compare.data $ Hap.full ))),
next_node = factor (next_node ,
levels(fct_c(hap.compare.data $ Hap.mtCR ,
hap.compare.data $ Hap.full ))),
x = factor (x , c(" Hap.mtCR" , " Hap.full" ),
c(" mtCR\n Haplotypes" ,
" Whole Mitogenome\n Haplotypes" ))) %> %
group_by(node ) %> %
mutate(label = ifelse(n() > = 3 , as.character(node ), NA )) %> %
ggplot(aes(x = x , next_x = next_x , node = node ,
next_node = next_node , fill = fct_shuffle(node ),
label = label )) +
geom_sankey(flow.alpha = .6 ,
node.color = " gray30" ) +
geom_sankey_label(size = 3 , color = " white" , fill = " gray40" ) +
scale_fill_viridis_d(drop = FALSE ) +
theme_sankey(base_size = 18 ) +
labs(x = NULL ,
title = " mtCR vs Whole Mitogenome Haplotype Change" ,
caption = paste(" Sankey plot showing the relationship bewteen" ,
" the haplotypes from mtCR to whole" ,
" \n mitogenome. Haplotypes with five or more" , " members are labeled." )) +
theme(legend.position = " none" ,
plot.title = element_text(hjust = 0 ),
plot.caption = element_text(hjust = 0 ))
library(LEA )
project.snmf <- file.path(" 5-full-mito" ,
" 3-structure-haplotype.snmfProject" ) %> %
load.snmfProject
project.pca <- file.path(" 5-full-mito" ,
" 3-structure-haplotype.pcaProject" ) %> %
load.pcaProject()
pca.scree.plot <- tracy.widom(project.pca ) %> %
ggplot(aes(N , percentage )) +
geom_line() +
geom_point() +
scale_y_continuous(labels = scales :: label_percent()) +
labs(title = " PCA Scree Plot" ,
x = " Principal Components" ,
y = " Percentage of Variance" ) +
theme_minimal()
summary.info = summary(project.snmf )$ crossEntropy
plot.entropy <- as.data.frame(t(summary.info )) %> %
tibble :: rownames_to_column(" K" ) %> %
mutate(K = as.numeric(substring(K , 4 ))) %> %
ggplot(aes(K , min )) +
# geom_ribbon(aes(ymin=min, ymax=max), alpha=0.2, color='grey70') +
# geom_hline(aes(yintercept = min(summary.info[2,]), color = "red")) +
geom_line(aes(x = K , y = min , group = 1 )) +
geom_point() +
labs(title = " Cross-entropy versus K" ,
x = " Number of ancestral populations (K)" ,
y = " Minimum Cross-entropy" ) +
theme_minimal() +
theme(legend.position = " none" )
cowplot :: plot_grid(pca.scree.plot ,
plot.entropy ,
NA , NA , rel_heights = c(10 ,1 ),
labels = c(" A" , " B" )) +
cowplot :: draw_text(paste(" PCA Scree plot (A) and Minimum" ,
" cross-entropy per K (B) for the whole" ,
" mitogenome structure analysis" ),
y = 0.05 , size = 12 )
library(ggtree )
tree.data <- read.tree(" 5-full-mito/haplotypes-nj-tree.nwk" )
full.hap.membership <-
read.delim(" 5-full-mito/3-structure-membership.K4.txt" )
anno <- data.frame (ID = tree.data $ tip.label ) %> %
left_join(full.hap.membership , by = c(" ID" = " Haplotype" )) %> %
select(- Sample ) %> %
mutate(clade = factor (Population )) %> %
distinct()
anno
layout <- ggtree(tree.data ) %< + % anno
p <- layout +
geom_tippoint(aes(color = clade ), size = 1 ) +
# #geom_text(aes(label=node)) +
# # geom_label_repel(aes(label=ifelse(label %in% c("SER19-00888",
# # "SER11-0141"),
# # label, NA),
# # segment.linetype=2), na.rm = T) +
# # geom_treescale(y=14) +
scale_color_iwanthue(na.value = " black" ,
breaks = sprintf(" mitogroup-%d" , 1 : 4 ),
name = " Population" ) +
theme_tree2() +
labs(title = " Whole Mitogenome Neighbor-Joining Tree" ,
caption = " Neighbor-joining tree for the whole mitogenome. Outgroup is the *S. frontalis* sequence NC_060612.1.
<br>Color shows the assigned group of each sample." ) +
theme(legend.position = c(0.05 ,0.6 ),
legend.background = element_blank(),
axis.text = element_text(),
plot.caption = element_markdown(hjust = 0 , size = 12 ))
# p <- rotate_tree(p, -45)
p <- flip(p , 188 , 227 )
p <- flip(p , 214 , 210 )
p
mtCR Minimum Spanning Network
library(pegas )
library(tidyverse )
library(poppr )
library(hues )
library(igraph )
haplotypes.without_outgroup.fasta <-
read.FASTA(" 4-mtCR/2-haplotypes.without_outgroup.fixed.fasta" )
pop.data <-
read.delim(" 4-mtCR/haplotype.published-groups.txt" ) %> %
select(ID , best.pop ) %> %
mutate(best.pop = factor (best.pop , c(" mtCR.sound" ,
" mtCR.inner" ,
" mtCR.outer" ,
" mtCR.ocean" ))) %> %
distinct %> %
column_to_rownames(' ID' )
sizes <-
read.delim(" 4-mtCR/haplotype.sizes.txt" ) %> %
mutate(Haplotype = gsub(' [*]' , ' ' , Haplotype )) %> %
filter(! grepl(" mtCR.out-" , Haplotype )) %> %
column_to_rownames(' Haplotype' )
genind <- haplotypes.without_outgroup.fasta %> %
DNAbin2genind(pop = pop.data [names(haplotypes.without_outgroup.fasta ),
" best.pop" ])
strata(genind ) <- sizes
adist <- diss.dist(genind )
amsn <- poppr.msn(genind , adist )
pal <- setNames(
c(" #97C160" , " #B65A49" , " #737F86" , " #914CB1" ),
levels(pop.data $ best.pop ))
vertex.size <-
sizes [names(haplotypes.without_outgroup.fasta ), " New" ]
V(amsn $ graph )$ size <- log(vertex.size + 1 ) + 1.5
V(amsn $ graph )$ label <- if_else(vertex.size < 5 , NA , vertex.size )
V(amsn $ graph )$ frame.color <-
setNames(pop.data $ best.pop , rownames(pop.data )) %> %
. [names(haplotypes.without_outgroup.fasta )] %> %
pal [. ]
V(amsn $ graph )$ color <-
if_else(vertex.size > 0 , V(amsn $ graph )$ frame.color , " white" )
E(amsn $ graph )$ width <- 2
E(amsn $ graph )$ label <- if_else(E(amsn $ graph )$ weight == 1 ,
NA , E(amsn $ graph )$ weight )
plot.igraph(amsn $ graph , layout = igraph :: layout.kamada.kawai )
pushViewport(viewport(x = 0.80 , y = 0.85 , width = unit(6 , " lines" )))
legend <-
as.data.frame(pal , nm = " color" ) %> %
rownames_to_column(" pop" ) %> %
mutate(pop = factor (pop , rev(c(" mtCR.sound" ,
" mtCR.inner" ,
" mtCR.outer" ,
" mtCR.ocean" )))) %> %
ggplot(aes(1 , pop , fill = color )) +
geom_tile() +
scale_fill_identity()+
scale_y_discrete(position = ' right' ) +
coord_equal() +
ggtitle(" Population" ) +
theme_void() +
theme(axis.text.y.right = element_text())
grid.draw(ggplotGrob(legend ))
popViewport()
grid.text(" mtCR Minimium Spanning Network" ,
hjust = 0 , vjust = 0 , x = 0.1 , y = 0.95 ,
gp = gpar(fontsize = 20 ))
whole mitogenome Minimum Spanning Network
library(pegas )
library(tidyverse )
library(poppr )
library(hues )
library(igraph )
haplotypes.without_outgroup.fasta <-
read.FASTA(" 5-full-mito/2-haplotypes.with_outgroup.fasta" )[- 1 ]
pop.data <-
read.delim(" 5-full-mito/3-structure-membership.K4.txt" ) %> %
mutate(Population = factor (Population )) %> %
group_by(Haplotype , Population ) %> %
count %> %
mutate(color = factor (Population ,
sprintf(" mitogroup-%d" , 1 : 4 ),
iwanthue(4 ))) %> %
column_to_rownames(' Haplotype' )
genind <- haplotypes.without_outgroup.fasta %> %
DNAbin2genind(pop = pop.data [names(haplotypes.without_outgroup.fasta ),
" Population" ])
strata(genind ) <- pop.data
adist <- diss.dist(genind )
amsn <- poppr.msn(genind , adist )
G <- amsn $ graph
#
vertex.size <- pop.data [V(G )$ name , " n" ]
V(G )$ label <- if_else(vertex.size < 5 , NA , vertex.size )
V(G )$ size <- log(vertex.size + 1 ) + 1.75
V(G )$ color <- as.character(pop.data [V(G )$ name , " color" ])
V(G )$ label.color <- " white"
#
E(G )$ width <- 2
E(G )$ label <- if_else(E(G )$ weight == 1 , NA , E(G )$ weight )
set.seed(42 )
layout <- layout.reingold.tilford(G , circular = T )
layout <- layout_with_gem(G , coords = layout )
plot.igraph(G , layout = layout )
pushViewport(viewport(x = 0.85 , y = 0.85 , width = unit(6 , " lines" )))
legend <-
select(pop.data , " Population" , " color" ) %> %
distinct() %> %
mutate(Population = fct_rev(factor (Population ))) %> %
ggplot(aes(1 , Population , fill = color )) +
geom_tile() +
scale_fill_identity()+
scale_y_discrete(position = ' right' ) +
coord_equal() +
ggtitle(" Population" ) +
theme_void() +
theme(axis.text.y.right = element_text())
grid.draw(ggplotGrob(legend ))
popViewport()
grid.text(" Whole Mitogenome Minimium Spanning Network" ,
hjust = 0 , vjust = 0 , x = 0.1 , y = 0.95 ,
gp = gpar(fontsize = 20 ))