Aim

Draw map of distribution of the two Malina species from metabarcodes, cultures and GenBank sequences

Init

Load the necessary libraries

library("RMySQL")

library("stringr") # To do replacement
library("dplyr")  # To manipulate dataframes
library("tidyr")  # To manipulate dataframes

library("ggplot2")
library("rworldmap")

library("dvutils")

Load options for knitr

  knitr::opts_chunk$set(fig.width=8, 
                        fig.height=6, 
                        eval=TRUE, 
                        cache=TRUE,
                        echo=TRUE,
                        prompt=FALSE,
                        tidy=TRUE,
                        comment=NA,
                        message=FALSE,
                        warning=FALSE)
  knitr::opts_knit$set(width=90)
  options(max.print="500")  

Data sets used

dataset_id dataset_code dataset_name marker region
5 MALINA_Monier_2014 MALINA - Monier 2014 18S_V4 Arctic Ocean
6 Stecher Sea Ice
8 Arctic_Comeau_2011 ACME - Comeau 2011 18S_V4 Arctic Ocean
9 Nansen_Metfies_2016 Nansen Basin - Metfies 2016 18S_V4 Arctic Ocean
10 Southern_Ocean_Wolf_2014 Southern Ocean - Wolf 2014 18S_V4 Southern Ocean
11 Fieldes_Bay_Luo_2016 Fieldes Bay - Luo 2016 18S_V4 Southern Ocean
13 Fram_strait_Kilias_2013 Fram Strait Kilias - 2013 18S_V4 Arctic Ocean
14 OSD_LGC_2014 OSD LGC 2014 18S_V4 Ocean
15 Tara
dataset_used <- c(5:6, 8:11, 13:15)

Get the data

Metabarcode data

# Connect to metabarcodes database
db_metabarcodes <- dbConnect(MySQL(), user = "root", password = "root", dbname = "metabarcodes", 
    host = "localhost")

# Upload everyone in memory
otus <- dbGetQuery(db_metabarcodes, "select * from otus")
samples <- dbGetQuery(db_metabarcodes, "select * from samples")
read_abundance <- dbGetQuery(db_metabarcodes, "select * from read_abundance")
# test <- dbGetQuery(db_metabarcodes, 'select * from rcc2288')

# Select only taxon if interest
otus_species <- otus %>% filter(str_detect(species, "RCC2288"))

# Joins the tables otu and read abundance
otu_species <- inner_join(read_abundance, otus_species)

# Summarize per species (not necessary in this case but need to be done if
# several otu for a given species)
otu_species <- otu_species %>% group_by(dataset_id, sample_code, species) %>% 
    summarize(reads_total_species = sum(read_number))

# Join with sample table to get the coordindates
samples <- filter(samples, dataset_id %in% dataset_used)
metabarcodes_species <- left_join(samples, otu_species)

Add the genbank and cultures sequences

# Add genbank sequences
genbank_species <- readxl::read_excel("RCC2288 in metabarcode data sets.xlsx", 
    sheet = "GenBank sequences")

# Add cultures
cultures <- readxl::read_excel("RCC2288 in metabarcode data sets.xlsx", sheet = "RCC")

Maps

Get the maps

# Get the world map
worldMap <- getMap()
world.points <- fortify(worldMap)
world.points$region <- world.points$id
world.df <- world.points[, c("long", "lat", "group", "region")]

# Define constants for the map
size_points <- 2.5
size_cross <- 1
color_ice <- "lightslateblue"
color_water <- "navy"
color_cultures <- "green"
color_not_present <- "red"
color_continents <- "grey80"

Compute contribution

df_map <- dplyr::filter(metabarcodes_species, !is.na(reads_total_species))
df_map$read_relative_pct <- df_map$reads_total_species/df_map$reads_total * 
    100

Draw maps

Mercator projection

species_map <- ggplot() + geom_polygon(data = world.df, aes(x = long, y = lat, 
    group = group, fill = ""), colour = "white", size = 0.1) + scale_fill_manual(values = color_continents, 
    guide = FALSE) + scale_x_continuous(breaks = (-4:4) * 45) + scale_y_continuous(breaks = (-2:2) * 
    30) + xlab("Longitude") + ylab("Latitude") + coord_fixed(1.3) + theme_bw()
# species_map <- species_map + coord_map () # Mercator projection
# species_map <- species_map + coord_map('gilbert') # Nice for the poles

# Add points where not detected
df_map <- dplyr::filter(metabarcodes_species, is.na(reads_total_species))
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_not_present, size = size_cross, shape = 3)

# Add the ice metabarcodes
df_map <- dplyr::filter(metabarcodes_species, !is.na(reads_total_species) & 
    substrate == "ice")
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_ice, size = size_points)

# Add the water metabarcodes
df_map <- dplyr::filter(metabarcodes_species, !is.na(reads_total_species) & 
    substrate == "water")
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_water, size = size_points)

# Add the water sequence
df_map <- dplyr::filter(genbank_species, substrate == "water")
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_water, size = size_points, shape = 15)

# Add the ice sequence
df_map <- dplyr::filter(genbank_species, substrate == "ice")
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_ice, size = size_points, shape = 15)

# Add the culture
df_map <- dplyr::filter(cultures)
species_map <- species_map + geom_point(data = df_map, aes(x = longitude, y = latitude), 
    color = color_cultures, size = size_points, shape = 17)

# Disply and save

species_map

ggsave(plot = species_map, filename = "Fig RCC2288 map world 2.0.pdf", width = 15, 
    height = 15, scale = 1.25, units = "cm", useDingbats = FALSE)

Polar projection

species_map_pole <- species_map + coord_map("ortho", orientation = c(75, -20, 
    0))

species_map_pole

ggsave(plot = species_map_pole, filename = paste("Fig RCC2288 map pole 2.0.pdf", 
    sep = ""), width = 10, height = 10, scale = 1.25, units = "cm", useDingbats = FALSE)