OSD Mamiellophyceae - Sci Rep 2019

1 Aim

  • Analysis of OSD Mamiellophyceae data for Sci Report paper 2019

1.1 Notes

2 Initialize.

This file defines all the necessary libraries and variables

4 Create OSD station map with leaflet

5 Summarize at genus level

Number of different genera : 16
# A tibble: 16 x 4
# Groups:   order [?]
   order                        genus                          n_ASVs n_seq
   <chr>                        <chr>                           <int> <dbl>
 1 Dolichomastigales            Crustomastigaceae-AB               30   332
 2 Dolichomastigales            Crustomastigaceae-C                 5    22
 3 Dolichomastigales            Crustomastigaceae_unclassified     30   204
 4 Dolichomastigales            Crustomastix                        1    12
 5 Dolichomastigales            Dolichomastigaceae-A                9    53
 6 Dolichomastigales            Dolichomastigaceae-B               80   577
 7 Dolichomastigales            Dolichomastigales unclassified     60   528
 8 Dolichomastigales            Dolichomastix                      23   102
 9 Mamiellales                  Bathycoccus                      1034 24391
10 Mamiellales                  Mamiella                           95  1455
11 Mamiellales                  Mamiellophyceae_unclassified       24   266
12 Mamiellales                  Mantoniella                       786 11921
13 Mamiellales                  Micromonas                       4285 88991
14 Mamiellales                  Ostreococcus                     4223 89199
15 Mamiellales                  RCC391                             12   321
16 Mamiellophyceae_unclassified Mamiellophyceae_unclassified       18   110
Fraction of reads of top ASV : 68.268156935977%
# A tibble: 4 x 4
# Groups:   order [?]
  order       genus        n_ASVs n_seq
  <chr>       <chr>         <int> <dbl>
1 Mamiellales Bathycoccus       1 16750
2 Mamiellales Mantoniella       2  8570
3 Mamiellales Micromonas       10 62988
4 Mamiellales Ostreococcus     10 60847

6 Summarize at species and ASV level

There are two types of species assignement

  • species_wang is the the species assigned by the Wang classifier to all ASV
  • species_ASV is the species assigned to the major ASV using careful phylogenetic analysis

6.1 Compute different dataframes

6.1.2 Function to define summary for specific groups of otus

6.2 Graphics at species levels

6.2.1 Define function for Histograms and treemaps

plot_species_summary <- function(species_summary, file_suffix = "1") {
    
    theme_chlorophyta_map <- theme(legend.position = c(0.1, 0.2), legend.background = element_rect(fill = "transparent"), 
        legend.text = element_text(size = 9), legend.title = element_text(size = 9), 
        plot.tag.position = "topright", plot.tag = element_text(size = 24, face = "bold"), 
        plot.title = element_text(size = 16))
    # Number of sequences per species
    g <- ggplot(species_summary, aes(x = reorder(species, n_seq_species), y = n_seq_species)) + 
        geom_col() + coord_flip() + xlab("Species") + ylab("Total of sequences - all samples")
    print(g)
    
    g_treemap <- ggplot(species_summary, aes(area = n_seq_species, fill = genus, 
        label = species, subgroup = genus)) + ggtitle("OSD - number of sequences") + 
        scale_fill_discrete(name = "Genus") + geom_treemap() + geom_treemap_subgroup_border() + 
        geom_treemap_text(place = "topleft", reflow = F, padding.x = grid::unit(3, 
            "mm"), padding.y = grid::unit(3, "mm"), min.size = 7) + scale_fill_viridis_d()
    print(g_treemap)
    
    ggsave(plot = g_treemap, filename = str_c("pdf/Treemap_sequences_", file_suffix, 
        ".pdf"), width = 10, height = 7, scale = 2, units = "cm", useDingbats = FALSE)
    
    ggsave(plot = g_treemap, filename = str_c("png/Treemap_sequences_", file_suffix, 
        ".png"), dpi = "print", scale = 2)
    
    treemap_dv(species_summary, c("genus", "species"), "n_seq_species", "OSD - number of sequences")
    
    
    # Mean pct of sequences per species
    
    g <- ggplot(species_summary, aes(area = mean_pct_seq_species, fill = genus, 
        label = species, subgroup = genus)) + ggtitle("OSD - mean of contribution", 
        subtitle = "") + geom_treemap() + geom_treemap_subgroup_border() + geom_treemap_text(colour = "white", 
        place = "topleft", reflow = T) + scale_fill_viridis_d()
    print(g)
    
    treemap_dv(species_summary, c("genus", "species"), "mean_pct_seq_species", 
        "OSD - mean of contribution")
    
    # Graph
    
    graph_samples_present <- ggplot(species_summary, aes(x = reorder(species, 
        pct_samples_present), y = pct_samples_present)) + geom_col() + geom_text(aes(label = n_samples_present), 
        hjust = -0.25) + coord_flip() + xlab("Species") + ylab("% of samples where species detected") + 
        ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "", 
        tag = "A")
    
    
    print(graph_samples_present)
    
    graph_samples_more_1pct <- ggplot(species_summary, aes(x = reorder(species, 
        pct_samples_present), y = pct_samples_more_1pct)) + geom_col() + geom_text(aes(label = n_samples_more_1pct), 
        hjust = -0.25) + coord_flip() + xlab("Species") + ylab("% of samples where species represents \n more than 1% of Mamiellophyceae") + 
        ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "", 
        tag = "B")
    
    print(graph_samples_more_1pct)
    
    grid_graphs <- gridExtra::grid.arrange(grobs = list(graph_samples_present, 
        graph_samples_more_1pct), ncol = 1, nrow = 2, clip = FALSE, padding = unit(0, 
        "line"))
    
    ggsave(plot = grid_graphs, filename = str_c("pdf/Presence_", file_suffix, 
        ".pdf"), width = 15, height = 20, scale = 1.8, units = "cm", useDingbats = FALSE)
    
    ggsave(plot = grid_graphs, filename = str_c("png/Presence_", file_suffix, 
        ".png"), dpi = "print", scale = 1.8)
    
    # Relation between samples present and samples more than 1 pct
    g <- ggplot(species_summary, aes(x = pct_samples_more_1pct, y = pct_samples_present, 
        label = species)) + geom_point() + geom_text(size = 2, check_overlap = TRUE, 
        vjust = 1) + xlim(0, 100)
    print(g)
    
    # Relation between sequence per species and samples present
    g <- ggplot(species_summary, aes(x = mean_pct_seq_species, y = pct_samples_present, 
        label = species)) + geom_point() + geom_text(size = 2, check_overlap = TRUE, 
        vjust = 1)
    print(g)
}

6.2.3 For selected OTUs (major ASV) at samples with more than 100 reads

8 World Maps for each species

# Define constants for the map
size_factor = 1
size_points <- 2.5
size_cross <- 1
color_ice <- "lightslateblue"
color_water <- "red"
color_cultures <- "green"
color_not_present <- "blue"
color_morpho <- "brown"

species_samples_metadata <- species_stats_2[["metadata"]]
species_summary <- species_stats_2[["summary"]]
readr::write_tsv(species_samples_metadata, "Mamiello species station.tsv")
readr::write_tsv(species_summary, "Mamiello species summary.tsv")

for (one_genus in c("Bathycoccus", "Ostreococcus", "Micromonas", "Mantoniella")) {
    
    species_selected <- species_summary$species[species_summary$genus == one_genus]
    map_array_main_fig <- list()  # to store the maps to do tiled graph
    map_array_main_fig_eu <- list()  # to store the EU maps to do tiled graph
    
    for (one_species in species_selected) {
        
        one_species_summary <- species_summary %>% filter(species == one_species)
        one_species_italic <- lookup_species_italic$species_italic[lookup_species_italic$species == 
            one_species]
        
        if (one_species_summary$max_pct_seq_species > 20) {
            species_limits = c(0, 80)
            species_breaks = c(10, 20, 40, 80)
        } else {
            species_limits = c(0, 20)
            species_breaks = c(2, 5, 10, 20)
        }
        
        species.absent <- species_samples_metadata %>% filter(n_seq_sample >= 
            100 & pct_seq < 1 & species == one_species)
        
        species.present <- species_samples_metadata %>% filter(n_seq_sample >= 
            100 & pct_seq >= 1 & species == one_species)
        
        one_species_map <- map_world(color_borders = "grey85") + theme_light(base_size = 14) + 
            geom_point(data = species.absent, aes(x = Longitude, y = Latitude), 
                color = color_not_present, size = size_cross, shape = 3) + geom_point(data = species.present, 
            aes(x = Longitude, y = Latitude, size = pct_seq, color = pct_seq, 
                alpha = pct_seq)) + theme(legend.position = c(0.15, 0.25), legend.background = element_rect(fill = "transparent"), 
            legend.text = element_text(size = 12), legend.title = element_text(size = 12)) + 
            scale_size(name = "% of Mamiellophyceae", range = c(0, 8), limits = species_limits, 
                breaks = species_breaks) + scale_alpha_continuous(name = "% of Mamiellophyceae", 
            range = c(0.5, 0.9), limits = species_limits, breaks = species_breaks) + 
            viridis::scale_color_viridis(option = "magma", name = "% of Mamiellophyceae", 
                limits = species_limits, breaks = species_breaks) + guides(colour = guide_legend()) + 
            theme(plot.title = element_text(margin = margin(t = 10, b = 5), 
                size = 18), panel.grid.minor = element_blank()) + ggtitle(parse(text = str_c(one_species_italic, 
            "~-~", one_species_summary$n_seq_species, "~reads~-~", one_species_summary$n_samples_more_1pct, 
            "~samples")))
        
        # range gives maximum and minimum size of symbols, limits the extent of the
        # scale replace guide = 'legend' by guide=FALSE to remove the legend....
        # NOT USED : guides(color = guide_legend(override.aes = list(size=5))) +
        
        one_species_map_eu <- one_species_map + coord_fixed(1.3, xlim = c(-40, 
            40), ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) * 10) + 
            scale_y_continuous(breaks = (3:7) * 10)
        
        print(one_species_map)
        print(one_species_map_eu)
        
        map_array_main_fig[[one_species]] <- one_species_map
        map_array_main_fig_eu[[one_species]] <- one_species_map_eu
    }
    
    grid_map_main_fig <- gridExtra::grid.arrange(grobs = map_array_main_fig, 
        ncol = 2, nrow = 5, clip = FALSE, padding = unit(0, "line"))
    grid_map_main_fig_eu <- gridExtra::grid.arrange(grobs = map_array_main_fig_eu, 
        ncol = 2, nrow = 5, clip = FALSE, padding = unit(0, "line"))
    
    # Save pdf
    ggsave(plot = grid_map_main_fig, filename = str_c("pdf/Map_", one_genus, 
        ".pdf"), width = 20, height = 35, scale = 2, units = "cm", useDingbats = FALSE)
    ggsave(plot = grid_map_main_fig_eu, filename = str_c("pdf/Map_", one_genus, 
        "_EU.pdf"), width = 20, height = 35, scale = 2, units = "cm", useDingbats = FALSE)
    
    # Save png
    ggsave(plot = grid_map_main_fig, filename = str_c("png/Map_", one_genus, 
        ".png"), dpi = "print", scale = 1.8)
    
    ggsave(plot = grid_map_main_fig_eu, filename = str_c("png/Map_", one_genus, 
        "_EU.png"), dpi = "print", scale = 1.8)
    
}

9 Heatmaps

9.2 Use ComplexHeatmap

See Web site

library(ComplexHeatmap)

# Palette de couleurs

reds = colorRampPalette(c("grey95", "orange", "red3"))
couleurs = reds(10)

pdf(file = "pdf/Heatmap horizontal no clustering.pdf", width = 15, height = 6)
# png(file='png/Heatmap_horizontal.png', width =1500, height = 600)
Heatmap(as.matrix(species_heatmap_data), col = couleurs, clustering_distance_columns = function(m) vegan::vegdist(m), 
    cluster_rows = FALSE, cluster_columns = TRUE, show_column_dend = TRUE, show_row_dend = FALSE, 
    column_dend_height = unit(30, "mm"), column_title = "Sample", row_title = "", 
    column_title_side = "top", row_title_side = "left", row_title_gp = gpar(fontsize = 12, 
        fontface = "plain"), column_title_gp = gpar(fontsize = 12, fontface = "plain"), 
    column_names_gp = gpar(fontsize = 8, fontface = "plain"), column_names_side = "top", 
    column_dend_side = "bottom", name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
while (!is.null(dev.list())) dev.off()


# pdf(file='pdf/Heatmap vertical.pdf', width =6, height = 12)
# png(file='png/Heatmap vertical.png', width =600, height = 1200)
Heatmap(as.matrix(species_heatmap_data_vertical), col = couleurs, split = 7, 
    combined_name_fun = NULL, km_title = "", clustering_distance_rows = function(m) vegan::vegdist(m), 
    clustering_distance_columns = function(m) vegan::vegdist(m), cluster_rows = TRUE, 
    cluster_columns = TRUE, show_column_dend = FALSE, show_row_dend = TRUE, 
    row_dend_width = unit(30, "mm"), column_title = "", row_title = "Sample", 
    column_title_side = "bottom", row_title_side = "right", row_title_gp = gpar(fontsize = 0, 
        fontface = "plain"), column_title_gp = gpar(fontsize = 15, fontface = "plain"), 
    row_names_gp = gpar(fontsize = 8, fontface = "plain"), name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
while (!is.null(dev.list())) dev.off()

Daniel Vaulot

15 02 2019