NZ 2017 - Analysis step 8
Do some plots

Load the variables common to the different scripts and the necessary libraries

  source('00-NZ_2017_init.R', echo=FALSE)

1 Steps

  • Load the phyloseq file
  • Look at the different tables
  • Do so simple plots

2 Phyloseq file

Load and visualize the data

phyloseq <- readRDS(file = "phyloseq_nz_2017.rds")

# Taxonomic ranks
rank_names(phyloseq)
[1] "supergroup" "division"   "class"      "order"      "family"    
[6] "genus"      "species"    "kingdom"   
# Sample variables
sample_variables(phyloseq)
 [1] "line_id"            "sample_code_seq"    "sample_code_mothur"
 [4] "cruise"             "area"               "date"              
 [7] "sample_type"        "site"               "temp"              
[10] "station"            "CTD"                "depth"             
[13] "depth_level"        "amount_fitered_L"   "latitude"          
[16] "longitude"          "salp_size"         
# Sample names
sample_names(phyloseq)
  [1] "BT10_B3a"     "BT10_C1"      "BT36_1a"      "BT36_3a"     
  [5] "BT36_5a"      "BT7_C1"       "CS10_10m"     "CS10_30m"    
  [9] "CS10_50m"     "CS10_70m"     "CS10_90m"     "CS14_10m"    
 [13] "CS14_30m"     "CS14_55m"     "CS14_65m"     "CS14_90m"    
 [17] "CS19_10m"     "CS19_30m"     "CS19_50m"     "CS19_65m"    
 [21] "CS19_80m"     "CS25_10m"     "CS25_30m"     "CS25_55m"    
 [25] "CS25_80m"     "CS25_90m"     "CS2_DNA1"     "CS2_DNA1_bis"
 [29] "CS7_10m"      "CS7_30m"      "CS7_50m"      "CS7_70m"     
 [33] "CS7_80m"      "CS8_10m"      "CS8_30m"      "CS8_50m"     
 [37] "CS8_50m_bis"  "CS8_70m"      "CS8_70m_bis"  "CS8_90m"     
 [41] "CS8_90m_bis"  "CTD07_10"     "CTD07_100"    "CTD07_25"    
 [45] "CTD07_75"     "CTD08_10"     "CTD08_100"    "CTD08_50"    
 [49] "CTD08_75"     "CTD09_10"     "CTD09_100"    "CTD09_50"    
 [53] "CTD09_75"     "CTD10_10"     "CTD10_50"     "CTD10_75"    
 [57] "CTD11_10"     "CTD11_50"     "CTD11_75"     "CTD12_10"    
 [61] "CTD12_50"     "CTD12_75"     "CTD20_100"    "CTD20_5"     
 [65] "CTD20_50"     "CTD20_75"     "CTD31_100"    "CTD31_5"     
 [69] "CTD31_50"     "CTD31_75"     "CTD40_100"    "CTD40_5"     
 [73] "CTD40_50"     "CTD40_75"     "CTD43_100"    "CTD43_5"     
 [77] "CTD43_50"     "CTD43_75"     "CTD53_10"     "CTD53_100"   
 [81] "CTD53_50"     "CTD53_75"     "CTD56_100"    "CTD56_5"     
 [85] "CTD56_50"     "CTD56_75"     "CTD61_10"     "CTD61_35"    
 [89] "CTD61_60"     "MS1002"       "MS1022"       "MS1030"      
 [93] "MS1038"       "MS1042"       "MS1046"       "MS1050"      
 [97] "MS1054"       "MS1062"       "MS1111"       "MS1115"      
[101] "MS1119"       "MS1123"       "MS1143"       "MS1147"      
[105] "MS1155"       "MS1159"       "MS1163"       "MS1167"      
[109] "MS1171"       "MS1175"       "MS1183"       "MS1232"      
[113] "MS1248"       "MS1256"       "MS1264"       "MS1268"      
[117] "MS1276"       "MS1280"       "MS1284"       "MS1292"      
[121] "MS1339"       "MS1343"       "MS1347"       "MS1354"      
[125] "MS1362"       "MS1366"       "MS1370"       "MS1374"      
[129] "MS1378"       "MS1382"       "MS1386"       "MS1390"      
[133] "MS1409"       "MS213"        "MS219"        "MS225"       
[137] "MS228"        "MS231"        "MS234"        "MS237"       
[141] "MS240"        "MS246"        "MS279"        "MS282"       
[145] "MS285"        "MS300"        "MS306"        "MS327"       
[149] "MS330"        "MS333"        "MS336"        "MS339"       
[153] "MS342"        "MS348"        "MS401"        "MS418"       
[157] "MS420"        "MS422"        "MS425"        "MS427"       
[161] "MS429"        "MS432"        "MS436"        "MS469"       
[165] "MS472"        "MS475"        "MS496"        "MS503"       
[169] "MS507"        "MS512"        "MS514"        "MS517"       
[173] "MS519"        "MS521"        "MS524"        "MS528"       
[177] "MS531"        "MS543"        "MS549"        "MS555"       
[181] "MS558"        "MS561"        "MS564"        "MS567"       
[185] "MS570"        "MS573"        "MS607"        "MS610"       
[189] "MS613"        "MS631"        "MS643"        "MS649"       
[193] "MS655"        "MS658"        "MS661"        "MS664"       
[197] "MS667"        "MS670"        "MS673"        "MS687"       
[201] "MS699"        "MS705"        "MS711"        "MS714"       
[205] "MS717"        "MS720"        "MS723"        "MS729"       
[209] "MS732"        "MS735"        "MS738"        "MS759"       
[213] "MS772"        "MS778"        "MS784"        "MS787"       
[217] "MS790"        "MS793"        "MS796"        "MS799"       
[221] "MS805"        "N1"           "N127_T0"      "N13"         
[225] "N144"         "N147_T0"      "N163"         "N167_T0"     
[229] "N171_T0"      "N175"         "N34"          "N61"         
[233] "N71"          "N_110"        "N_136"        "N_154"       
[237] "N_84"         "N_99"         "STRVX_CTRL"   "UW154"       
[241] "UW163"        "UW99"         "UW_1"         "UW_110"      
[245] "UW_13"        "UW_175"       "UW_34"        "UW_61"       
[249] "UW_71_70"     "UW_99_U44"   

3 Do some data transformations

  • Remove samples with less than reads_min
reads_min <- 3000
phyloseq <- prune_samples(sample_sums(phyloseq) > reads_min, phyloseq)
phyloseq
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 4501 taxa and 249 samples ]
sample_data() Sample Data:       [ 249 samples by 17 sample variables ]
tax_table()   Taxonomy Table:    [ 4501 taxa by 8 taxonomic ranks ]
  • Normalize by median
total = median(sample_sums(phyloseq))
str_c("Median number of reads: ", total)
[1] "Median number of reads: 15364"
standf = function(x, t = total) round(t * (x/sum(x)))
phyloseq = transform_sample_counts(phyloseq, standf)

4 Define functions to transform a phyloseq table

  • Function to extract photosynthetic taxa
ps_auto <- function(ps) {
    
    ps <- subset_taxa(ps, (division %in% c("Chlorophyta", "Dinophyta", "Cryptophyta", 
        "Haptophyta", "Ochrophyta", "Cercozoa")) & !(class %in% c("Syndiniales", 
        "Sarcomonadea")))
}
  • Function to extract abundant taxa (> fraction)
ps_abund <- function(ps, fraction = 0.1) {
    
    ps <- filter_taxa(ps, function(x) sum(x > total * fraction) > 0, TRUE)
}

5 Abundant taxa

  • Only take OTUs that represent at least 10% of reads in any given sample
phyloseq_abund <- ps_abund(phyloseq, 0.1)
phyloseq_abund
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 55 taxa and 249 samples ]
sample_data() Sample Data:       [ 249 samples by 17 sample variables ]
tax_table()   Taxonomy Table:    [ 55 taxa by 8 taxonomic ranks ]
# Compute Number of reads of each taxo and merge to data frame
df.phyloseq_abund_taxa_sums <- data.frame(taxa_sums(phyloseq_abund))

df.phyloseq_abund <- data.frame(tax_table(phyloseq_abund)) %>% rownames_to_column(var = "OTUNumber") %>% 
    bind_cols(df.phyloseq_abund_taxa_sums) %>% rename(reads = taxa_sums.phyloseq_abund.)

knitr::kable(df.phyloseq_abund, caption = "Most abundant otus - taxonomy")
Most abundant otus - taxonomy
OTUNumber supergroup division class order family genus species kingdom reads
Otu0001 Alveolata Dinoflagellata Syndiniales Dino-Group-I Dino-Group-I-Clade-1 Dino-Group-I-Clade-1_X Dino-Group-I-Clade-1_X_sp. Eukaryota 464586
Otu0002 Alveolata Dinoflagellata Dinophyceae Gymnodiniales Gymnodiniaceae Gyrodinium Gyrodinium_fusiforme Eukaryota 307160
Otu0003 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Oithona Oithona_similis Eukaryota 100669
Otu0004 Archaeplastida Chlorophyta Mamiellophyceae Mamiellales Bathycoccaceae Ostreococcus Ostreococcus_lucimarinus Eukaryota 84514
Otu0005 Archaeplastida Chlorophyta Prasino-Clade-VII Prasino-Clade-VII_X Prasino-Clade-VII-B Prasino-Clade-VII-B-1 Prasino-Clade-VII-B-1_sp. Eukaryota 73784
Otu0006 Alveolata Dinoflagellata Dinophyceae Gymnodiniales Gymnodiniaceae Gymnodinium Gymnodinium_sp. Eukaryota 78018
Otu0007 Archaeplastida Chlorophyta Mamiellophyceae Mamiellales Bathycoccaceae Bathycoccus Bathycoccus_prasinos Eukaryota 71370
Otu0008 Opisthokonta Metazoa Cnidaria Cnidaria_X Hydrozoa Hydrozoa_X Hydrozoa_X_sp. Eukaryota 75139
Otu0009 Alveolata Dinoflagellata Dinophyceae Gymnodiniales Gymnodiniaceae Gyrodinium Gyrodinium_helveticum Eukaryota 68513
Otu0010 Alveolata Dinoflagellata Syndiniales Dino-Group-I Dino-Group-I-Clade-1 Dino-Group-I-Clade-1_X Dino-Group-I-Clade-1_X_sp. Eukaryota 53164
Otu0011 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Paracalanus Paracalanus_parvus Eukaryota 34265
Otu0012 Stramenopiles Ochrophyta Pelagophyceae Pelagomonadales Pelagomonadaceae Pelagomonas Pelagomonas_calceolata Eukaryota 46368
Otu0013 Hacrobia Haptophyta Prymnesiophyceae Phaeocystales Phaeocystaceae Phaeocystis Phaeocystis_antarctica Eukaryota 47304
Otu0015 Alveolata Dinoflagellata Dinophyceae Dinophyceae_X Tovelliaceae Woloszynskia Woloszynskia_halophila Eukaryota 30962
Otu0016 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-7 Dino-Group-II-Clade-7_X Dino-Group-II-Clade-7_X_sp. Eukaryota 49716
Otu0017 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-10-and-11 Dino-Group-II-Clade-10-and-11_X Dino-Group-II-Clade-10-and-11_X_sp. Eukaryota 37451
Otu0018 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Clausocalanus Clausocalanus_furcatus Eukaryota 37884
Otu0020 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Maxillopoda_X Maxillopoda_X_sp. Eukaryota 27112
Otu0022 Opisthokonta Metazoa Cnidaria Cnidaria_X Hydrozoa Aegina Aegina_citrea Eukaryota 16402
Otu0023 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Clausocalanus Clausocalanus_furcatus Eukaryota 25919
Otu0025 Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X Raphid-pennate Fragilariopsis Fragilariopsis_sublineata Eukaryota 20890
Otu0027 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Metridia Metridia_sp. Eukaryota 17887
Otu0028 Rhizaria Radiolaria Polycystinea Collodaria Collodaria_X Collodaria_XX Collodaria_XX_sp. Eukaryota 25402
Otu0029 Archaeplastida Chlorophyta Prasino-Clade-V Pseudoscourfieldiales Pycnococcaceae Pycnococcus Pycnococcus_provasolii Eukaryota 12613
Otu0031 Stramenopiles Stramenopiles_X MAST MAST-3 MAST-3I MAST-3I_X MAST-3I_X_sp. Eukaryota 15148
Otu0032 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Neocalanus Neocalanus_cristatus Eukaryota 17172
Otu0034 Stramenopiles Ochrophyta Bacillariophyta Bacillariophyta_X Polar-centric-Mediophyceae Minidiscus Minidiscus_trioculatus Eukaryota 17523
Otu0035 Archaeplastida Chlorophyta Mamiellophyceae Mamiellales Mamiellaceae Micromonas Micromonas_Clade-B.E.3 Eukaryota 15258
Otu0037 Archaeplastida Chlorophyta Prasino-Clade-VII Prasino-Clade-VII_X Prasino-Clade-VII-A Prasino-Clade-VII-A-3 Prasino-Clade-VII-A-3_sp. Eukaryota 13240
Otu0038 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Neocalanus Neocalanus_cristatus Eukaryota 10845
Otu0039 Hacrobia Haptophyta Prymnesiophyceae Isochrysidales Noelaerhabdaceae Emiliania Emiliania_huxleyi Eukaryota 14151
Otu0042 Alveolata Dinoflagellata Dinophyceae Dinophyceae_X Dinophyceae_XX Dinophyceae_XXX Dinophyceae_XXX_sp. Eukaryota 5193
Otu0043 Alveolata Dinoflagellata Dinophyceae Gymnodiniales Warnowiaceae Warnowia Warnowia_sp. Eukaryota 14173
Otu0044 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-6 Dino-Group-II-Clade-6_X Dino-Group-II-Clade-6_X_sp. Eukaryota 13169
Otu0045 Opisthokonta Fungi Basidiomycota Pucciniomycotina Microbotryomycetes Rhodosporidium Rhodosporidium_diobovatum Eukaryota 17701
Otu0046 Alveolata Dinoflagellata Dinophyceae Gymnodiniales Gymnodiniaceae Gyrodinium Gyrodinium_fusiforme Eukaryota 11301
Otu0047 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Delibus Delibus_nudus Eukaryota 9652
Otu0049 Alveolata Dinoflagellata Dinophyceae Dinophyceae_X Dinophyceae_XX Dinophyceae_XXX Dinophyceae_XXX_sp. Eukaryota 8156
Otu0053 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-10-and-11 Dino-Group-II-Clade-10-and-11_X Dino-Group-II-Clade-10-and-11_X_sp. Eukaryota 10903
Otu0054 Alveolata Dinoflagellata Syndiniales Dino-Group-I Dino-Group-I-Clade-1 Dino-Group-I-Clade-1_X Dino-Group-I-Clade-1_X_sp. Eukaryota 11156
Otu0055 Alveolata Dinoflagellata Syndiniales Dino-Group-I Dino-Group-I-Clade-7 Dino-Group-I-Clade-7_X Dino-Group-I-Clade-7_X_sp. Eukaryota 12379
Otu0056 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-6 Dino-Group-II-Clade-6_X Dino-Group-II-Clade-6_X_sp. Eukaryota 12889
Otu0060 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Calocalanus Calocalanus_sp. Eukaryota 7460
Otu0062 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Calocalanus Calocalanus_minutus Eukaryota 6846
Otu0064 Opisthokonta Metazoa Ctenophora Ctenophora_X Ctenophora_XX Hormiphora Hormiphora_sp. Eukaryota 7480
Otu0072 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-7 Dino-Group-II-Clade-7_X Dino-Group-II-Clade-7_X_sp. Eukaryota 10112
Otu0089 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Eucalanus Eucalanus_spinifer Eukaryota 4787
Otu0095 Rhizaria Radiolaria RAD-B RAD-B_X RAD-B-Group-IV RAD-B-Group-IV_X RAD-B-Group-IV_X_sp. Eukaryota 3857
Otu0127 Opisthokonta Metazoa Cnidaria Cnidaria_X Hydrozoa Aegina Aegina_citrea Eukaryota 4041
Otu0146 Opisthokonta Metazoa Urochordata Urochordata_X Ascidiacea Ihlea Ihlea_racovitzai Eukaryota 2821
Otu0147 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II_X Dino-Group-II_XX Dino-Group-II_XX_sp. Eukaryota 5984
Otu0171 Alveolata Dinoflagellata Syndiniales Dino-Group-II Dino-Group-II-Clade-12 Dino-Group-II-Clade-12_X Dino-Group-II-Clade-12_X_sp. Eukaryota 3595
Otu0180 Opisthokonta Metazoa Arthropoda Crustacea Maxillopoda Gaetanus Gaetanus_variabilis Eukaryota 4906
Otu0203 Opisthokonta Metazoa Urochordata Urochordata_X Appendicularia Appendicularia_X Appendicularia_X_sp. Eukaryota 2594
Otu0291 Opisthokonta Metazoa Cnidaria Cnidaria_X Hydrozoa Forskalia Forskalia_edwardsi Eukaryota 2257
  • Plot the abundace of each taxo
ggplot(df.phyloseq_abund) + geom_col(aes(x = fct_reorder(str_c(OTUNumber, " - ", 
    species), reads), y = reads, fill = class)) + coord_flip()

6 Do some test plots

6.1 Salp samples - Abundant (Auto and hetero)

phyloseq_salps <- subset_samples(phyloseq_abund, sample_type == "salp gut")

plot_heatmap(phyloseq_salps, method = "NMDS", distance = "bray", taxa.label = "genus", 
    taxa.order = "division", sample.order = "sample_code_mothur", sample.label = "sample_code_mothur", 
    low = "beige", high = "red", na.value = "beige", trans = log_trans(10))

6.2 Surface sample - autotrophic - abudant

phyloseq_select <- subset_samples(phyloseq, depth_level == "surface")

phyloseq_select <- ps_auto(phyloseq_select)
phyloseq_select <- ps_abund(phyloseq_select, 0.05)

plot_heatmap(phyloseq_select, method = "NMDS", distance = "bray", taxa.label = "species", 
    taxa.order = "class", low = "beige", high = "red", na.value = "beige", trans = NULL)

6.3 Chatham rise ordered by longitude - Auto and abundant

phyloseq_select <- subset_samples(phyloseq, (cruise == "TAN1516") & (sample_type == 
    "water") & !(str_detect(sample_code_mothur, "(T0|N_84|N_136|N144|U44)")))

phyloseq_select <- ps_auto(phyloseq_select)
phyloseq_select <- ps_abund(phyloseq_select, 0.05)

hmap <- plot_heatmap(phyloseq_select, method = "NMDS", distance = "bray", taxa.label = "species", 
    taxa.order = "division", sample.order = c("longitude"), sample.label = "sample_code_mothur", 
    low = "beige", high = "red", na.value = "beige", trans = NULL)

hmap

ggsave(plot = hmap, filename = "pdf/Heatmap_TAN1516.pdf", useDingbats = FALSE)

Do a map of these stations

mothur_file_xls <- paste0("NZ_2017_mothur.xlsx")
samples_metadata <- read_excel(mothur_file_xls, sheet = "samples")

stations <- samples_metadata %>% dplyr::filter((cruise == "TAN1516") & (sample_type == 
    "water") & !(str_detect(sample_code_mothur, "(T0|N_84|N_136|N144|U44|UW)")))

# Map drawing

map <- map_world() + geom_point(data = stations, aes(x = longitude, y = latitude), 
    fill = "blue", size = 3, shape = 21) + geom_text(data = stations, aes(x = longitude, 
    y = latitude, label = sample_code_mothur), nudge_x = 0.2, nudge_y = 0.3, 
    label_size = 8) + coord_fixed(1.3, xlim = c(165, 185), ylim = c(-36, -48)) + 
    scale_x_continuous(breaks = (33:37) * 5) + scale_y_continuous(breaks = (18:24) * 
    -2)
map

# Map Google

map_back <- ggmap::get_map(location = c(lon = 175, lat = -42), zoom = 6, scale = 2, 
    color = "color", maptype = "satellite")
map_gg <- ggmap::ggmap(map_back) + geom_point(data = stations, aes(x = longitude, 
    y = latitude), fill = "white", size = 3, shape = 21) + geom_text(data = stations, 
    aes(x = longitude, y = latitude, label = sample_code_mothur), nudge_x = 0.2, 
    nudge_y = 0.3, color = "white") + xlab("Longtitude") + ylab("Latitude")

map_gg

ggsave(plot = map_gg, filename = "pdf/Map_TAN1516.pdf", useDingbats = FALSE)

Daniel Vaulot

12 08 2018