NZ 2017 - Analysis step 8
Do some plots
Load the variables common to the different scripts and the necessary libraries
source('00-NZ_2017_init.R', echo=FALSE)1 Steps
- Load the phyloseq file
- Look at the different tables
- Do so simple plots
2 Phyloseq file
Load and visualize the data
phyloseq <- readRDS(file = "phyloseq_nz_2017.rds")
# Taxonomic ranks
rank_names(phyloseq)[1] "supergroup" "division" "class" "order" "family"
[6] "genus" "species" "kingdom"
# Sample variables
sample_variables(phyloseq) [1] "line_id" "sample_code_seq" "sample_code_mothur"
[4] "cruise" "area" "date"
[7] "sample_type" "site" "temp"
[10] "station" "CTD" "depth"
[13] "depth_level" "amount_fitered_L" "latitude"
[16] "longitude" "salp_size"
# Sample names
sample_names(phyloseq) [1] "BT10_B3a" "BT10_C1" "BT36_1a" "BT36_3a"
[5] "BT36_5a" "BT7_C1" "CS10_10m" "CS10_30m"
[9] "CS10_50m" "CS10_70m" "CS10_90m" "CS14_10m"
[13] "CS14_30m" "CS14_55m" "CS14_65m" "CS14_90m"
[17] "CS19_10m" "CS19_30m" "CS19_50m" "CS19_65m"
[21] "CS19_80m" "CS25_10m" "CS25_30m" "CS25_55m"
[25] "CS25_80m" "CS25_90m" "CS2_DNA1" "CS2_DNA1_bis"
[29] "CS7_10m" "CS7_30m" "CS7_50m" "CS7_70m"
[33] "CS7_80m" "CS8_10m" "CS8_30m" "CS8_50m"
[37] "CS8_50m_bis" "CS8_70m" "CS8_70m_bis" "CS8_90m"
[41] "CS8_90m_bis" "CTD07_10" "CTD07_100" "CTD07_25"
[45] "CTD07_75" "CTD08_10" "CTD08_100" "CTD08_50"
[49] "CTD08_75" "CTD09_10" "CTD09_100" "CTD09_50"
[53] "CTD09_75" "CTD10_10" "CTD10_50" "CTD10_75"
[57] "CTD11_10" "CTD11_50" "CTD11_75" "CTD12_10"
[61] "CTD12_50" "CTD12_75" "CTD20_100" "CTD20_5"
[65] "CTD20_50" "CTD20_75" "CTD31_100" "CTD31_5"
[69] "CTD31_50" "CTD31_75" "CTD40_100" "CTD40_5"
[73] "CTD40_50" "CTD40_75" "CTD43_100" "CTD43_5"
[77] "CTD43_50" "CTD43_75" "CTD53_10" "CTD53_100"
[81] "CTD53_50" "CTD53_75" "CTD56_100" "CTD56_5"
[85] "CTD56_50" "CTD56_75" "CTD61_10" "CTD61_35"
[89] "CTD61_60" "MS1002" "MS1022" "MS1030"
[93] "MS1038" "MS1042" "MS1046" "MS1050"
[97] "MS1054" "MS1062" "MS1111" "MS1115"
[101] "MS1119" "MS1123" "MS1143" "MS1147"
[105] "MS1155" "MS1159" "MS1163" "MS1167"
[109] "MS1171" "MS1175" "MS1183" "MS1232"
[113] "MS1248" "MS1256" "MS1264" "MS1268"
[117] "MS1276" "MS1280" "MS1284" "MS1292"
[121] "MS1339" "MS1343" "MS1347" "MS1354"
[125] "MS1362" "MS1366" "MS1370" "MS1374"
[129] "MS1378" "MS1382" "MS1386" "MS1390"
[133] "MS1409" "MS213" "MS219" "MS225"
[137] "MS228" "MS231" "MS234" "MS237"
[141] "MS240" "MS246" "MS279" "MS282"
[145] "MS285" "MS300" "MS306" "MS327"
[149] "MS330" "MS333" "MS336" "MS339"
[153] "MS342" "MS348" "MS401" "MS418"
[157] "MS420" "MS422" "MS425" "MS427"
[161] "MS429" "MS432" "MS436" "MS469"
[165] "MS472" "MS475" "MS496" "MS503"
[169] "MS507" "MS512" "MS514" "MS517"
[173] "MS519" "MS521" "MS524" "MS528"
[177] "MS531" "MS543" "MS549" "MS555"
[181] "MS558" "MS561" "MS564" "MS567"
[185] "MS570" "MS573" "MS607" "MS610"
[189] "MS613" "MS631" "MS643" "MS649"
[193] "MS655" "MS658" "MS661" "MS664"
[197] "MS667" "MS670" "MS673" "MS687"
[201] "MS699" "MS705" "MS711" "MS714"
[205] "MS717" "MS720" "MS723" "MS729"
[209] "MS732" "MS735" "MS738" "MS759"
[213] "MS772" "MS778" "MS784" "MS787"
[217] "MS790" "MS793" "MS796" "MS799"
[221] "MS805" "N1" "N127_T0" "N13"
[225] "N144" "N147_T0" "N163" "N167_T0"
[229] "N171_T0" "N175" "N34" "N61"
[233] "N71" "N_110" "N_136" "N_154"
[237] "N_84" "N_99" "STRVX_CTRL" "UW154"
[241] "UW163" "UW99" "UW_1" "UW_110"
[245] "UW_13" "UW_175" "UW_34" "UW_61"
[249] "UW_71_70" "UW_99_U44"
3 Do some data transformations
- Remove samples with less than reads_min
reads_min <- 3000
phyloseq <- prune_samples(sample_sums(phyloseq) > reads_min, phyloseq)
phyloseqphyloseq-class experiment-level object
otu_table() OTU Table: [ 4501 taxa and 249 samples ]
sample_data() Sample Data: [ 249 samples by 17 sample variables ]
tax_table() Taxonomy Table: [ 4501 taxa by 8 taxonomic ranks ]
- Normalize by median
total = median(sample_sums(phyloseq))
str_c("Median number of reads: ", total)[1] "Median number of reads: 15364"
standf = function(x, t = total) round(t * (x/sum(x)))
phyloseq = transform_sample_counts(phyloseq, standf)4 Define functions to transform a phyloseq table
- Function to extract photosynthetic taxa
ps_auto <- function(ps) {
ps <- subset_taxa(ps, (division %in% c("Chlorophyta", "Dinophyta", "Cryptophyta",
"Haptophyta", "Ochrophyta", "Cercozoa")) & !(class %in% c("Syndiniales",
"Sarcomonadea")))
}- Function to extract abundant taxa (> fraction)
ps_abund <- function(ps, fraction = 0.1) {
ps <- filter_taxa(ps, function(x) sum(x > total * fraction) > 0, TRUE)
}5 Abundant taxa
- Only take OTUs that represent at least 10% of reads in any given sample
phyloseq_abund <- ps_abund(phyloseq, 0.1)
phyloseq_abundphyloseq-class experiment-level object
otu_table() OTU Table: [ 55 taxa and 249 samples ]
sample_data() Sample Data: [ 249 samples by 17 sample variables ]
tax_table() Taxonomy Table: [ 55 taxa by 8 taxonomic ranks ]
# Compute Number of reads of each taxo and merge to data frame
df.phyloseq_abund_taxa_sums <- data.frame(taxa_sums(phyloseq_abund))
df.phyloseq_abund <- data.frame(tax_table(phyloseq_abund)) %>% rownames_to_column(var = "OTUNumber") %>%
bind_cols(df.phyloseq_abund_taxa_sums) %>% rename(reads = taxa_sums.phyloseq_abund.)
knitr::kable(df.phyloseq_abund, caption = "Most abundant otus - taxonomy")| OTUNumber | supergroup | division | class | order | family | genus | species | kingdom | reads |
|---|---|---|---|---|---|---|---|---|---|
| Otu0001 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-I | Dino-Group-I-Clade-1 | Dino-Group-I-Clade-1_X | Dino-Group-I-Clade-1_X_sp. | Eukaryota | 464586 |
| Otu0002 | Alveolata | Dinoflagellata | Dinophyceae | Gymnodiniales | Gymnodiniaceae | Gyrodinium | Gyrodinium_fusiforme | Eukaryota | 307160 |
| Otu0003 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Oithona | Oithona_similis | Eukaryota | 100669 |
| Otu0004 | Archaeplastida | Chlorophyta | Mamiellophyceae | Mamiellales | Bathycoccaceae | Ostreococcus | Ostreococcus_lucimarinus | Eukaryota | 84514 |
| Otu0005 | Archaeplastida | Chlorophyta | Prasino-Clade-VII | Prasino-Clade-VII_X | Prasino-Clade-VII-B | Prasino-Clade-VII-B-1 | Prasino-Clade-VII-B-1_sp. | Eukaryota | 73784 |
| Otu0006 | Alveolata | Dinoflagellata | Dinophyceae | Gymnodiniales | Gymnodiniaceae | Gymnodinium | Gymnodinium_sp. | Eukaryota | 78018 |
| Otu0007 | Archaeplastida | Chlorophyta | Mamiellophyceae | Mamiellales | Bathycoccaceae | Bathycoccus | Bathycoccus_prasinos | Eukaryota | 71370 |
| Otu0008 | Opisthokonta | Metazoa | Cnidaria | Cnidaria_X | Hydrozoa | Hydrozoa_X | Hydrozoa_X_sp. | Eukaryota | 75139 |
| Otu0009 | Alveolata | Dinoflagellata | Dinophyceae | Gymnodiniales | Gymnodiniaceae | Gyrodinium | Gyrodinium_helveticum | Eukaryota | 68513 |
| Otu0010 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-I | Dino-Group-I-Clade-1 | Dino-Group-I-Clade-1_X | Dino-Group-I-Clade-1_X_sp. | Eukaryota | 53164 |
| Otu0011 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Paracalanus | Paracalanus_parvus | Eukaryota | 34265 |
| Otu0012 | Stramenopiles | Ochrophyta | Pelagophyceae | Pelagomonadales | Pelagomonadaceae | Pelagomonas | Pelagomonas_calceolata | Eukaryota | 46368 |
| Otu0013 | Hacrobia | Haptophyta | Prymnesiophyceae | Phaeocystales | Phaeocystaceae | Phaeocystis | Phaeocystis_antarctica | Eukaryota | 47304 |
| Otu0015 | Alveolata | Dinoflagellata | Dinophyceae | Dinophyceae_X | Tovelliaceae | Woloszynskia | Woloszynskia_halophila | Eukaryota | 30962 |
| Otu0016 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-7 | Dino-Group-II-Clade-7_X | Dino-Group-II-Clade-7_X_sp. | Eukaryota | 49716 |
| Otu0017 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-10-and-11 | Dino-Group-II-Clade-10-and-11_X | Dino-Group-II-Clade-10-and-11_X_sp. | Eukaryota | 37451 |
| Otu0018 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Clausocalanus | Clausocalanus_furcatus | Eukaryota | 37884 |
| Otu0020 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Maxillopoda_X | Maxillopoda_X_sp. | Eukaryota | 27112 |
| Otu0022 | Opisthokonta | Metazoa | Cnidaria | Cnidaria_X | Hydrozoa | Aegina | Aegina_citrea | Eukaryota | 16402 |
| Otu0023 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Clausocalanus | Clausocalanus_furcatus | Eukaryota | 25919 |
| Otu0025 | Stramenopiles | Ochrophyta | Bacillariophyta | Bacillariophyta_X | Raphid-pennate | Fragilariopsis | Fragilariopsis_sublineata | Eukaryota | 20890 |
| Otu0027 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Metridia | Metridia_sp. | Eukaryota | 17887 |
| Otu0028 | Rhizaria | Radiolaria | Polycystinea | Collodaria | Collodaria_X | Collodaria_XX | Collodaria_XX_sp. | Eukaryota | 25402 |
| Otu0029 | Archaeplastida | Chlorophyta | Prasino-Clade-V | Pseudoscourfieldiales | Pycnococcaceae | Pycnococcus | Pycnococcus_provasolii | Eukaryota | 12613 |
| Otu0031 | Stramenopiles | Stramenopiles_X | MAST | MAST-3 | MAST-3I | MAST-3I_X | MAST-3I_X_sp. | Eukaryota | 15148 |
| Otu0032 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Neocalanus | Neocalanus_cristatus | Eukaryota | 17172 |
| Otu0034 | Stramenopiles | Ochrophyta | Bacillariophyta | Bacillariophyta_X | Polar-centric-Mediophyceae | Minidiscus | Minidiscus_trioculatus | Eukaryota | 17523 |
| Otu0035 | Archaeplastida | Chlorophyta | Mamiellophyceae | Mamiellales | Mamiellaceae | Micromonas | Micromonas_Clade-B.E.3 | Eukaryota | 15258 |
| Otu0037 | Archaeplastida | Chlorophyta | Prasino-Clade-VII | Prasino-Clade-VII_X | Prasino-Clade-VII-A | Prasino-Clade-VII-A-3 | Prasino-Clade-VII-A-3_sp. | Eukaryota | 13240 |
| Otu0038 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Neocalanus | Neocalanus_cristatus | Eukaryota | 10845 |
| Otu0039 | Hacrobia | Haptophyta | Prymnesiophyceae | Isochrysidales | Noelaerhabdaceae | Emiliania | Emiliania_huxleyi | Eukaryota | 14151 |
| Otu0042 | Alveolata | Dinoflagellata | Dinophyceae | Dinophyceae_X | Dinophyceae_XX | Dinophyceae_XXX | Dinophyceae_XXX_sp. | Eukaryota | 5193 |
| Otu0043 | Alveolata | Dinoflagellata | Dinophyceae | Gymnodiniales | Warnowiaceae | Warnowia | Warnowia_sp. | Eukaryota | 14173 |
| Otu0044 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-6 | Dino-Group-II-Clade-6_X | Dino-Group-II-Clade-6_X_sp. | Eukaryota | 13169 |
| Otu0045 | Opisthokonta | Fungi | Basidiomycota | Pucciniomycotina | Microbotryomycetes | Rhodosporidium | Rhodosporidium_diobovatum | Eukaryota | 17701 |
| Otu0046 | Alveolata | Dinoflagellata | Dinophyceae | Gymnodiniales | Gymnodiniaceae | Gyrodinium | Gyrodinium_fusiforme | Eukaryota | 11301 |
| Otu0047 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Delibus | Delibus_nudus | Eukaryota | 9652 |
| Otu0049 | Alveolata | Dinoflagellata | Dinophyceae | Dinophyceae_X | Dinophyceae_XX | Dinophyceae_XXX | Dinophyceae_XXX_sp. | Eukaryota | 8156 |
| Otu0053 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-10-and-11 | Dino-Group-II-Clade-10-and-11_X | Dino-Group-II-Clade-10-and-11_X_sp. | Eukaryota | 10903 |
| Otu0054 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-I | Dino-Group-I-Clade-1 | Dino-Group-I-Clade-1_X | Dino-Group-I-Clade-1_X_sp. | Eukaryota | 11156 |
| Otu0055 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-I | Dino-Group-I-Clade-7 | Dino-Group-I-Clade-7_X | Dino-Group-I-Clade-7_X_sp. | Eukaryota | 12379 |
| Otu0056 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-6 | Dino-Group-II-Clade-6_X | Dino-Group-II-Clade-6_X_sp. | Eukaryota | 12889 |
| Otu0060 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Calocalanus | Calocalanus_sp. | Eukaryota | 7460 |
| Otu0062 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Calocalanus | Calocalanus_minutus | Eukaryota | 6846 |
| Otu0064 | Opisthokonta | Metazoa | Ctenophora | Ctenophora_X | Ctenophora_XX | Hormiphora | Hormiphora_sp. | Eukaryota | 7480 |
| Otu0072 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-7 | Dino-Group-II-Clade-7_X | Dino-Group-II-Clade-7_X_sp. | Eukaryota | 10112 |
| Otu0089 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Eucalanus | Eucalanus_spinifer | Eukaryota | 4787 |
| Otu0095 | Rhizaria | Radiolaria | RAD-B | RAD-B_X | RAD-B-Group-IV | RAD-B-Group-IV_X | RAD-B-Group-IV_X_sp. | Eukaryota | 3857 |
| Otu0127 | Opisthokonta | Metazoa | Cnidaria | Cnidaria_X | Hydrozoa | Aegina | Aegina_citrea | Eukaryota | 4041 |
| Otu0146 | Opisthokonta | Metazoa | Urochordata | Urochordata_X | Ascidiacea | Ihlea | Ihlea_racovitzai | Eukaryota | 2821 |
| Otu0147 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II_X | Dino-Group-II_XX | Dino-Group-II_XX_sp. | Eukaryota | 5984 |
| Otu0171 | Alveolata | Dinoflagellata | Syndiniales | Dino-Group-II | Dino-Group-II-Clade-12 | Dino-Group-II-Clade-12_X | Dino-Group-II-Clade-12_X_sp. | Eukaryota | 3595 |
| Otu0180 | Opisthokonta | Metazoa | Arthropoda | Crustacea | Maxillopoda | Gaetanus | Gaetanus_variabilis | Eukaryota | 4906 |
| Otu0203 | Opisthokonta | Metazoa | Urochordata | Urochordata_X | Appendicularia | Appendicularia_X | Appendicularia_X_sp. | Eukaryota | 2594 |
| Otu0291 | Opisthokonta | Metazoa | Cnidaria | Cnidaria_X | Hydrozoa | Forskalia | Forskalia_edwardsi | Eukaryota | 2257 |
- Plot the abundace of each taxo
ggplot(df.phyloseq_abund) + geom_col(aes(x = fct_reorder(str_c(OTUNumber, " - ",
species), reads), y = reads, fill = class)) + coord_flip()6 Do some test plots
6.1 Salp samples - Abundant (Auto and hetero)
phyloseq_salps <- subset_samples(phyloseq_abund, sample_type == "salp gut")
plot_heatmap(phyloseq_salps, method = "NMDS", distance = "bray", taxa.label = "genus",
taxa.order = "division", sample.order = "sample_code_mothur", sample.label = "sample_code_mothur",
low = "beige", high = "red", na.value = "beige", trans = log_trans(10))6.2 Surface sample - autotrophic - abudant
phyloseq_select <- subset_samples(phyloseq, depth_level == "surface")
phyloseq_select <- ps_auto(phyloseq_select)
phyloseq_select <- ps_abund(phyloseq_select, 0.05)
plot_heatmap(phyloseq_select, method = "NMDS", distance = "bray", taxa.label = "species",
taxa.order = "class", low = "beige", high = "red", na.value = "beige", trans = NULL)6.3 Chatham rise ordered by longitude - Auto and abundant
phyloseq_select <- subset_samples(phyloseq, (cruise == "TAN1516") & (sample_type ==
"water") & !(str_detect(sample_code_mothur, "(T0|N_84|N_136|N144|U44)")))
phyloseq_select <- ps_auto(phyloseq_select)
phyloseq_select <- ps_abund(phyloseq_select, 0.05)
hmap <- plot_heatmap(phyloseq_select, method = "NMDS", distance = "bray", taxa.label = "species",
taxa.order = "division", sample.order = c("longitude"), sample.label = "sample_code_mothur",
low = "beige", high = "red", na.value = "beige", trans = NULL)
hmapggsave(plot = hmap, filename = "pdf/Heatmap_TAN1516.pdf", useDingbats = FALSE)Do a map of these stations
mothur_file_xls <- paste0("NZ_2017_mothur.xlsx")
samples_metadata <- read_excel(mothur_file_xls, sheet = "samples")
stations <- samples_metadata %>% dplyr::filter((cruise == "TAN1516") & (sample_type ==
"water") & !(str_detect(sample_code_mothur, "(T0|N_84|N_136|N144|U44|UW)")))
# Map drawing
map <- map_world() + geom_point(data = stations, aes(x = longitude, y = latitude),
fill = "blue", size = 3, shape = 21) + geom_text(data = stations, aes(x = longitude,
y = latitude, label = sample_code_mothur), nudge_x = 0.2, nudge_y = 0.3,
label_size = 8) + coord_fixed(1.3, xlim = c(165, 185), ylim = c(-36, -48)) +
scale_x_continuous(breaks = (33:37) * 5) + scale_y_continuous(breaks = (18:24) *
-2)
map# Map Google
map_back <- ggmap::get_map(location = c(lon = 175, lat = -42), zoom = 6, scale = 2,
color = "color", maptype = "satellite")
map_gg <- ggmap::ggmap(map_back) + geom_point(data = stations, aes(x = longitude,
y = latitude), fill = "white", size = 3, shape = 21) + geom_text(data = stations,
aes(x = longitude, y = latitude, label = sample_code_mothur), nudge_x = 0.2,
nudge_y = 0.3, color = "white") + xlab("Longtitude") + ylab("Latitude")
map_ggggsave(plot = map_gg, filename = "pdf/Map_TAN1516.pdf", useDingbats = FALSE)