RCC Database - Export custom lists
1 Aim
Export specific custom RCC lists
2 Initialize.
This file defines all the necessary libraries and variables
source('RCC_init.R')3 Export part of the database
3.1 Moore Foundation genetic project
Micromonas, Ostreococcus, Bathycoccus, Aureococcus, Pelagomonas
genus_list <- c("Micromonas", "Ostreococcus", "Bathycoccus", "Aureococcus",
"Pelagomonas")
cultures_genus <- cultures %>% filter(genus %in% genus_list) %>% filter(transformed ==
0) %>% arrange(division, class, genus, species, clade, rcc_id) %>% filter(not_pure ==
0) %>% select(division, class, genus, species, clade, rcc_id, strain_name,
strain_name_synonyms, clonal, cryopreserved, active_transfer_stopped, sampling_ocean,
sampling_regional_sea, sampling_station, sampling_depth, sampling_date,
Latitude, Longitude)
# write_tsv(cultures_genus, 'C:/Users/vaulot/Google Drive/Projects/2015
# Moore Foundation Banyuls/strains_moore.txt', na='')3.2 Cultures isolated from Roscoff
list_sampling_stations <- cultures %>% group_by(sampling_station, sampling_site,
sampling_regional_sea) %>% summarise(n_strains = n())
cultures_Roscoff <- cultures %>% filter(str_detect(sampling_station, "Roscoff|SOMLIT|Santec") |
str_detect(sampling_site, "Penz|Morlaix")) %>% arrange(domain, division,
class, genus, species, clade, rcc_id) %>% select(domain, division, class,
genus, species, clade, rcc_id, strain_name, strain_name_synonyms, sampling_site,
sampling_station, sampling_depth, sampling_date, Latitude, Longitude)
# dv_save(cultures_Roscoff, 'C:/Users/vaulot/Desktop/strains_Roscoff.txt')
# Summarize by species
cultures_Roscoff_summary <- cultures_Roscoff %>% group_by(domain, division,
class, genus, species, clade) %>% summarise(n_strains = n())
# write_tsv((cultures_Roscoff_summary,
# 'C:/Users/vaulot/Desktop/strains_Roscoff_summary.txt', na='')3.3 Pelagophyceae
3.3.1 Export tables
cultures_Pelago <- cultures %>% filter(class == "Pelagophyceae") %>% arrange(class,
order, family, genus, species, rcc_id) %>% select(class, order, family,
genus, species, rcc_id, strain_name, strain_name_synonyms, sampling_ocean,
sampling_regional_sea, sampling_depth, sampling_date, Latitude, Longitude)
sequences_18S <- sequences %>% filter(str_detect(gene_name, "18S")) %>% select(rcc_id,
genbank_accession, sequence) %>% rename(accession_18S = genbank_accession,
sequence_18S = sequence) %>% mutate(sequence_18S_length = str_length(sequence_18S))
sequences_18S_longer <- sequences_18S %>% arrange(rcc_id, desc(sequence_18S_length)) %>%
distinct(rcc_id, .keep_all = TRUE)
sequences_ITS <- sequences %>% filter(str_detect(gene_name, "ITS")) %>% select(rcc_id,
genbank_accession, sequence) %>% rename(accession_ITS = genbank_accession,
sequence_ITS = sequence)
dna_summary <- dna %>% group_by(rcc_id) %>% summarise(n_dna_samples = n())
cultures_Pelago <- cultures_Pelago %>% left_join(sequences_18S_longer) %>% left_join(sequences_ITS) %>%
left_join(dna_summary)
dna_Pelago <- cultures_Pelago %>% select(rcc_id, species, strain_name) %>% left_join(dna) %>%
filter(!is.na(dna_date))
write_tsv(cultures_Pelago, "C:/Users/vaulot/Desktop/RCC_Pelago.txt", na = "")
write_tsv(dna_Pelago, "C:/Users/vaulot/Desktop/RCC_Pelago_DNA.txt", na = "")3.3.2 Export fasta
cultures_Pelago_18S <- cultures_Pelago %>% filter(!is.na(accession_18S)) %>%
mutate(sequence_name = str_c(accession_18S, str_c("RCC", rcc_id), strain_name,
species, sep = "|"))
seq_out <- DNAStringSet(cultures_Pelago_18S$sequence_18S) # Store the sequence in a DNAString
names(seq_out) <- cultures_Pelago_18S$sequence_name
writeXStringSet(seq_out, "C:/Users/vaulot/Desktop/RCC_Pelago_18S.fasta", compress = FALSE)3.3.3 Graphs
Number of strains per species
df <- cultures_Pelago %>% group_by(order, species) %>% summarise(n_strains = n())
ggplot(df, aes(x = reorder(species, n_strains), y = n_strains, fill = order)) +
geom_bar(stat = "identity") + xlab("species") + ylab("Number of strains") +
geom_text(aes(label = n_strains), vjust = 0.2, hjust = -0.2) + coord_flip() Map of new strains
map <- map_world()
map <- map + geom_point(data = cultures_Pelago, aes(x = Longitude, y = Latitude,
fill = order), size = 2, shape = 21) + scale_fill_manual(values = c("blue",
"red", "yellow"))
map