RCC Database - Compute statistics
RCC Database - Compute statistics
1 Aim
Compute statistics of RCC data in particular for annual report
2 Initialize
source('RCC_init.R')3 Summaries for annual reports
Filter for last year
cultures_one_year <- cultures %>% filter(year_entered == 2017)3.1 By taxonomic group
cultures_division <- cultures %>% group_by(domain, division) %>% summarise(n_strains = n())
cultures_division_one_year <- cultures_one_year %>% group_by(domain, division) %>%
summarise(n_strains = n())
# cultures_division <- cultures_division %>% filter(n_strains > 50)
for (df in list(cultures_division, cultures_division_one_year)) {
plot <- ggplot(df, aes(x = reorder(division, n_strains), y = n_strains,
fill = domain)) + geom_bar(stat = "identity") + xlab("Division") + ylab("Number of strains") +
geom_text(aes(label = n_strains), vjust = 0.2, hjust = -0.2) + coord_flip()
print(plot)
}3.2 Number of strains per year
cultures_year <- cultures %>% group_by(year_entered) %>% summarise(n_strains = n())
ggplot(cultures_year, aes(x = year_entered, y = n_strains)) + geom_bar(stat = "identity") +
xlab("Year") + ylab("Number of strains") + scale_x_continuous(breaks = 1998:2018) +
geom_text(aes(label = n_strains), vjust = -0.5, hjust = +0.5) + theme(axis.text.x = element_text(angle = 45,
vjust = 1, hjust = 1))3.3 Order per year
order_year <- order_details %>% group_by(year, order_type) %>% summarise(n_orders = n())
order_colors <- c(deposit = "yellow", collaborator = "blue", public = "red",
education = "green", private = "black")
ggplot(order_year, aes(x = year, y = n_orders, fill = order_type)) + geom_bar(stat = "identity",
width = 0.7) + xlab("Year") + ylab("Number of strains sent") + scale_x_continuous(breaks = 1998:2018) +
scale_fill_manual("order type", values = order_colors) + theme(axis.text.x = element_text(angle = 45,
vjust = 1, hjust = 1))3.4 Map of new strains - Current year
map <- map_world()
map <- map + geom_point(data = cultures_one_year, aes(x = Longitude, y = Latitude),
fill = "blue", size = 2, shape = 21)
map4 Specific exports
4.1 Number of strains isolated at a given station
cultures_per_station <- cultures %>% filter(!is.na(Longitude) & !is.na(Latitude)) %>%
group_by(Longitude, Latitude) %>% summarise(n_cult = n())
# dv_save(cultures_per_station,'cultures_per_station.txt')4.2 Strains isolated by plankton group 2012-2017
This was done for the 5-year HCERES committee
4.2.1 Number of strains per isolator
cultures_2012_2017 <- cultures %>% filter(year_sampled > 2011)
isolators_pk <- c("L. Guillou", "P. Gourvil", "A. Lopes", "F. Lepelletier",
"D. Vaulot", "I. Probert", "K. Crenn", "F. Le Gall", "R. Edern", "E. Foulon",
"R. Edern, D. Marie", "M. Tragin", "D. Marie", "E. Foulon, S. Masquelier",
"A. Dia")
cultures_2012_2017_pk <- cultures_2012_2017 %>% filter(isolation_by %in% isolators_pk)
cultures_isolators <- cultures_2012_2017_pk %>% group_by(isolation_by) %>% summarise(n_strains = n()) %>%
arrange(desc(n_strains))4.2.2 Do a map of the cultures and samples
Read file with location of samples
sites_pk <- read_excel(path = "C:/Daniel/Cultures/RCC database/R - RCC database/Sample positions_ECOMAP_All.xls")Do a map with both cultures and samples
map <- map_world()
map <- map + geom_point(data = sites_pk, aes(x = longitude, y = latitude), fill = "red",
size = 2, shape = 22) + geom_point(data = cultures_2012_2017_pk, aes(x = Longitude,
y = Latitude), fill = "blue", size = 2, shape = 21)
map