RCC Database - Compute statistics

1 Aim

Compute statistics of RCC data in particular for annual report

2 Initialize

  source('RCC_init.R')

3 Summaries for annual reports

Filter for last year

cultures_one_year <- cultures %>% filter(year_entered == 2017)

3.1 By taxonomic group

cultures_division <- cultures %>% group_by(domain, division) %>% summarise(n_strains = n())
cultures_division_one_year <- cultures_one_year %>% group_by(domain, division) %>% 
    summarise(n_strains = n())
# cultures_division <- cultures_division %>% filter(n_strains > 50)

for (df in list(cultures_division, cultures_division_one_year)) {
    plot <- ggplot(df, aes(x = reorder(division, n_strains), y = n_strains, 
        fill = domain)) + geom_bar(stat = "identity") + xlab("Division") + ylab("Number of strains") + 
        geom_text(aes(label = n_strains), vjust = 0.2, hjust = -0.2) + coord_flip()
    print(plot)
}

3.2 Number of strains per year

cultures_year <- cultures %>% group_by(year_entered) %>% summarise(n_strains = n())

ggplot(cultures_year, aes(x = year_entered, y = n_strains)) + geom_bar(stat = "identity") + 
    xlab("Year") + ylab("Number of strains") + scale_x_continuous(breaks = 1998:2018) + 
    geom_text(aes(label = n_strains), vjust = -0.5, hjust = +0.5) + theme(axis.text.x = element_text(angle = 45, 
    vjust = 1, hjust = 1))

3.3 Order per year

order_year <- order_details %>% group_by(year, order_type) %>% summarise(n_orders = n())
order_colors <- c(deposit = "yellow", collaborator = "blue", public = "red", 
    education = "green", private = "black")

ggplot(order_year, aes(x = year, y = n_orders, fill = order_type)) + geom_bar(stat = "identity", 
    width = 0.7) + xlab("Year") + ylab("Number of strains sent") + scale_x_continuous(breaks = 1998:2018) + 
    scale_fill_manual("order type", values = order_colors) + theme(axis.text.x = element_text(angle = 45, 
    vjust = 1, hjust = 1))

3.4 Map of new strains - Current year

map <- map_world()
map <- map + geom_point(data = cultures_one_year, aes(x = Longitude, y = Latitude), 
    fill = "blue", size = 2, shape = 21)
map

4 Specific exports

4.1 Number of strains isolated at a given station

cultures_per_station <- cultures %>% filter(!is.na(Longitude) & !is.na(Latitude)) %>% 
    group_by(Longitude, Latitude) %>% summarise(n_cult = n())
# dv_save(cultures_per_station,'cultures_per_station.txt')

4.2 Strains isolated by plankton group 2012-2017

This was done for the 5-year HCERES committee

4.2.1 Number of strains per isolator

cultures_2012_2017 <- cultures %>% filter(year_sampled > 2011)

isolators_pk <- c("L. Guillou", "P. Gourvil", "A. Lopes", "F. Lepelletier", 
    "D. Vaulot", "I. Probert", "K. Crenn", "F. Le Gall", "R. Edern", "E. Foulon", 
    "R. Edern, D. Marie", "M. Tragin", "D. Marie", "E. Foulon, S. Masquelier", 
    "A. Dia")
cultures_2012_2017_pk <- cultures_2012_2017 %>% filter(isolation_by %in% isolators_pk)
cultures_isolators <- cultures_2012_2017_pk %>% group_by(isolation_by) %>% summarise(n_strains = n()) %>% 
    arrange(desc(n_strains))

4.2.2 Do a map of the cultures and samples

Read file with location of samples

sites_pk <- read_excel(path = "C:/Daniel/Cultures/RCC database/R - RCC database/Sample positions_ECOMAP_All.xls")

Do a map with both cultures and samples

map <- map_world()
map <- map + geom_point(data = sites_pk, aes(x = longitude, y = latitude), fill = "red", 
    size = 2, shape = 22) + geom_point(data = cultures_2012_2017_pk, aes(x = Longitude, 
    y = Latitude), fill = "blue", size = 2, shape = 21)
map

Daniel Vaulot

25 07 2018