Chapter 4 Catalogue richness

4.1 Load MG data

load("data/data_mg.Rdata")

4.2 Functional ordination

gift_pcoa <- 
  gifts_elements %>% 
  as.data.frame() %>%
  vegdist(method="euclidean") %>%
  pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]

# Get genome positions
gift_pcoa_vectors <- 
  gift_pcoa$vectors %>%  # extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2)  # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- 
  cov(gifts_elements, scale(gift_pcoa_vectors)) %*%
  diag((gift_pcoa_eigenvalues/(nrow(gifts_elements)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1 = 1, Axis.2 = 2) %>% 
  rownames_to_column(var = "label") %>% 
  mutate(func = substr(label,1,3)) %>% 
  group_by(func) %>% 
  summarise(Axis.1 = mean(Axis.1),
            Axis.2 = mean(Axis.2)) %>% 
  rename(label = func) %>% 
  filter(!label %in% c("S01","S02","S03"))

The resulting plot corresponds to Figure 1a.

scale <- 20 # scale for vector loadings

gift_pcoa_vectors %>% 
  rownames_to_column(var = "genome") %>% 
  left_join(genome_stats, by = "genome") %>%
  left_join(genome_taxonomy, by = "genome") %>%
  ggplot() +
      # genome positions
      scale_color_manual(values = order_colors)+
      geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length), 
                 alpha = 0.9, shape = 16) +
      # scale_color_manual(values=phylum_colors) +
      scale_size_continuous(range = c(0.05, 4)) +
      # loading positions
      geom_segment(data = gift_pcoa_gifts, 
                   aes(x = 0, y = 0, xend = Axis.1 * scale, yend = Axis.2 * scale),
                    arrow = arrow(length = unit(0.2, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.2, 
                    color = "black") +
     # primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                        sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"), 
                        sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")) +
     geom_label_repel(data = gift_pcoa_gifts,
                      aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                      segment.color = 'transparent',
                      size = 3) +
    theme_minimal() + 
    theme(legend.position = "bottom",
          axis.title.x = element_text(size = 12),
          axis.title.y = element_text(size = 12),
          axis.text.x = element_text(size = 10),
          axis.text.y = element_text(size = 10),
          text = element_text(size = 8))

4.2.1 Plot genome length and average MCI gradients

# Preparing data for ploting gradients
overall_mci <-
  gifts_elements %>%
  as.data.frame() %>% 
  rownames_to_column(var = "genome") %>%
  rowwise() %>%
  mutate(overall = mean(c_across(B0101:S0301))) %>%
  select(genome, overall)

gift_pcoa_vectors_with_metadata <- 
  gift_pcoa_vectors %>% 
  rownames_to_column(var = "genome") %>% 
  left_join(overall_mci, by = "genome") %>%
  left_join(genome_stats %>%
              select(genome, mag_length), by = "genome") %>% 
  left_join(gifts_functions %>% 
              as.data.frame %>% 
              rownames_to_column(var = "genome") %>%
              select(genome, B04, B07))
  
scale_factor <- max(gift_pcoa_vectors_with_metadata$mag_length, na.rm = TRUE) /
                max(gift_pcoa_vectors_with_metadata$overall, na.rm = TRUE)


# Axis 1 gradients
gift_pcoa_vectors_with_metadata %>% 
  ggplot(aes(x = Axis.1)) +
    geom_smooth(aes(y = mag_length), color = "darkblue") +
    geom_smooth(aes(y = overall * scale_factor), color = "darkgreen") +
    scale_y_continuous(name = "MAG length",
                       sec.axis = sec_axis(~ . / scale_factor, name = "Overall MCI")) +
  theme_minimal() +
    theme(
      axis.title.y.left = element_text(color = "darkblue"),
      axis.title.y.right = element_text(color = "darkgreen"),
      axis.title.x = element_text(size = 12),
      axis.title.y = element_text(size = 12),
      axis.text.x = element_text(size = 10),
      axis.text.y = element_text(size = 10),
      text = element_text(size = 8),
      legend.position = "none")

4.2.2 Plot SCFA and vitamin biosynthesis gradients

# Axis 2 gradients
scale_factor <- max(gift_pcoa_vectors_with_metadata$B04, na.rm = TRUE) /
                max(gift_pcoa_vectors_with_metadata$B07, na.rm = TRUE)

gift_pcoa_vectors_with_metadata %>% 
  ggplot(aes(x = Axis.2)) +
    geom_smooth(aes(y = B04), color = "#ee3237") +
    geom_smooth(aes(y = B07 * scale_factor), color = "#a61f5e") +
    scale_y_continuous(name = "SCFA biosynthesis",
                       sec.axis = sec_axis(~ . / scale_factor, name = "Vitamin biosynthesis")) +
  theme_minimal() +
    theme(
      axis.title.y.left = element_text(color = "#ee3237"),
      axis.title.y.right = element_text(color = "#a61f5e"),
      axis.title.x = element_text(size = 12),
      axis.title.y = element_text(size = 12),
      axis.text.x = element_text(size = 10),
      axis.text.y = element_text(size = 10),
      text = element_text(size = 8),
      legend.position = "none")

4.3 Compare MCI and genome length between taxonomic groups

The resulting plot corresponds to Supplementary Figure S1

main_phylums <- c("Bacillota_A", "Bacillota", "Bacteroidota",
                  "Cyanobacteriota","Pseudomonadota", "Actinomycetota",
                  "Verrucomicrobiota", "Campylobacterota")

selected_taxonomy_colors <- 
  taxonomy_colors %>% 
  filter(phylum %in% main_phylums)

selected_orders <- 
  selected_taxonomy_colors %>% 
  select(order, color_order) %>% 
  filter(order != "Gastranaerophilales") %>% 
  filter(order != "Actinomycetales") %>% 
  filter(order != "Coriobacteriales")

selected_groups <- 
  selected_taxonomy_colors %>% 
  select(phylum, color_phylum) %>% 
  distinct(phylum, color_phylum) %>%
  filter(color_phylum != '#c28c5c') %>% 
  rename(order = 'phylum') %>% 
  rename(color_order = 'color_phylum') %>% 
  bind_rows(., selected_orders) %>% 
  mutate(order = factor(order, levels = c("Campylobacterota", 
                                          "Verrucomicrobiota",
                                          "Actinomycetota",
                                          "Enterobacterales",     
                                          "RF32",                 
                                          "Burkholderiales",      
                                          "Rs-D84",              
                                          "Pseudomonadota",  
                                          "Gastranaerophilales",
                                          "Cyanobacteriota", 
                                          "Bacteroidales",        
                                          "Flavobacteriales",
                                          "Bacteroidota", #
                                          "RF39",                 
                                          "Lactobacillales",      
                                          "ML615J-28",           
                                          "Erysipelotrichales",   
                                          "Acholeplasmatales",    
                                          "Bacillales",           
                                          "RFN20",
                                          "Bacillota",            
                                          "Oscillospirales",      
                                          "TANB77",               
                                          "Christensenellales",   
                                          "Lachnospirales",      
                                          "Clostridiales",        
                                          "Monoglobales_A",       
                                          "Peptostreptococcales", 
                                          "Monoglobales",        
                                          "UBA1212",
                                          "Bacillota_A"))) %>% 
  arrange(order)
genome_taxonomy <- 
  genome_taxonomy %>% 
  mutate(avg_mci = rowMeans(gifts_elements)) %>% 
  mutate(length = genome_stats$mag_length)

table_one <- 
  genome_taxonomy %>% 
  select(genome, order, avg_mci)
  
table_two <- 
  genome_taxonomy %>% 
  select(genome, phylum, avg_mci) %>%
  rename(order = 'phylum') %>% 
  bind_rows(., table_one) %>% 
  rename(group = 'order')

border_groups <- c("Bacillota_A", "Bacillota", "Bacteroidota",
                   "Cyanobacteriota", "Pseudomonadota", "Actinomycetota",
                   "Verrucomicrobiota", "Campylobacterota")

4.3.1 Average MCI by taxonomic phylum and order

table_two %>% 
  filter(group %in% selected_groups$order) %>% 
  mutate(group = factor(group, levels = selected_groups$order)) %>% 
  ggplot() +
  geom_jitter(aes(x = group, y = avg_mci), 
              color = 'grey', alpha = 0.5, width = 0.1) +
  geom_violin(aes(x = group, y = avg_mci, fill = group),
              color = NA,
              size = 0.01) +
  theme_minimal() +
  xlab("Taxonomic group") +
  ylab("Genome average metabolic capacity") +
  scale_fill_manual(values = selected_taxonomy_colors$color_order) +
  theme(legend.position = 'none') +
  coord_flip()

4.3.2 Genome length by taxonomic phylum and order

table_three <- 
  genome_taxonomy %>% 
  select(genome, order, length)

table_four <- 
  genome_taxonomy %>% 
  select(genome, phylum, length) %>%
  rename(order = 'phylum') %>% 
  bind_rows(., table_three) %>% 
  rename(group = 'order')

table_four %>% 
  filter(group %in% selected_groups$order) %>% 
  mutate(group = factor(group, levels = selected_groups$order)) %>% 
  ggplot() +
  geom_jitter(aes(x = group, y = length), color = 'grey', alpha = 0.5, width = 0.1) +
  geom_violin(aes(x = group, y = length, fill = group),
              color = NA,
              size = 0.01) +
  theme_minimal() +
  xlab("Taxonomic group") +
  ylab("Mag length") +
  scale_fill_manual(values = selected_taxonomy_colors$color_order) +
  theme(legend.position = 'none') +
  coord_flip()

rm(list = ls())