Chapter 4 Catalogue richness
4.2 Functional ordination
gift_pcoa <-
gifts_elements %>%
as.data.frame() %>%
vegdist(method="euclidean") %>%
pcoa()
gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]
# Get genome positions
gift_pcoa_vectors <-
gift_pcoa$vectors %>% # extract vectors
as.data.frame() %>%
select(Axis.1,Axis.2) # keep the first 2 axes
gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]
gift_pcoa_gifts <-
cov(gifts_elements, scale(gift_pcoa_vectors)) %*%
diag((gift_pcoa_eigenvalues/(nrow(gifts_elements)-1))^(-0.5)) %>%
as.data.frame() %>%
rename(Axis.1 = 1, Axis.2 = 2) %>%
rownames_to_column(var = "label") %>%
mutate(func = substr(label,1,3)) %>%
group_by(func) %>%
summarise(Axis.1 = mean(Axis.1),
Axis.2 = mean(Axis.2)) %>%
rename(label = func) %>%
filter(!label %in% c("S01","S02","S03"))The resulting plot corresponds to Figure 1a.
scale <- 20 # scale for vector loadings
gift_pcoa_vectors %>%
rownames_to_column(var = "genome") %>%
left_join(genome_stats, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
ggplot() +
# genome positions
scale_color_manual(values = order_colors)+
geom_point(aes(x = Axis.1, y = Axis.2, color = order, size = mag_length),
alpha = 0.9, shape = 16) +
# scale_color_manual(values=phylum_colors) +
scale_size_continuous(range = c(0.05, 4)) +
# loading positions
geom_segment(data = gift_pcoa_gifts,
aes(x = 0, y = 0, xend = Axis.1 * scale, yend = Axis.2 * scale),
arrow = arrow(length = unit(0.2, "cm"),
type = "open",
angle = 25),
linewidth = 0.2,
color = "black") +
# primary and secondary scale adjustments
scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")) +
scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")) +
geom_label_repel(data = gift_pcoa_gifts,
aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
segment.color = 'transparent',
size = 3) +
theme_minimal() +
theme(legend.position = "bottom",
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8))4.2.1 Plot genome length and average MCI gradients
# Preparing data for ploting gradients
overall_mci <-
gifts_elements %>%
as.data.frame() %>%
rownames_to_column(var = "genome") %>%
rowwise() %>%
mutate(overall = mean(c_across(B0101:S0301))) %>%
select(genome, overall)
gift_pcoa_vectors_with_metadata <-
gift_pcoa_vectors %>%
rownames_to_column(var = "genome") %>%
left_join(overall_mci, by = "genome") %>%
left_join(genome_stats %>%
select(genome, mag_length), by = "genome") %>%
left_join(gifts_functions %>%
as.data.frame %>%
rownames_to_column(var = "genome") %>%
select(genome, B04, B07))
scale_factor <- max(gift_pcoa_vectors_with_metadata$mag_length, na.rm = TRUE) /
max(gift_pcoa_vectors_with_metadata$overall, na.rm = TRUE)
# Axis 1 gradients
gift_pcoa_vectors_with_metadata %>%
ggplot(aes(x = Axis.1)) +
geom_smooth(aes(y = mag_length), color = "darkblue") +
geom_smooth(aes(y = overall * scale_factor), color = "darkgreen") +
scale_y_continuous(name = "MAG length",
sec.axis = sec_axis(~ . / scale_factor, name = "Overall MCI")) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "darkblue"),
axis.title.y.right = element_text(color = "darkgreen"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8),
legend.position = "none")4.2.2 Plot SCFA and vitamin biosynthesis gradients
# Axis 2 gradients
scale_factor <- max(gift_pcoa_vectors_with_metadata$B04, na.rm = TRUE) /
max(gift_pcoa_vectors_with_metadata$B07, na.rm = TRUE)
gift_pcoa_vectors_with_metadata %>%
ggplot(aes(x = Axis.2)) +
geom_smooth(aes(y = B04), color = "#ee3237") +
geom_smooth(aes(y = B07 * scale_factor), color = "#a61f5e") +
scale_y_continuous(name = "SCFA biosynthesis",
sec.axis = sec_axis(~ . / scale_factor, name = "Vitamin biosynthesis")) +
theme_minimal() +
theme(
axis.title.y.left = element_text(color = "#ee3237"),
axis.title.y.right = element_text(color = "#a61f5e"),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
text = element_text(size = 8),
legend.position = "none")4.3 Compare MCI and genome length between taxonomic groups
The resulting plot corresponds to Supplementary Figure S1
main_phylums <- c("Bacillota_A", "Bacillota", "Bacteroidota",
"Cyanobacteriota","Pseudomonadota", "Actinomycetota",
"Verrucomicrobiota", "Campylobacterota")
selected_taxonomy_colors <-
taxonomy_colors %>%
filter(phylum %in% main_phylums)
selected_orders <-
selected_taxonomy_colors %>%
select(order, color_order) %>%
filter(order != "Gastranaerophilales") %>%
filter(order != "Actinomycetales") %>%
filter(order != "Coriobacteriales")
selected_groups <-
selected_taxonomy_colors %>%
select(phylum, color_phylum) %>%
distinct(phylum, color_phylum) %>%
filter(color_phylum != '#c28c5c') %>%
rename(order = 'phylum') %>%
rename(color_order = 'color_phylum') %>%
bind_rows(., selected_orders) %>%
mutate(order = factor(order, levels = c("Campylobacterota",
"Verrucomicrobiota",
"Actinomycetota",
"Enterobacterales",
"RF32",
"Burkholderiales",
"Rs-D84",
"Pseudomonadota",
"Gastranaerophilales",
"Cyanobacteriota",
"Bacteroidales",
"Flavobacteriales",
"Bacteroidota", #
"RF39",
"Lactobacillales",
"ML615J-28",
"Erysipelotrichales",
"Acholeplasmatales",
"Bacillales",
"RFN20",
"Bacillota",
"Oscillospirales",
"TANB77",
"Christensenellales",
"Lachnospirales",
"Clostridiales",
"Monoglobales_A",
"Peptostreptococcales",
"Monoglobales",
"UBA1212",
"Bacillota_A"))) %>%
arrange(order)genome_taxonomy <-
genome_taxonomy %>%
mutate(avg_mci = rowMeans(gifts_elements)) %>%
mutate(length = genome_stats$mag_length)
table_one <-
genome_taxonomy %>%
select(genome, order, avg_mci)
table_two <-
genome_taxonomy %>%
select(genome, phylum, avg_mci) %>%
rename(order = 'phylum') %>%
bind_rows(., table_one) %>%
rename(group = 'order')
border_groups <- c("Bacillota_A", "Bacillota", "Bacteroidota",
"Cyanobacteriota", "Pseudomonadota", "Actinomycetota",
"Verrucomicrobiota", "Campylobacterota")4.3.1 Average MCI by taxonomic phylum and order
table_two %>%
filter(group %in% selected_groups$order) %>%
mutate(group = factor(group, levels = selected_groups$order)) %>%
ggplot() +
geom_jitter(aes(x = group, y = avg_mci),
color = 'grey', alpha = 0.5, width = 0.1) +
geom_violin(aes(x = group, y = avg_mci, fill = group),
color = NA,
size = 0.01) +
theme_minimal() +
xlab("Taxonomic group") +
ylab("Genome average metabolic capacity") +
scale_fill_manual(values = selected_taxonomy_colors$color_order) +
theme(legend.position = 'none') +
coord_flip()4.3.2 Genome length by taxonomic phylum and order
table_three <-
genome_taxonomy %>%
select(genome, order, length)
table_four <-
genome_taxonomy %>%
select(genome, phylum, length) %>%
rename(order = 'phylum') %>%
bind_rows(., table_three) %>%
rename(group = 'order')
table_four %>%
filter(group %in% selected_groups$order) %>%
mutate(group = factor(group, levels = selected_groups$order)) %>%
ggplot() +
geom_jitter(aes(x = group, y = length), color = 'grey', alpha = 0.5, width = 0.1) +
geom_violin(aes(x = group, y = length, fill = group),
color = NA,
size = 0.01) +
theme_minimal() +
xlab("Taxonomic group") +
ylab("Mag length") +
scale_fill_manual(values = selected_taxonomy_colors$color_order) +
theme(legend.position = 'none') +
coord_flip()