Chapter 5 Compositional analysis
5.0.1 Taxonomy barplot per individual
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(!is.na(count)) %>%
ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Relative abundance", y ="Samples") +
facet_nested(. ~ species.y + sample_type , scales="free", space="free") + #facet per day and treatment
scale_y_continuous(expand = c(0.001, 0.001)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
legend.position = "none",
strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))
5.1 Taxonomy boxplot
5.1.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
filter(!is.na(relabun)) %>%
group_by(family) %>%
summarise(mean=mean(relabun),sd=sd(relabun)) %>%
mutate(family= sub("^f__", "", family)) %>%
arrange(-mean) %>%
tt()
family | mean | sd |
---|---|---|
Bacteroidaceae | 1.349934e-01 | 0.2355762671 |
Peptostreptococcaceae | 1.265954e-01 | 0.2003047797 |
Fusobacteriaceae | 1.179239e-01 | 0.1458738058 |
Enterobacteriaceae | 1.088605e-01 | 0.1905777531 |
Helicobacteraceae | 9.413014e-02 | 0.1767378275 |
Clostridiaceae | 6.945329e-02 | 0.1243229057 |
Campylobacteraceae | 5.591634e-02 | 0.1131315105 |
Catellicoccaceae | 5.305885e-02 | 0.1422465924 |
Mycoplasmoidaceae | 4.050434e-02 | 0.1714127764 |
Lactobacillaceae | 3.758704e-02 | 0.1600968373 |
Cellulosilyticaceae | 3.556923e-02 | 0.0582955697 |
Porphyromonadaceae | 2.197603e-02 | 0.0666469294 |
Peptoniphilaceae | 2.060237e-02 | 0.0515934647 |
CAG-274 | 1.210850e-02 | 0.0395977824 |
Burkholderiaceae_A | 9.686136e-03 | 0.0376160652 |
Chlamydiaceae | 8.117520e-03 | 0.0382848789 |
Desulfovibrionaceae | 5.580109e-03 | 0.0359452882 |
Selenomonadaceae | 5.101894e-03 | 0.0359629704 |
Erysipelotrichaceae | 5.006995e-03 | 0.0204090413 |
3.729786e-03 | 0.0335719378 | |
Veillonellaceae | 3.626526e-03 | 0.0106389862 |
Filifactoraceae | 3.247374e-03 | 0.0115032184 |
Lachnospiraceae | 2.942034e-03 | 0.0081314504 |
Anaerovoracaceae | 2.483841e-03 | 0.0089452121 |
Tissierellaceae | 2.444076e-03 | 0.0224003238 |
Ruminococcaceae | 2.382604e-03 | 0.0064807153 |
Planococcaceae | 2.135437e-03 | 0.0186267365 |
Peptococcaceae | 2.056186e-03 | 0.0072240288 |
Tepidimicrobiaceae | 1.618136e-03 | 0.0146032741 |
Mucispirillaceae | 1.598802e-03 | 0.0047897732 |
Streptococcaceae | 1.415677e-03 | 0.0052594300 |
UBA932 | 1.394879e-03 | 0.0052242104 |
Acidaminococcaceae | 9.736027e-04 | 0.0034979478 |
Mycobacteriaceae | 7.391909e-04 | 0.0049450852 |
Tissierellaceae_A | 6.640589e-04 | 0.0060862001 |
Vagococcaceae | 6.388941e-04 | 0.0035752333 |
Butyricicoccaceae | 5.795755e-04 | 0.0021993548 |
CAG-508 | 4.136741e-04 | 0.0018999987 |
Coprobacillaceae | 3.539934e-04 | 0.0020253897 |
Moraxellaceae | 3.475403e-04 | 0.0021694914 |
Atopobiaceae | 3.008484e-04 | 0.0017712414 |
Anaerotignaceae | 2.804028e-04 | 0.0011764975 |
Tannerellaceae | 2.467874e-04 | 0.0010948157 |
Oscillospiraceae | 2.313110e-04 | 0.0011111966 |
UBA3375 | 1.792426e-04 | 0.0007368232 |
Turicibacteraceae | 1.342658e-04 | 0.0007561806 |
Wohlfahrtiimonadaceae | 6.919587e-05 | 0.0006341906 |
family_arrange <- family_summary %>%
filter(!is.na(relabun)) %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
mutate(family= sub("^f__", "", family)) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(family= sub("^f__", "", family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
#geom_boxplot(alpha=0.2) +
geom_jitter(alpha=0.5) +
facet_nested(. ~ species + sample_type)+
theme_minimal() +
theme(legend.position = "none")
5.1.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__")
genus_summary %>%
filter(!is.na(relabun)) %>%
group_by(genus) %>%
summarise(mean=mean(relabun),sd=sd(relabun)) %>%
arrange(-mean) %>%
tt()
genus | mean | sd |
---|---|---|
g__Fusobacterium_A | 1.179239e-01 | 0.1458738058 |
g__Escherichia | 1.045296e-01 | 0.1860683657 |
g__GCA-900066495 | 8.862392e-02 | 0.1958817247 |
g__Campylobacter_D | 5.591634e-02 | 0.1131315105 |
g__Catellicoccus | 5.305885e-02 | 0.1422465924 |
g__Prevotella | 5.084046e-02 | 0.1755423947 |
g__Helicobacter_G | 5.055896e-02 | 0.1218576010 |
g__Zhenhengia | 3.556923e-02 | 0.0582955697 |
g__Helicobacter_D | 3.215986e-02 | 0.1029459307 |
g__Peptostreptococcus | 3.118737e-02 | 0.0694638235 |
g__Sarcina | 2.609951e-02 | 0.0559480274 |
g__Ureaplasma | 2.448075e-02 | 0.1206061525 |
g__Alloprevotella | 2.361889e-02 | 0.0611609038 |
g__Ligilactobacillus | 2.249411e-02 | 0.1154584801 |
g__Porphyromonas | 2.197603e-02 | 0.0666469294 |
g__Anaerosphaera | 1.983949e-02 | 0.0514068407 |
g__Clostridium | 1.655643e-02 | 0.0310109524 |
g__Phocaeicola | 1.526923e-02 | 0.0865751567 |
g__Clostridium_H | 1.258596e-02 | 0.0430584409 |
g__Tyzzerella | 1.210850e-02 | 0.0395977824 |
g__Mycoplasma_L | 1.195384e-02 | 0.0932743545 |
g__F6-6636 | 1.192650e-02 | 0.0705082437 |
g__Bacteroides | 1.100378e-02 | 0.0516588292 |
g__Sutterella | 9.371858e-03 | 0.0374702630 |
g__Bacteroides_E | 9.083135e-03 | 0.0665959028 |
g__Clostridium_G | 8.315260e-03 | 0.0757795952 |
g__Chlamydiifrater | 8.117520e-03 | 0.0382848789 |
g__Mailhella | 5.580109e-03 | 0.0359452882 |
g__Bulleidia | 4.803233e-03 | 0.0204153655 |
g__RGIG1987 | 4.069749e-03 | 0.0304552880 |
g__Plesiomonas | 3.884902e-03 | 0.0175479313 |
g__Hathewaya | 2.601320e-03 | 0.0104447961 |
g__Clostridium_J | 2.442256e-03 | 0.0048099737 |
g__Savagea | 2.135437e-03 | 0.0186267365 |
g__Lactobacillus | 2.090952e-03 | 0.0113853596 |
g__Cryptobacteroides | 1.394879e-03 | 0.0052242104 |
g__Lactococcus | 1.311435e-03 | 0.0052564538 |
g__Limosilactobacillus | 1.075479e-03 | 0.0080564212 |
g__Anaerotruncus | 9.660959e-04 | 0.0025849276 |
g__UBA9414 | 9.201366e-04 | 0.0044801473 |
g__Paraclostridium | 7.946527e-04 | 0.0054791406 |
g__Gemmiger | 7.589084e-04 | 0.0046336342 |
g__Mycobacterium | 7.391909e-04 | 0.0049450852 |
g__Terrisporobacter | 7.354962e-04 | 0.0030472950 |
g__Enterocloster | 6.914772e-04 | 0.0044543676 |
g__Vagococcus_C | 5.830821e-04 | 0.0035663571 |
g__Butyricicoccus | 5.795755e-04 | 0.0021993548 |
g__Edwardsiella | 4.459381e-04 | 0.0028735031 |
g__Clostridium_X | 4.187222e-04 | 0.0012889576 |
g__CAG-793 | 4.136741e-04 | 0.0018999987 |
g__Ruthenibacterium | 3.689964e-04 | 0.0023769823 |
g__Thomasclavelia | 3.539934e-04 | 0.0020253897 |
g__Parasutterella | 3.142773e-04 | 0.0016953939 |
g__Anaerotignum | 2.804028e-04 | 0.0011764975 |
g__UBA1367 | 2.741167e-04 | 0.0017661111 |
g__Clostridium_AP | 2.494888e-04 | 0.0016072985 |
g__Parabacteroides | 2.467874e-04 | 0.0010948157 |
g__Anaerofilum | 2.460626e-04 | 0.0007805127 |
g__Blautia | 2.235156e-04 | 0.0014398071 |
g__Dwaynesavagella | 2.175211e-04 | 0.0013155954 |
g__Clostridium_F | 2.163132e-04 | 0.0014772755 |
g__Faecalicoccus | 2.037620e-04 | 0.0013125687 |
g__UBA3375 | 1.792426e-04 | 0.0007368232 |
g__Turicibacter | 1.342658e-04 | 0.0007561806 |
g__Flavonifractor | 1.240599e-04 | 0.0007991467 |
g__Sellimonas | 1.191311e-04 | 0.0005809912 |
g__Streptococcus | 1.042418e-04 | 0.0005289080 |
g__Lawsonibacter | 7.202973e-05 | 0.0003356782 |
g__Vagococcus | 5.581201e-05 | 0.0003595280 |
g__RGIG3102 | 4.254089e-05 | 0.0002548549 |
g__Pseudoflavonifractor_A | 3.522144e-05 | 0.0001849164 |
g__RGIG4373 | 2.673169e-05 | 0.0001816031 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus= sub("^g__", "", genus)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
#geom_boxplot() +
geom_jitter(alpha=0.5) +
facet_nested(. ~ species + sample_type)+
theme_minimal()
5.2 Alpha diversity
# Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, tree = genome_tree) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(phylogenetic = 1) %>%
rownames_to_column(var = "sample")
# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
to.elements(., GIFT_db) %>%
traits2dist(., method = "gower")
functional <- genome_counts_filt %>%
filter(genome %in% labels(dist)[[1]]) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, dist = dist) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(functional = 1) %>%
rownames_to_column(var = "sample") %>%
mutate(functional = if_else(is.nan(functional), 1, functional))
# Merge all metrics
alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
full_join(phylogenetic, by = join_by(sample == sample)) %>%
full_join(functional, by = join_by(sample == sample))
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
unite("group", c(species,sample_type), remove = FALSE) %>%
ggplot(aes(y = value, x = group, group=group, color=group, fill=group)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha=0.5) +
scale_color_manual(name="Group",
breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
values=c("#e5bd5b", "#6b7398","#e2815a", "#876b96")) +
scale_fill_manual(name="Group",
breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
values=c("#e5bd5b50", "#6b739850","#e2815a50", "#876b9650")) +
facet_wrap(. ~ metric, scales = "free", ncol=4) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank())
5.3 Beta diversity
beta_q0n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 0)
beta_q1n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1)
beta_q1p <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1, tree = genome_tree)
beta_q1f <- genome_counts_filt %>%
filter(genome %in% labels(dist)[[1]]) %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1, dist = dist)