Chapter 5 Compositional analysis
5.1 Taxonomy
5.1.1 Abundance barplot per individual
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(!is.na(count)) %>%
ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Samples", y ="Relative Abundance") +
facet_nested(. ~ species.y + sample_type , scales="free", space="free") + #facet per day and treatment
scale_y_continuous(expand = c(0.001, 0.001)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
#legend.position = "none",
strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))
### Abundance barplot per individual by host
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(!is.na(count)) %>%
ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Relative abundance", y ="Samples") +
facet_nested(. ~ species.y, scales="free", space="free") + #facet per day and treatment
scale_y_continuous(expand = c(0.001, 0.001)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
#legend.position = "none",
strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))
5.1.2 Abundance barplot per individual by sample type
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(!is.na(count)) %>%
ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
labs(x = "Relative abundance", y ="Samples") +
facet_nested(. ~ sample_type , scales="free", space="free") + #facet per day and treatment
scale_y_continuous(expand = c(0.001, 0.001)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
#legend.position = "none",
strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))
5.1.3 Dominant Phyla
#Dominant phyla
bin_count = genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(!is.na(count))
phyla_count = tapply(bin_count$count, bin_count$phylum, sum)
phyla_rel = phyla_count / sum(phyla_count)
head(sort(phyla_rel, decreasing = TRUE), 3)
p__Bacillota_A p__Bacteroidota p__Pseudomonadota
0.3031068 0.1736148 0.1371443
[1] 45
[1] 153
[1] 0.6710526
5.1.4 Unique MAGs
#Host Species Comparison
caura_count = bin_count[which(match(bin_count$species.y, "Cathartes aura")==1),]
caura_count = caura_count[-which(match(caura_count$count, 0)==1),]
# Number of MAGs identified in Cathartes aura
length(unique(caura_count$genome))
[1] 208
catratus_count = bin_count[which(match(bin_count$species.y, "Coragyps atratus")==1),]
catratus_count = catratus_count[-which(match(catratus_count$count, 0)==1),]
# Number of MAGs identified in Coragyps atratus
length(unique(catratus_count$genome))
[1] 129
# Number of MAGs unique to Cathartes aura
length(setdiff(unique(caura_count$genome), unique(catratus_count$genome)))
[1] 99
# Number of MAGs unique to Coragyps atratus
length(setdiff(unique(catratus_count$genome), unique(caura_count$genome)))
[1] 20
#Sample Type Comparison
stomach_count = bin_count[which(match(bin_count$sample_type, "Stomach contents")==1),]
stomach_count = stomach_count[-which(match(stomach_count$count, 0)==1),]
# Number of MAGs identified in Stomach Contents
length(unique(stomach_count$genome))
[1] 156
colon_count = bin_count[which(match(bin_count$sample_type, "Colon contents")==1),]
colon_count = colon_count[-which(match(colon_count$count, 0)==1),]
# Number of MAGs identified in Colon Contents
length(unique(colon_count$genome))
[1] 164
# Number of MAGs unique to Stomach Contents
length(setdiff(unique(stomach_count$genome), unique(colon_count$genome)))
[1] 64
# Number of MAGs unique to Colon Contents
length(setdiff(unique(colon_count$genome), unique(stomach_count$genome)))
[1] 72
5.2 Summary Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
filter(!is.na(relabun)) %>%
group_by(family) %>%
summarise(mean=mean(relabun),sd=sd(relabun)) %>%
mutate(family= sub("^f__", "", family)) %>%
arrange(-mean) %>%
tt()
family | mean | sd |
---|---|---|
Bacteroidaceae | 1.490709e-01 | 0.2443103149 |
Fusobacteriaceae | 1.282902e-01 | 0.1443705609 |
Enterobacteriaceae | 1.251916e-01 | 0.2011343329 |
Peptostreptococcaceae | 1.251124e-01 | 0.1786407751 |
Helicobacteraceae | 8.076195e-02 | 0.1625938079 |
Clostridiaceae | 7.892501e-02 | 0.1313347535 |
Campylobacteraceae | 5.212880e-02 | 0.1075223078 |
Cellulosilyticaceae | 4.080261e-02 | 0.0611654255 |
Catellicoccaceae | 3.989325e-02 | 0.1121311440 |
Mycoplasmoidaceae | 3.328857e-02 | 0.1462071056 |
Porphyromonadaceae | 2.260172e-02 | 0.0670564521 |
Peptoniphilaceae | 2.157820e-02 | 0.0512850668 |
CAG-274 | 1.182361e-02 | 0.0379807447 |
Burkholderiaceae_A | 1.145965e-02 | 0.0407079102 |
Lactobacillaceae | 1.142778e-02 | 0.0536167065 |
Desulfovibrionaceae | 6.601819e-03 | 0.0390535137 |
Selenomonadaceae | 6.036043e-03 | 0.0390871712 |
Erysipelotrichaceae | 5.923769e-03 | 0.0220992310 |
Chlamydiaceae | 5.783864e-03 | 0.0270599475 |
4.412705e-03 | 0.0365148240 | |
Veillonellaceae | 4.290538e-03 | 0.0114594499 |
Filifactoraceae | 3.841964e-03 | 0.0124330797 |
Lachnospiraceae | 3.480717e-03 | 0.0087463260 |
Anaerovoracaceae | 2.938628e-03 | 0.0096706536 |
Tissierellaceae | 2.891583e-03 | 0.0243649074 |
Ruminococcaceae | 2.818856e-03 | 0.0069679490 |
Planococcaceae | 2.526432e-03 | 0.0202580338 |
Tepidimicrobiaceae | 1.914414e-03 | 0.0158834796 |
Mucispirillaceae | 1.891541e-03 | 0.0051614857 |
Peptococcaceae | 1.686681e-03 | 0.0048979981 |
Streptococcaceae | 1.674885e-03 | 0.0056884455 |
UBA932 | 1.650279e-03 | 0.0056509694 |
Acidaminococcaceae | 1.151868e-03 | 0.0037814945 |
Mycobacteriaceae | 8.745357e-04 | 0.0053735691 |
Tissierellaceae_A | 7.856471e-04 | 0.0066199803 |
Vagococcaceae | 7.558747e-04 | 0.0038815540 |
Butyricicoccaceae | 6.856950e-04 | 0.0023794299 |
CAG-508 | 4.894172e-04 | 0.0020598114 |
Coprobacillaceae | 4.188091e-04 | 0.0021992062 |
Moraxellaceae | 4.111745e-04 | 0.0023567466 |
Atopobiaceae | 3.559333e-04 | 0.0019235515 |
Anaerotignaceae | 3.317441e-04 | 0.0012743338 |
Tannerellaceae | 2.919738e-04 | 0.0011865247 |
Oscillospiraceae | 2.736638e-04 | 0.0012051199 |
UBA3375 | 2.120617e-04 | 0.0007979193 |
Turicibacteraceae | 1.588497e-04 | 0.0008210002 |
Wohlfahrtiimonadaceae | 8.186553e-05 | 0.0006898112 |
5.2.1 Jitterplot Family
family_arrange <- family_summary %>%
filter(!is.na(relabun)) %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
mutate(family= sub("^f__", "", family)) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(family= sub("^f__", "", family)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
#geom_boxplot(alpha=0.2) +
geom_jitter(alpha=0.5) +
facet_nested(. ~ species + sample_type)+
theme_minimal() +
theme(legend.position = "none")
5.3 Summary Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__")
genus_summary %>%
filter(!is.na(relabun)) %>%
group_by(genus) %>%
summarise(mean=mean(relabun),sd=sd(relabun)) %>%
arrange(-mean) %>%
tt()
genus | mean | sd |
---|---|---|
g__Fusobacterium_A | 1.282902e-01 | 0.1443705609 |
g__Escherichia | 1.200678e-01 | 0.1964860711 |
g__GCA-900066495 | 8.205501e-02 | 0.1712499164 |
g__Prevotella | 5.650019e-02 | 0.1892847437 |
g__Campylobacter_D | 5.212880e-02 | 0.1075223078 |
g__Helicobacter_G | 4.134742e-02 | 0.1090008497 |
g__Zhenhengia | 4.080261e-02 | 0.0611654255 |
g__Catellicoccus | 3.989325e-02 | 0.1121311440 |
g__Peptostreptococcus | 3.503106e-02 | 0.0734362406 |
g__Helicobacter_D | 3.027088e-02 | 0.0974853188 |
g__Sarcina | 2.763321e-02 | 0.0566237706 |
g__Alloprevotella | 2.368344e-02 | 0.0609490193 |
g__Porphyromonas | 2.260172e-02 | 0.0670564521 |
g__Anaerosphaera | 2.067564e-02 | 0.0510786126 |
g__Clostridium | 1.958789e-02 | 0.0328640923 |
g__Phocaeicola | 1.806500e-02 | 0.0940000374 |
g__Clostridium_H | 1.489043e-02 | 0.0465139151 |
g__Ureaplasma | 1.433108e-02 | 0.0587432097 |
g__Mycoplasma_L | 1.414257e-02 | 0.1014122406 |
g__Bacteroides | 1.301856e-02 | 0.0560145875 |
g__Tyzzerella | 1.182361e-02 | 0.0379807447 |
g__Sutterella | 1.108783e-02 | 0.0405643816 |
g__Bacteroides_E | 1.074624e-02 | 0.0723915208 |
g__Clostridium_G | 9.837772e-03 | 0.0824246806 |
g__Mailhella | 6.601819e-03 | 0.0390535137 |
g__F6-6636 | 5.985085e-03 | 0.0495264820 |
g__Chlamydiifrater | 5.783864e-03 | 0.0270599475 |
g__Bulleidia | 5.682699e-03 | 0.0221160879 |
g__RGIG1987 | 4.814914e-03 | 0.0331080045 |
g__Plesiomonas | 4.596222e-03 | 0.0190210615 |
g__Hathewaya | 3.077618e-03 | 0.0113078394 |
g__Clostridium_J | 2.889429e-03 | 0.0051109656 |
g__Savagea | 2.526432e-03 | 0.0202580338 |
g__Lactobacillus | 2.473803e-03 | 0.0123587727 |
g__Ligilactobacillus | 1.696498e-03 | 0.0080645270 |
g__Cryptobacteroides | 1.650279e-03 | 0.0056509694 |
g__Lactococcus | 1.551557e-03 | 0.0056906734 |
g__Limosilactobacillus | 1.272398e-03 | 0.0087581814 |
g__Anaerotruncus | 1.142987e-03 | 0.0027780737 |
g__UBA9414 | 1.088612e-03 | 0.0048593519 |
g__Paraclostridium | 9.401525e-04 | 0.0059546252 |
g__Gemmiger | 8.978634e-04 | 0.0050330320 |
g__Mycobacterium | 8.745357e-04 | 0.0053735691 |
g__Terrisporobacter | 8.701646e-04 | 0.0033002565 |
g__Enterocloster | 8.180857e-04 | 0.0048395418 |
g__Vagococcus_C | 6.898436e-04 | 0.0038737945 |
g__Butyricicoccus | 6.856950e-04 | 0.0023794299 |
g__Edwardsiella | 5.275888e-04 | 0.0031219822 |
g__Clostridium_X | 4.953897e-04 | 0.0013897614 |
g__CAG-793 | 4.894172e-04 | 0.0020598114 |
g__Ruthenibacterium | 4.365591e-04 | 0.0025825225 |
g__Thomasclavelia | 4.188091e-04 | 0.0021992062 |
g__Parasutterella | 3.718211e-04 | 0.0018402360 |
g__Anaerotignum | 3.317441e-04 | 0.0012743338 |
g__UBA1367 | 3.243070e-04 | 0.0019188301 |
g__Clostridium_AP | 2.951699e-04 | 0.0017462840 |
g__Parabacteroides | 2.919738e-04 | 0.0011865247 |
g__Anaerofilum | 2.911164e-04 | 0.0008420415 |
g__Blautia | 2.644410e-04 | 0.0015643087 |
g__Dwaynesavagella | 2.573489e-04 | 0.0014289254 |
g__Clostridium_F | 2.559199e-04 | 0.0016054140 |
g__Faecalicoccus | 2.410706e-04 | 0.0014260679 |
g__UBA3375 | 2.120617e-04 | 0.0007979193 |
g__Turicibacter | 1.588497e-04 | 0.0008210002 |
g__Flavonifractor | 1.467751e-04 | 0.0008682496 |
g__Sellimonas | 1.409439e-04 | 0.0006301750 |
g__Streptococcus | 1.233284e-04 | 0.0005738541 |
g__Lawsonibacter | 8.521827e-05 | 0.0003639590 |
g__Vagococcus | 6.603111e-05 | 0.0003906168 |
g__RGIG3102 | 5.033007e-05 | 0.0002767955 |
g__Pseudoflavonifractor_A | 4.167044e-05 | 0.0002006784 |
g__RGIG4373 | 3.162623e-05 | 0.0001973511 |
5.3.1 Jitterplot Genus
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus= sub("^g__", "", genus)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
#geom_boxplot() +
geom_jitter(alpha=0.5) +
facet_nested(. ~ species + sample_type)+
theme_minimal()
5.4 Alpha diversity
5.4.1 Calculate Hill Numbers
# Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, tree = genome_tree) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(phylogenetic = 1) %>%
rownames_to_column(var = "sample")
# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
to.elements(., GIFT_db2) %>%
traits2dist(., method = "gower")
functional <- genome_counts_filt %>%
filter(genome %in% labels(dist)[[1]]) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, dist = dist) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(functional = 1) %>%
rownames_to_column(var = "sample") %>%
mutate(functional = if_else(is.nan(functional), 1, functional))
# Merge all metrics
alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
full_join(phylogenetic, by = join_by(sample == sample)) %>%
full_join(functional, by = join_by(sample == sample))
5.4.2 Plot Alpha Diversity
alpha_plot = alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
unite("group", c(species,sample_type), remove = FALSE) %>%
ggplot(aes(y = value, x = group, group=group, color=group, fill=group)) +
geom_boxplot(outlier.shape = NA) +
stat_summary(fun.y = mean, color = "black", geom = "point", shape = 20, fill = "black")+
geom_jitter(alpha=0.5) +
scale_color_manual(name="Group",
breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
values=c(species_colors, "#FF6A6A", "#8B8989" )) +
scale_fill_manual(name="Group",
breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
values=c(species_colors, "#FF6A6A", "#8B8989" )) +
facet_wrap(. ~ metric, scales = "free", ncol=4) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank())
alpha_long <- alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(
metric = factor(metric, levels = c("richness", "neutral", "phylogenetic", "functional")),
group = paste(species, sample_type, sep = "_"))
5.5 Beta diversity
#Calculate Hill Numbers
beta_q0n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
select_if(~!all(. == 0)) %>%
hillpair(., q = 0)
beta_q1n <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
select_if(~!all(. == 0)) %>%
hillpair(., q = 1)
beta_q1p <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
hillpair(., q = 1, tree = genome_tree)
beta_q1f <- genome_counts_filt %>%
filter(genome %in% labels(dist)[[1]]) %>%
column_to_rownames(., "genome") %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
select_if(~!all(. == 0)) %>%
hillpair(., q = 1, dist = dist)
5.6 Richness diversity plot (Species)
group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]
beta_q0n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(species #, sample_type
) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = species, #shape = as.factor(sample_type)
)) +
geom_point(size = 4) +
scale_color_manual(values = species_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
#labs(shape="Sample Type") +
labs(color="Species")
#+geom_text_repel(aes(label = sample), size=3)
#Richness
betadisper(beta_q0n$S, group$species) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00597 0.0059740 1.0104 999 0.333
Residuals 69 0.40795 0.0059123
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Cathartes aura Coragyps atratus
Cathartes aura 0.34
Coragyps atratus 0.31831
adonis2(beta_q0n$S ~ species,
data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
species | 1 | 0.4870221 | 0.01722339 | 1.209241 | 0.206 |
Residual | 69 | 27.7897652 | 0.98277661 | NA | NA |
Total | 70 | 28.2767873 | 1.00000000 | NA | NA |
5.6.1 Richness diversity plot (Sample Type)
group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]
beta_q0n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(sample_type #, species
) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = sample_type, #shape = as.factor(species)
)) +
geom_point(size = 4) +
scale_color_manual(values = sampletype_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
#labs(shape="Species") +
labs(color="Sample Type")
#+geom_text_repel(aes(label = sample), size=3)
#Richness
betadisper(beta_q0n$S, group_richness$sample_type) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00480 0.0048029 0.8822 999 0.348
Residuals 69 0.37563 0.0054440
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Colon contents Stomach contents
Colon contents 0.351
Stomach contents 0.35087
adonis2(beta_q0n$S ~ sample_type,
data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
sample_type | 1 | 0.6489657 | 0.02295048 | 1.620781 | 0.051 |
Residual | 69 | 27.6278216 | 0.97704952 | NA | NA |
Total | 70 | 28.2767873 | 1.00000000 | NA | NA |
5.6.2 Richness diversity plot (Sex)
group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]
beta_q0n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(sex) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = sex #, fill = sex, shape = as.factor(state)
)) +
geom_point(size = 4) +
scale_color_manual(values = sex_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
labs(color="Sex")
#Calculate dispersion and significance
betadisper(beta_q0n$S, group_richness$sex) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00510 0.0050977 0.9912 999 0.333
Residuals 69 0.35487 0.0051430
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Female Male
Female 0.335
Male 0.32293
adonis2(beta_q0n$S ~ sex,
data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
sex | 1 | 0.5361512 | 0.01896083 | 1.333583 | 0.128 |
Residual | 69 | 27.7406361 | 0.98103917 | NA | NA |
Total | 70 | 28.2767873 | 1.00000000 | NA | NA |
5.6.3 Richness diversity plot (State)
group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]
beta_q0n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(state) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = state # shape = as.factor(sex)
)) +
geom_point(size = 4) +
scale_color_manual(values = state_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
labs(color="State")
#Calculate dispersion and significance
betadisper(beta_q0n$S, group_richness$state) %>% permutest(., pairwise = TRUE)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00435 0.0043537 0.8142 999 0.363
Residuals 69 0.36896 0.0053472
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Florida Texas
Florida 0.364
Texas 0.37002
#betadisper(beta_q0n$C, sample_metadata_non0$state) %>% permutest(., pairwise = TRUE)
adonis2(beta_q0n$S ~ state,
data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
state | 1 | 0.6060866 | 0.02143407 | 1.511345 | 0.049 |
Residual | 69 | 27.6707008 | 0.97856593 | NA | NA |
Total | 70 | 28.2767873 | 1.00000000 | NA | NA |
5.6.4 Neutral Diversity (Species)
group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]
beta_q1n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = species#, fill = sample_type, shape = as.factor(sample_type)
)) +
geom_point(size = 4) +
scale_color_manual(values = species_colors)+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00306 0.0030636 0.2567 999 0.601
Residuals 69 0.82341 0.0119335
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Cathartes aura Coragyps atratus
Cathartes aura 0.603
Coragyps atratus 0.614
adonis2(beta_q1n$S ~ species,
data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
species | 1 | 0.3996775 | 0.01464862 | 1.025781 | 0.418 |
Residual | 69 | 26.8846353 | 0.98535138 | NA | NA |
Total | 70 | 27.2843129 | 1.00000000 | NA | NA |
5.6.5 Neutral Diversity (Sample Type)
group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]
beta_q1n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(sample_type) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = sample_type#, fill = sample_type, shape = as.factor(sample_type)
)) +
geom_point(size = 4) +
scale_color_manual(values = sampletype_colors)+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.01612 0.016118 1.2912 999 0.25
Residuals 69 0.86132 0.012483
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Colon contents Stomach contents
Colon contents 0.256
Stomach contents 0.25976
adonis2(beta_q1n$S ~ sample_type,
data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
sample_type | 1 | 0.5736848 | 0.02102618 | 1.481966 | 0.106 |
Residual | 69 | 26.7106281 | 0.97897382 | NA | NA |
Total | 70 | 27.2843129 | 1.00000000 | NA | NA |
5.6.6 Neutral Diversity (Sex)
group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]
beta_q1n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(sex) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = sex#, shape = as.factor(state)
)) +
geom_point(size = 4) +
scale_color_manual(values = sex_colors)+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
labs(color="Sex")
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.01439 0.014390 0.897 999 0.346
Residuals 69 1.10689 0.016042
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Female Male
Female 0.351
Male 0.34689
adonis2(beta_q1n$S ~ sex,
data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
sex | 1 | 0.37472 | 0.0137339 | 0.9608351 | 0.463 |
Residual | 69 | 26.90959 | 0.9862661 | NA | NA |
Total | 70 | 27.28431 | 1.0000000 | NA | NA |
5.6.7 Neutral Diversity (State)
group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]
beta_q1n$S %>%
vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(state) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = state#, shape = as.factor(state)
)) +
geom_point(size = 4) +
scale_color_manual(values = state_colors)+
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
) +
labs(color="State")
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.01995 0.019954 1.2517 999 0.277
Residuals 69 1.09998 0.015942
Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
Florida Texas
Florida 0.27
Texas 0.26711
adonis2(beta_q1n$S ~ state,
data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
state | 1 | 0.4270251 | 0.01565094 | 1.097085 | 0.335 |
Residual | 69 | 26.8572877 | 0.98434906 | NA | NA |
Total | 70 | 27.2843129 | 1.00000000 | NA | NA |
5.7 Functional Differences
5.7.1 Calculate GIFT Scores
# Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts, GIFT_db2)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
select_if(~ !is.numeric(.) || sum(.) != 0)
elements <- GIFTs_elements_filtered %>%
as.data.frame()
# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db2)
functions <- GIFTs_functions %>%
as.data.frame()
# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db2)
domains <- GIFTs_domains %>%
as.data.frame()
# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)
5.7.2 Functional Level
GIFTs_functions_community_no0 <- GIFTs_functions_community[rowSums(GIFTs_functions_community[])>0,]
GIFTs_functions_community_no0 %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
ggplot(aes(x=trait,y=sample,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed")))+
facet_grid(sample_type ~ ., scales="free",space="free")
5.7.3 Element Level
#Filter out samples with only 0 GIFT scores
GIFTs_elements_community_no0 <- GIFTs_elements_community[rowSums(GIFTs_elements_community[])>0,]
GIFTs_elements_community_no0 %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
as.data.frame(GIFTs_elements_community_no0) %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait, GIFT_db2$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db2$Code_function ~ GIFT_db2$Function[match(functionid, GIFT_db2$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db2$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db2$Function))) %>%
ggplot(aes(x=sample,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) +
#facet_nested(~ species + sample_type, scales="free", space="free") +
facet_grid(functionid ~ sample_type, scales="free",space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
strip.text.y = element_text(angle = 0), axis.text.y = element_text(angle=45, size=5)) +
labs(y="Functional Attribute",x="Samples",fill="GIFT")
5.7.4 Element Level Individual functions
#Change column in GIFT_db2 to focus on specific genetic function
GIFT_interest = GIFT_db2[which(GIFT_db2$Function == "Amino acid derivative biosynthesis"),]
GIFTs_elements_community_no0 %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
as.data.frame(GIFTs_elements_community_no0) %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_interest$Code_element ~ GIFT_interest$Element[match(trait, GIFT_interest$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_interest$Code_function ~ GIFT_interest$Function[match(functionid, GIFT_interest$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_interest$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_interest$Function))) %>%
ggplot(aes(x=sample,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) +
#facet_nested(~ species + sample_type, scales="free", space="free") +
facet_grid(~ sample_type, scales="free",space="free") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
strip.text.y = element_text(angle = 90), axis.text.y = element_text(angle=45, size=5)) +
labs(y="Functional Attribute",x="Samples",fill="GIFT")
5.7.5 Function Matrix
dist_func = vegdist(GIFTs_elements_community_no0, method ="bray" )
adonis2(dist_func ~ group_neutral$species,
data = as.data.frame(GIFTs_elements_community_no0) %>% arrange(match(group_neutral$sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
Warning in tidy.anova(.): The following column names in ANOVA output were not recognized or transformed: SumOfSqs,
R2
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
group_neutral$species | 1 | 0.02520837 | 0.009904455 | 0.6902439 | 0.859 |
Residual | 69 | 2.51994604 | 0.990095545 | NA | NA |
Total | 70 | 2.54515441 | 1.000000000 | NA | NA |
adonis2(dist_func ~ group_neutral$sample_type,
data = as.data.frame(GIFTs_elements_community_no0) %>% arrange(match(group_neutral$sample,labels(beta_q1n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()
Warning in tidy.anova(.): The following column names in ANOVA output were not recognized or transformed: SumOfSqs,
R2
term | df | SumOfSqs | R2 | statistic | p.value |
---|---|---|---|---|---|
group_neutral$sample_type | 1 | 0.03909879 | 0.01536205 | 1.076519 | 0.346 |
Residual | 69 | 2.50605562 | 0.98463795 | NA | NA |
Total | 70 | 2.54515441 | 1.00000000 | NA | NA |
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 1 0.00682 0.0068181 1.101 999 0.307
Residuals 69 0.42730 0.0061928
Run 0 stress 0.1844193
Run 1 stress 0.1992137
Run 2 stress 0.193558
Run 3 stress 0.1991246
Run 4 stress 0.1834961
... New best solution
... Procrustes: rmse 0.08663094 max resid 0.2839648
Run 5 stress 0.196854
Run 6 stress 0.1810674
... New best solution
... Procrustes: rmse 0.06440697 max resid 0.3445628
Run 7 stress 0.1768873
... New best solution
... Procrustes: rmse 0.06494787 max resid 0.3210299
Run 8 stress 0.1862977
Run 9 stress 0.1809679
Run 10 stress 0.1792945
Run 11 stress 0.1889051
Run 12 stress 0.1853142
Run 13 stress 0.1857258
Run 14 stress 0.1800068
Run 15 stress 0.1767055
... New best solution
... Procrustes: rmse 0.08175255 max resid 0.475352
Run 16 stress 0.2017196
Run 17 stress 0.1756847
... New best solution
... Procrustes: rmse 0.08431076 max resid 0.4426863
Run 18 stress 0.1994475
Run 19 stress 0.1816233
Run 20 stress 0.1827079
Run 21 stress 0.2021623
Run 22 stress 0.2114578
Run 23 stress 0.1891313
Run 24 stress 0.2083115
Run 25 stress 0.1861193
Run 26 stress 0.2028519
Run 27 stress 0.1977956
Run 28 stress 0.1748759
... New best solution
... Procrustes: rmse 0.09247303 max resid 0.3676355
Run 29 stress 0.1816649
Run 30 stress 0.201681
Run 31 stress 0.1940182
Run 32 stress 0.1855793
Run 33 stress 0.19913
Run 34 stress 0.2078823
Run 35 stress 0.1940457
Run 36 stress 0.1852037
Run 37 stress 0.1926344
Run 38 stress 0.1848537
Run 39 stress 0.1918392
Run 40 stress 0.1964868
Run 41 stress 0.2010841
Run 42 stress 0.1754699
Run 43 stress 0.1786253
Run 44 stress 0.1782674
Run 45 stress 0.1943384
Run 46 stress 0.2035826
Run 47 stress 0.1855595
Run 48 stress 0.213982
Run 49 stress 0.174449
... New best solution
... Procrustes: rmse 0.06777011 max resid 0.413151
Run 50 stress 0.2131544
Run 51 stress 0.2024524
Run 52 stress 0.1817632
Run 53 stress 0.1965729
Run 54 stress 0.1766407
Run 55 stress 0.1805406
Run 56 stress 0.1754543
Run 57 stress 0.1785108
Run 58 stress 0.1775952
Run 59 stress 0.1875204
Run 60 stress 0.1775863
Run 61 stress 0.1808415
Run 62 stress 0.1934209
Run 63 stress 0.1972299
Run 64 stress 0.1898226
Run 65 stress 0.1768942
Run 66 stress 0.2035381
Run 67 stress 0.2044475
Run 68 stress 0.1910243
Run 69 stress 0.1915126
Run 70 stress 0.1786784
Run 71 stress 0.2035656
Run 72 stress 0.1931804
Run 73 stress 0.1887419
Run 74 stress 0.1759115
Run 75 stress 0.2157983
Run 76 stress 0.1857274
Run 77 stress 0.1731775
... New best solution
... Procrustes: rmse 0.07835955 max resid 0.481939
Run 78 stress 0.1794544
Run 79 stress 0.2024807
Run 80 stress 0.1904765
Run 81 stress 0.1779329
Run 82 stress 0.1749454
Run 83 stress 0.1998384
Run 84 stress 0.1880411
Run 85 stress 0.1912452
Run 86 stress 0.1811838
Run 87 stress 0.202975
Run 88 stress 0.1795674
Run 89 stress 0.185256
Run 90 stress 0.2005972
Run 91 stress 0.1933866
Run 92 stress 0.1762495
Run 93 stress 0.180012
Run 94 stress 0.1851051
Run 95 stress 0.1854797
Run 96 stress 0.184186
Run 97 stress 0.2022917
Run 98 stress 0.1787965
Run 99 stress 0.186392
Run 100 stress 0.1766509
*** Best solution was not repeated -- monoMDS stopping criteria:
35: no. of iterations >= maxit
65: stress ratio > sratmax
nmds_scores <- as.data.frame(nmds_result$points)
#GIFT ordination by sample type
nmds_scores$Group <- group_neutral$sample_type
x_cen = mean(nmds_scores$MDS1, na.rm = TRUE)
y_cen = mean(nmds_scores$MDS2, na.rm = TRUE)
ggplot(nmds_scores, aes(x = MDS1, y = MDS2, color = Group)) +
geom_point(size = 4) +
scale_color_manual(values = sampletype_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = MDS1, yend = MDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
)
#GIFT ordination by species
nmds_scores$Group <- group_neutral$species
x_cen = mean(nmds_scores$MDS1, na.rm = TRUE)
y_cen = mean(nmds_scores$MDS2, na.rm = TRUE)
ggplot(nmds_scores, aes(x = MDS1, y = MDS2, color = Group)) +
geom_point(size = 4) +
scale_color_manual(values = species_colors) +
geom_segment(aes(x = x_cen, y = y_cen, xend = MDS1, yend = MDS2), alpha = 0.9) +
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right", legend.box = "vertical"
)
### Differential Abundance of Bacteria by Sample Type
phylo_metadata = sample_metadata_non0
phylo_metadata = data.frame(phylo_metadata)
rownames(phylo_metadata) = phylo_metadata[,1]
phylo_metadata = phylo_metadata[,-1]
phylo_metadata = sample_data(phylo_metadata)
phylo_mag = otu_table(genome_counts_non0, taxa_are_rows=TRUE)
mag_table = phyloseq(phylo_mag, phylo_metadata)
res <- ancombc2(
data = mag_table,
fix_formula = "sample_type",
p_adj_method = "holm",
lib_cut = 20,
group = "sample_type",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
global = FALSE,
verbose = TRUE
)
Obtaining initial estimates ...
Estimating sample-specific biases ...
Sensitivity analysis for pseudo-count addition to 0s: ...
ANCOM-BC2 primary results ...
# View results
lfc_res <- as.data.frame(res$res$taxon)
colnames(lfc_res)[1] = "Function"
rownames(lfc_res) = lfc_res$Function
lfc_res$Function = NULL
lfc_res = mutate(lfc_res, lfc = res$res$'lfc_sample_typeStomach contents')
lfc_res = mutate(lfc_res, p_val = res$res$'p_sample_typeStomach contents')
# See significant taxa
sig_taxa_sample = filter(lfc_res, p_val < 0.05)
print(sig_taxa_sample)
lfc p_val
EHA01634_bin.46 1.1151583 0.0318630762
EHA01666_bin.16 -1.1699768 0.0019150418
EHA02611_bin.33 0.9825746 0.0086812155
EHA02611_bin.37 -0.9516886 0.0191501006
EHA02637_bin.51 -0.8556469 0.0096416401
EHA02637_bin.7 -1.3466827 0.0003748247
EHA03085_bin.25 -0.6651150 0.0402859338
EHA03138_bin.47 -0.7490996 0.0183654450
EHA03161_bin.23 -0.9584138 0.0068342412
EHA03189_bin.19 -1.2871884 0.0026345092
EHA03189_bin.1 -0.7994314 0.0309347176
EHA03206_bin.25 -1.3999139 0.0002905914
sig_taxa_sample$genome = rownames(sig_taxa_sample)
sig_taxa_sample = merge(sig_taxa_sample, genome_metadata, by = "genome")
sig_taxa_sample$signif_label <- cut(sig_taxa_sample$p_val,
breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
labels = c("***", "**", "*", ""))
# Bar plot of significant taxa (ordered by log fold change)
ggplot(sig_taxa_sample, aes(x = reorder(phylum, lfc), y = lfc, fill = lfc > 0)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = signif_label),
hjust = ifelse(sig_taxa_sample$lfc > 0, -0.2, 1.2),
color = "black",
size = 4) +
coord_flip() +
scale_fill_manual(values = sampletype_colors,
labels = c("Colon Contents", "Stomach Contents")) +
theme_minimal() +
labs(
x = "Family",
y = "Log Fold Change",
title = "Enrichment of Significant Taxa",
fill = "More abundant in"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
### Differential Abundance of Bacteria by Species
phylo_metadata = sample_metadata_non0
phylo_metadata = data.frame(phylo_metadata)
rownames(phylo_metadata) = phylo_metadata[,1]
phylo_metadata = phylo_metadata[,-1]
phylo_metadata = sample_data(phylo_metadata)
phylo_mag = otu_table(genome_counts_non0, taxa_are_rows=TRUE)
mag_table = phyloseq(phylo_mag, phylo_metadata)
res <- ancombc2(
data = mag_table,
fix_formula = "species",
p_adj_method = "holm",
lib_cut = 20,
group = "species",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
global = FALSE,
verbose = TRUE
)
Obtaining initial estimates ...
Estimating sample-specific biases ...
Sensitivity analysis for pseudo-count addition to 0s: ...
ANCOM-BC2 primary results ...
# View results
lfc_res <- as.data.frame(res$res$taxon)
colnames(lfc_res)[1] = "Function"
rownames(lfc_res) = lfc_res$Function
lfc_res$Function = NULL
lfc_res = mutate(lfc_res, lfc = res$res$'lfc_speciesCoragyps atratus')
lfc_res = mutate(lfc_res, p_val = res$res$'p_speciesCoragyps atratus')
# See significant taxa
sig_taxa_species = filter(lfc_res, p_val < 0.05)
print(sig_taxa_species)
lfc p_val
EHA01589_bin.17 -0.8118426 3.737945e-03
EHA01589_bin.2 0.8957119 2.371683e-04
EHA01595_bin.5 -0.6359475 5.971373e-03
EHA01666_bin.16 0.6721596 2.242389e-03
EHA01709_bin.62 0.6927908 1.828504e-03
EHA02611_bin.16 0.5630132 9.184508e-03
EHA02611_bin.37 1.1601796 6.500070e-05
EHA02611_bin.39 0.9397022 2.874165e-04
EHA02637_bin.37 -0.7543196 2.960604e-03
EHA02637_bin.51 -0.7246455 1.899483e-03
EHA03085_bin.25 -0.8075419 1.153715e-03
EHA03085_bin.3 -0.7036940 2.802183e-03
EHA03128_bin.14 0.8331161 2.548205e-03
EHA03128_bin.35 0.3622284 4.548755e-02
EHA03138_bin.21 -1.5344794 5.085004e-05
EHA03138_bin.47 -0.5864219 4.052138e-03
EHA03138_bin.4 -0.7960476 3.495705e-03
EHA03161_bin.87 -0.4585107 3.907148e-02
EHA03161_bin.94 0.5946715 8.011410e-03
EHA03189_bin.40 -0.5775843 1.235070e-02
EHA03189_bin.46 1.0915245 5.726228e-05
EHA03189_bin.9 1.1688334 3.470488e-04
EHA03206_bin.1 -1.1621375 5.821359e-03
EHA03206_bin.23 0.6652361 4.316121e-03
sig_taxa_species$genome = rownames(sig_taxa_species)
sig_taxa_species = merge(sig_taxa_species, genome_metadata, by = "genome")
sig_taxa_species$signif_label <- cut(sig_taxa_species$p_val,
breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
labels = c("***", "**", "*", ""))
# Bar plot of significant taxa (ordered by log fold change)
ggplot(sig_taxa_species, aes(x = reorder(phylum, lfc), y = lfc, fill = lfc > 0)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = signif_label),
hjust = ifelse(sig_taxa_species$lfc > 0, -0.2, 1.2),
color = "black",
size = 4) +
coord_flip() +
scale_fill_manual(values = species_colors,
labels = c("Cathartes aura", "Coragyps atratus")) +
theme_minimal() +
labs(
x = "Family",
y = "Log Fold Change",
title = "Enrichment of Significant Taxa",
fill = "More abundant in"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
5.7.6 Wilcoxon comparison
element_gift <- GIFTs_elements_community_no0 %>%
as.data.frame() %>%
rownames_to_column(., "sample") %>%
#Change column number to column in sample_metadata to change comparison sample_type, species, etc
left_join(sample_metadata[c(1,5)], by="sample")
comparison = colnames(element_gift)[length(colnames(element_gift))]
uniqueGIFT_db<- unique(GIFT_db2) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)
#Wilcoxan test
sig_wilcox_sample = element_gift %>%
pivot_longer(-c(sample, comparison), names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
summarise(
p_value = wilcox.test(value ~ get(comparison))$p.value,
mean_1 = mean(value[get(comparison) == unique(element_gift[,comparison])[1]], na.rm = TRUE),
mean_2 = mean(value[get(comparison) == unique(element_gift[,comparison])[2]], na.rm = TRUE),
mean_difference = mean_1 - mean_2, # Magnitude of difference
direction = case_when(
mean_difference > 0 ~ paste("Higher in", unique(element_gift[,comparison])[1]),
mean_difference < 0 ~ paste("Higher in", unique(element_gift[,comparison])[2]),
TRUE ~ "No Difference"
)
) %>%
mutate(trait_name = trait) %>%
mutate(trait_name = case_when(
trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait_name, GIFT_db2$Code_element)],
TRUE ~ trait_name
)) %>%
mutate(
p_adjust = p.adjust(p_value, method = "BH"),
p_label = case_when(
p_adjust < 0.001 ~ "***",
p_adjust < 0.01 ~ "**",
p_adjust < 0.05 ~ "*",
TRUE ~ ""
),
trait_label = paste0(trait_name, " ", p_label) # Append p-values to trait name
) %>%
filter(p_adjust < 0.01) #Keep only significant results
#Match significant functions to table
wilcox_elements_community_no0_sig = GIFTs_elements_community_no0[,which(match(colnames(GIFTs_elements_community_no0), sig_wilcox_sample$trait)>0)]
#Create table of gift scores with significant differences between comparisons
heatmap_data <- wilcox_elements_community_no0_sig %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample, names_to="trait", values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait, GIFT_db2$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db2$Code_function ~ GIFT_db2$Function[match(functionid, GIFT_db2$Code_function)],
TRUE ~ functionid
)) %>%
left_join(sig_wilcox_sample %>% select(trait, trait_label, mean_difference, direction, p_adjust), by="trait") %>%
mutate(trait_label = factor(trait_label, levels = unique(trait_label))) %>%
mutate(functionid = factor(functionid, levels = unique(GIFT_db2$Function)))
ggplot(heatmap_data, aes(x=sample, y=paste0(trait, " ", sig_wilcox_sample$p_label), fill=gift)) +
geom_tile(colour="white", linewidth=0.2) +
scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) +
facet_nested(functionid ~ get(comparison) + species, scales="free", space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
strip.text.y = element_text(angle = 0), axis.text.y = element_text(angle=45, size=10)) +
labs(y="Functional Attribute (with p-values)", x="Samples", fill="GIFT")
### Volcano Plot
sig_wilcox_sample$negLogP <- -log10(sig_wilcox_sample$p_adjust)
ggplot(sig_wilcox_sample, aes(x = mean_difference, y = negLogP)) +
geom_point(aes(), alpha = 0.7) + # Highlight significant points
geom_text_repel(aes(label = trait_name), size = 3, max.overlaps = 10) + # Avoid too many overlapping labels
theme_minimal() +
labs(x = "GIFT Difference",
y = "-Log10(P-value)") +
theme(plot.title = element_text(hjust = 0.5)) # Center title