Chapter 8 Differential abundance analysis
8.2 Create Phyloseq object
# Reorder levels: set "unsel" as reference
sample_metadata$fox_behaviour <- factor(
sample_metadata$fox_behaviour,
levels = c("unsel", "aggr", "tame") #here we set unsel as reference level
)
#Phyloseq object
count_phy <- genome_counts_filt %>%
column_to_rownames(var="genome")%>%
otu_table(., taxa_are_rows=T)
sample_info_tab_phy <- sample_metadata%>%
column_to_rownames(var="sample")%>%
sample_data()
TAX <- genome_metadata%>%
column_to_rownames(var="genome")%>%
select(1:7)%>%
as.matrix()%>%
tax_table()
tree <- phy_tree(genome_tree)
physeq_all = phyloseq(count_phy, TAX, sample_info_tab_phy, tree)8.3 Run ANCOM-BC2 (fox behaviour)
Analysis of Composition of Microbes with Bias Correction
set.seed(1234) #set seed for reproducibility
ancom_mag_gut = ancombc2(data = physeq_all,
assay_name = "counts",
tax_level = NULL,
fix_formula = "fox_behaviour + gut_location",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "fox_behaviour",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = TRUE, #to get all pairwise comparisons
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)8.4 Chcking the results
# Function to get significant ANCOM-BC2 results
get_sig_ancom <- function(ancom_res, comp_prefix, alpha = 0.05, physeq_obj = physeq_all) {
# Identify relevant columns
lfc_col <- paste0("lfc_", comp_prefix)
q_col <- paste0("q_", comp_prefix)
# Check if columns exist
if(!all(c(lfc_col, q_col) %in% colnames(ancom_res))) {
stop("Specified comparison columns not found in ANCOM-BC2 result.")
}
# Filter significant taxa
sig_res <- ancom_res %>%
dplyr::select(taxon, all_of(lfc_col), all_of(q_col)) %>%
filter(.data[[q_col]] < alpha) %>%
arrange(.data[[q_col]])
# Merge taxonomy
taxonomy <- data.frame(physeq_obj@tax_table) %>%
rownames_to_column("taxon") %>%
mutate_at(vars(order, phylum, family, genus), ~ str_replace(., "[dpcofgs]__", ""))
sig_res <- sig_res %>%
left_join(taxonomy, by = "taxon")
# Return result (can be empty if none significant)
return(sig_res)
}
# Aggressive vs Unselected
sig_aggr_vs_unsel <- get_sig_ancom(ancom_mag_gut$res, "fox_behaviouraggr")
# Tame vs Unselected
sig_tame_vs_unsel <- get_sig_ancom(ancom_mag_gut$res, "fox_behaviourtame")There are no significant results for the differential abundance analysis between the fox behaviours.
8.5 Phylum comparisons
8.5.1 Aggressive vs Unselected
taxonomy <- data.frame(physeq_all@tax_table) %>%
rownames_to_column(., "taxon")%>%
mutate_at(vars(order, phylum, family, genus), ~ str_replace(., "[dpcofgs]__", ""))
ancom_mag_gut_table <- ancom_mag_gut$res %>%
dplyr::select(taxon, lfc_fox_behaviouraggr, q_fox_behaviouraggr) %>%
filter( q_fox_behaviouraggr < 0.05) %>%
dplyr::arrange( q_fox_behaviouraggr) %>%
merge(., taxonomy, by="taxon") %>%
rename(genome=taxon) %>%
mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::arrange(lfc_fox_behaviouraggr)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", "")) %>%
right_join(taxonomy, by=join_by(phylum == phylum)) %>%
dplyr::select(phylum, colors) %>%
mutate(colors = str_c(colors, "80")) %>% #add 80% alpha
unique() %>%
dplyr::arrange(phylum)
tax_table <- as.data.frame(unique(taxonomy$phylum)) # use all phyla from full taxonomy
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum") %>%
arrange(phylum) %>%
dplyr::select(colors) %>%
pull()ancom_mag_gut_table %>%
mutate(genome=factor(genome,levels=ancom_mag_gut_table$genome)) %>%
ggplot(aes(x=lfc_fox_behaviouraggr, y=forcats::fct_reorder(genome,lfc_fox_behaviouraggr), fill=phylum)) + #forcats::fct_rev()
geom_col() +
scale_fill_manual(values=tax_color) +
geom_vline(xintercept=0) +
# coord_flip()+
theme(panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "right", legend.box = "vertical")+
labs(title = "Aggressive vs Unselected")+
xlab("log2FoldChange (aggr vs unsel)") +
ylab("Species")+
guides(fill=guide_legend(title="Phylum"))8.5.2 Tame vs Unselected
ancom_mag_gut_table_tame_unsel <- ancom_mag_gut$res %>%
dplyr::select(taxon, lfc_fox_behaviourtame, q_fox_behaviourtame) %>%
filter(q_fox_behaviourtame < 0.05) %>%
arrange(q_fox_behaviourtame) %>%
merge(taxonomy, by="taxon") %>%
rename(genome=taxon) %>%
arrange(lfc_fox_behaviourtame)
ggplot(ancom_mag_gut_table_tame_unsel,
aes(x=lfc_fox_behaviourtame,
y=fct_reorder(genome, lfc_fox_behaviourtame),
fill=phylum)) +
geom_col() +
geom_vline(xintercept=0) +
scale_fill_manual(values=tax_color) +
labs(x="log2 Fold Change (tame vs unsel)", y="Species",
title = "Tame vs Unselected")+
theme_bw()8.6 Genus
8.7 Run ANCOM-BC2 (gut location)
Analysis of Composition of Microbes with Bias Correction
set.seed(1234) #set seed for reproducibility
ancom_mag_gut_location = ancombc2(data = physeq_all,
assay_name = "counts",
tax_level = NULL,
fix_formula = "gut_location+ fox_behaviour",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "gut_location",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = TRUE, #to get all pairwise comparisons
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)# Pairwise gut location comparisons
sig_F <- get_sig_ancom(ancom_mag_gut_location$res, "gut_locationF_colon")
sig_F taxon lfc_gut_locationF_colon q_gut_locationF_colon
1 K_bin_25 -2.418794 0.02113882
classification domain
1 d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__ Bacteria
phylum class order family genus
1 Bacillota_I Bacilli_A Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
taxon lfc_gut_locationK_ileum q_gut_locationK_ileum
1 D_bin_40485 -3.953451 0.0000284191
2 D_bin_60660 -2.780142 0.0022525939
3 K_bin_25 -1.833990 0.0347910912
classification
1 d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus acidophilus
2 d__Bacteria;p__Deferribacterota;c__Deferribacteres;o__Deferribacterales;f__Mucispirillaceae;g__Mucispirillum;s__Mucispirillum sp937890655
3 d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__
domain phylum class order family genus
1 Bacteria Bacillota Bacilli Lactobacillales Lactobacillaceae Lactobacillus
2 Bacteria Deferribacterota Deferribacteres Deferribacterales Mucispirillaceae Mucispirillum
3 Bacteria Bacillota_I Bacilli_A Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
taxon lfc_gut_locationM_colon q_gut_locationM_colon
1 K_bin_25 -3.523191 0.0005377379
2 D_bin_40485 -2.071362 0.0129908185
classification
1 d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__
2 d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus acidophilus
domain phylum class order family genus
1 Bacteria Bacillota_I Bacilli_A Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
2 Bacteria Bacillota Bacilli Lactobacillales Lactobacillaceae Lactobacillus
ancom_mag_gut_table_fcolon_dileum <- ancom_mag_gut_location$res %>%
dplyr::select(taxon, lfc_gut_locationF_colon, p_gut_locationF_colon, q_gut_locationF_colon) %>%
filter(q_gut_locationF_colon < 0.05) %>%
arrange(q_gut_locationF_colon) %>%
merge(taxonomy, by="taxon") %>%
rename(genome=taxon) %>%
arrange(lfc_gut_locationF_colon)
ggplot(ancom_mag_gut_table_fcolon_dileum,
aes(x=lfc_gut_locationF_colon,
y=fct_reorder(genome, lfc_gut_locationF_colon),
fill=genus)) +
geom_col() +
geom_vline(xintercept=0) +
scale_fill_manual(values=tax_color) +
labs(x="log2 Fold Change (D_ileum vs F_colon)", y="Species",
title = "D_ileum vs F_colon")+
theme_bw()ancom_mag_gut_table_kileum_dileum <- ancom_mag_gut_location$res %>%
dplyr::select(taxon, lfc_gut_locationK_ileum, p_gut_locationK_ileum, q_gut_locationK_ileum) %>%
filter(q_gut_locationK_ileum < 0.05) %>%
arrange(q_gut_locationK_ileum) %>%
merge(taxonomy, by="taxon") %>%
rename(genome=taxon) %>%
arrange(lfc_gut_locationK_ileum)
ggplot(ancom_mag_gut_table_kileum_dileum,
aes(x=lfc_gut_locationK_ileum,
y=fct_reorder(genome, lfc_gut_locationK_ileum),
fill=genus)) +
geom_col() +
geom_vline(xintercept=0) +
scale_fill_manual(values=tax_color) +
labs(x="log2 Fold Change (D_ileum vs K_ileum)", y="Species",
title = "D_ileum vs K_ileum")+
theme_bw()8.8 Volcano plots
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
pull(colors, name=phylum)
#pdf("figures/different_species_StrucZero_new_violin.pdf",width=12, height=6)
ancom_behaviour_res <- ancom_mag_gut$res %>%
dplyr::select(genome=taxon, lfc_fox_behaviouraggr, p_fox_behaviouraggr) %>%
left_join(genome_metadata, by = "genome") %>%
mutate(phylum = ifelse(p_fox_behaviouraggr < 0.05, phylum, NA))
ggplot(ancom_behaviour_res , aes(x = lfc_fox_behaviouraggr, y = -log(p_fox_behaviouraggr), color = phylum)) +
geom_point(size=3, show.legend = FALSE) +
#xlim(c(-10,4)) +
scale_color_manual(values = phylum_colors) +
labs(color = "Significant phylum", x = "Log-fold difference (aggr vs unsel)", y = "-log(p-value)") +
theme_classic()ancom_mag_gut_location_plot <- ancom_mag_gut_location$res %>%
dplyr::select(genome=taxon, lfc_gut_locationF_colon, q_gut_locationF_colon) %>%
left_join(genome_metadata, by = "genome") %>%
mutate(phylum = ifelse(q_gut_locationF_colon < 0.05, phylum, NA))
ggplot(ancom_mag_gut_location_plot, aes(x = lfc_gut_locationF_colon, y = -log(q_gut_locationF_colon), color = genus)) +
geom_point(size=3) +
#xlim(c(-10,4)) +
labs(color = "Significant genus", x = "Log-fold difference (D_ileum vs F_colon)", y = "-log(p-value)") +
theme_classic()library(forcats)
res_pair <- ancom_mag_gut_location$res_pair
res_long <- res_pair %>%
select(taxon, starts_with("lfc_"), starts_with("p_"), starts_with("q_")) %>%
pivot_longer(
cols = -taxon,
names_to = c("type", "comparison"),
names_pattern = "^(lfc|p|q)_(.*)$"
) %>%
pivot_wider(names_from = type, values_from = value)
#D_ileum was set as reference, so if only one group name (e.g. "gut_locationM_colon")
#it means D_ileum vs M_colon
res_long <- res_long %>%
mutate(
comparison = gsub("^gut_location", "", comparison),
comparison = case_when(
grepl("_gut_location", comparison) ~ gsub("_gut_location", " vs ", comparison),
TRUE ~ paste0(comparison, " vs D_ileum")
)
)
#Filter to significant results (q<0.05)
res_sig <- res_long %>%
filter(q < 0.05)
res_sig <- res_sig %>%
left_join(taxonomy, by = "taxon")
ggplot(res_sig,
aes(x = lfc,
y = fct_reorder(taxon, lfc),
fill = genus)) +
geom_col() +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_fill_manual(values = tax_color) +
labs(
x = "log2 Fold Change",
y = "Taxon",
title = "Significant Pairwise Comparisons (q < 0.05)"
) +
facet_wrap(~comparison, scales = "free_y") +
theme_bw() +
theme(
strip.text = element_text(face = "bold", size = 6),
legend.position = "right"
)res_heat <- res_long %>%
left_join(taxonomy, by = "taxon") %>%
mutate(significant = q < 0.05)
#Keep only taxa that are significant in at least one comparison
sig_taxa <- res_heat %>%
group_by(taxon) %>%
filter(any(significant)) %>%
ungroup()
ggplot(sig_taxa, aes(x = comparison, y = fct_reorder(genus, lfc), fill = lfc)) +
geom_tile(color = "white") +
geom_text(
aes(label = ifelse(significant, "*", "")),
color = "black",
size = 5
) +
scale_fill_viridis_c(
option = "C",
direction = 1,
name = "log2FC"
) +
labs(
x = "Pairwise Comparison",
y = "Taxon",
title = "Differential Abundance Across Gut Locations (ANCOM-BC2)"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
)