Chapter 8 Differential abundance analysis

8.1 Load the data

load("data/data.Rdata")

genome_counts_filt <- genome_counts_filt %>%
  select(one_of(c("genome",sample_metadata$sample))) %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0))

sample_metadata <- sample_metadata %>%
  filter(sample %in% colnames(genome_counts_filt))

8.2 Create Phyloseq object

# Reorder levels: set "unsel" as reference
sample_metadata$fox_behaviour <- factor(
  sample_metadata$fox_behaviour,
  levels = c("unsel", "aggr", "tame")  #here we set unsel as reference level
)

#Phyloseq object
count_phy <- genome_counts_filt %>%
  column_to_rownames(var="genome")%>%
  otu_table(., taxa_are_rows=T)

sample_info_tab_phy <- sample_metadata%>%
  column_to_rownames(var="sample")%>%
  sample_data()

TAX <- genome_metadata%>%
  column_to_rownames(var="genome")%>%
  select(1:7)%>%
  as.matrix()%>%
  tax_table()
tree <- phy_tree(genome_tree)

physeq_all = phyloseq(count_phy, TAX, sample_info_tab_phy, tree)

8.3 Run ANCOM-BC2 (fox behaviour)

Analysis of Composition of Microbes with Bias Correction

set.seed(1234) #set seed for reproducibility
ancom_mag_gut = ancombc2(data = physeq_all, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "fox_behaviour + gut_location",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "fox_behaviour", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = TRUE,  #to get all pairwise comparisons
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

8.4 Chcking the results

# Function to get significant ANCOM-BC2 results
get_sig_ancom <- function(ancom_res, comp_prefix, alpha = 0.05, physeq_obj = physeq_all) {
  
  # Identify relevant columns
  lfc_col <- paste0("lfc_", comp_prefix)
  q_col   <- paste0("q_", comp_prefix)
  
  # Check if columns exist
  if(!all(c(lfc_col, q_col) %in% colnames(ancom_res))) {
    stop("Specified comparison columns not found in ANCOM-BC2 result.")
  }
  
  # Filter significant taxa
  sig_res <- ancom_res %>%
    dplyr::select(taxon, all_of(lfc_col), all_of(q_col)) %>%
    filter(.data[[q_col]] < alpha) %>%
    arrange(.data[[q_col]])
  
  # Merge taxonomy
  taxonomy <- data.frame(physeq_obj@tax_table) %>%
    rownames_to_column("taxon") %>%
    mutate_at(vars(order, phylum, family, genus), ~ str_replace(., "[dpcofgs]__", ""))
  
  sig_res <- sig_res %>%
    left_join(taxonomy, by = "taxon")
  
  # Return result (can be empty if none significant)
  return(sig_res)
}

# Aggressive vs Unselected
sig_aggr_vs_unsel <- get_sig_ancom(ancom_mag_gut$res, "fox_behaviouraggr")

# Tame vs Unselected
sig_tame_vs_unsel <- get_sig_ancom(ancom_mag_gut$res, "fox_behaviourtame")

There are no significant results for the differential abundance analysis between the fox behaviours.

8.5 Phylum comparisons

8.5.1 Aggressive vs Unselected

taxonomy <- data.frame(physeq_all@tax_table) %>%
  rownames_to_column(., "taxon")%>%
  mutate_at(vars(order, phylum, family, genus), ~ str_replace(., "[dpcofgs]__", ""))

ancom_mag_gut_table <- ancom_mag_gut$res %>%
  dplyr::select(taxon, lfc_fox_behaviouraggr, q_fox_behaviouraggr) %>%
  filter( q_fox_behaviouraggr < 0.05) %>%
  dplyr::arrange( q_fox_behaviouraggr) %>%
  merge(., taxonomy, by="taxon") %>%
  rename(genome=taxon) %>% 
  mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_fox_behaviouraggr)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(taxonomy$phylum))  # use all phyla from full taxonomy
colnames(tax_table)[1] <- "phylum"

tax_color <- merge(tax_table, colors_alphabetic, by="phylum") %>%
  arrange(phylum) %>%
  dplyr::select(colors) %>%
  pull()
ancom_mag_gut_table %>%
      mutate(genome=factor(genome,levels=ancom_mag_gut_table$genome)) %>%
ggplot(aes(x=lfc_fox_behaviouraggr, y=forcats::fct_reorder(genome,lfc_fox_behaviouraggr), fill=phylum)) + #forcats::fct_rev()
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_vline(xintercept=0) + 
#  coord_flip()+
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  labs(title = "Aggressive vs Unselected")+
  xlab("log2FoldChange (aggr vs unsel)") + 
  ylab("Species")+
  guides(fill=guide_legend(title="Phylum"))

8.5.2 Tame vs Unselected

ancom_mag_gut_table_tame_unsel <- ancom_mag_gut$res %>%
  dplyr::select(taxon, lfc_fox_behaviourtame, q_fox_behaviourtame) %>%
  filter(q_fox_behaviourtame < 0.05) %>%
  arrange(q_fox_behaviourtame) %>%
  merge(taxonomy, by="taxon") %>%
  rename(genome=taxon) %>%
  arrange(lfc_fox_behaviourtame)

ggplot(ancom_mag_gut_table_tame_unsel,
       aes(x=lfc_fox_behaviourtame,
           y=fct_reorder(genome, lfc_fox_behaviourtame),
           fill=phylum)) +
  geom_col() +
  geom_vline(xintercept=0) +
  scale_fill_manual(values=tax_color) +
  labs(x="log2 Fold Change (tame vs unsel)", y="Species",
       title = "Tame vs Unselected")+
  theme_bw()

ancom_mag_results <- ancom_mag_gut$res 

8.6 Genus

8.6.1 Aggressive vs Unselected

ggplot(ancom_mag_gut_table,
       aes(x=lfc_fox_behaviouraggr,
           y=fct_reorder(genome, lfc_fox_behaviouraggr),
           fill=genus)) +
  geom_col() +
  geom_vline(xintercept=0) +
  labs(x="log2 Fold Change (aggr vs unsel)", y="Species",
       title = "Aggressive vs Unselected")+
  theme_bw()

8.6.2 Tame vs Unselected

ggplot(ancom_mag_gut_table_tame_unsel,
       aes(x=lfc_fox_behaviourtame,
           y=fct_reorder(genome, lfc_fox_behaviourtame),
           fill=genus)) +
  geom_col() +
  geom_vline(xintercept=0) +
  labs(x="log2 Fold Change (tame vs unsel)", y="Species",
       title = "Tame vs Unselected")+
  theme_bw()

8.7 Run ANCOM-BC2 (gut location)

Analysis of Composition of Microbes with Bias Correction

set.seed(1234) #set seed for reproducibility
ancom_mag_gut_location = ancombc2(data = physeq_all, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "gut_location+ fox_behaviour",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "gut_location", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = TRUE,  #to get all pairwise comparisons
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)
# Pairwise gut location comparisons
sig_F <- get_sig_ancom(ancom_mag_gut_location$res, "gut_locationF_colon")
sig_F
     taxon lfc_gut_locationF_colon q_gut_locationF_colon
1 K_bin_25               -2.418794            0.02113882
                                                                                         classification   domain
1 d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__ Bacteria
       phylum     class           order            family          genus
1 Bacillota_I Bacilli_A Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
sig_K <- get_sig_ancom(ancom_mag_gut_location$res, "gut_locationK_ileum")
sig_K 
        taxon lfc_gut_locationK_ileum q_gut_locationK_ileum
1 D_bin_40485               -3.953451          0.0000284191
2 D_bin_60660               -2.780142          0.0022525939
3    K_bin_25               -1.833990          0.0347910912
                                                                                                                             classification
1                  d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus acidophilus
2 d__Bacteria;p__Deferribacterota;c__Deferribacteres;o__Deferribacterales;f__Mucispirillaceae;g__Mucispirillum;s__Mucispirillum sp937890655
3                                     d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__
    domain           phylum           class             order            family          genus
1 Bacteria        Bacillota         Bacilli   Lactobacillales  Lactobacillaceae  Lactobacillus
2 Bacteria Deferribacterota Deferribacteres Deferribacterales  Mucispirillaceae  Mucispirillum
3 Bacteria      Bacillota_I       Bacilli_A   Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
sig_M <- get_sig_ancom(ancom_mag_gut_location$res, "gut_locationM_colon")
sig_M
        taxon lfc_gut_locationM_colon q_gut_locationM_colon
1    K_bin_25               -3.523191          0.0005377379
2 D_bin_40485               -2.071362          0.0129908185
                                                                                                            classification
1                    d__Bacteria;p__Bacillota_I;c__Bacilli_A;o__Mycoplasmatales;f__Mycoplasmoidaceae;g__Mycoplasmoides;s__
2 d__Bacteria;p__Bacillota;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus acidophilus
    domain      phylum     class           order            family          genus
1 Bacteria Bacillota_I Bacilli_A Mycoplasmatales Mycoplasmoidaceae Mycoplasmoides
2 Bacteria   Bacillota   Bacilli Lactobacillales  Lactobacillaceae  Lactobacillus
ancom_mag_gut_table_fcolon_dileum <- ancom_mag_gut_location$res %>%
  dplyr::select(taxon, lfc_gut_locationF_colon, p_gut_locationF_colon, q_gut_locationF_colon) %>%
  filter(q_gut_locationF_colon < 0.05) %>%
  arrange(q_gut_locationF_colon) %>%
  merge(taxonomy, by="taxon") %>%
  rename(genome=taxon) %>%
  arrange(lfc_gut_locationF_colon)

ggplot(ancom_mag_gut_table_fcolon_dileum,
       aes(x=lfc_gut_locationF_colon,
           y=fct_reorder(genome, lfc_gut_locationF_colon),
           fill=genus)) +
  geom_col() +
  geom_vline(xintercept=0) +
  scale_fill_manual(values=tax_color) +
  labs(x="log2 Fold Change (D_ileum vs F_colon)", y="Species",
       title = "D_ileum vs F_colon")+
  theme_bw()

ancom_mag_gut_table_kileum_dileum <- ancom_mag_gut_location$res %>%
  dplyr::select(taxon, lfc_gut_locationK_ileum, p_gut_locationK_ileum, q_gut_locationK_ileum) %>%
  filter(q_gut_locationK_ileum < 0.05) %>%
  arrange(q_gut_locationK_ileum) %>%
  merge(taxonomy, by="taxon") %>%
  rename(genome=taxon) %>%
  arrange(lfc_gut_locationK_ileum)

ggplot(ancom_mag_gut_table_kileum_dileum,
       aes(x=lfc_gut_locationK_ileum,
           y=fct_reorder(genome, lfc_gut_locationK_ileum),
           fill=genus)) +
  geom_col() +
  geom_vline(xintercept=0) +
  scale_fill_manual(values=tax_color) +
  labs(x="log2 Fold Change (D_ileum vs K_ileum)", y="Species",
       title = "D_ileum vs K_ileum")+
  theme_bw()

8.8 Volcano plots

phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    pull(colors, name=phylum)

#pdf("figures/different_species_StrucZero_new_violin.pdf",width=12, height=6)
ancom_behaviour_res <- ancom_mag_gut$res %>%
  dplyr::select(genome=taxon, lfc_fox_behaviouraggr, p_fox_behaviouraggr) %>%
  left_join(genome_metadata, by = "genome") %>%
  mutate(phylum = ifelse(p_fox_behaviouraggr < 0.05, phylum, NA))


ggplot(ancom_behaviour_res , aes(x = lfc_fox_behaviouraggr, y = -log(p_fox_behaviouraggr), color = phylum)) +
  geom_point(size=3, show.legend = FALSE) +
  #xlim(c(-10,4)) +
  scale_color_manual(values = phylum_colors) +
  labs(color = "Significant phylum", x = "Log-fold difference (aggr vs unsel)", y = "-log(p-value)") +
  theme_classic()

#dev.off()
ancom_mag_gut_location_plot <- ancom_mag_gut_location$res %>%
  dplyr::select(genome=taxon, lfc_gut_locationF_colon, q_gut_locationF_colon) %>%
  left_join(genome_metadata, by = "genome") %>%
  mutate(phylum = ifelse(q_gut_locationF_colon < 0.05, phylum, NA)) 

ggplot(ancom_mag_gut_location_plot, aes(x = lfc_gut_locationF_colon, y = -log(q_gut_locationF_colon), color = genus)) +
  geom_point(size=3) +
  #xlim(c(-10,4)) +
  labs(color = "Significant genus", x = "Log-fold difference (D_ileum vs F_colon)", y = "-log(p-value)") +
  theme_classic()

library(forcats)

res_pair <- ancom_mag_gut_location$res_pair

res_long <- res_pair %>%
  select(taxon, starts_with("lfc_"), starts_with("p_"), starts_with("q_")) %>%
  pivot_longer(
    cols = -taxon,
    names_to = c("type", "comparison"),
    names_pattern = "^(lfc|p|q)_(.*)$"
  ) %>%
  pivot_wider(names_from = type, values_from = value)

#D_ileum was set as reference, so if only one group name (e.g. "gut_locationM_colon")
#it means D_ileum vs M_colon
res_long <- res_long %>%
  mutate(
    comparison = gsub("^gut_location", "", comparison),
    comparison = case_when(
      grepl("_gut_location", comparison) ~ gsub("_gut_location", " vs ", comparison),
      TRUE ~ paste0(comparison, " vs D_ileum")
    )
  )

#Filter to significant results (q<0.05)
res_sig <- res_long %>%
  filter(q < 0.05)

res_sig <- res_sig %>%
  left_join(taxonomy, by = "taxon")

ggplot(res_sig,
       aes(x = lfc,
           y = fct_reorder(taxon, lfc),
           fill = genus)) +
  geom_col() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_fill_manual(values = tax_color) +
  labs(
    x = "log2 Fold Change",
    y = "Taxon",
    title = "Significant Pairwise Comparisons (q < 0.05)"
  ) +
  facet_wrap(~comparison, scales = "free_y") +
  theme_bw() +
  theme(
    strip.text = element_text(face = "bold", size = 6),
    legend.position = "right"
  )

res_heat <- res_long %>%
  left_join(taxonomy, by = "taxon") %>%
  mutate(significant = q < 0.05)

#Keep only taxa that are significant in at least one comparison
sig_taxa <- res_heat %>%
  group_by(taxon) %>%
  filter(any(significant)) %>%
  ungroup()


ggplot(sig_taxa, aes(x = comparison, y = fct_reorder(genus, lfc), fill = lfc)) +
  geom_tile(color = "white") +
  geom_text(
    aes(label = ifelse(significant, "*", "")),
    color = "black",
    size = 5
  ) +
  scale_fill_viridis_c(
    option = "C",      
    direction = 1,      
    name = "log2FC"
  ) +
  labs(
    x = "Pairwise Comparison",
    y = "Taxon",
    title = "Differential Abundance Across Gut Locations (ANCOM-BC2)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank()
  )