Chapter 6 Differential abundance

load("resources/data.Rdata")

6.1 Create phyloseq object

# phyloseq object considering structual zeros
phylo_samples <- sample_metadata %>%
  column_to_rownames("sample") %>%
  sample_data() # convert to phyloseq sample_data object

phylo_genome <- genome_counts_filt %>%
  column_to_rownames("genome") %>%
  mutate_all(~ replace(., . == 0, 0.00001)) %>% # add pseudo counts to avoid structural zero issues (note this approach can be improved!) %>%
  mutate_all(~./sum(.)) %>% #apply TSS nornalisation
  otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata %>%
  filter(genome %in% rownames(phylo_genome)) %>% # remove structural zeros
  mutate(genome2 = genome) %>% # create a pseudo genome name column
  column_to_rownames("genome2") %>%
  dplyr::select(domain, phylum, class, order, family, genus, species, genome) %>% # add an additional taxonomic level to ensure genome-level analysis (as no all genomes have species-level taxonomic assignments. Otherwise, ANCOMBC2 aggregates analyses per species)
  as.matrix() %>%
  tax_table() # convert to phyloseq tax_table object

phyloseq <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)

6.2 Run ANCOMBC2

6.2.1 MAG level

differential_abundance <- ancombc2(
  data = phyloseq,
  assay_name = "counts",
  tax_level = NULL, # change to agglomerate analysis to a higher taxonomic range
  fix_formula = "type", # fixed variable(s)
  rand_formula = "(1|individual)",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

# Save differential abundance to data object
save(sample_metadata, 
     genome_metadata, 
     read_counts, 
     genome_counts, 
     genome_counts_filt, 
     genome_tree,
     genome_gifts, 
     phylum_colors,
     data_stats,
     differential_abundance,
     file = "resources/data.Rdata")
tax <- data.frame(phyloseq@tax_table) %>%
  rownames_to_column(., "taxon")

differential_abundance_table <- differential_abundance$res %>%
  dplyr::select(genome=taxon, lfc_typefeces, p_typefeces) %>%
  filter(p_typefeces < 0.05) %>%
  dplyr::arrange(p_typefeces) %>%
  merge(., tax, by = "genome")

differential_abundance_table %>%
  select(genome,order,phylum, lfc_typefeces, p_typefeces) %>%
  tt()
tinytable_hj59du549s5cjieffdic
genome order phylum lfc_typefeces p_typefeces
all:bin_000024 o__Selenomonadales p__Bacillota_C 4.678225 3.556000e-02
all:bin_000076 o__Bacteroidales p__Bacteroidota 5.375968 1.743752e-02
feces:bin_000019 o__Lachnospirales p__Bacillota_A 4.960703 1.407321e-02
feces:bin_000049 o__Bacteroidales p__Bacteroidota 5.240488 2.419249e-02
feces:bin_000066 o__Bacteroidales p__Bacteroidota 5.427702 1.063533e-02
feces:bin_000107 o__Bacteroidales p__Bacteroidota 5.034951 3.680645e-02
feces:bin_000114 o__Bacteroidales p__Bacteroidota 7.759443 4.169144e-05
Sg1:bin_000001 o__Enterobacterales p__Pseudomonadota -6.086702 3.377969e-02
Sg1:bin_000002 o__Bacteroidales p__Bacteroidota -4.855074 3.106069e-02
Sg1:bin_000003 o__Bacteroidales p__Bacteroidota 5.850958 6.260054e-03
Sg1:bin_000010 o__Verrucomicrobiales p__Verrucomicrobiota -4.870493 2.994023e-02
Sg1:bin_000015 o__Gastranaerophilales p__Cyanobacteriota -4.920420 2.656571e-02
Sg1:bin_000021 o__Bacteroidales p__Bacteroidota -4.922566 2.642784e-02
Sg10:bin_000001 o__Enterobacterales p__Pseudomonadota -7.012111 4.578780e-02
Sg10:bin_000009 o__Oscillospirales p__Bacillota_A -4.937513 3.899201e-02
Sg2:bin_000006 o__Bacteroidales p__Bacteroidota 5.779402 2.162388e-02
Sg3:bin_000001 o__Campylobacterales p__Campylobacterota -6.911601 2.834473e-02
Sg3:bin_000008 o__Verrucomicrobiales p__Verrucomicrobiota -4.897028 2.924217e-02
Sg4:bin_000004 o__Desulfovibrionales p__Desulfobacterota 6.891804 2.331371e-04
Sg6:bin_000002 o__Lachnospirales p__Bacillota_A 5.011067 3.919461e-02
Sg6:bin_000003 o__Lachnospirales p__Bacillota_A 5.537628 9.267341e-03
Sg9:bin_000004 o__Bacteroidales p__Bacteroidota 7.134700 3.313875e-03
Sg9:bin_000007 o__Bacteroidales p__Bacteroidota 6.976235 4.242228e-03
ggplot(differential_abundance_table, aes(x = forcats::fct_rev(genome), y = lfc_typefeces, color = phylum)) +
  geom_point(size = 3) +
  scale_color_manual(values = phylum_colors[-c(3,4)]) +
  geom_hline(yintercept = 0) +
  coord_flip() +
  facet_grid(phylum ~ ., scales = "free", space = "free") +
  theme(
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    axis.title.x = element_blank(),
    strip.background = element_blank(), 
    strip.text = element_blank()
  ) +
  xlab("Genome") +
  ylab("log2FoldChange") +
  guides(col = guide_legend("Phylum"))

differential_abundance$res %>%
  na.omit() %>%
  dplyr::select(genome=taxon, lfc_typefeces, p_typefeces) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  mutate(phylum = ifelse(p_typefeces < 0.05, phylum, NA)) %>%
  ggplot(., aes(x = lfc_typefeces, y = -log(p_typefeces), color = phylum)) +
  geom_point() +
  #xlim(c(-10,4)) +
  scale_color_manual(values = phylum_colors[-c(3,4)]) +
  labs(color = "Significance", x = "Log-fold difference between sample types", y = "p-value") +
  theme_classic()

6.2.2 Genus level

differential_abundance_genus <- ancombc2(
  data = phyloseq,
  assay_name = "counts",
  tax_level = "genus", # change to agglomerate analysis to a higher taxonomic range
  fix_formula = "type", # fixed variable(s)
  rand_formula = "(1|individual)",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

# Save differential abundance to data object
save(sample_metadata, 
     genome_metadata, 
     read_counts, 
     genome_counts, 
     genome_counts_filt, 
     genome_tree,
     genome_gifts, 
     phylum_colors,
     data_stats,
     differential_abundance,
     differential_abundance_genus,
     file = "resources/data.Rdata")
tax <- data.frame(phyloseq@tax_table) %>%
  rownames_to_column(., "taxon")

differential_abundance_genus$res %>%
  filter(nchar(taxon) > 5) %>%
  dplyr::select(taxon=taxon, lfc_typefeces, p_typefeces) %>%
  filter(p_typefeces < 0.05) %>%
  dplyr::arrange(lfc_typefeces) %>%
  left_join(tax %>% select(-c(taxon,genome,species)) %>% unique(), by = join_by(taxon==genus)) %>%
  tt()
tinytable_8r23yhp5xqe4k8m9bbmf
taxon lfc_typefeces p_typefeces domain phylum class order family
g__Hafnia -7.012111 3.268748e-02 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales f__Enterobacteriaceae
g__Salmonella -6.086702 1.837262e-02 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Enterobacterales f__Enterobacteriaceae
g__Onthovicinus -4.937513 1.551107e-02 d__Bacteria p__Bacillota_A c__Clostridia o__Oscillospirales f__Acutalibacteraceae
g__Limenecus -4.920420 7.128674e-03 d__Bacteria p__Cyanobacteriota c__Vampirovibrionia o__Gastranaerophilales f__Gastranaerophilaceae
g__Anaerotruncus -4.811549 3.874874e-02 d__Bacteria p__Bacillota_A c__Clostridia o__Oscillospirales f__Ruminococcaceae
g__Lawsonibacter 3.482235 4.253668e-02 d__Bacteria p__Bacillota_A c__Clostridia o__Oscillospirales f__Oscillospiraceae
g__Hungatella 4.344792 2.289275e-02 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae
g__Akkermansia 5.197450 1.585097e-06 d__Bacteria p__Verrucomicrobiota c__Verrucomicrobiae o__Verrucomicrobiales f__Akkermansiaceae
g__Clostridium_Q 5.261332 1.892609e-03 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae
g__Odoribacter 5.646793 1.364548e-06 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Marinifilaceae
g__Acetatifactor 5.709236 3.545806e-06 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae
g__Enterocloster 5.802455 1.348610e-04 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae
g__OM05-12 5.883081 4.942230e-07 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Bacteroidaceae
g__Eisenbergiella 6.066911 6.761798e-07 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae
g__Alistipes 6.156010 2.493252e-05 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Rikenellaceae
g__Bilophila 6.350192 1.319505e-06 d__Bacteria p__Desulfobacterota c__Desulfovibrionia o__Desulfovibrionales f__Desulfovibrionaceae
g__Phocaeicola 6.384248 8.842193e-04 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Bacteroidaceae
g__Parabacteroides 6.393685 3.758112e-07 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Tannerellaceae
g__Bacteroides 6.522811 5.553197e-04 d__Bacteria p__Bacteroidota c__Bacteroidia o__Bacteroidales f__Bacteroidaceae
g__Ventrimonas 6.824415 3.245688e-07 d__Bacteria p__Bacillota_A c__Clostridia o__Lachnospirales f__Lachnospiraceae

6.2.3 Phylum level

differential_abundance_phylum <- ancombc2(
  data = phyloseq,
  assay_name = "counts",
  tax_level = "phylum", # change to agglomerate analysis to a higher taxonomic range
  fix_formula = "type", # fixed variable(s)
  rand_formula = "(1|individual)",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

# Save differential abundance to data object
save(sample_metadata, 
     genome_metadata, 
     read_counts, 
     genome_counts, 
     genome_counts_filt, 
     genome_tree,
     genome_gifts, 
     phylum_colors,
     data_stats,
     differential_abundance,
     differential_abundance_genus,
     differential_abundance_phylum,
     file = "resources/data.Rdata")
differential_abundance_phylum_table <- differential_abundance_phylum$res %>%
  dplyr::select(taxon=taxon, lfc_typefeces, p_typefeces) %>%
  #filter(p_typefeces < 0.05) %>%
  dplyr::arrange(lfc_typefeces)


phylum_colors2 <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    left_join(differential_abundance_phylum_table, by=join_by(phylum == taxon)) %>%
    arrange(match(phylum, differential_abundance_phylum_table$taxon)) %>%
    select(phylum, colors) %>% 
    unique() %>%
    select(colors) %>%
    pull()

differential_abundance_phylum_table %>%
  filter(p_typefeces < 0.05) %>%
  dplyr::arrange(lfc_typefeces) %>%
  tt()
tinytable_ltw0xbe08noo7mk0ellf
taxon lfc_typefeces p_typefeces
p__Campylobacterota -6.911601 1.700822e-02
p__Pseudomonadota -6.284413 2.017873e-02
p__Cyanobacteriota -4.920420 7.244201e-03
p__Bacillota_C 4.678225 1.119498e-02
p__Verrucomicrobiota 5.197450 1.827898e-06
p__Bacillota_A 5.388425 1.184674e-03
p__Bacteroidota 5.765525 7.008059e-04
p__Desulfobacterota 6.168047 1.160127e-06
differential_abundance_phylum_table %>%
      mutate(taxon=factor(taxon,levels=differential_abundance_phylum_table$taxon)) %>%
      ggplot(aes(x = lfc_typefeces, y = forcats::fct_rev(taxon), fill = taxon)) +
        geom_col(size = 2) +
        scale_fill_manual(values = phylum_colors2) +
        geom_hline(yintercept = 0) +
        geom_vline(xintercept = 4, linetype="dashed", color = "grey", size=1) +
        geom_vline(xintercept = -4, linetype="dashed", color = "grey", size=1) +
        theme(
          panel.background = element_blank(),
          axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
          axis.title.x = element_blank(),
          strip.background = element_blank(), 
          strip.text = element_blank()
        ) +
        labs(x="log2FoldChange", y="Phylum")