Chapter 6 Differential abundance

load("resources/amplicon/data_standard_filt.Rdata")
load("resources/metagenomics/data_filtered.Rdata")

6.1 Phylum level

6.1.1 Metagenomics

phylo_samples <- sample_metadata %>% 
  column_to_rownames("sample") %>%
  sample_data()

phylo_genome <- genome_counts_filt %>% 
  select(one_of(c("genome", rownames(phylo_samples)))) %>%
  column_to_rownames("genome") %>%
  otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_phylum_metagen <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)
load("resources/ancom_rand_output_phylum_metagen.Rdata")
ancom_rand_output_phylum_metagen = ancombc2(
  data = physeq_genome_phylum_metagen,
  assay_name = "counts",
  tax_level = "phylum",
  fix_formula = "Species",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = "Species",
  struc_zero = TRUE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = TRUE,
  pairwise = TRUE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

save(ancom_rand_output_phylum_metagen, 
     file = "resources/ancom_rand_output_phylum_metagen.Rdata")

6.1.1.1 Structural zeros

Total number of structural zeros

# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metagen$zero_ind

# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("Eb"=1) %>% 
  rename("Ha"=2) %>% 
  rename("Pk"=3) 

# Total number of structural zeros
nrow(structural_zero_groups)
[1] 10

Structural zeros in Cb

structural_zero_groups %>% 
  filter(Eb==TRUE) %>% 
  nrow()
[1] 3
structural_zero_groups %>% 
  filter(Eb==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Spirochaetota"   "Actinomycetota"  "Cyanobacteriota"

Structural zeros in Ha

structural_zero_groups %>% 
  filter(Ha==TRUE) %>% 
  rownames_to_column(., "phylum")%>% 
  select(phylum) %>% 
  pull()
[1] "Desulfobacterota" "Campylobacterota" "Elusimicrobiota"  "Fusobacteriota"   "Deferribacterota" "Planctomycetota"  "Cyanobacteriota"  "Synergistota"    

Structural zeros in Pk

structural_zero_groups %>% 
  filter(Pk==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Elusimicrobiota"  "Deferribacterota" "Planctomycetota"  "Spirochaetota"    "Actinomycetota"  

6.1.1.2 Enriched phyla

res_pair_phylum <- ancom_rand_output_phylum_metagen$res_pair

lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols   <- gsub("^lfc_", "q_", lfc_cols)

plot_data_phylum <- lfc_cols %>%
  setNames(nm = .) %>%
  purrr::imap_dfr(function(lfc_col, name) {
    q_col <- gsub("^lfc_", "q_", lfc_col)
    
    res_pair_phylum %>%
      select(taxon, !!lfc_col, !!q_col) %>%
      rename(
        lfc = !!lfc_col,
        q   = !!q_col
      ) %>%
      mutate(contrast_key = name)
  }) %>%
  filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
  mutate(
    contrast = case_when(
      contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
      contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
      contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
      TRUE ~ contrast_key
    ),
    direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
                             paste("Enriched in", sub(".*vs ", "", contrast)))
  )
plot_data_phylum %>% select(-contrast_key) %>% tt()
taxon lfc q contrast direction
Bacteroidota -2.469488 0.01595184 H. ariel vs C. bottae Enriched in C. bottae
Bacteroidota 2.599867 0.01595184 P. kuhlii vs H. ariel Enriched in P. kuhlii

6.1.2 Metabarcoding standard

load("resources/amplicon/data_standard_filt.Rdata")

phylo_samples <- sample_metadata_amplicon %>% 
  column_to_rownames("sample") %>%
  sample_data()

phylo_genome <- genome_counts_filt %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli"))%>%
  rownames_to_column(., "genome") %>% 
  select(one_of(c("genome", rownames(phylo_samples)))) %>%
  column_to_rownames("genome") %>%
  otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata_ampli %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_metab <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)

load("resources/ancom_rand_output_phylum_metab.Rdata")
ancom_rand_output_phylum_metab = ancombc2(
  data = physeq_genome_metab,
  assay_name = "counts",
  tax_level = "phylum",
  fix_formula = "Species",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = "Species",
  struc_zero = TRUE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = TRUE,
  pairwise = TRUE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

save(ancom_rand_output_phylum_metab, 
     file = "resources/ancom_rand_output_phylum_metab.Rdata")

6.1.2.1 Structural zeros

Total number of structural zeros

# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab$zero_ind

# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("Eb"=1) %>% 
  rename("Ha"=2) %>% 
  rename("Pk"=3) 

# Total number of structural zeros
nrow(structural_zero_groups)
[1] 12

6.1.2.2 Enriched phyla

res_pair_phylum <- ancom_rand_output_phylum_metab$res_pair
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols   <- gsub("^lfc_", "q_", lfc_cols)

plot_data_phylum <- lfc_cols %>%
  setNames(nm = .) %>%
  purrr::imap_dfr(function(lfc_col, name) {
    q_col <- gsub("^lfc_", "q_", lfc_col)
    
    res_pair_phylum %>%
      select(taxon, !!lfc_col, !!q_col) %>%
      rename(
        lfc = !!lfc_col,
        q   = !!q_col
      ) %>%
      mutate(contrast_key = name)
  }) %>%
  filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
  mutate(
    contrast = case_when(
      contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
      contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
      contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
      TRUE ~ contrast_key
    ),
    direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
                             paste("Enriched in", sub(".*vs ", "", contrast)))
  )
plot_data_phylum %>% select(-contrast_key) %>% 
  tt()
taxon lfc q contrast direction
Cyanobacteriota -1.606842 0.04418075 H. ariel vs C. bottae Enriched in C. bottae
Synergistota -2.144107 0.01274277 H. ariel vs C. bottae Enriched in C. bottae
Actinomycetota 1.955042 0.02923364 H. ariel vs C. bottae Enriched in H. ariel
Planctomycetota -2.071759 0.04200179 H. ariel vs C. bottae Enriched in C. bottae
Patescibacteria -2.113164 0.03296700 H. ariel vs C. bottae Enriched in C. bottae
Patescibacteria -2.040924 0.03348554 P. kuhlii vs C. bottae Enriched in C. bottae
Planctomycetota 2.068273 0.03746770 P. kuhlii vs H. ariel Enriched in P. kuhlii

6.1.3 Comparison between metagenomics and standard metabarcoding

load("resources/amplicon/data_standard_filt.Rdata")

sample_metadata_amplicon <- sample_metadata[c(1,9,10,13)] %>%
  mutate(sample = paste0(sample, "_ampli")) %>%
  mutate(method="Metabarcoding")

genome_metadata_ampli <- genome_metadata %>%
  mutate(
    class  = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
    order  = if_else(is.na(order),
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(is.na(family),
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(is.na(genus),
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax_ampli <- genome_counts_filt %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli")) %>%
  rownames_to_column(., "genome")%>%
  left_join(genome_metadata_ampli, by = "genome")

family_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame() %>% 
  select(-c(phylum,class))

phylum_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame()
load("resources/metagenomics/data_filtered.Rdata")

genome_metadata <- genome_metadata %>%
  mutate(
    class  = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
    order  = if_else(str_trim(order) == "",
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(str_trim(family) == "",
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(str_trim(genus) == "",
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax <- genome_counts_filt %>% 
  left_join(genome_metadata[c(1,3,4,6)], by = "genome")

# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() %>% 
  select(-c(phylum,class))

# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() 
merged_table <- full_join(family_level_agg_ampli,family_level_agg, 
          by = c("family"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg, 
          by = c("phylum"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

sample_shot <- sample_metadata %>% 
  select(-c(Habitat,Longitude, Latitude, Region))%>%
  mutate(method="Metagenomics")

all_samples <- rbind(sample_shot, sample_metadata_amplicon)
phylo_samples <- all_samples %>%
  column_to_rownames("sample") %>%
  sample_data()
phylo_genome <- merged_table_phylum %>%
  select(one_of(c("phylum", rownames(phylo_samples)))) %>%
  column_to_rownames("phylum") %>%
  otu_table(., taxa_are_rows = TRUE)

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_samples)
load("resources/ancom_rand_output_phylum_appro.Rdata")
ancom_rand_output_phylum_appro = ancombc2(
  data = physeq_genome_filtered,
  assay_name = "counts",
  tax_level = NULL,
  fix_formula = "method",
  rand_formula = "(1|Species)",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = "method",
  struc_zero = TRUE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = TRUE,
  pairwise = TRUE,
  dunnet = TRUE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  lme_control = lme4::lmerControl(),
  trend_control = NULL
)

save(ancom_rand_output_phylum_appro, 
     file = "resources/ancom_rand_output_phylum_appro.Rdata")

6.1.3.1 Enriched phyla

ancom_rand_output_phylum_appro$res %>% filter(q_methodMetagenomics<0.05)
           taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept)
1 Pseudomonadota        2.279875               -1.95287      0.4412564              0.601319      5.166781            -3.247644   0.001679214
  p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
1          0.001601129    0.02080063           0.02081467             TRUE                    TRUE                  TRUE                         TRUE