Chapter 6 Differential abundance
load("resources/amplicon/data_standard_filt.Rdata")
load("resources/metagenomics/data_filtered.Rdata")6.1 Phylum level
6.1.1 Metagenomics
phylo_samples <- sample_metadata %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt %>%
select(one_of(c("genome", rownames(phylo_samples)))) %>%
column_to_rownames("genome") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
filter(genome %in% rownames(phylo_genome)) %>%
column_to_rownames("genome") %>%
dplyr::select(domain,phylum,class,order,family,genus,species) %>%
as.matrix() %>%
tax_table()
physeq_genome_phylum_metagen <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)
load("resources/ancom_rand_output_phylum_metagen.Rdata")ancom_rand_output_phylum_metagen = ancombc2(
data = physeq_genome_phylum_metagen,
assay_name = "counts",
tax_level = "phylum",
fix_formula = "Species",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = "Species",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)
save(ancom_rand_output_phylum_metagen,
file = "resources/ancom_rand_output_phylum_metagen.Rdata")6.1.1.1 Structural zeros
Total number of structural zeros
# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metagen$zero_ind
# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("Eb"=1) %>%
rename("Ha"=2) %>%
rename("Pk"=3)
# Total number of structural zeros
nrow(structural_zero_groups)[1] 10
Structural zeros in Cb
[1] 3
structural_zero_groups %>%
filter(Eb==TRUE) %>%
rownames_to_column(., "phylum") %>%
select(phylum) %>%
pull()[1] "Spirochaetota" "Actinomycetota" "Cyanobacteriota"
Structural zeros in Ha
structural_zero_groups %>%
filter(Ha==TRUE) %>%
rownames_to_column(., "phylum")%>%
select(phylum) %>%
pull()[1] "Desulfobacterota" "Campylobacterota" "Elusimicrobiota" "Fusobacteriota" "Deferribacterota" "Planctomycetota" "Cyanobacteriota" "Synergistota"
Structural zeros in Pk
structural_zero_groups %>%
filter(Pk==TRUE) %>%
rownames_to_column(., "phylum") %>%
select(phylum) %>%
pull()[1] "Elusimicrobiota" "Deferribacterota" "Planctomycetota" "Spirochaetota" "Actinomycetota"
6.1.1.2 Enriched phyla
res_pair_phylum <- ancom_rand_output_phylum_metagen$res_pair
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols <- gsub("^lfc_", "q_", lfc_cols)
plot_data_phylum <- lfc_cols %>%
setNames(nm = .) %>%
purrr::imap_dfr(function(lfc_col, name) {
q_col <- gsub("^lfc_", "q_", lfc_col)
res_pair_phylum %>%
select(taxon, !!lfc_col, !!q_col) %>%
rename(
lfc = !!lfc_col,
q = !!q_col
) %>%
mutate(contrast_key = name)
}) %>%
filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
mutate(
contrast = case_when(
contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
TRUE ~ contrast_key
),
direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
paste("Enriched in", sub(".*vs ", "", contrast)))
)
plot_data_phylum %>% select(-contrast_key) %>% tt()| taxon | lfc | q | contrast | direction |
|---|---|---|---|---|
| Bacteroidota | -2.469488 | 0.01595184 | H. ariel vs C. bottae | Enriched in C. bottae |
| Bacteroidota | 2.599867 | 0.01595184 | P. kuhlii vs H. ariel | Enriched in P. kuhlii |
6.1.2 Metabarcoding standard
load("resources/amplicon/data_standard_filt.Rdata")
phylo_samples <- sample_metadata_amplicon %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli"))%>%
rownames_to_column(., "genome") %>%
select(one_of(c("genome", rownames(phylo_samples)))) %>%
column_to_rownames("genome") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata_ampli %>%
filter(genome %in% rownames(phylo_genome)) %>%
column_to_rownames("genome") %>%
dplyr::select(domain,phylum,class,order,family,genus,species) %>%
as.matrix() %>%
tax_table()
physeq_genome_metab <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)
load("resources/ancom_rand_output_phylum_metab.Rdata")ancom_rand_output_phylum_metab = ancombc2(
data = physeq_genome_metab,
assay_name = "counts",
tax_level = "phylum",
fix_formula = "Species",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = "Species",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)
save(ancom_rand_output_phylum_metab,
file = "resources/ancom_rand_output_phylum_metab.Rdata")6.1.2.1 Structural zeros
Total number of structural zeros
# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab$zero_ind
# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("Eb"=1) %>%
rename("Ha"=2) %>%
rename("Pk"=3)
# Total number of structural zeros
nrow(structural_zero_groups)[1] 12
6.1.2.2 Enriched phyla
res_pair_phylum <- ancom_rand_output_phylum_metab$res_pair
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols <- gsub("^lfc_", "q_", lfc_cols)
plot_data_phylum <- lfc_cols %>%
setNames(nm = .) %>%
purrr::imap_dfr(function(lfc_col, name) {
q_col <- gsub("^lfc_", "q_", lfc_col)
res_pair_phylum %>%
select(taxon, !!lfc_col, !!q_col) %>%
rename(
lfc = !!lfc_col,
q = !!q_col
) %>%
mutate(contrast_key = name)
}) %>%
filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
mutate(
contrast = case_when(
contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
TRUE ~ contrast_key
),
direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
paste("Enriched in", sub(".*vs ", "", contrast)))
)
plot_data_phylum %>% select(-contrast_key) %>%
tt()| taxon | lfc | q | contrast | direction |
|---|---|---|---|---|
| Cyanobacteriota | -1.606842 | 0.04418075 | H. ariel vs C. bottae | Enriched in C. bottae |
| Synergistota | -2.144107 | 0.01274277 | H. ariel vs C. bottae | Enriched in C. bottae |
| Actinomycetota | 1.955042 | 0.02923364 | H. ariel vs C. bottae | Enriched in H. ariel |
| Planctomycetota | -2.071759 | 0.04200179 | H. ariel vs C. bottae | Enriched in C. bottae |
| Patescibacteria | -2.113164 | 0.03296700 | H. ariel vs C. bottae | Enriched in C. bottae |
| Patescibacteria | -2.040924 | 0.03348554 | P. kuhlii vs C. bottae | Enriched in C. bottae |
| Planctomycetota | 2.068273 | 0.03746770 | P. kuhlii vs H. ariel | Enriched in P. kuhlii |
6.1.3 Comparison between metagenomics and standard metabarcoding
load("resources/amplicon/data_standard_filt.Rdata")
sample_metadata_amplicon <- sample_metadata[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli")) %>%
mutate(method="Metabarcoding")
genome_metadata_ampli <- genome_metadata %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_ampli, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()load("resources/metagenomics/data_filtered.Rdata")
genome_metadata <- genome_metadata %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax <- genome_counts_filt %>%
left_join(genome_metadata[c(1,3,4,6)], by = "genome")
# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() merged_table <- full_join(family_level_agg_ampli,family_level_agg,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
sample_shot <- sample_metadata %>%
select(-c(Habitat,Longitude, Latitude, Region))%>%
mutate(method="Metagenomics")
all_samples <- rbind(sample_shot, sample_metadata_amplicon)phylo_samples <- all_samples %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- merged_table_phylum %>%
select(one_of(c("phylum", rownames(phylo_samples)))) %>%
column_to_rownames("phylum") %>%
otu_table(., taxa_are_rows = TRUE)
physeq_genome_filtered <- phyloseq(phylo_genome, phylo_samples)
load("resources/ancom_rand_output_phylum_appro.Rdata")ancom_rand_output_phylum_appro = ancombc2(
data = physeq_genome_filtered,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
rand_formula = "(1|Species)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
lme_control = lme4::lmerControl(),
trend_control = NULL
)
save(ancom_rand_output_phylum_appro,
file = "resources/ancom_rand_output_phylum_appro.Rdata")6.1.3.1 Enriched phyla
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept)
1 Pseudomonadota 2.279875 -1.95287 0.4412564 0.601319 5.166781 -3.247644 0.001679214
p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
1 0.001601129 0.02080063 0.02081467 TRUE TRUE TRUE TRUE