Chapter 9 Differential abundance analysis
set.seed(1234)
load("data/data_filtered.Rdata")
count_phy <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
otu_table(., taxa_are_rows = T)
sample_info_tab_phy <- sample_metadata %>%
column_to_rownames(var = "sample") %>%
sample_data()
TAX <- genome_metadata %>%
column_to_rownames(var = "genome") %>%
select(1:7) %>%
as.matrix() %>%
tax_table()
tree <- phy_tree(genome_tree)
physeq_all = phyloseq(count_phy, TAX, sample_info_tab_phy, tree)ancom_rand_output_mag = ancombc2(data = physeq_all,
assay_name = "counts",
tax_level = NULL,
fix_formula = "Bd_status",
p_adj_method = "BH",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "Bd_status",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "BH", B = 100),
trend_control = NULL)9.1 Structural zeros
Total number of structural zeros
# Extract structural zeros
structural_zeros <- ancom_rand_output_mag$zero_ind
# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("Bd_negative"=1) %>%
rename("Bd_positive"=2)
nrow(structural_zero_groups)[1] 102
Structural zeros in Bd_negative
[1] 44
structural_zero_groups %>%
filter(Bd_negative==TRUE) %>%
rownames_to_column(., "genome") %>%
left_join(., genome_metadata, by="genome") %>%
count(phylum, name = "sample_count") %>%
arrange(desc(sample_count)) phylum sample_count
1 p__Bacteroidota 18
2 p__Pseudomonadota 16
3 p__Patescibacteria 3
4 p__Verrucomicrobiota 3
5 p__Acidobacteriota 1
6 p__Actinomycetota 1
7 p__Bacillota_A 1
8 p__Nitrospirota 1
structural_zero_groups %>%
filter(Bd_negative==TRUE) %>%
rownames_to_column(., "genome") %>%
left_join(., genome_metadata, by="genome") %>%
select(family, genus, species) %>%
tt()| family | genus | species |
|---|---|---|
| f__Chthoniobacteraceae | g__CAIURC01 | s__ |
| f__Akkermansiaceae | g__Luteolibacter | s__ |
| f__Opitutaceae | g__JADKHC01 | s__ |
| f__Cyclobacteriaceae | g__ELB16-189 | s__ |
| f__Spirosomaceae | g__Spirosoma | s__ |
| f__Marinilabiliaceae | g__JAGPHP01 | s__ |
| f__Marinilabiliaceae | g__JAGPHP01 | s__ |
| f__Marinilabiliaceae | g__JAGPHP01 | s__ |
| f__Marinilabiliaceae | g__JAGPHP01 | s__ |
| f__Marinilabiliaceae | g__JAGPHP01 | s__ |
| f__Prolixibacteraceae | g__UBA6024 | s__ |
| f__JAAUTT01 | g__ | s__ |
| f__JAAUTT01 | g__ | s__ |
| f__Paludibacteraceae | g__Paludibacter | s__ |
| f__Paludibacteraceae | g__Paludibacter | s__ |
| f__Crocinitomicaceae | g__Fluviicola | s__ |
| f__Flavobacteriaceae | g__Flavobacterium | s__ |
| f__Flavobacteriaceae | g__Flavobacterium | s__ |
| f__B-17BO | g__ | s__ |
| f__Sphingobacteriaceae | g__ | s__ |
| f__Saprospiraceae | g__ | s__ |
| f__Holophagaceae | g__Holophaga | s__ |
| f__Beijerinckiaceae | g__CAIYZJ01 | s__ |
| f__Beijerinckiaceae | g__CAIYZJ01 | s__ |
| f__Beijerinckiaceae | g__CAIYZJ01 | s__ |
| f__Aestuariivirgaceae | g__CABJBCQ01 | s__ |
| f__Aestuariivirgaceae | g__CABJBCQ01 | s__ |
| f__Sphingomonadaceae | g__Sphingomonas_O | s__ |
| f__Sphingomonadaceae | g__Sphingomonas_O | s__ |
| f__Sphingomonadaceae | g__Sphingomonas_O | s__ |
| f__Sphingomonadaceae | g__Sphingomonas_O | s__ |
| f__Chromatiaceae | g__Thiodictyon | s__ |
| f__Moraxellaceae | g__Aquirhabdus | s__ |
| f__Rhodocyclaceae | g__ | s__ |
| f__Rhodocyclaceae | g__Azonexus | s__ |
| f__Burkholderiaceae_B | g__Rhodoferax_C | s__ |
| f__Burkholderiaceae_B | g__Rhodoferax_C | s__ |
| f__Burkholderiaceae_B | g__Variovorax | s__ |
| f__UBA9217 | g__JALNZF01 | s__ |
| f__UBA6164 | g__ | s__ |
| f__UBA2023 | g__JABFSX01 | s__ |
| f__Absconditicoccaceae | g__ | s__ |
| f__Demequinaceae | g__Demequina | s__ |
| f__Caloramatoraceae | g__ | s__ |
Structural zeros in Bd_positive
[1] 58
structural_zero_groups %>%
filter(Bd_positive==TRUE) %>%
rownames_to_column(., "genome") %>%
left_join(., genome_metadata, by="genome") %>%
count(phylum, name = "sample_count") %>%
arrange(desc(sample_count)) phylum sample_count
1 p__Pseudomonadota 31
2 p__Bacteroidota 14
3 p__Patescibacteria 5
4 p__Actinomycetota 2
5 p__Bacillota 1
6 p__Bdellovibrionota 1
7 p__Desulfobacterota_F 1
8 p__Gemmatimonadota 1
9 p__Halobacteriota 1
10 p__Verrucomicrobiota 1
structural_zero_groups %>%
filter(Bd_positive==TRUE) %>%
rownames_to_column(., "genome") %>%
left_join(., genome_metadata, by="genome") %>%
select(family, genus, species) %>%
tt()| family | genus | species |
|---|---|---|
| f__Methanoregulaceae | g__Methanoregula | s__ |
| f__Akkermansiaceae | g__Luteolibacter | s__ |
| f__Spirosomaceae | g__Spirosoma | s__ |
| f__Spirosomaceae | g__Spirosoma | s__ |
| f__Spirosomaceae | g__JALRIJ01 | s__ |
| f__Spirosomaceae | g__JALRIJ01 | s__ |
| f__NBLH01 | g__NBLH01 | s__ |
| f__Prolixibacteraceae | g__UBA6024 | s__ |
| f__VadinHA17 | g__SR-FBR-E99 | s__ |
| f__UBA7332 | g__UBA7332 | s__ |
| f__Crocinitomicaceae | g__Fluviicola | s__ |
| f__Crocinitomicaceae | g__Fluviicola | s__ |
| f__Flavobacteriaceae | g__Flavobacterium | s__ |
| f__Flavobacteriaceae | g__Flavobacterium | s__ |
| f__Chitinophagaceae | g__Ferruginibacter | s__ |
| f__Ignavibacteriaceae | g__Fen-1301 | s__ |
| f__Gemmatimonadaceae | g__UBA4720 | s__ |
| f__Rhodobacteraceae | g__Stagnihabitans | s__ |
| f__Rhodobacteraceae | g__Gemmobacter_B | s__ |
| f__Sphingomonadaceae | g__Novosphingobium | s__ |
| f__Sphingomonadaceae | g__Novosphingobium | s__ |
| f__Methylomonadaceae | g__CAIQWF01 | s__ |
| f__Methylomonadaceae | g__Methylomonas | s__ |
| f__Thiotrichaceae | g__Thiothrix | s__ |
| f__Diplorickettsiaceae | g__Rickettsiella_B | s__ |
| f__Rhodocyclaceae | g__Azonexus | s__ |
| f__Rhodocyclaceae | g__Azonexus | s__ |
| f__Rhodocyclaceae | g__Azonexus | s__ |
| f__Rhodocyclaceae | g__Azonexus | s__ |
| f__Rhodocyclaceae | g__Rhodocyclus | s__ |
| f__Rhodocyclaceae | g__CAJBIL01 | s__ |
| f__Burkholderiaceae | g__Polynucleobacter | s__ |
| f__Burkholderiaceae | g__JACPVY01 | s__ |
| f__Burkholderiaceae | g__Telluria | s__ |
| f__Burkholderiaceae | g__CAILRJ01 | s__ |
| f__Burkholderiaceae_B | g__Polaromonas | s__ |
| f__Burkholderiaceae_B | g__JADJBS01 | s__ |
| f__Burkholderiaceae_B | g__JADJBS01 | s__ |
| f__Burkholderiaceae_B | g__JADJBS01 | s__ |
| f__Burkholderiaceae_B | g__JADJBS01 | s__ |
| f__Burkholderiaceae_B | g__JADJDQ01 | s__ |
| f__Burkholderiaceae_B | g__Leptothrix | s__ |
| f__Burkholderiaceae_B | g__Ideonella | s__ |
| f__Burkholderiaceae_B | g__Ideonella | s__ |
| f__Burkholderiaceae_B | g__Ideonella | s__ |
| f__Usitatibacteraceae | g__JAEUMZ01 | s__ |
| f__Methylophilaceae | g__ | s__ |
| f__UBA11063 | g__Aquella | s__ |
| f__Pseudopelobacteraceae | g__JACRCG01 | s__ |
| f__Silvanigrellaceae | g__Silvanigrella | s__Silvanigrella cienkowskii |
| f__SC72 | g__UBA5232 | s__ |
| f__GWC2-37-73 | g__UBA12068 | s__ |
| f__CAIMLA01 | g__ | s__ |
| f__UBA2023 | g__ | s__ |
| f__UBA2023 | g__JANJWP01 | s__ |
| f__Microbacteriaceae | g__Aurantimicrobium | s__ |
| f__S36-B12 | g__UBA11398 | s__ |
| f__Erysipelotrichaceae | g__UBA2212 | s__ |
9.2 Enriched MAGs
taxonomy <- data.frame(physeq_all@tax_table) %>%
rownames_to_column(., "taxon") %>%
mutate_at(vars(phylum, order, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))
ancombc_rand_table_mag_status <- ancom_rand_output_mag$res %>%
dplyr::select(taxon, lfc_Bd_statusBd_positive, q_Bd_statusBd_positive) %>%
filter(q_Bd_statusBd_positive < 0.05) %>%
dplyr::arrange(q_Bd_statusBd_positive) %>%
left_join(taxonomy, by = join_by(taxon == taxon)) %>%
mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::arrange(lfc_Bd_statusBd_positive)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", "")) %>%
right_join(taxonomy, by=join_by(phylum == phylum)) %>%
dplyr::select(phylum, colors) %>%
mutate(colors = str_c(colors, "80")) %>%
unique() %>%
dplyr::arrange(phylum)
tax_table <- as.data.frame(unique(ancombc_rand_table_mag_status$phylum))
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
dplyr::arrange(phylum) %>%
dplyr::select(colors) %>%
pull()ancombc_rand_table_mag_status %>%
mutate(genome=factor(taxon,levels=ancombc_rand_table_mag_status$taxon)) %>%
ggplot(., aes(x=lfc_Bd_statusBd_positive, y=forcats::fct_reorder(genome,lfc_Bd_statusBd_positive), fill=phylum)) +
geom_col() +
scale_fill_manual(values=tax_color) +
geom_hline(yintercept=0) +
theme(panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_blank(),
axis.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "right", legend.box = "vertical")+
labs(
title = "Enriched in negatives (− LFC) vs Enriched in positives (+ LFC)",
x = "log2FoldChange",
y = "Genome",
fill = "Phylum"
) lfc_Bd_statusBd_positive family genus species
1 -1.536306 Burkholderiaceae_B Sphaerotilus
2 1.700607 Dermabacteraceae JAFFTF01
3 2.177951 Erysipelotrichaceae UBA2212
4 2.369690 Flavobacteriaceae Flavobacterium
5 2.372847 Burkholderiaceae_B Acidovorax
6 2.388475 Burkholderiaceae_B Rhodoferax_C
7 2.435418 Burkholderiaceae_B Rhodoferax