Chapter 8 Differential abundance analysis
8.1 Captive vs Wild
8.1.1 Structural zeros
pre_samples <- sample_metadata %>%
filter(diet == "Pre_grass") %>%
dplyr::select(sample) %>%
pull()
wild_samples <- sample_metadata %>%
filter(diet=="Wild") %>%
dplyr::select(sample) %>% pull()
structural_zeros <- genome_counts %>%
rowwise() %>%
mutate(all_zeros_pre = all(c_across(all_of(pre_samples)) == 0)) %>%
mutate(all_zeros_wild = all(c_across(all_of(wild_samples)) == 0)) %>%
mutate(average_pre = mean(c_across(all_of(pre_samples)), na.rm = TRUE)) %>%
mutate(average_wild = mean(c_across(all_of(wild_samples)), na.rm = TRUE)) %>%
filter(all_zeros_pre == TRUE || all_zeros_wild==TRUE) %>%
mutate(present = case_when(
all_zeros_pre & !all_zeros_wild ~ "wild",
!all_zeros_pre & all_zeros_wild ~ "pregrass",
!all_zeros_pre & !all_zeros_wild ~ "None",
TRUE ~ NA_character_
)) %>%
mutate(average = ifelse(present == "pregrass", average_pre, average_wild)) %>%
dplyr::select(genome, present, average) %>%
left_join(genome_metadata, by=join_by(genome==genome)) %>%
arrange(present,-average)
structural_zeros %>%
filter(present %in% c("pregrass","wild")) %>%
tt()| genome | present | average | domain | phylum | class | order | family | genus | species | completeness | contamination | length |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| EHA03329_bin.40 | pregrass | 1.231370e+01 | d__Bacteria | p__Pseudomonadota | c__Alphaproteobacteria | o__RF32 | f__CAG-239 | g__ | s__ | 100.00 | 0.19 | 1401808 |
| EHA03293_bin.11 | pregrass | 2.245807e+00 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Cardiobacteriales | f__Wohlfahrtiimonadaceae | g__Ignatzschineria | s__ | 94.52 | 0.68 | 2302391 |
| EHA03293_bin.24 | pregrass | 1.663890e+00 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Burkholderiales | f__Burkholderiaceae_C | g__Paenalcaligenes | s__Paenalcaligenes intestinipullorum | 99.99 | 0.04 | 2015030 |
| EHA03333_bin.4 | pregrass | 1.418469e+00 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Burkholderiales | f__Burkholderiaceae_C | g__Oligella | s__ | 94.36 | 1.56 | 2054098 |
| EHA03306_bin.54 | pregrass | 1.265276e+00 | d__Archaea | p__Methanobacteriota | c__Methanobacteria | o__Methanobacteriales | f__Methanobacteriaceae | g__Methanosphaera | s__Methanosphaera cuniculi | 99.85 | 0.07 | 1705853 |
| EHA03333_bin.7 | pregrass | 8.408085e-01 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Cardiobacteriales | f__Wohlfahrtiimonadaceae | g__Ignatzschineria | s__ | 61.96 | 1.36 | 1933065 |
| EHA03294_bin.17 | pregrass | 7.546218e-01 | d__Bacteria | p__Bacillota_A | c__Clostridia | o__Christensenellales | f__UBA1242 | g__Caccovivens | s__ | 88.30 | 1.19 | 865947 |
| EHA03333_bin.59 | pregrass | 6.273076e-01 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Cardiobacteriales | f__Wohlfahrtiimonadaceae | g__Wohlfahrtiimonas | s__ | 62.59 | 1.11 | 1358803 |
| EHA04588_bin.94 | wild | 1.927820e-05 | d__Bacteria | p__Pseudomonadota | c__Gammaproteobacteria | o__Cardiobacteriales | f__Cardiobacteriaceae | g__ | s__ | 75.45 | 0.62 | 1945202 |
8.1.2 Enrichment analysis: Ancombc2
phylo_samples <- sample_metadata %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt %>%
select(one_of(c("genome", rownames(phylo_samples)))) %>%
filter(!genome %in% structural_zeros$genome) %>%
column_to_rownames("genome") %>%
mutate_all( ~ replace(., . == 0, 0.00001)) %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
filter(genome %in% rownames(phylo_genome)) %>%
mutate(genome2 = genome) %>%
column_to_rownames("genome2") %>%
dplyr::select(domain, phylum, class, order, family, genus, species, genome) %>%
as.matrix() %>%
tax_table()
physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)
physeq_sample <- subset_samples(physeq_genome_filtered, diet %in% c("Pre_grass","Wild"))
physeq_sample <- prune_taxa(taxa_sums(physeq_sample)>0, physeq_sample)8.1.2.1 MAG level
ancom_rand_output_mag_pre_wild = ancombc2(
data = physeq_sample,
assay_name = "counts",
tax_level = NULL,
fix_formula = "diet",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = NULL,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)taxonomy <- data.frame(physeq_sample@tax_table) %>%
rownames_to_column(., "taxon") %>%
mutate_at(vars(phylum, order, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))
ancombc_rand_table_mag <- ancom_rand_output_mag_pre_wild$res %>%
dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
filter(p_dietWild < 0.05) %>%
dplyr::arrange(p_dietWild) %>%
left_join(taxonomy, by = join_by(taxon == taxon)) %>%
mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::arrange(lfc_dietWild)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", "")) %>%
right_join(taxonomy, by=join_by(phylum == phylum)) %>%
dplyr::select(phylum, colors) %>%
mutate(colors = str_c(colors, "80")) %>% #add 80% alpha
unique() %>%
dplyr::arrange(phylum)
tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
dplyr::arrange(phylum) %>%
dplyr::select(colors) %>%
pull()ancombc_rand_table_mag %>%
mutate(genome=factor(genome,levels=ancombc_rand_table_mag$genome)) %>%
ggplot(., aes(x=lfc_dietWild, y=forcats::fct_reorder(genome,lfc_dietWild), fill=phylum)) +
geom_col() +
scale_fill_manual(values=tax_color) +
geom_hline(yintercept=0) +
theme(panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 6),
axis.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "right", legend.box = "vertical")+
xlab("log2FoldChange") +
ylab("Genome")+
guides(fill=guide_legend(title="Phylum"))ancom_rand_output_mag_pre_wild$res %>%
na.omit() %>%
dplyr::select(genome=taxon, lfc_dietWild, p_dietWild) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
mutate(phylum = ifelse(p_dietWild < 0.05, phylum, NA)) %>%
ggplot(., aes(x = lfc_dietWild, y = -log(p_dietWild), color = phylum)) +
geom_point(size=3, show.legend = FALSE) +
scale_color_manual(values = phylum_colors) +
labs(color = "Significant phylum", x = "Log-fold difference between pre and post grass", y = "p-value") +
theme_classic()8.1.2.2 Genus level
ancom_rand_output_mag_pre_wild_genus = ancombc2(
data = physeq_sample,
assay_name = "counts",
tax_level = "genus",
fix_formula = "diet",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = NULL,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)tax <- data.frame(physeq_genome_filtered@tax_table) %>%
rownames_to_column(., "taxon") %>%
mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
select(phylum, genus)%>%
unique()
ancombc_rand_table_genus <- ancom_rand_output_mag_pre_wild_genus$res %>%
mutate_at(vars(taxon), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
filter(p_dietWild < 0.05) %>%
dplyr::arrange(lfc_dietWild) %>%
left_join(tax, by=join_by(taxon == genus)) %>%
dplyr::arrange(lfc_dietWild)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", "")) %>%
right_join(tax, by=join_by(phylum == phylum)) %>%
dplyr::select(phylum, colors) %>%
mutate(colors = str_c(colors, "80")) %>% #add 80% alpha
unique() %>%
dplyr::arrange(phylum)
tax_table <- as.data.frame(unique(ancombc_rand_table_genus$phylum))
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
dplyr::arrange(phylum) %>%
dplyr::select(colors) %>%
pull()genus_matrix <- ancombc_rand_table_genus%>%
mutate(taxon=factor(taxon,levels=ancombc_rand_table_genus$taxon)) %>%
filter(!is.na(phylum))
genus_matrix <- genus_matrix %>%
arrange(desc(-lfc_dietWild)) %>%
slice_head(n = 15) %>%
bind_rows(genus_matrix %>%
arrange(-lfc_dietWild) %>%
slice_head(n = 15))
tax_table <- as.data.frame(unique(genus_matrix$phylum))
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by = "phylum") %>%
dplyr::arrange(phylum) %>%
dplyr::select(colors) %>%
pull()
genus_matrix %>%
ggplot(., aes(x=lfc_dietWild, y=forcats::fct_rev(taxon), fill=phylum)) +
geom_col() +
scale_fill_manual(values=tax_color) +
geom_hline(yintercept=0) +
theme(panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.position = "right", legend.box = "vertical")+
xlab("log2FoldChange") +
ylab("Genus")+
guides(fill=guide_legend(title="Phylum"))ancom_rand_output_mag_pre_wild_genus$res %>%
na.omit() %>%
dplyr::select(genome = taxon, lfc_dietWild, p_dietWild) %>%
left_join(genome_metadata, by = join_by(genome == genus)) %>%
mutate(phylum = ifelse(p_dietWild < 0.05, phylum, NA)) %>%
ggplot(., aes(x = lfc_dietWild, y = -log(p_dietWild), color = phylum)) +
geom_point(size = 3, show.legend = FALSE) +
scale_color_manual(values = phylum_colors) +
labs(color = "Significant phylum", x = "Log-fold difference between sample types", y = "p-value") +
theme_classic() +
geom_text(aes(5.5, 5), label = "Enriched\nin wild hares", color = "#666666") +
geom_text(aes(-5, 5), label = "Enriched\nin captive hares", color = "#666666") +
labs(color = "Phylum", y = "Log-fold", x = "-log p-value") +
theme_classic()8.1.2.3 Phylum level
ancom_rand_output_phyl_capwild = ancombc2(
data = physeq_sample,
assay_name = "counts",
tax_level = "phylum",
fix_formula = "diet",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = NULL,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)ancom_rand_output_phyl_capwild$res %>%
dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
filter(p_dietWild < 0.05) %>%
dplyr::arrange(p_dietWild) taxon lfc_dietWild p_dietWild
1 p__Spirochaetota 8.7284796 4.776516e-07
2 p__Bacillota_B 2.7666740 7.923227e-07
3 p__Synergistota 3.3563476 8.143247e-06
4 p__Bacillota_C 3.0796571 3.586172e-05
5 p__Desulfobacterota 2.3245267 5.530492e-05
6 p__Verrucomicrobiota 2.1635054 5.026095e-03
7 p__Bacteroidota 1.1976032 1.149023e-02
8 p__Bacillota_A 0.9179763 2.146291e-02
tax <- data.frame(physeq_sample@tax_table) %>%
rownames_to_column(., "taxon") %>%
mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
select(phylum)%>%
unique()
ancombc_rand_table_phylum <- ancom_rand_output_phyl_capwild$res %>%
mutate_at(vars(taxon), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
filter(p_dietWild < 0.05) %>%
dplyr::arrange(lfc_dietWild)
ancombc_rand_table_phylum taxon lfc_dietWild p_dietWild
1 Bacillota_A 0.9179763 2.146291e-02
2 Bacteroidota 1.1976032 1.149023e-02
3 Verrucomicrobiota 2.1635054 5.026095e-03
4 Desulfobacterota 2.3245267 5.530492e-05
5 Bacillota_B 2.7666740 7.923227e-07
6 Bacillota_C 3.0796571 3.586172e-05
7 Synergistota 3.3563476 8.143247e-06
8 Spirochaetota 8.7284796 4.776516e-07
8.1.2.4 Relative abundances of Spirochaetota
spiro_mags <- genome_metadata %>%
filter(phylum=="p__Spirochaetota")
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.))%>%
filter(genome %in% spiro_mags$genome) %>%
pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>%
left_join(sample_metadata, by=join_by(sample==sample)) %>%
filter(diet!="Post_grass") %>%
ggplot(aes(y = abundance, x = diet, group=diet, color=diet, fill=diet)) +
geom_jitter(width = 0.2, show.legend = FALSE) +
geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
scale_color_manual(labels=c("Pre_grass" = "Captive pre-grass", "Wild" = "Wild"),
values=c("#F4B02E", "#74C8AE"))+
scale_fill_manual(labels=c("Pre_grass" = "Captive pre-grass", "Wild" = "Wild"),
values=c("#F4B02E", "#74C8AE"))+
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.text = element_text(size=10),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
)+
labs(y = "Relative abundance of Spirochaetota")8.2 Captive: before and after grass
8.2.1 Structural zeros
pre_samples <- sample_metadata %>%
filter(diet == "Pre_grass") %>%
dplyr::select(sample) %>%
pull()
post_samples <- sample_metadata %>%
filter(diet == "Post_grass") %>%
dplyr::select(sample) %>%
pull()
structural_zeros <- genome_counts_filt %>%
rowwise() %>%
mutate(all_zeros_pre = all(c_across(all_of(pre_samples)) == 0)) %>%
mutate(all_zeros_post = all(c_across(all_of(post_samples)) == 0)) %>%
mutate(average_pre = mean(c_across(all_of(pre_samples)), na.rm = TRUE)) %>%
mutate(average_post = mean(c_across(all_of(post_samples)), na.rm = TRUE)) %>%
filter(all_zeros_pre == TRUE || all_zeros_post==TRUE) %>%
mutate(present = case_when(
all_zeros_pre & !all_zeros_post ~ "post",
!all_zeros_pre & all_zeros_post ~ "pre",
!all_zeros_pre & !all_zeros_post ~ "None",
TRUE ~ NA_character_
)) %>%
mutate(average = ifelse(present == "pre", average_pre, average_post)) %>%
dplyr::select(genome, present, average) %>%
left_join(genome_metadata, by=join_by(genome==genome)) %>%
arrange(present,-average)8.2.2 Enrichment analysis: Ancombc2
phylo_samples <- sample_metadata %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt %>%
filter(!genome %in% structural_zeros$genome) %>%
column_to_rownames("genome") %>%
mutate_all( ~ replace(., . == 0, 0.00001)) %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
filter(genome %in% rownames(phylo_genome)) %>%
mutate(genome2 = genome) %>%
column_to_rownames("genome2") %>%
dplyr::select(domain, phylum, class, order, family, genus, species, genome) %>%
as.matrix() %>%
tax_table()
physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)
physeq_sample <- subset_samples(physeq_genome_filtered, diet %in% c("Pre_grass","Post_grass"))
physeq_sample <- prune_taxa(taxa_sums(physeq_sample)>0, physeq_sample)8.2.2.1 Relative abundances of archaea
meta_mags <- genome_metadata %>%
filter(phylum=="p__Methanobacteriota")
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.))%>%
filter(genome %in% meta_mags$genome) %>%
pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>%
left_join(sample_metadata, by=join_by(sample==sample)) %>%
filter(diet!="Wild") %>%
group_by(diet) %>%
summarize(mean_Archaea=mean(abundance), sd_Archaea=sd(abundance),
max_Archaea=max(abundance), min_Archaea=min(abundance),
zero_count = sum(abundance == 0, na.rm = TRUE))# A tibble: 2 × 6
diet mean_Archaea sd_Archaea max_Archaea min_Archaea zero_count
<chr> <dbl> <dbl> <dbl> <dbl> <int>
1 Post_grass 0.00154 0.00292 0.00768 0 1
2 Pre_grass 0.000904 0.00228 0.00744 0 4
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.))%>%
filter(genome %in% meta_mags$genome) %>%
pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>%
left_join(sample_metadata, by=join_by(sample==sample)) %>%
filter(diet!="Wild") %>%
ggplot(aes(y = abundance, x = diet, group=diet, color=diet, fill=diet)) +
geom_jitter(width = 0.2, show.legend = FALSE) +
geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
scale_color_manual(values=diet_colors)+
scale_fill_manual(values=diet_colors) +
scale_x_discrete(labels=c("Post_grass" = "Grass", "Pre_grass" = "No grass")) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.text = element_text(size=10),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
)+
labs(x = "Diet", y = "Methanobacteriota")8.2.2.2 MAG level
set.seed(1234)
ancom_rand_output_mag_pre_post = ancombc2(
data = physeq_sample,
assay_name = "counts",
tax_level = NULL,
fix_formula = "diet",
rand_formula = "(1|individual)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = NULL,
struc_zero = FALSE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)taxonomy <- data.frame(physeq_sample@tax_table) %>%
rownames_to_column(., "taxon") %>%
mutate_at(vars(phylum, order, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))
ancombc_rand_table_mag <- ancom_rand_output_mag_pre_post$res %>%
dplyr::select(taxon, lfc_dietPre_grass, p_dietPre_grass) %>%
filter(p_dietPre_grass < 0.05) %>%
dplyr::arrange(p_dietPre_grass) %>%
merge(., taxonomy, by="taxon") %>%
mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
dplyr::arrange(lfc_dietPre_grass)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", "")) %>%
right_join(taxonomy, by=join_by(phylum == phylum)) %>%
dplyr::select(phylum, colors) %>%
mutate(colors = str_c(colors, "80")) %>% #add 80% alpha
unique() %>%
dplyr::arrange(phylum)
tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
dplyr::arrange(phylum) %>%
dplyr::select(colors) %>%
pull()ancombc_rand_table_mag%>%
mutate(genome=factor(genome,levels=ancombc_rand_table_mag$genome)) %>%
ggplot(., aes(x=lfc_dietPre_grass, y=forcats::fct_reorder(genome,lfc_dietPre_grass), fill=phylum)) +
geom_col() +
scale_fill_manual(values=tax_color) +
geom_hline(yintercept=0) +
theme(panel.background = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12, face = "bold"),
legend.position = "right", legend.box = "vertical")+
xlab("log2FoldChange") +
ylab("Genome")+
guides(fill=guide_legend(title="Phylum"))ancom_rand_output_mag_pre_post$res %>%
na.omit() %>%
dplyr::select(genome=taxon, lfc_dietPre_grass, p_dietPre_grass) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
mutate(phylum = ifelse(p_dietPre_grass < 0.05, phylum, NA)) %>%
ggplot(., aes(x = lfc_dietPre_grass, y = -log(p_dietPre_grass), color = phylum)) +
geom_point(size=3, show.legend = FALSE) +
scale_color_manual(values = phylum_colors) +
labs(color = "Significant phylum", x = "Log-fold difference between pre and post grass", y = "p-value") +
theme_classic()