Chapter 8 Differential abundance analysis

load("data/data_host_filtered.Rdata")
load("data/ancombc_filtered_wild_captive.Rdata")
load("data/ancombc_filtered_captive_diet.Rdata")

8.1 Captive vs Wild

8.1.1 Structural zeros

pre_samples <- sample_metadata %>% 
  filter(diet == "Pre_grass") %>%
  dplyr::select(sample) %>%
  pull()

wild_samples <- sample_metadata %>% 
  filter(diet=="Wild") %>%
  dplyr::select(sample) %>% pull()

structural_zeros <- genome_counts %>% 
   rowwise() %>% 
   mutate(all_zeros_pre = all(c_across(all_of(pre_samples)) == 0)) %>% 
   mutate(all_zeros_wild = all(c_across(all_of(wild_samples)) == 0)) %>% 
   mutate(average_pre = mean(c_across(all_of(pre_samples)), na.rm = TRUE)) %>% 
   mutate(average_wild = mean(c_across(all_of(wild_samples)), na.rm = TRUE)) %>% 
   filter(all_zeros_pre == TRUE || all_zeros_wild==TRUE)  %>% 
   mutate(present = case_when(
      all_zeros_pre & !all_zeros_wild ~ "wild",
      !all_zeros_pre & all_zeros_wild ~ "pregrass",
      !all_zeros_pre & !all_zeros_wild ~ "None",
      TRUE ~ NA_character_
    )) %>%
   mutate(average = ifelse(present == "pregrass", average_pre, average_wild)) %>%
   dplyr::select(genome, present, average) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(present,-average)

structural_zeros %>% 
  filter(present %in% c("pregrass","wild")) %>% 
  tt()
genome present average domain phylum class order family genus species completeness contamination length
EHA03329_bin.40 pregrass 1.231370e+01 d__Bacteria p__Pseudomonadota c__Alphaproteobacteria o__RF32 f__CAG-239 g__ s__ 100.00 0.19 1401808
EHA03293_bin.11 pregrass 2.245807e+00 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Cardiobacteriales f__Wohlfahrtiimonadaceae g__Ignatzschineria s__ 94.52 0.68 2302391
EHA03293_bin.24 pregrass 1.663890e+00 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Burkholderiales f__Burkholderiaceae_C g__Paenalcaligenes s__Paenalcaligenes intestinipullorum 99.99 0.04 2015030
EHA03333_bin.4 pregrass 1.418469e+00 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Burkholderiales f__Burkholderiaceae_C g__Oligella s__ 94.36 1.56 2054098
EHA03306_bin.54 pregrass 1.265276e+00 d__Archaea p__Methanobacteriota c__Methanobacteria o__Methanobacteriales f__Methanobacteriaceae g__Methanosphaera s__Methanosphaera cuniculi 99.85 0.07 1705853
EHA03333_bin.7 pregrass 8.408085e-01 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Cardiobacteriales f__Wohlfahrtiimonadaceae g__Ignatzschineria s__ 61.96 1.36 1933065
EHA03294_bin.17 pregrass 7.546218e-01 d__Bacteria p__Bacillota_A c__Clostridia o__Christensenellales f__UBA1242 g__Caccovivens s__ 88.30 1.19 865947
EHA03333_bin.59 pregrass 6.273076e-01 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Cardiobacteriales f__Wohlfahrtiimonadaceae g__Wohlfahrtiimonas s__ 62.59 1.11 1358803
EHA04588_bin.94 wild 1.927820e-05 d__Bacteria p__Pseudomonadota c__Gammaproteobacteria o__Cardiobacteriales f__Cardiobacteriaceae g__ s__ 75.45 0.62 1945202

8.1.2 Enrichment analysis: Ancombc2

phylo_samples <- sample_metadata %>%
  column_to_rownames("sample") %>%
  sample_data()
phylo_genome <- genome_counts_filt %>%
  select(one_of(c("genome", rownames(phylo_samples)))) %>%
  filter(!genome %in% structural_zeros$genome) %>% 
  column_to_rownames("genome") %>%
  mutate_all( ~ replace(., . == 0, 0.00001)) %>% 
  otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
  filter(genome %in% rownames(phylo_genome)) %>% 
  mutate(genome2 = genome) %>% 
  column_to_rownames("genome2") %>%
  dplyr::select(domain, phylum, class, order, family, genus, species, genome) %>% 
  as.matrix() %>%
  tax_table() 

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)

physeq_sample <- subset_samples(physeq_genome_filtered, diet %in% c("Pre_grass","Wild"))
physeq_sample <- prune_taxa(taxa_sums(physeq_sample)>0, physeq_sample)

8.1.2.1 MAG level

ancom_rand_output_mag_pre_wild = ancombc2(
  data = physeq_sample,
  assay_name = "counts",
  tax_level = NULL,
  fix_formula = "diet",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)
taxonomy <- data.frame(physeq_sample@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(phylum, order, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))

ancombc_rand_table_mag <- ancom_rand_output_mag_pre_wild$res %>%
  dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
  filter(p_dietWild < 0.05) %>%
  dplyr::arrange(p_dietWild) %>%
  left_join(taxonomy, by = join_by(taxon == taxon)) %>%
  mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_dietWild)
  
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()
ancombc_rand_table_mag %>%
  mutate(genome=factor(genome,levels=ancombc_rand_table_mag$genome)) %>%
  ggplot(., aes(x=lfc_dietWild, y=forcats::fct_reorder(genome,lfc_dietWild), fill=phylum)) + 
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 6),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Genome")+
  guides(fill=guide_legend(title="Phylum"))

ancom_rand_output_mag_pre_wild$res %>%
  na.omit() %>%
  dplyr::select(genome=taxon, lfc_dietWild, p_dietWild) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  mutate(phylum = ifelse(p_dietWild < 0.05, phylum, NA)) %>%
  ggplot(., aes(x = lfc_dietWild, y = -log(p_dietWild), color = phylum)) +
  geom_point(size=3, show.legend = FALSE) +
  scale_color_manual(values = phylum_colors) +
  labs(color = "Significant phylum", x = "Log-fold difference between pre and post grass", y = "p-value") +
  theme_classic()

8.1.2.2 Genus level

ancom_rand_output_mag_pre_wild_genus = ancombc2(
  data = physeq_sample,
  assay_name = "counts",
  tax_level = "genus",
  fix_formula = "diet",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)
tax <- data.frame(physeq_genome_filtered@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
  select(phylum, genus)%>%
    unique()

ancombc_rand_table_genus <- ancom_rand_output_mag_pre_wild_genus$res %>%
  mutate_at(vars(taxon), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
  filter(p_dietWild < 0.05) %>%
  dplyr::arrange(lfc_dietWild)  %>%
  left_join(tax, by=join_by(taxon == genus)) %>%
  dplyr::arrange(lfc_dietWild)
  
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(tax, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_genus$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()
genus_matrix <- ancombc_rand_table_genus%>%
      mutate(taxon=factor(taxon,levels=ancombc_rand_table_genus$taxon)) %>%
      filter(!is.na(phylum))

genus_matrix <- genus_matrix %>%
  arrange(desc(-lfc_dietWild)) %>%
  slice_head(n = 15) %>%
  bind_rows(genus_matrix %>%
              arrange(-lfc_dietWild) %>%
              slice_head(n = 15)) 

tax_table <- as.data.frame(unique(genus_matrix$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by = "phylum") %>%
  dplyr::arrange(phylum) %>%
  dplyr::select(colors) %>%
  pull()

genus_matrix %>%      
  ggplot(., aes(x=lfc_dietWild, y=forcats::fct_rev(taxon), fill=phylum)) + 
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 14, face = "bold"),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 14, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Genus")+
  guides(fill=guide_legend(title="Phylum"))

ancom_rand_output_mag_pre_wild_genus$res %>%
  na.omit() %>%
  dplyr::select(genome = taxon, lfc_dietWild, p_dietWild) %>%
  left_join(genome_metadata, by = join_by(genome == genus)) %>%
  mutate(phylum = ifelse(p_dietWild < 0.05, phylum, NA)) %>%
  ggplot(., aes(x = lfc_dietWild, y = -log(p_dietWild), color = phylum)) +
  geom_point(size = 3, show.legend = FALSE) +
  scale_color_manual(values = phylum_colors) +
  labs(color = "Significant phylum", x = "Log-fold difference between sample types", y = "p-value") +
  theme_classic() +
  geom_text(aes(5.5, 5), label = "Enriched\nin wild hares", color = "#666666") +
  geom_text(aes(-5, 5), label = "Enriched\nin captive hares", color = "#666666") +
  labs(color = "Phylum", y = "Log-fold", x = "-log p-value") +
  theme_classic()

8.1.2.3 Phylum level

ancom_rand_output_phyl_capwild = ancombc2(
  data = physeq_sample,
  assay_name = "counts",
  tax_level = "phylum",
  fix_formula = "diet",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)
ancom_rand_output_phyl_capwild$res %>%
  dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
  filter(p_dietWild < 0.05) %>%
  dplyr::arrange(p_dietWild) 
                 taxon lfc_dietWild   p_dietWild
1     p__Spirochaetota    8.7284796 4.776516e-07
2       p__Bacillota_B    2.7666740 7.923227e-07
3      p__Synergistota    3.3563476 8.143247e-06
4       p__Bacillota_C    3.0796571 3.586172e-05
5  p__Desulfobacterota    2.3245267 5.530492e-05
6 p__Verrucomicrobiota    2.1635054 5.026095e-03
7      p__Bacteroidota    1.1976032 1.149023e-02
8       p__Bacillota_A    0.9179763 2.146291e-02
tax <- data.frame(physeq_sample@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(phylum, genus), ~ str_replace(., "[dpcofgs]__", ""))%>%
  select(phylum)%>%
    unique()

ancombc_rand_table_phylum <- ancom_rand_output_phyl_capwild$res %>%
  mutate_at(vars(taxon), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::select(taxon, lfc_dietWild, p_dietWild) %>%
  filter(p_dietWild < 0.05) %>%
  dplyr::arrange(lfc_dietWild)

ancombc_rand_table_phylum
              taxon lfc_dietWild   p_dietWild
1       Bacillota_A    0.9179763 2.146291e-02
2      Bacteroidota    1.1976032 1.149023e-02
3 Verrucomicrobiota    2.1635054 5.026095e-03
4  Desulfobacterota    2.3245267 5.530492e-05
5       Bacillota_B    2.7666740 7.923227e-07
6       Bacillota_C    3.0796571 3.586172e-05
7      Synergistota    3.3563476 8.143247e-06
8     Spirochaetota    8.7284796 4.776516e-07
save(ancom_rand_output_mag_pre_wild,
     ancom_rand_output_mag_pre_wild_genus,
     ancom_rand_output_phyl_capwild,
     file="data/ancombc_filtered_wild_captive.Rdata")

8.1.2.4 Relative abundances of Spirochaetota

spiro_mags <- genome_metadata %>% 
  filter(phylum=="p__Spirochaetota")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.))%>%
  filter(genome %in% spiro_mags$genome) %>% 
  pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  filter(diet!="Post_grass") %>% 
  ggplot(aes(y = abundance, x = diet, group=diet, color=diet, fill=diet)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(labels=c("Pre_grass" = "Captive pre-grass", "Wild" = "Wild"),
          values=c("#F4B02E", "#74C8AE"))+ 
  scale_fill_manual(labels=c("Pre_grass" = "Captive pre-grass", "Wild" = "Wild"),
          values=c("#F4B02E", "#74C8AE"))+ 
  coord_cartesian(xlim = c(1, NA)) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.text = element_text(size=10),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
    )+
  labs(y = "Relative abundance of Spirochaetota")

8.2 Captive: before and after grass

8.2.1 Structural zeros

pre_samples <- sample_metadata %>% 
  filter(diet == "Pre_grass") %>%
  dplyr::select(sample) %>%
  pull()

post_samples <- sample_metadata %>% 
  filter(diet == "Post_grass") %>%
  dplyr::select(sample) %>% 
  pull()

structural_zeros <- genome_counts_filt %>% 
   rowwise() %>% 
   mutate(all_zeros_pre = all(c_across(all_of(pre_samples)) == 0)) %>% 
   mutate(all_zeros_post = all(c_across(all_of(post_samples)) == 0)) %>% 
   mutate(average_pre = mean(c_across(all_of(pre_samples)), na.rm = TRUE)) %>% 
   mutate(average_post = mean(c_across(all_of(post_samples)), na.rm = TRUE)) %>% 
   filter(all_zeros_pre == TRUE || all_zeros_post==TRUE)  %>%
   mutate(present = case_when(
      all_zeros_pre & !all_zeros_post ~ "post",
      !all_zeros_pre & all_zeros_post ~ "pre",
      !all_zeros_pre & !all_zeros_post ~ "None",
      TRUE ~ NA_character_
    )) %>%
   mutate(average = ifelse(present == "pre", average_pre, average_post)) %>%
   dplyr::select(genome, present, average) %>%
   left_join(genome_metadata, by=join_by(genome==genome)) %>%
   arrange(present,-average)

8.2.2 Enrichment analysis: Ancombc2

phylo_samples <- sample_metadata %>%
  column_to_rownames("sample") %>%
  sample_data() 
phylo_genome <- genome_counts_filt %>%
  filter(!genome %in% structural_zeros$genome) %>%
  column_to_rownames("genome") %>%
  mutate_all( ~ replace(., . == 0, 0.00001)) %>%
  otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata %>%
  filter(genome %in% rownames(phylo_genome)) %>% 
  mutate(genome2 = genome) %>%
  column_to_rownames("genome2") %>%
  dplyr::select(domain, phylum, class, order, family, genus, species, genome) %>% 
  as.matrix() %>%
  tax_table() 

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)

physeq_sample <- subset_samples(physeq_genome_filtered, diet %in% c("Pre_grass","Post_grass"))
physeq_sample <- prune_taxa(taxa_sums(physeq_sample)>0, physeq_sample)

8.2.2.1 Relative abundances of archaea

meta_mags <- genome_metadata %>% 
  filter(phylum=="p__Methanobacteriota")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.))%>%
  filter(genome %in% meta_mags$genome) %>% 
  pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  filter(diet!="Wild") %>%
  group_by(diet) %>% 
  summarize(mean_Archaea=mean(abundance), sd_Archaea=sd(abundance), 
            max_Archaea=max(abundance), min_Archaea=min(abundance),
            zero_count = sum(abundance == 0, na.rm = TRUE))
# A tibble: 2 × 6
  diet       mean_Archaea sd_Archaea max_Archaea min_Archaea zero_count
  <chr>             <dbl>      <dbl>       <dbl>       <dbl>      <int>
1 Post_grass     0.00154     0.00292     0.00768           0          1
2 Pre_grass      0.000904    0.00228     0.00744           0          4
genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.))%>%
  filter(genome %in% meta_mags$genome) %>% 
  pivot_longer(!genome, names_to = "sample", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  filter(diet!="Wild") %>% 
  ggplot(aes(y = abundance, x = diet, group=diet, color=diet, fill=diet)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=diet_colors)+
  scale_fill_manual(values=diet_colors) +
  scale_x_discrete(labels=c("Post_grass" = "Grass", "Pre_grass" = "No grass")) +
  coord_cartesian(xlim = c(1, NA)) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.text = element_text(size=10),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
    )+
  labs(x = "Diet", y = "Methanobacteriota")

8.2.2.2 MAG level

set.seed(1234)
ancom_rand_output_mag_pre_post = ancombc2(
  data = physeq_sample,
  assay_name = "counts",
  tax_level = NULL,
  fix_formula = "diet",
  rand_formula = "(1|individual)",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = NULL,
  struc_zero = FALSE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = FALSE,
  pairwise = FALSE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  lme_control = lme4::lmerControl(),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)
taxonomy <- data.frame(physeq_sample@tax_table) %>%
  rownames_to_column(., "taxon") %>%
  mutate_at(vars(phylum, order, family, genus, species), ~ str_replace(., "[dpcofgs]__", ""))

ancombc_rand_table_mag <- ancom_rand_output_mag_pre_post$res %>%
  dplyr::select(taxon, lfc_dietPre_grass, p_dietPre_grass) %>%
  filter(p_dietPre_grass < 0.05) %>%
  dplyr::arrange(p_dietPre_grass) %>%
  merge(., taxonomy, by="taxon") %>%
  mutate_at(vars(phylum, species), ~ str_replace(., "[dpcofgs]__", ""))%>%
  dplyr::arrange(lfc_dietPre_grass)
  
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  mutate_at(vars(phylum), ~ str_replace(., "[dpcofgs]__", ""))  %>%
  right_join(taxonomy, by=join_by(phylum == phylum)) %>%
  dplyr::select(phylum, colors) %>%
  mutate(colors = str_c(colors, "80"))  %>% #add 80% alpha
    unique() %>%
    dplyr::arrange(phylum)

tax_table <- as.data.frame(unique(ancombc_rand_table_mag$phylum))
  
colnames(tax_table)[1] <- "phylum"
tax_color <- merge(tax_table, colors_alphabetic, by="phylum")%>%
    dplyr::arrange(phylum) %>%
    dplyr::select(colors) %>%
    pull()
ancombc_rand_table_mag%>%
  mutate(genome=factor(genome,levels=ancombc_rand_table_mag$genome)) %>%
  ggplot(., aes(x=lfc_dietPre_grass, y=forcats::fct_reorder(genome,lfc_dietPre_grass), fill=phylum)) + 
  geom_col() + 
  scale_fill_manual(values=tax_color) + 
  geom_hline(yintercept=0) + 
  theme(panel.background = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 8),
        axis.title = element_text(size = 12, face = "bold"),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12, face = "bold"),
        legend.position = "right", legend.box = "vertical")+
  xlab("log2FoldChange") + 
  ylab("Genome")+
  guides(fill=guide_legend(title="Phylum"))

ancom_rand_output_mag_pre_post$res %>%
  na.omit() %>%
  dplyr::select(genome=taxon, lfc_dietPre_grass, p_dietPre_grass) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  mutate(phylum = ifelse(p_dietPre_grass < 0.05, phylum, NA)) %>%
  ggplot(., aes(x = lfc_dietPre_grass, y = -log(p_dietPre_grass), color = phylum)) +
  geom_point(size=3, show.legend = FALSE) +
  scale_color_manual(values = phylum_colors) +
  labs(color = "Significant phylum", x = "Log-fold difference between pre and post grass", y = "p-value") +
  theme_classic()