Chapter 9 Functional differences

load("data/data_host_filtered.Rdata")
genome_counts_filt <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts_raw),]
genome_counts_filt <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0))%>%
  rownames_to_column(., "genome")

genome_gifts <- genome_gifts_raw[rownames(genome_gifts_raw) %in% genome_counts_filt$genome,]
genome_gifts <- genome_gifts[, colSums(genome_gifts != 0) > 0]

#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>% 
  select_if(~ !is.numeric(.) || sum(.) != 0)

#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)

#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)

#Get community-weighed average GIFTs per sample
genome_counts_row <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% 
  column_to_rownames(., "genome") 

GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)
GIFTs_elements_filtered %>% 
  rownames_to_column("genome") %>%
  left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum)  %>%
  summarize(across(where(is.numeric), mean, na.rm = TRUE))%>%
    pivot_longer(!phylum,names_to="trait",values_to="gift") %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=phylum,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ ., scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=6),
              strip.text.y = element_text(angle = 0)
              ) +
        labs(y="Traits",x="Samples",fill="GIFT")

GIFTs_elements_filtered %>% 
  rownames_to_column("genome") %>%
  filter(genome %in% struct_mag)  %>% 
    pivot_longer(!genome,names_to="trait",values_to="gift") %>% 
    left_join(struct_mag_meta, by = join_by(genome == genome))%>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=genome,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ present, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=6),
              strip.text.y = element_text(angle = 0)
              ) +
        labs(y="Traits",x="Samples",fill="GIFT")

9.1 Phylum MCI

GIFTs_elements_filtered  %>% 
  rownames_to_column("genome")%>% 
  left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum) %>%  
  summarize(across(where(is.numeric), mean, na.rm = TRUE)) %>%
  mutate(
    row_mean = rowMeans(across(where(is.numeric)), na.rm = TRUE),  
    row_sd = apply(across(where(is.numeric)), 1, sd, na.rm = TRUE),
    row_median = apply(across(where(is.numeric)), 1, median, na.rm = TRUE),
    row_max = apply(across(where(is.numeric)), 1, max, na.rm = TRUE),
    row_min = apply(across(where(is.numeric)), 1, min, na.rm = TRUE)
  ) %>% 
  select(phylum, row_mean, row_sd, row_median, row_max, row_min) %>% 
  arrange(-row_mean)
# A tibble: 15 × 6
   phylum               row_mean row_sd row_median row_max row_min
   <chr>                   <dbl>  <dbl>      <dbl>   <dbl>   <dbl>
 1 p__Bacteroidota        0.271  0.256      0.213    0.826       0
 2 p__Bacillota_C         0.270  0.343      0.0833   0.944       0
 3 p__Bacillota_B         0.270  0.360      0        1           0
 4 p__Pseudomonadota      0.250  0.236      0.151    0.74        0
 5 p__Verrucomicrobiota   0.245  0.260      0.161    0.786       0
 6 p__Desulfobacterota    0.234  0.291      0.06     0.909       0
 7 p__Spirochaetota       0.229  0.302      0.035    1           0
 8 p__Bacillota_A         0.224  0.218      0.189    0.760       0
 9 p__Cyanobacteriota     0.214  0.301      0.0357   1           0
10 p__Methanobacteriota   0.190  0.327      0        1           0
11 p__Actinomycetota      0.179  0.198      0.12     0.714       0
12 p__Synergistota        0.157  0.214      0.0373   0.8         0
13 p__Bacillota           0.0692 0.110      0.0361   0.653       0
14 p__Campylobacterota    0.0677 0.174      0        1           0
15 p__Patescibacteria     0.0336 0.0949     0        0.667       0
GIFTs_elements_filtered %>% 
  rownames_to_column("genome") %>%
  left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum)  %>%
  summarize(across(where(is.numeric), mean, na.rm = TRUE))%>%
    pivot_longer(!phylum,names_to="trait",values_to="gift") %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>% 
  group_by(phylum, functionid) %>% 
  summarize(median=median(gift))%>%
  pivot_wider(names_from = functionid, values_from = median)
# A tibble: 15 × 21
# Groups:   phylum [15]
   phylum             Nucleic acid biosynt…¹ Amino acid biosynthe…² Amino acid derivativ…³ `SCFA biosynthesis` Organic anion biosyn…⁴ `Vitamin biosynthesis` Aromatic compound bi…⁵ Metallophore biosynt…⁶ Antibiotic biosynthe…⁷
   <chr>                               <dbl>                  <dbl>                  <dbl>               <dbl>                  <dbl>                  <dbl>                  <dbl>                  <dbl>                  <dbl>
 1 p__Actinomycetota                   0.568                 0.431                 0                     0.39                  0.286                  0.101                  0.357                 0                      0.005  
 2 p__Bacillota                        0.213                 0.0525                0.0343                0.373                 0.0588                 0.0328                 0.0586                0                      0.00775
 3 p__Bacillota_A                      0.577                 0.502                 0.110                 0.496                 0.260                  0.179                  0.577                 0.00864                0.0354 
 4 p__Bacillota_B                      0.762                 0.818                 0.2                   0.67                  0.525                  0.29                   0.9                   0                      0      
 5 p__Bacillota_C                      0.883                 0.657                 0.428                 0.407                 0.466                  0.363                  0.778                 0                      0.0311 
 6 p__Bacteroidota                     0.669                 0.457                 0.0913                0.452                 0.576                  0.484                  0.568                 0                      0.0133 
 7 p__Campylobactero…                  0.315                 0                     0                     0.33                  0.27                   0                      0                     0                      0      
 8 p__Cyanobacteriota                  0.696                 0.326                 0.171                 0.226                 0.221                  0.123                  0.857                 0                      0.0714 
 9 p__Desulfobactero…                  0.780                 0.612                 0.182                 0.584                 0.485                  0.305                  0.636                 0                      0      
10 p__Methanobacteri…                  0.94                  0.67                  0.1                   0.29                  0.68                   0.155                  0                     0                      0      
11 p__Patescibacteria                  0.146                 0                     0.00833               0.165                 0.005                  0                      0                     0                      0      
12 p__Pseudomonadota                   0.604                 0.569                 0.186                 0.477                 0.561                  0.344                  0.647                 0.00714                0.0141 
13 p__Spirochaetota                    0.812                 0.438                 0                     0.33                  0.5                    0.14                   0                     0                      0      
14 p__Synergistota                     0.45                  0.37                  0.0333                0.381                 0.135                  0.159                  0.511                 0                      0.0223 
15 p__Verrucomicrobi…                  0.589                 0.424                 0.0714                0.435                 0.492                  0.243                  0.381                 0                      0      
# ℹ abbreviated names: ¹​`Nucleic acid biosynthesis`, ²​`Amino acid biosynthesis`, ³​`Amino acid derivative biosynthesis`, ⁴​`Organic anion biosynthesis`, ⁵​`Aromatic compound biosynthesis`, ⁶​`Metallophore biosynthesis`,
#   ⁷​`Antibiotic biosynthesis`
# ℹ 11 more variables: `Lipid degradation` <dbl>, `Polysaccharide degradation` <dbl>, `Sugar degradation` <dbl>, `Amino acid degradation` <dbl>, `Nitrogen compound degradation` <dbl>, `Alcohol degradation` <dbl>,
#   `Xenobiotic degradation` <dbl>, `Antibiotic degradation` <dbl>, `Cellular structure` <dbl>, Appendages <dbl>, Spore <dbl>

9.2 Groups MCI

GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(diet) %>%
  summarise(MCI = mean(value), sd = sd(value)) %>%
  tt()
diet MCI sd
Post_grass 0.2396430 0.01304278
Pre_grass 0.2493529 0.01524032
Wild 0.2541861 0.02059163

Shapiro test

MCI <- GIFTs_functions_community %>%
  rowMeans() %>%
  as_tibble(., rownames = "sample") %>%
  left_join(sample_metadata, by = join_by(sample == sample))

shapiro.test(MCI$value)

    Shapiro-Wilk normality test

data:  MCI$value
W = 0.98397, p-value = 0.8693
res.aov <- aov(value ~ diet, data=MCI)
summary(res.aov)
            Df   Sum Sq   Mean Sq F value Pr(>F)
diet         2 0.001317 0.0006583    2.39  0.107
Residuals   33 0.009090 0.0002755               

9.3 Community level plots

GIFTs_elements_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample")%>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ diet, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              axis.text.y = element_text(size=6),
              strip.text.y = element_text(angle = 0)
              ) +
  labs(y="Traits",x="Samples",fill="GIFT")

unique_funct_db<- GIFT_db[c(3,4,5)] %>% 
  distinct(Code_function, .keep_all = TRUE)

GIFTs_functions_community %>%
   t() %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "Code_function")  %>%
  left_join(.,unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function))  %>%
  select(-Code_function) %>%
  column_to_rownames(., "Function")%>%
   t()  %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample))%>%
 # filter(!diet =="Post_grass") %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(diet ~ ., scales="free",space="free")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10),
        axis.text.y = element_text(size=8),
        strip.background = element_blank(),
        strip.text = element_text(size = 12, color="black",face="bold"),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
        axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
        panel.background= element_blank()
              ) +
        labs(x="Function", y="Sample",fill="GIFT")

9.4 Wilcoxon comparison

9.4.1 Captive vs Wild

9.4.1.1 Community elements differences:

element_gift <- GIFTs_elements_community %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  left_join(sample_metadata[c(1,21)], by="sample") %>% 
  filter(diet!="Post_grass")

uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

significant_elements <- element_gift %>%
    pivot_longer(-c(sample,diet), names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    summarise(p_value = wilcox.test(value ~ diet)$p.value) %>%
    mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
    filter(p_adjust < 0.05)%>%
  rownames_to_column(., "Elements")  %>% 
  select(-Elements)

element_gift_t <- element_gift  %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "trait")

element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>% 
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric)  %>%
  rownames_to_column(., "sample")%>% 
  left_join(., sample_metadata[c(1,21)], by = join_by(sample == sample))

difference_table <- element_gift_filt %>%
  select(-sample) %>%
  group_by(diet) %>%
  summarise(across(everything(), mean)) %>%
  t() %>%
  row_to_names(row_number = 1) %>%
  as.data.frame() %>%
  mutate_if(is.character, as.numeric) %>%
  rownames_to_column(., "Elements") %>%
  left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>% 
  arrange(Function) %>% 
  mutate(Difference=Pre_grass-Wild)%>% 
  mutate(group_color = ifelse(Difference <0, "Wild","Captive")) 
means_gift <- element_gift_filt %>% 
  select(-diet) %>% 
  pivot_longer(!sample, names_to = "elements", values_to = "abundance") %>% 
  left_join(sample_metadata, by=join_by(sample==sample)) %>% 
  group_by(diet, elements) %>%
  summarise(mean=mean(abundance))

log_fold <- means_gift %>%
  group_by(elements) %>%
  summarise(
    logfc_captive_wild = log2(mean[diet == "Pre_grass"] / mean[diet == "Wild"])
    )
uniqueGIFT <- unique(GIFT_db[c(2,3,4,5,6)])

code_function2 <- difference_table %>%
  left_join(uniqueGIFT[c(1:3)], by=join_by(Elements==Code_element))

unique_codes<-unique(code_function2$Code_function)

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  filter(Code_function %in% unique_codes)%>% 
  mutate(legend=str_c(Code_function," - ",Function))
code_function2 %>%
  left_join(significant_elements, by=join_by(Elements==trait)) %>%
  left_join(log_fold, by=join_by(Elements==elements)) %>% 
  left_join(gift_colors, by=join_by(Code_function==Code_function)) %>% 
  ggplot(., aes(x = logfc_captive_wild, y = -log(p_adjust), color=legend, size=abs(Difference))) +
  geom_jitter(width = 0.2, height = 0.2)+
  geom_vline(xintercept=0) +
  scale_color_manual(values = gift_colors$Color)+
  theme_classic()+
  labs(size="Mean difference (abs)", color="Functional trait")+
  labs(x = "Log-fold change", y="-Log adjusted p-value") +
  geom_text_repel(aes(label = Element), min.segment.length = 0.4, size=2.5, max.overlaps = Inf)

difference_table %>%
  ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) + 
  geom_col() +
#  geom_point(size=4) + 
  scale_fill_manual(values=c("#e5bd5b", "#6b7398")) + 
  geom_hline(yintercept=0) + 
  coord_flip()+
  theme(axis.text = element_text(size = 10),
        axis.title = element_text(size = 12),
        legend.position = "right", 
        legend.title = element_blank(),
        panel.background = element_blank(),
          panel.grid.major = element_line(size = 0.15, linetype = 'solid',
                                colour = "grey"))+
  xlab("Function") + 
  ylab("Mean difference")
sugar_function <- GIFT_db %>% 
  filter(Code_function=="D03") %>% 
  select(Code_element) %>% 
  pull()

GIFTs_elements_community %>% 
  as.data.frame() %>% 
  select(any_of(sugar_function)) %>% 
  rownames_to_column(., "sample") %>% 
  left_join(sample_metadata[c(1,21)], by=join_by(sample==sample)) %>% 
  select(-sample) %>% 
  pivot_longer(!diet,names_to="trait",values_to="gift") %>% 
  left_join(uniqueGIFT_db, by=join_by(trait==Code_element)) %>% 
  ggplot(aes(x=diet, y=gift, color=diet)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=diet_colors)+
  scale_fill_manual(values=diet_colors) +
  scale_x_discrete(labels=c("Wild" = "Wild", "Pre_grass" = "Captive-born")) +
  facet_grid(.~Element,  scales="free_x")+
  coord_cartesian(xlim = c(1, NA)) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.text = element_text(size=10, angle = 45, hjust = 1),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
    )

poly_function <- GIFT_db %>% 
  filter(Code_function=="D02") %>% 
  select(Code_element) %>% 
  pull()

GIFTs_elements_community %>% 
  as.data.frame() %>% 
  select(any_of(poly_function)) %>% 
  rownames_to_column(., "sample") %>% 
  left_join(sample_metadata[c(1,21)], by=join_by(sample==sample)) %>% 
  select(-sample) %>% 
  pivot_longer(!diet,names_to="trait",values_to="gift") %>% 
  left_join(uniqueGIFT_db, by=join_by(trait==Code_element)) %>% 
  ggplot(aes(x=diet, y=gift, color=diet)) +
  geom_jitter(width = 0.2, show.legend = FALSE) +
  geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
  scale_color_manual(values=diet_colors)+
  scale_fill_manual(values=diet_colors) +
  scale_x_discrete(labels=c("Wild" = "Wild", "Pre_grass" = "Captive-born")) +
facet_grid(.~Element,  scales="free_x")+
  coord_cartesian(xlim = c(1, NA)) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.text = element_text(size=10, angle = 45, hjust = 1),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
    )