Chapter 9 Functional differences
genome_counts_filt <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts_raw),]
genome_counts_filt <- genome_counts_filt %>%
column_to_rownames(., "genome") %>%
filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
select_if(~!all(. == 0))%>%
rownames_to_column(., "genome")
genome_gifts <- genome_gifts_raw[rownames(genome_gifts_raw) %in% genome_counts_filt$genome,]
genome_gifts <- genome_gifts[, colSums(genome_gifts != 0) > 0]
#Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts,GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome,]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
select_if(~ !is.numeric(.) || sum(.) != 0)
#Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered,GIFT_db)
#Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions,GIFT_db)
#Get community-weighed average GIFTs per sample
genome_counts_row <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
column_to_rownames(., "genome")
GIFTs_elements_community <- to.community(GIFTs_elements_filtered,genome_counts_row,GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions,genome_counts_row,GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains,genome_counts_row,GIFT_db)GIFTs_elements_filtered %>%
rownames_to_column("genome") %>%
left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum) %>%
summarize(across(where(is.numeric), mean, na.rm = TRUE))%>%
pivot_longer(!phylum,names_to="trait",values_to="gift") %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
ggplot(aes(x=phylum,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(functionid ~ ., scales="free",space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_text(size=6),
strip.text.y = element_text(angle = 0)
) +
labs(y="Traits",x="Samples",fill="GIFT")GIFTs_elements_filtered %>%
rownames_to_column("genome") %>%
filter(genome %in% struct_mag) %>%
pivot_longer(!genome,names_to="trait",values_to="gift") %>%
left_join(struct_mag_meta, by = join_by(genome == genome))%>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
ggplot(aes(x=genome,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(functionid ~ present, scales="free",space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_text(size=6),
strip.text.y = element_text(angle = 0)
) +
labs(y="Traits",x="Samples",fill="GIFT")9.1 Phylum MCI
GIFTs_elements_filtered %>%
rownames_to_column("genome")%>%
left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum) %>%
summarize(across(where(is.numeric), mean, na.rm = TRUE)) %>%
mutate(
row_mean = rowMeans(across(where(is.numeric)), na.rm = TRUE),
row_sd = apply(across(where(is.numeric)), 1, sd, na.rm = TRUE),
row_median = apply(across(where(is.numeric)), 1, median, na.rm = TRUE),
row_max = apply(across(where(is.numeric)), 1, max, na.rm = TRUE),
row_min = apply(across(where(is.numeric)), 1, min, na.rm = TRUE)
) %>%
select(phylum, row_mean, row_sd, row_median, row_max, row_min) %>%
arrange(-row_mean)# A tibble: 15 × 6
phylum row_mean row_sd row_median row_max row_min
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 p__Bacteroidota 0.271 0.256 0.213 0.826 0
2 p__Bacillota_C 0.270 0.343 0.0833 0.944 0
3 p__Bacillota_B 0.270 0.360 0 1 0
4 p__Pseudomonadota 0.250 0.236 0.151 0.74 0
5 p__Verrucomicrobiota 0.245 0.260 0.161 0.786 0
6 p__Desulfobacterota 0.234 0.291 0.06 0.909 0
7 p__Spirochaetota 0.229 0.302 0.035 1 0
8 p__Bacillota_A 0.224 0.218 0.189 0.760 0
9 p__Cyanobacteriota 0.214 0.301 0.0357 1 0
10 p__Methanobacteriota 0.190 0.327 0 1 0
11 p__Actinomycetota 0.179 0.198 0.12 0.714 0
12 p__Synergistota 0.157 0.214 0.0373 0.8 0
13 p__Bacillota 0.0692 0.110 0.0361 0.653 0
14 p__Campylobacterota 0.0677 0.174 0 1 0
15 p__Patescibacteria 0.0336 0.0949 0 0.667 0
GIFTs_elements_filtered %>%
rownames_to_column("genome") %>%
left_join(genome_metadata[,1:8], by = join_by(genome == genome)) %>% group_by(phylum) %>%
summarize(across(where(is.numeric), mean, na.rm = TRUE))%>%
pivot_longer(!phylum,names_to="trait",values_to="gift") %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
group_by(phylum, functionid) %>%
summarize(median=median(gift))%>%
pivot_wider(names_from = functionid, values_from = median)# A tibble: 15 × 21
# Groups: phylum [15]
phylum Nucleic acid biosynt…¹ Amino acid biosynthe…² Amino acid derivativ…³ `SCFA biosynthesis` Organic anion biosyn…⁴ `Vitamin biosynthesis` Aromatic compound bi…⁵ Metallophore biosynt…⁶ Antibiotic biosynthe…⁷
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 p__Actinomycetota 0.568 0.431 0 0.39 0.286 0.101 0.357 0 0.005
2 p__Bacillota 0.213 0.0525 0.0343 0.373 0.0588 0.0328 0.0586 0 0.00775
3 p__Bacillota_A 0.577 0.502 0.110 0.496 0.260 0.179 0.577 0.00864 0.0354
4 p__Bacillota_B 0.762 0.818 0.2 0.67 0.525 0.29 0.9 0 0
5 p__Bacillota_C 0.883 0.657 0.428 0.407 0.466 0.363 0.778 0 0.0311
6 p__Bacteroidota 0.669 0.457 0.0913 0.452 0.576 0.484 0.568 0 0.0133
7 p__Campylobactero… 0.315 0 0 0.33 0.27 0 0 0 0
8 p__Cyanobacteriota 0.696 0.326 0.171 0.226 0.221 0.123 0.857 0 0.0714
9 p__Desulfobactero… 0.780 0.612 0.182 0.584 0.485 0.305 0.636 0 0
10 p__Methanobacteri… 0.94 0.67 0.1 0.29 0.68 0.155 0 0 0
11 p__Patescibacteria 0.146 0 0.00833 0.165 0.005 0 0 0 0
12 p__Pseudomonadota 0.604 0.569 0.186 0.477 0.561 0.344 0.647 0.00714 0.0141
13 p__Spirochaetota 0.812 0.438 0 0.33 0.5 0.14 0 0 0
14 p__Synergistota 0.45 0.37 0.0333 0.381 0.135 0.159 0.511 0 0.0223
15 p__Verrucomicrobi… 0.589 0.424 0.0714 0.435 0.492 0.243 0.381 0 0
# ℹ abbreviated names: ¹`Nucleic acid biosynthesis`, ²`Amino acid biosynthesis`, ³`Amino acid derivative biosynthesis`, ⁴`Organic anion biosynthesis`, ⁵`Aromatic compound biosynthesis`, ⁶`Metallophore biosynthesis`,
# ⁷`Antibiotic biosynthesis`
# ℹ 11 more variables: `Lipid degradation` <dbl>, `Polysaccharide degradation` <dbl>, `Sugar degradation` <dbl>, `Amino acid degradation` <dbl>, `Nitrogen compound degradation` <dbl>, `Alcohol degradation` <dbl>,
# `Xenobiotic degradation` <dbl>, `Antibiotic degradation` <dbl>, `Cellular structure` <dbl>, Appendages <dbl>, Spore <dbl>
9.2 Groups MCI
GIFTs_functions_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(diet) %>%
summarise(MCI = mean(value), sd = sd(value)) %>%
tt()| diet | MCI | sd |
|---|---|---|
| Post_grass | 0.2396430 | 0.01304278 |
| Pre_grass | 0.2493529 | 0.01524032 |
| Wild | 0.2541861 | 0.02059163 |
Shapiro test
MCI <- GIFTs_functions_community %>%
rowMeans() %>%
as_tibble(., rownames = "sample") %>%
left_join(sample_metadata, by = join_by(sample == sample))
shapiro.test(MCI$value)
Shapiro-Wilk normality test
data: MCI$value
W = 0.98397, p-value = 0.8693
Df Sum Sq Mean Sq F value Pr(>F)
diet 2 0.001317 0.0006583 2.39 0.107
Residuals 33 0.009090 0.0002755
9.3 Community level plots
GIFTs_elements_community %>%
as.data.frame() %>%
rownames_to_column(var="sample")%>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(functionid = substr(trait, 1, 3)) %>%
mutate(trait = case_when(
trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
TRUE ~ trait
)) %>%
mutate(functionid = case_when(
functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
TRUE ~ functionid
)) %>%
mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
ggplot(aes(x=sample,y=trait,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(functionid ~ diet, scales="free",space="free") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.text.y = element_text(size=6),
strip.text.y = element_text(angle = 0)
) +
labs(y="Traits",x="Samples",fill="GIFT")unique_funct_db<- GIFT_db[c(3,4,5)] %>%
distinct(Code_function, .keep_all = TRUE)
GIFTs_functions_community %>%
t() %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Code_function") %>%
left_join(.,unique_funct_db[c(1,3)],by = join_by(Code_function == Code_function)) %>%
select(-Code_function) %>%
column_to_rownames(., "Function")%>%
t() %>%
as.data.frame() %>%
rownames_to_column(var="sample") %>%
pivot_longer(!sample,names_to="trait",values_to="gift") %>%
left_join(sample_metadata, by = join_by(sample == sample))%>%
# filter(!diet =="Post_grass") %>%
ggplot(aes(x=trait,y=sample,fill=gift)) +
geom_tile(colour="white", linewidth=0.2)+
scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
facet_grid(diet ~ ., scales="free",space="free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 10),
axis.text.y = element_text(size=8),
strip.background = element_blank(),
strip.text = element_text(size = 12, color="black",face="bold"),
axis.title = element_text(size = 12, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
panel.background= element_blank()
) +
labs(x="Function", y="Sample",fill="GIFT")9.4 Wilcoxon comparison
9.4.1 Captive vs Wild
9.4.1.1 Community elements differences:
element_gift <- GIFTs_elements_community %>%
as.data.frame() %>%
rownames_to_column(., "sample") %>%
left_join(sample_metadata[c(1,21)], by="sample") %>%
filter(diet!="Post_grass")
uniqueGIFT_db<- unique(GIFT_db[c(2,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)
significant_elements <- element_gift %>%
pivot_longer(-c(sample,diet), names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
summarise(p_value = wilcox.test(value ~ diet)$p.value) %>%
mutate(p_adjust=p.adjust(p_value, method="BH")) %>%
filter(p_adjust < 0.05)%>%
rownames_to_column(., "Elements") %>%
select(-Elements)
element_gift_t <- element_gift %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "trait")
element_gift_filt <- subset(element_gift_t, trait %in% significant_elements$trait) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "sample")%>%
left_join(., sample_metadata[c(1,21)], by = join_by(sample == sample))
difference_table <- element_gift_filt %>%
select(-sample) %>%
group_by(diet) %>%
summarise(across(everything(), mean)) %>%
t() %>%
row_to_names(row_number = 1) %>%
as.data.frame() %>%
mutate_if(is.character, as.numeric) %>%
rownames_to_column(., "Elements") %>%
left_join(.,uniqueGIFT_db[c(1,3,4)],by = join_by(Elements == Code_element)) %>%
arrange(Function) %>%
mutate(Difference=Pre_grass-Wild)%>%
mutate(group_color = ifelse(Difference <0, "Wild","Captive")) means_gift <- element_gift_filt %>%
select(-diet) %>%
pivot_longer(!sample, names_to = "elements", values_to = "abundance") %>%
left_join(sample_metadata, by=join_by(sample==sample)) %>%
group_by(diet, elements) %>%
summarise(mean=mean(abundance))
log_fold <- means_gift %>%
group_by(elements) %>%
summarise(
logfc_captive_wild = log2(mean[diet == "Pre_grass"] / mean[diet == "Wild"])
)uniqueGIFT <- unique(GIFT_db[c(2,3,4,5,6)])
code_function2 <- difference_table %>%
left_join(uniqueGIFT[c(1:3)], by=join_by(Elements==Code_element))
unique_codes<-unique(code_function2$Code_function)
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
filter(Code_function %in% unique_codes)%>%
mutate(legend=str_c(Code_function," - ",Function))code_function2 %>%
left_join(significant_elements, by=join_by(Elements==trait)) %>%
left_join(log_fold, by=join_by(Elements==elements)) %>%
left_join(gift_colors, by=join_by(Code_function==Code_function)) %>%
ggplot(., aes(x = logfc_captive_wild, y = -log(p_adjust), color=legend, size=abs(Difference))) +
geom_jitter(width = 0.2, height = 0.2)+
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color)+
theme_classic()+
labs(size="Mean difference (abs)", color="Functional trait")+
labs(x = "Log-fold change", y="-Log adjusted p-value") +
geom_text_repel(aes(label = Element), min.segment.length = 0.4, size=2.5, max.overlaps = Inf)difference_table %>%
ggplot(aes(x=forcats::fct_reorder(Function,Difference), y=Difference, fill=group_color)) +
geom_col() +
# geom_point(size=4) +
scale_fill_manual(values=c("#e5bd5b", "#6b7398")) +
geom_hline(yintercept=0) +
coord_flip()+
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
legend.position = "right",
legend.title = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_line(size = 0.15, linetype = 'solid',
colour = "grey"))+
xlab("Function") +
ylab("Mean difference")sugar_function <- GIFT_db %>%
filter(Code_function=="D03") %>%
select(Code_element) %>%
pull()
GIFTs_elements_community %>%
as.data.frame() %>%
select(any_of(sugar_function)) %>%
rownames_to_column(., "sample") %>%
left_join(sample_metadata[c(1,21)], by=join_by(sample==sample)) %>%
select(-sample) %>%
pivot_longer(!diet,names_to="trait",values_to="gift") %>%
left_join(uniqueGIFT_db, by=join_by(trait==Code_element)) %>%
ggplot(aes(x=diet, y=gift, color=diet)) +
geom_jitter(width = 0.2, show.legend = FALSE) +
geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
scale_color_manual(values=diet_colors)+
scale_fill_manual(values=diet_colors) +
scale_x_discrete(labels=c("Wild" = "Wild", "Pre_grass" = "Captive-born")) +
facet_grid(.~Element, scales="free_x")+
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.text = element_text(size=10, angle = 45, hjust = 1),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
)poly_function <- GIFT_db %>%
filter(Code_function=="D02") %>%
select(Code_element) %>%
pull()
GIFTs_elements_community %>%
as.data.frame() %>%
select(any_of(poly_function)) %>%
rownames_to_column(., "sample") %>%
left_join(sample_metadata[c(1,21)], by=join_by(sample==sample)) %>%
select(-sample) %>%
pivot_longer(!diet,names_to="trait",values_to="gift") %>%
left_join(uniqueGIFT_db, by=join_by(trait==Code_element)) %>%
ggplot(aes(x=diet, y=gift, color=diet)) +
geom_jitter(width = 0.2, show.legend = FALSE) +
geom_boxplot(width = 0.5, alpha=0.5,outlier.shape = NA, show.legend = FALSE) +
scale_color_manual(values=diet_colors)+
scale_fill_manual(values=diet_colors) +
scale_x_discrete(labels=c("Wild" = "Wild", "Pre_grass" = "Captive-born")) +
facet_grid(.~Element, scales="free_x")+
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.text = element_text(size=10, angle = 45, hjust = 1),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
)