Chapter 15 Comparing activity of positively and negatively associated bacteria
15.1 Load data
load("data/data_mt.Rdata")
load("data/data_mg.Rdata")
trends_bw <-
read_tsv("data/hmsc_bw_trend.tsv") %>%
mutate(trend_07 = case_when(bw_support > 0.85 ~ 'positive',
bw_support < 0.15 ~ 'negative',
TRUE ~ 'non-significant')) %>%
mutate(trend_09 = case_when(bw_support > 0.95 ~ 'positive',
bw_support < 0.05 ~ 'negative',
TRUE ~ 'non-significant'))
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend = str_c(Code_function, " - ", Function))15.2 Expression profiles of positively and negatively associated bacteria
The resulting plot corresponds to Figure 3c in the manuscript.
15.2.1 Element-based expression table of day 35 individuals
mag_elements <- lapply(expr_counts, function(x) to.elements(x, GIFT_db2))
# Convert correct tibble for applying CLR conversion
mag_elements_merged <-
mag_elements %>%
lapply(function(x) t(x)) %>%
lapply(function(x) as.data.frame(x)) %>%
Map(cbind, ., MAG = names(.)) %>%
lapply(function(x) rownames_to_column(x, "Element")) %>%
do.call(rbind, .) %>%
mutate(Function = substr(Element, start = 1, stop = 3))%>%
as.data.frame() %>%
relocate(MAG, .before = Element) %>%
relocate(Function, .after = Element)
# Select individuals from day 35
metadata_day_35 <-
chicken_metadata_mt %>%
filter(sampling_time == "35")
mag_elements_day_35 <-
mag_elements_merged %>%
select(MAG, Element, Function, metadata_day_35$animal_code)
metadata_mt <-
metadata_day_35 %>%
arrange(match(metadata_day_35$animal_code, colnames(mag_elements_day_35[,-c(1,3)])))
mean(metadata_mt$animal_code == colnames(mag_elements_day_35[,-c(1:3)]))[1] 1
15.2.2 Normalised expression table
normalisation_factor <-
expr_counts %>% # Normalisation is performed with dist_expr because it contains the transcripts for all the functions, not only the functions considered important for the host in distillR.
lapply(function(x) rowSums(x)) %>%
reduce(cbind) %>%
rowSums()
normalisation_factor <- normalisation_factor[match(metadata_mt$animal_code,names(normalisation_factor))]
mean(names(normalisation_factor) == metadata_mt$animal_code)[1] 1
15.2.3 Filter MAGs associated with chicken body weight
positive_mags <-
trends_bw %>%
filter(trend_09 == "positive") %>%
select(genome) %>%
unlist() %>%
as.character()
negative_mags <-
trends_bw %>%
filter(trend_09 == "negative") %>%
select(genome) %>%
unlist() %>%
as.character()
n_neg_sp <- 50 # Filter 50 negative MAGs for the figure
negative_mags_top50 <-
trends_bw %>%
filter(trend_09 == "negative") %>%
arrange(parameter) %>%
slice(1:n_neg_sp) %>%
select(genome) %>%
unlist() %>%
as.character()
mag_subset_elements <-
norm_elements_day_35 %>%
filter(MAG %in% c(positive_mags, negative_mags))
mag_subset_element_top50s <-
norm_elements_day_35 %>%
filter(MAG %in% c(positive_mags, negative_mags_top50))15.2.4 Tidy data
# Filter relative abundance table
mag_weighted_day35 <-
mag_weighted %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = 'animal_code') %>%
filter(animal_code %in% colnames(mag_subset_elements[, -c(1:3)])) %>%
column_to_rownames(var = "animal_code")
total_day35 <-
t(decostand(mag_weighted_day35, "total")) %>%
as.data.frame() %>%
rownames_to_column(var = "genome") %>%
filter(genome %in% c(positive_mags, negative_mags)) %>%
column_to_rownames(var = "genome") %>%
mutate(mean_abundance = rowMeans(.)) %>%
rownames_to_column(var = "genome") %>%
select(genome, mean_abundance)
mag_subset_elements_2 <-
mag_subset_element_top50s %>%
rename(genome = 'MAG') %>%
left_join(total_day35, by = "genome") %>%
mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
select(-mean_abundance) %>%
mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
filter(!grepl("S", Function)) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()
mag_subset_elements_3 <-
mag_subset_elements %>%
rename(genome = 'MAG') %>%
left_join(total_day35, by = "genome") %>%
mutate(across(CA01.15:CB24.14, ~ .x / mean_abundance)) %>%
select(-mean_abundance) %>%
mutate(bw_association = if_else(genome %in% positive_mags, "positive", "negative")) %>%
mutate(avg_expr_million = rowMeans(across(CA01.15:CB24.14)) * 1000000) %>%
filter(!grepl("S", Function)) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()15.2.5 Plot
mag_subset_elements_2 %>%
rename(Code_function = "Function") %>%
left_join(gift_colors, by = "Code_function") %>%
ggplot(aes(x = genome, y = Element, fill = bw_association, group = legend, alpha = log(avg_expr_million, 2))) +
geom_tile(color = "#ffffff") +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_fill_manual(values = c("red3",
"blue3"),
na.value = "#f4f4f4") +
facet_grid(legend ~ bw_association, scales = "free", space = "free") +
theme_void(base_size = 8) +
theme(axis.text.x = element_text(size = 5, angle = 90),
axis.text.y = element_text(size = 5),
strip.text.y = element_blank(),
legend.position = "none")15.3 Taxonomy of reduced MAGs
reduced_mags1 <-
mag_subset_elements_3 %>%
filter(Element == "B0101") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags2 <-
mag_subset_elements_3 %>%
filter(Element == "B0102") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags3 <-
mag_subset_elements_3 %>%
filter(Element == "B0103") %>%
filter(avg_expr_million == 0) %>%
select(genome) %>%
unlist() %>%
as.character()
reduced_mags <- intersect(intersect(reduced_mags1, reduced_mags2), reduced_mags3)
reduced_mags_df <-
mag_subset_elements_3 %>%
filter(genome %in% reduced_mags) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup()%>%
pivot_wider(id_cols = c(genome, bw_association), names_from = Element, values_from = avg_expr_million)All reduced MAGs with positive association belong to RF39
positives <-
reduced_mags_df %>%
filter(bw_association == "positive") %>%
select(genome) %>%
unlist() %>%
as.character()
genome_taxonomy[genome_taxonomy$genome %in% positives,] %>%
select(order) %>%
table()order
RF39
10
family
UBA660
10
Reduced MAGs with negative association belong to various orders, mainly to Christensenellales
negatives <-
reduced_mags_df %>%
filter(bw_association == "negative") %>%
select(genome) %>%
unlist() %>%
as.character()
genome_taxonomy[genome_taxonomy$genome %in% negatives,] %>%
select(order) %>%
table()order
Bacteroidales Christensenellales Coriobacteriales Lachnospirales Oscillospirales Saccharimonadales
1 12 1 1 1 1
UBA1381
1
family
Acutalibacteraceae Atopobiaceae Borkfalkiaceae Lachnospiraceae Nanosyncoccaceae Rikenellaceae
1 1 1 1 1 1
UBA1242 UBA1381
11 1
15.4 Genomic characteristics of genome reduced MAGs
The resulting plot corresponds to Figure 4a.
gifts <-
gifts_elements %>%
as.data.frame() %>%
select(where(~ sum(.) > 0))
mci_df <-
gifts %>%
filter(rownames(.) %in% reduced_mags_df$genome) %>%
mutate(avg_mci = rowMeans(.)) %>%
rownames_to_column(var = "genome") %>%
left_join(reduced_mags_df, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
filter(family=="UBA1242"| order=="RF39") %>%
select(genome, avg_mci, bw_association)
length_df <-
genome_stats %>%
filter(genome %in% reduced_mags_df$genome) %>%
left_join(mci_df, by = "genome") %>%
left_join(genome_taxonomy, by = "genome") %>%
filter(family=="UBA1242"|order=="RF39") %>%
select(genome, mag_length, bw_association)
mean_func <- function(data, indices) {
return(mean(data[indices]))
}
boot_mci_df <- data.frame(matrix(nrow = 2, ncol = 4))
colnames(boot_mci_df) <-c ("bw_association","mean","ci_025","ci_975")
boot_mci_df$bw_association <- c("positive","negative")
bw_pos_redMAG_mci_boot <- boot(mci_df$avg_mci[mci_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_redMAG_mci_boot_ci <- boot.ci(bw_pos_redMAG_mci_boot, type = "bca")
bw_neg_redMAG_mci_boot<-boot(mci_df$avg_mci[mci_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_redMAG_mci_boot_ci<-boot.ci(bw_neg_redMAG_mci_boot, type = "bca")
boot_mci_df[1,"mean"] <- bw_pos_redMAG_mci_boot$t0
boot_mci_df[1,"ci_025"] <- bw_pos_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[1,"ci_975"] <- bw_pos_redMAG_mci_boot_ci$bca[,5]
boot_mci_df[2,"mean"] <- bw_neg_redMAG_mci_boot$t0
boot_mci_df[2,"ci_025"] <- bw_neg_redMAG_mci_boot_ci$bca[,4]
boot_mci_df[2,"ci_975"] <- bw_neg_redMAG_mci_boot_ci$bca[,5]mci_df %>%
ggplot()+
geom_jitter(aes(x = bw_association, y = avg_mci, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(aes(x = bw_association, y = avg_mci, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_mci_df,
aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
linewidth = 4, size = 1, position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","blue3")) +
scale_fill_manual(values = c("red3","blue3")) +
ylab("Avg. MCI")+
xlab("Body-weight association")+
theme_minimal()+
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12)) 15.4.1 Genome length of genome reduced MAGss
The resulting plot corresponds to Figure 4b.
boot_length_df <- data.frame(matrix(nrow = 2, ncol = 4))
colnames(boot_length_df) <- c("bw_association", "mean", "ci_025", "ci_975")
boot_length_df$bw_association <- c("positive", "negative")
bw_pos_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "positive"], statistic = mean_func, R = 10000)
bw_pos_red_mag_length_boot_ci <- boot.ci(bw_pos_red_mag_length_boot, type = "bca")
bw_neg_red_mag_length_boot <- boot(length_df$mag_length[length_df$bw_association == "negative"], statistic = mean_func, R = 10000)
bw_neg_red_mag_length_boot_ci <- boot.ci(bw_neg_red_mag_length_boot, type = "bca")
boot_length_df[1, "mean"] <- bw_pos_red_mag_length_boot$t0
boot_length_df[1, "ci_025"] <- bw_pos_red_mag_length_boot_ci$bca[,4]
boot_length_df[1, "ci_975"] <- bw_pos_red_mag_length_boot_ci$bca[,5]
boot_length_df[2, "mean"] <- bw_neg_red_mag_length_boot$t0
boot_length_df[2, "ci_025"] <- bw_neg_red_mag_length_boot_ci$bca[,4]
boot_length_df[2, "ci_975"] <- bw_neg_red_mag_length_boot_ci$bca[,5]length_df %>%
ggplot() +
geom_jitter(aes(x = bw_association, y = mag_length, color = bw_association),
alpha = 0.5, width = 0.25) +
geom_boxplot(aes(x = bw_association, y = mag_length, fill = bw_association),
alpha = 0.1, outlier.shape = NA, width = 0.5) +
geom_linerange(data = boot_length_df,
aes(ymin = ci_025, ymax = ci_975, x = bw_association, color = bw_association),
linewidth = 4, size = 1,position = position_nudge(x = 0.35)) +
scale_color_manual(values = c("red3","blue3")) +
scale_fill_manual(values = c("red3","blue3")) +
ylab("MAG length") +
xlab("Body-weight association") +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
axis.title.y = element_text(size = 12)) 15.4.2 Expression profiles of genome reduced MAGs
The resulting plot corresponds to Figure 4c.
comp_reduced_mags <-
reduced_mags_df %>%
left_join(genome_taxonomy, by = 'genome') %>%
filter(family=="UBA1242"|order=="RF39") %>%
select(-domain:-species)
reduced_mags_dist <- vegdist(sqrt(comp_reduced_mags[, -c(1:3)]), method = "bray")
pcoa_reduced_mags <- cmdscale(reduced_mags_dist, eig = TRUE, k = 3, add = TRUE)
pcoa_reduced_mags_df <-
pcoa_reduced_mags$points %>%
as.data.frame() %>%
mutate(genome = comp_reduced_mags$genome) %>%
mutate(bw_association = comp_reduced_mags$bw_association) %>%
rename(PCOA1 = 'V1', PCOA2 = 'V2', PCOA3 = 'V3')
envfit_gifts <- envfit(pcoa_reduced_mags_df[, 1:2], sqrt(comp_reduced_mags[, -c(1,2)]), permutations = 999)
envfit_gifts_df <-
as.data.frame(scores(envfit_gifts, display = "vectors")) %>%
rownames_to_column(.,var = "gift_id") %>%
mutate(pval = envfit_gifts$vectors$pvals) %>%
filter(pval < 0.05) %>%
select(gift_id, PCOA1, PCOA2)ggplot() +
geom_point(data = pcoa_reduced_mags_df,
aes(x = PCOA1, y = PCOA2, color = bw_association),
size = 2,
shape = 16,
alpha = 0.8) +
scale_color_manual(values = c("red3","blue3")) +
geom_segment(data = envfit_gifts_df,
aes(x = 0, xend = PCOA1*0.5, y = 0 , yend = PCOA2*0.5),
arrow = arrow(length = unit(0.25, "cm")),
color="gray20",
linewidth = 0.2) +
geom_label_repel(data = envfit_gifts_df, aes(x = PCOA1*0.5, y = PCOA2*0.5, label = gift_id), size = 1, max.overlaps = 10) +
theme_minimal() +
theme(legend.position = 'none',
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10))15.4.3 Activity heatmap of genome reduced bacteria
mag_subset_elements_3 %>%
rename(Code_element = "Element") %>%
rename(Code_function = "Function") %>%
left_join(GIFT_db2 %>% select (Code_element, Code_function, Element, Function)) %>%
left_join(gift_colors, by = "Code_function") %>%
left_join(genome_taxonomy, by = 'genome') %>%
filter(family == "UBA1242" | family == "UBA660") %>%
select(-domain:-species) %>%
group_by(Element) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
group_by(genome) %>%
filter(sum(avg_expr_million) > 0) %>%
ungroup() %>%
ggplot(aes(x = genome,
y = Code_element,
fill = bw_association,
group = legend,
alpha = log(avg_expr_million, 2))) +
geom_tile(color = "#ffffff") +
scale_y_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_x_discrete(guide = guide_axis(check.overlap = TRUE)) +
scale_fill_manual(values = c("red3",
"blue3"),
na.value = "#f4f4f4") +
facet_grid(legend ~ bw_association, scales = "free", space = "free") +
theme_void(base_size = 8) +
theme(axis.text.x = element_text(angle = 90, vjust = 0, hjust = 1),
axis.text.y = element_text(size = 5, vjust = 0, hjust = 1),
strip.text.y = element_blank(),
legend.position = "none")