Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
tax_barplot <- genome_counts_filt %>%
mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) #filter 0 countstax_barplot_filt <- genome_counts_filt %>%
dplyr::select(-all_of(samples_to_remove))%>%
mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) #filter 0 countsEvaluating the community composition at each of the gut locations.
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(. ~ gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")ggplot(tax_barplot_filt, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(. ~ gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")Evaluating community composition between the fox behaviours within the different gut locations:
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ gut_location + fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour + gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour + sample_type, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ sample_type+ fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
### Only with F_colon
ggplot(tax_barplot_fcolon, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,fox_behaviour) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
tame_mean=mean(relabun[fox_behaviour =="tame"]*100, na.rm=T),
tame_sd=sd(relabun[fox_behaviour =="tame"]*100, na.rm=T),
aggressive_mean=mean(relabun[fox_behaviour =="aggr"]*100, na.rm=T),
aggressive_sd=sd(relabun[fox_behaviour=="aggr"]*100, na.rm=T),
unsel_mean=mean(relabun[fox_behaviour =="unsel"]*100, na.rm=T),
unsel_sd=sd(relabun[fox_behaviour =="unsel"]*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
tame=str_c(round(tame_mean,2),"±",round(tame_sd,2)),
aggresive=str_c(round(aggressive_mean,2),"±",round(aggressive_sd,2)),
unselected =str_c(round(unsel_mean,2),"±",round(unsel_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total,tame,aggresive, unselected) %>%
tt()| phylum | total | tame | aggresive | unselected |
|---|---|---|---|---|
| Bacillota_A | 53.84±36.19 | 58.59±35.17 | 50.48±39.83 | 49.55±33.08 |
| Bacteroidota | 15.25±19.97 | 9.6±16.51 | 16.34±20.1 | 26.91±24.37 |
| Bacillota | 12.28±22.53 | 15.11±24.53 | 12.65±24.9 | 4.34±5.37 |
| Fusobacteriota | 4.95±6.83 | 4.36±7.66 | 5.36±6.89 | 5.47±4.84 |
| Pseudomonadota | 4.22±13.86 | 2.75±5.01 | 6.73±21.62 | 2.25±2.29 |
| Bacillota_I | 2.76±3.46 | 1.45±2.48 | 2.68±2.84 | 6.21±4.68 |
| Campylobacterota | 2.69±4.84 | 3.2±4.91 | 2.27±5.02 | 2.35±4.79 |
| Desulfobacterota | 1.45±7.98 | 3.15±12.05 | 0.05±0.11 | 0.33±0.33 |
| Spirochaetota | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
| Deferribacterota | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
| Bacillota_C | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
| Actinomycetota | 0.21±0.37 | 0.18±0.31 | 0.26±0.48 | 0.15±0.25 |
| Bacillota_B | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
Distribution of relative abundance across samples:
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
dplyr::select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")## Calculate the phylum order based on mean relative abundance
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
dplyr::select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_nested(. ~ gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 10),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
) +
labs(y="Phylum",x="Relative abundance")phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 10),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
) +
labs(y="Phylum",x="Relative abundance")5.1.3 Family
`summarise()` has grouped output by 'sample', 'family'. You can override using the `.groups` argument.
| family | total | tame | aggr | unsel |
|---|---|---|---|---|
| Peptostreptococcaceae | 45.03±42.9 | 48.09±43.82 | 44.45±44.89 | 38.7±40.68 |
| Bacteroidaceae | 14.24±18.56 | 9±15.28 | 14.97±18.23 | 25.71±23.3 |
| Lactobacillaceae | 9.31±16.8 | 13.65±22.13 | 7.06±12.19 | 3.54±4.51 |
| Fusobacteriaceae | 4.95±6.83 | 4.36±7.66 | 5.36±6.89 | 5.47±4.84 |
| Lachnospiraceae | 4.29±6.04 | 3.43±6.01 | 4.38±6.09 | 6.22±6.34 |
| Clostridiaceae | 3.1±10.53 | 6.24±15.54 | 0.36±0.53 | 1.45±2.28 |
| Streptococcaceae | 2.9±8.69 | 1.46±2.66 | 5.44±13.42 | 0.78±0.84 |
| Enterobacteriaceae | 2.04±5.98 | 1.97±4.82 | 2.57±8.17 | 1.02±1.62 |
| Helicobacteraceae | 1.93±3.97 | 1.76±3.39 | 1.94±4.4 | 2.32±4.79 |
| Mycoplasmoidaceae | 1.78±3.5 | 0.9±2.51 | 1.58±2.76 | 4.43±5.72 |
| CAMWXW01 | 1.36±7.99 | 3.12±12.06 | 0.02±0.08 | 0±0 |
| Pasteurellaceae | 1.32±8.53 | 0±0 | 3.37±13.61 | 0±0 |
| Brachyspiraceae | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
| Campylobacteraceae | 0.76±1.77 | 1.45±2.39 | 0.33±0.97 | 0.02±0.07 |
| Mucispirillaceae | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
| Acidaminococcaceae | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
| Oscillospiraceae | 0.62±0.9 | 0.31±0.53 | 0.56±0.74 | 1.52±1.37 |
| Burkholderiaceae | 0.6±0.84 | 0.57±0.89 | 0.46±0.71 | 1±0.93 |
| Muribaculaceae | 0.58±1.07 | 0.23±0.44 | 0.99±1.55 | 0.52±0.46 |
| Erysipelotrichaceae | 0.53±0.75 | 0.37±0.66 | 0.58±0.77 | 0.8±0.9 |
| Ruminococcaceae | 0.41±0.67 | 0.41±0.84 | 0.39±0.58 | 0.43±0.44 |
| UBA932 | 0.23±0.41 | 0.19±0.49 | 0.22±0.35 | 0.34±0.37 |
| Coprobacillaceae | 0.23±0.46 | 0.12±0.24 | 0.32±0.65 | 0.28±0.38 |
| Succinivibrionaceae | 0.18±0.37 | 0.14±0.37 | 0.27±0.44 | 0.04±0.1 |
| Anaeroplasmataceae | 0.16±0.34 | 0.07±0.23 | 0.2±0.44 | 0.29±0.32 |
| Marinifilaceae | 0.14±0.3 | 0.16±0.37 | 0.1±0.17 | 0.22±0.32 |
| CAG-465 | 0.13±0.38 | 0.03±0.07 | 0.07±0.21 | 0.53±0.77 |
| CAG-508 | 0.12±0.27 | 0.03±0.07 | 0.12±0.28 | 0.32±0.43 |
| Coriobacteriaceae | 0.09±0.28 | 0.05±0.15 | 0.17±0.41 | 0.01±0.03 |
| CAG-239 | 0.09±0.19 | 0.07±0.21 | 0.06±0.15 | 0.19±0.2 |
| Bifidobacteriaceae | 0.09±0.23 | 0.09±0.25 | 0.09±0.23 | 0.08±0.2 |
| Desulfovibrionaceae | 0.08±0.19 | 0.03±0.08 | 0.03±0.09 | 0.33±0.33 |
| UBA660 | 0.07±0.26 | 0±0 | 0±0 | 0.41±0.53 |
| Tannerellaceae | 0.06±0.13 | 0.02±0.07 | 0.06±0.15 | 0.12±0.19 |
| Anaerotignaceae | 0.05±0.14 | 0.02±0.06 | 0.1±0.21 | 0.04±0.07 |
| Aristaeellaceae | 0.05±0.14 | 0.01±0.03 | 0.02±0.04 | 0.23±0.26 |
| Enterococcaceae | 0.04±0.13 | 0.01±0.03 | 0.08±0.19 | 0.03±0.06 |
| Peptococcaceae | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
| Eggerthellaceae | 0.03±0.08 | 0.04±0.1 | 0.01±0.02 | 0.06±0.11 |
| Butyricicoccaceae | 0.03±0.08 | 0.01±0.07 | 0.02±0.04 | 0.09±0.13 |
| Turicibacteraceae | 0.03±0.11 | 0±0 | 0.07±0.17 | 0±0 |
| Anaerovoracaceae | 0.01±0.04 | 0.01±0.04 | 0.01±0.02 | 0.03±0.05 |
| CAG-826 | 0±0.01 | 0±0.01 | 0±0.01 | 0±0 |
family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour.x)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")5.1.4 Genus
`summarise()` has grouped output by 'sample', 'genus'. You can override using the `.groups` argument.
| genus | total | tame | aggr | unsel |
|---|---|---|---|---|
| Paraclostridium | 44.77±42.86 | 47.88±44.04 | 44.02±44.54 | 38.7±40.68 |
| Bacteroides | 5.72±8 | 2.83±4.62 | 5.8±6.72 | 12.77±12.77 |
| Limosilactobacillus | 5.1±9.73 | 5.91±10.31 | 5.41±11.11 | 2.4±3.06 |
| Phocaeicola | 3.83±5.4 | 2.45±4.54 | 5.12±6.63 | 4.39±3.83 |
| Lactobacillus | 3.54±8.66 | 6.99±12.38 | 0.94±1.41 | 0.73±1.1 |
| Fusobacterium_A | 3.22±4.67 | 3.04±5.44 | 3.36±4.56 | 3.36±3.04 |
| Sarcina | 2.71±9.12 | 5.38±13.47 | 0.33±0.49 | 1.42±2.26 |
| Lactococcus | 2.34±8.57 | 1.09±2.22 | 4.51±13.44 | 0.59±0.52 |
| Paraprevotella | 2.23±3.46 | 2.3±4.06 | 1.33±2.3 | 4.11±3.63 |
| Alloprevotella | 2.07±2.93 | 1.14±2.01 | 2.45±3.51 | 3.53±2.99 |
| Mycoplasmoides | 1.78±3.5 | 0.9±2.51 | 1.58±2.76 | 4.43±5.72 |
| Escherichia | 1.74±5.83 | 1.4±4.34 | 2.44±8.19 | 0.99±1.63 |
| Fusobacterium_B | 1.67±2.21 | 1.32±2.25 | 1.85±2.32 | 2.11±2 |
| CAMWXW01 | 1.36±7.99 | 3.12±12.06 | 0.02±0.08 | 0±0 |
| Canicola | 1.32±8.53 | 0±0 | 3.37±13.61 | 0±0 |
| Blautia | 1.2±1.74 | 1.09±1.91 | 1.25±1.8 | 1.37±1.27 |
| Blautia_A | 1.01±1.57 | 0.66±1.17 | 1.18±1.87 | 1.5±1.71 |
| Brachyspira | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
| Helicobacter_A | 0.9±2.2 | 0.64±1.34 | 0.59±1.02 | 2.22±4.63 |
| Campylobacter_D | 0.76±1.77 | 1.45±2.39 | 0.33±0.97 | 0.02±0.07 |
| Helicobacter_C | 0.75±2.24 | 0.96±2.1 | 0.86±2.84 | 0±0 |
| Mucispirillum | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
| Phascolarctobacterium_A | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
| Ligilactobacillus | 0.64±1.09 | 0.69±1.08 | 0.69±1.26 | 0.41±0.72 |
| Faecousia | 0.6±0.88 | 0.3±0.53 | 0.55±0.73 | 1.48±1.35 |
| Sutterella | 0.6±0.84 | 0.57±0.89 | 0.46±0.71 | 1±0.93 |
| Limisoma | 0.58±1.07 | 0.23±0.44 | 0.99±1.55 | 0.52±0.46 |
| Faecalimonas | 0.57±0.89 | 0.44±0.88 | 0.48±0.73 | 1.07±1.15 |
| Streptococcus | 0.56±1.09 | 0.36±0.65 | 0.93±1.54 | 0.19±0.42 |
| Clostridium | 0.39±1.43 | 0.86±2.11 | 0.03±0.05 | 0.03±0.07 |
| Faecalibacterium | 0.33±0.58 | 0.35±0.72 | 0.33±0.52 | 0.31±0.28 |
| Plesiomonas | 0.31±0.97 | 0.57±1.35 | 0.13±0.53 | 0.03±0.06 |
| Helicobacter_B | 0.28±1.27 | 0.15±0.44 | 0.49±1.99 | 0.11±0.18 |
| Faecalimicrobium | 0.26±1.02 | 0.2±0.66 | 0.44±1.48 | 0±0 |
| Otoolea | 0.24±0.36 | 0.25±0.43 | 0.16±0.25 | 0.42±0.37 |
| Cryptobacteroides | 0.23±0.41 | 0.19±0.49 | 0.22±0.35 | 0.34±0.37 |
| Oliverpabstia | 0.21±0.42 | 0.19±0.4 | 0.28±0.5 | 0.11±0.24 |
| Allobaculum | 0.2±0.32 | 0.12±0.23 | 0.3±0.42 | 0.15±0.16 |
| Prevotella | 0.19±0.49 | 0.22±0.6 | 0.15±0.45 | 0.22±0.34 |
| Anaerobiospirillum | 0.17±0.37 | 0.14±0.37 | 0.25±0.43 | 0.04±0.1 |
| Holdemanella | 0.16±0.33 | 0.2±0.45 | 0.12±0.24 | 0.17±0.18 |
| Brotaphodocola | 0.16±0.28 | 0.08±0.22 | 0.12±0.2 | 0.44±0.4 |
| CALUXS01 | 0.16±0.34 | 0.07±0.23 | 0.2±0.44 | 0.29±0.32 |
| OM05-12 | 0.15±0.39 | 0.04±0.17 | 0.06±0.13 | 0.61±0.74 |
| Odoribacter | 0.14±0.3 | 0.16±0.37 | 0.1±0.17 | 0.22±0.32 |
| UBA9414 | 0.14±0.24 | 0.16±0.29 | 0.07±0.13 | 0.23±0.27 |
| 0.13±0.38 | 0.03±0.07 | 0.07±0.21 | 0.53±0.77 | |
| Faecalibacillus | 0.13±0.42 | 0.04±0.09 | 0.27±0.64 | 0.04±0.08 |
| JAHHTG01 | 0.12±0.38 | 0±0 | 0.13±0.34 | 0.39±0.73 |
| JAGZHZ01 | 0.11±0.23 | 0.11±0.25 | 0.1±0.19 | 0.15±0.26 |
| HGM12587 | 0.1±0.23 | 0.02±0.07 | 0.08±0.19 | 0.36±0.37 |
| Gallintestinimicrobium | 0.09±0.14 | 0.08±0.14 | 0.11±0.16 | 0.08±0.09 |
| Collinsella | 0.09±0.28 | 0.05±0.15 | 0.17±0.41 | 0.01±0.03 |
| UMGS1370 | 0.09±0.16 | 0.08±0.17 | 0.11±0.16 | 0.06±0.14 |
| 51-20 | 0.09±0.19 | 0.07±0.21 | 0.06±0.15 | 0.19±0.2 |
| Bifidobacterium | 0.09±0.23 | 0.09±0.25 | 0.09±0.23 | 0.08±0.2 |
| Taurinivorans | 0.08±0.19 | 0.03±0.08 | 0.03±0.09 | 0.33±0.33 |
| Frisingicoccus | 0.07±0.22 | 0.08±0.2 | 0.1±0.28 | 0±0 |
| UBA3789 | 0.07±0.26 | 0±0 | 0±0 | 0.41±0.53 |
| Marvinbryantia | 0.07±0.17 | 0.03±0.08 | 0.07±0.15 | 0.17±0.32 |
| CAG-269 | 0.07±0.16 | 0±0.02 | 0.1±0.2 | 0.16±0.22 |
| Schaedlerella | 0.06±0.12 | 0.04±0.1 | 0.06±0.14 | 0.09±0.14 |
| Cetobacterium_A | 0.06±0.39 | 0±0 | 0.15±0.62 | 0±0 |
| Parabacteroides | 0.06±0.13 | 0.02±0.07 | 0.06±0.15 | 0.12±0.19 |
| Ventricola | 0.05±0.14 | 0.01±0.03 | 0.02±0.04 | 0.23±0.26 |
| Phocaeicola_A | 0.05±0.12 | 0.02±0.1 | 0.06±0.13 | 0.09±0.13 |
| CALVGN01 | 0.05±0.08 | 0.05±0.1 | 0.03±0.05 | 0.09±0.11 |
| Enterococcus | 0.04±0.13 | 0.01±0.03 | 0.08±0.19 | 0.03±0.06 |
| CAJMNU01 | 0.04±0.11 | 0.02±0.06 | 0.07±0.16 | 0.03±0.05 |
| UMGS1590 | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
| CALDMQ01 | 0.04±0.1 | 0.06±0.14 | 0.02±0.04 | 0.03±0.06 |
| Negativibacillus | 0.04±0.09 | 0.02±0.06 | 0.02±0.05 | 0.1±0.16 |
| Fimiplasma | 0.04±0.15 | 0±0 | 0±0 | 0.21±0.32 |
| Avimicrobium | 0.04±0.08 | 0.04±0.1 | 0.03±0.06 | 0.02±0.04 |
| Anaerotignum | 0.03±0.08 | 0.02±0.05 | 0.06±0.11 | 0.03±0.06 |
| Slackia | 0.03±0.08 | 0.04±0.1 | 0.01±0.02 | 0.06±0.11 |
| Avilachnospira | 0.03±0.06 | 0.03±0.07 | 0.03±0.06 | 0.04±0.06 |
| Butyricicoccus | 0.03±0.08 | 0.01±0.07 | 0.02±0.04 | 0.09±0.13 |
| Latilactobacillus | 0.03±0.1 | 0.05±0.15 | 0.02±0.04 | 0±0 |
| Turicibacter | 0.03±0.11 | 0±0 | 0.07±0.17 | 0±0 |
| Laedolimicola | 0.03±0.07 | 0.01±0.03 | 0.04±0.1 | 0.04±0.05 |
| UMGS1663 | 0.03±0.1 | 0±0 | 0±0 | 0.15±0.22 |
| Choladousia | 0.03±0.07 | 0.01±0.04 | 0.05±0.11 | 0.01±0.03 |
| Merdicola | 0.02±0.07 | 0.03±0.07 | 0.03±0.08 | 0.01±0.01 |
| Beduini | 0.02±0.06 | 0.02±0.06 | 0.03±0.07 | 0.01±0.02 |
| Metalachnospira | 0.02±0.07 | 0.01±0.02 | 0.04±0.1 | 0.01±0.02 |
| Alitiscatomonas | 0.02±0.04 | 0.01±0.03 | 0.02±0.04 | 0.04±0.07 |
| Dorea_B | 0.02±0.04 | 0.03±0.06 | 0±0.01 | 0.01±0.01 |
| Ruminococcus_B | 0.01±0.05 | 0.02±0.08 | 0.01±0.02 | 0.01±0.03 |
| Evtepia | 0.01±0.05 | 0±0.02 | 0.01±0.03 | 0.04±0.1 |
| Gallibacter | 0.01±0.04 | 0.01±0.04 | 0.01±0.02 | 0.03±0.05 |
| Succinivibrio | 0.01±0.03 | 0±0 | 0.02±0.05 | 0±0 |
| Onthovivens | 0±0.01 | 0±0.01 | 0±0.01 | 0±0 |
genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~ gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")5.2 Core microbiota
library(UpSetR)
genome_counts_rel <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
column_to_rownames(., "genome")
#Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1
#Add gut_location info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
rownames_to_column("sample") %>%
left_join(sample_metadata, by="sample")
#Summarize by location
table_upset_analysis <- df %>%
group_by(gut_location) %>%
summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
column_to_rownames("gut_location") %>%
t() %>%
as.data.frame()
location_colors <- c(
F_colon = "#d6604d",
M_colon = "#542788",
D_ileum = "#fdb863",
K_ileum = "#bb99d8"
)
#UpSet plot
location_upset_plot <- upset(table_upset_analysis,
keep.order = TRUE,
sets = rev(c("F_colon", "M_colon", "D_ileum", "K_ileum")),
sets.bar.color = rev(location_colors),
mb.ratio = c(0.55, 0.45),
text.scale = c(2.5, 2.5, 2.5, 2.5) ,
order.by = "freq")
location_upset_plot# Convert to relative abundance
genome_counts_rel <- genome_counts_filt %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
column_to_rownames("genome")
# Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1
# Add behaviour info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
rownames_to_column("sample") %>%
left_join(sample_metadata, by = "sample")
# Summarize by behaviour
table_upset_analysis <- df %>%
group_by(fox_behaviour) %>%
summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
column_to_rownames("fox_behaviour") %>%
t() %>%
as.data.frame()
# Behaviour colors
behaviour_colors <- c(
tame = "#1f77b4",
aggr = "#d62728",
unsel = "#2ca02c"
)
# UpSet plot
behaviour_upset_plot <- upset(table_upset_analysis,
keep.order = TRUE,
sets = rev(names(behaviour_colors)),
sets.bar.color = rev(unname(behaviour_colors)),
mb.ratio = c(0.55, 0.45),
text.scale = c(2, 2, 2, 2.5) ,
order.by = "freq")
behaviour_upset_plot5.3 Wilcoxon Test
H0 (null): The distribution of genome prevalence (presence) is the same across the two sample_type groups (gut tissue or gut content).
mag_prevalence_location <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, sample_type) %>%
group_by(genome,gut_location, sample_type) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) %>%
group_by(genome,sample_type) %>%
summarise(presence=sum(presence))
mag_prevalence_location %>%
group_by(sample_type) %>%
summarise(mean=mean(presence),sd=sd(presence)) %>%
tt()| sample_type | mean | sd |
|---|---|---|
| Gut content | 1.8601399 | 0.3480610 |
| Gut tissue | 0.7552448 | 0.4924326 |
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 19123 5.12e-44 Wilcoxon rank sum test with continuity correction two.sided
mag_prevalence_location %>%
ggplot(aes(x=sample_type,y=presence, color=sample_type, fill=sample_type)) +
geom_boxplot(alpha = 0.5) +
geom_point() +
theme_minimal()
Comparing the prevalence/presence of the MAGs between aggressive and tame foxes
mag_prevalence_behaviour <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #divide each column (except genome) by its sum to get relative abundance
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
filter(!grepl("unsel", fox_behaviour))%>% #to remove the unselected foxes
mutate(presence=ifelse(abundance>0,1,0)) %>% #converts abundance to presence/absence
dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
group_by(genome,gut_location, fox_behaviour) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) %>% #counts presence per combination of genome, gut location and fox behaviour
group_by(genome,fox_behaviour) %>%
summarise(presence=sum(presence)) #sum the presences for each fox behaviour
mag_prevalence_behaviour %>%
group_by(fox_behaviour) %>%
summarise(mean=mean(presence),sd=sd(presence)) %>%
tt()| fox_behaviour | mean | sd |
|---|---|---|
| aggr | 2.321678 | 0.8101591 |
| tame | 1.566434 | 0.7922107 |
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 15248 2.69e-14 Wilcoxon rank sum test with continuity correction two.sided
behaviour_colors <- c( "#d62728", "#1f77b4","#2ca02c")
mag_prevalence_behaviour %>%
ggplot(aes(x=fox_behaviour,y=presence, color=fox_behaviour, fill=fox_behaviour)) +
geom_boxplot() +
geom_point() +
scale_color_manual(values=behaviour_colors) +
scale_fill_manual(values=str_c(behaviour_colors,"50")) +
theme_minimal()5.4 Kruskal-Wallis
As we have >2 groups in gut location and fox behaviour, we will carry out Kruskal-Wallis tests to evaluate if there are significant differences between the groups in terms of MAG presence count (how many MAGs are present in each group). If the p-value < 0.05, we will do pairwise Wilcoxon tests, to evaluate which of the groups differ in this regard.
Gut location:
mag_prevalence_gutlocation <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
group_by(genome,gut_location) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) `summarise()` has grouped output by 'genome'. You can override using the `.groups` argument.
# Kruskal–Wallis test (nonparametric ANOVA)
kruskal.test(presence ~ gut_location, data = mag_prevalence_gutlocation)
Kruskal-Wallis rank sum test
data: presence by gut_location
Kruskal-Wallis chi-squared = 352.88, df = 3, p-value < 2.2e-16
pairwise.wilcox.test(mag_prevalence_gutlocation$presence,
mag_prevalence_gutlocation$gut_location,
p.adjust.method = "fdr")
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: mag_prevalence_gutlocation$presence and mag_prevalence_gutlocation$gut_location
D_ileum F_colon K_ileum
F_colon 4.4e-06 - -
K_ileum < 2e-16 < 2e-16 -
M_colon 0.0056 3.0e-11 < 2e-16
P value adjustment method: fdr
mag_prevalence_foxbehaviour <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
group_by(genome,fox_behaviour) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) `summarise()` has grouped output by 'genome'. You can override using the `.groups` argument.
#Kruskal–Wallis test (nonparametric ANOVA)
kruskal.test(presence ~ fox_behaviour, data = mag_prevalence_foxbehaviour)
Kruskal-Wallis rank sum test
data: presence by fox_behaviour
Kruskal-Wallis chi-squared = 10.772, df = 2, p-value = 0.004581
pairwise.wilcox.test(mag_prevalence_foxbehaviour$presence,
mag_prevalence_foxbehaviour$fox_behaviour,
p.adjust.method = "fdr")
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: mag_prevalence_foxbehaviour$presence and mag_prevalence_foxbehaviour$fox_behaviour
aggr tame
tame 0.0961 -
unsel 0.0034 0.1236
P value adjustment method: fdr