Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
tax_barplot <- genome_counts_filt %>%
mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) #filter 0 counts
tax_barplot_filt <- genome_counts_filt %>%
dplyr::select(-all_of(samples_to_remove))%>%
mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) #filter 0 counts
Evaluating the community composition at each of the gut locations.
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(. ~ gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
ggplot(tax_barplot_filt, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(. ~ gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
Evaluating community composition between the fox behaviours within the different gut locations:
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ gut_location + fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour + gut_location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour + sample_type, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ sample_type+ fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
### Only with F_colon
ggplot(tax_barplot_fcolon, aes(x=sample,y=count, fill=phylum, group=phylum)) +
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ fox_behaviour, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalization
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,fox_behaviour) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
tame_mean=mean(relabun[fox_behaviour =="tame"]*100, na.rm=T),
tame_sd=sd(relabun[fox_behaviour =="tame"]*100, na.rm=T),
aggressive_mean=mean(relabun[fox_behaviour =="aggr"]*100, na.rm=T),
aggressive_sd=sd(relabun[fox_behaviour=="aggr"]*100, na.rm=T),
unsel_mean=mean(relabun[fox_behaviour =="unsel"]*100, na.rm=T),
unsel_sd=sd(relabun[fox_behaviour =="unsel"]*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
tame=str_c(round(tame_mean,2),"±",round(tame_sd,2)),
aggresive=str_c(round(aggressive_mean,2),"±",round(aggressive_sd,2)),
unselected =str_c(round(unsel_mean,2),"±",round(unsel_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total,tame,aggresive, unselected) %>%
tt()
phylum | total | tame | aggresive | unselected |
---|---|---|---|---|
Bacillota_A | 53.84±36.19 | 58.59±35.17 | 50.48±39.83 | 49.55±33.08 |
Bacteroidota | 15.25±19.97 | 9.6±16.51 | 16.34±20.1 | 26.91±24.37 |
Bacillota | 12.28±22.53 | 15.11±24.53 | 12.65±24.9 | 4.34±5.37 |
Fusobacteriota | 4.95±6.83 | 4.36±7.66 | 5.36±6.89 | 5.47±4.84 |
Pseudomonadota | 4.22±13.86 | 2.75±5.01 | 6.73±21.62 | 2.25±2.29 |
Bacillota_I | 2.76±3.46 | 1.45±2.48 | 2.68±2.84 | 6.21±4.68 |
Campylobacterota | 2.69±4.84 | 3.2±4.91 | 2.27±5.02 | 2.35±4.79 |
Desulfobacterota | 1.45±7.98 | 3.15±12.05 | 0.05±0.11 | 0.33±0.33 |
Spirochaetota | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
Deferribacterota | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
Bacillota_C | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
Actinomycetota | 0.21±0.37 | 0.18±0.31 | 0.26±0.48 | 0.15±0.25 |
Bacillota_B | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
Distribution of relative abundance across samples:
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
dplyr::select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")
## Calculate the phylum order based on mean relative abundance
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
dplyr::select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_nested(. ~ gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 10),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
) +
labs(y="Phylum",x="Relative abundance")
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 10),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
) +
labs(y="Phylum",x="Relative abundance")
5.1.3 Family
`summarise()` has grouped output by 'sample', 'family'. You can override using the `.groups` argument.
family | total | tame | aggr | unsel |
---|---|---|---|---|
Peptostreptococcaceae | 45.03±42.9 | 48.09±43.82 | 44.45±44.89 | 38.7±40.68 |
Bacteroidaceae | 14.24±18.56 | 9±15.28 | 14.97±18.23 | 25.71±23.3 |
Lactobacillaceae | 9.31±16.8 | 13.65±22.13 | 7.06±12.19 | 3.54±4.51 |
Fusobacteriaceae | 4.95±6.83 | 4.36±7.66 | 5.36±6.89 | 5.47±4.84 |
Lachnospiraceae | 4.29±6.04 | 3.43±6.01 | 4.38±6.09 | 6.22±6.34 |
Clostridiaceae | 3.1±10.53 | 6.24±15.54 | 0.36±0.53 | 1.45±2.28 |
Streptococcaceae | 2.9±8.69 | 1.46±2.66 | 5.44±13.42 | 0.78±0.84 |
Enterobacteriaceae | 2.04±5.98 | 1.97±4.82 | 2.57±8.17 | 1.02±1.62 |
Helicobacteraceae | 1.93±3.97 | 1.76±3.39 | 1.94±4.4 | 2.32±4.79 |
Mycoplasmoidaceae | 1.78±3.5 | 0.9±2.51 | 1.58±2.76 | 4.43±5.72 |
CAMWXW01 | 1.36±7.99 | 3.12±12.06 | 0.02±0.08 | 0±0 |
Pasteurellaceae | 1.32±8.53 | 0±0 | 3.37±13.61 | 0±0 |
Brachyspiraceae | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
Campylobacteraceae | 0.76±1.77 | 1.45±2.39 | 0.33±0.97 | 0.02±0.07 |
Mucispirillaceae | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
Acidaminococcaceae | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
Oscillospiraceae | 0.62±0.9 | 0.31±0.53 | 0.56±0.74 | 1.52±1.37 |
Burkholderiaceae | 0.6±0.84 | 0.57±0.89 | 0.46±0.71 | 1±0.93 |
Muribaculaceae | 0.58±1.07 | 0.23±0.44 | 0.99±1.55 | 0.52±0.46 |
Erysipelotrichaceae | 0.53±0.75 | 0.37±0.66 | 0.58±0.77 | 0.8±0.9 |
Ruminococcaceae | 0.41±0.67 | 0.41±0.84 | 0.39±0.58 | 0.43±0.44 |
UBA932 | 0.23±0.41 | 0.19±0.49 | 0.22±0.35 | 0.34±0.37 |
Coprobacillaceae | 0.23±0.46 | 0.12±0.24 | 0.32±0.65 | 0.28±0.38 |
Succinivibrionaceae | 0.18±0.37 | 0.14±0.37 | 0.27±0.44 | 0.04±0.1 |
Anaeroplasmataceae | 0.16±0.34 | 0.07±0.23 | 0.2±0.44 | 0.29±0.32 |
Marinifilaceae | 0.14±0.3 | 0.16±0.37 | 0.1±0.17 | 0.22±0.32 |
CAG-465 | 0.13±0.38 | 0.03±0.07 | 0.07±0.21 | 0.53±0.77 |
CAG-508 | 0.12±0.27 | 0.03±0.07 | 0.12±0.28 | 0.32±0.43 |
Coriobacteriaceae | 0.09±0.28 | 0.05±0.15 | 0.17±0.41 | 0.01±0.03 |
CAG-239 | 0.09±0.19 | 0.07±0.21 | 0.06±0.15 | 0.19±0.2 |
Bifidobacteriaceae | 0.09±0.23 | 0.09±0.25 | 0.09±0.23 | 0.08±0.2 |
Desulfovibrionaceae | 0.08±0.19 | 0.03±0.08 | 0.03±0.09 | 0.33±0.33 |
UBA660 | 0.07±0.26 | 0±0 | 0±0 | 0.41±0.53 |
Tannerellaceae | 0.06±0.13 | 0.02±0.07 | 0.06±0.15 | 0.12±0.19 |
Anaerotignaceae | 0.05±0.14 | 0.02±0.06 | 0.1±0.21 | 0.04±0.07 |
Aristaeellaceae | 0.05±0.14 | 0.01±0.03 | 0.02±0.04 | 0.23±0.26 |
Enterococcaceae | 0.04±0.13 | 0.01±0.03 | 0.08±0.19 | 0.03±0.06 |
Peptococcaceae | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
Eggerthellaceae | 0.03±0.08 | 0.04±0.1 | 0.01±0.02 | 0.06±0.11 |
Butyricicoccaceae | 0.03±0.08 | 0.01±0.07 | 0.02±0.04 | 0.09±0.13 |
Turicibacteraceae | 0.03±0.11 | 0±0 | 0.07±0.17 | 0±0 |
Anaerovoracaceae | 0.01±0.04 | 0.01±0.04 | 0.01±0.02 | 0.03±0.05 |
CAG-826 | 0±0.01 | 0±0.01 | 0±0.01 | 0±0 |
family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour.x)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
5.1.4 Genus
`summarise()` has grouped output by 'sample', 'genus'. You can override using the `.groups` argument.
genus | total | tame | aggr | unsel |
---|---|---|---|---|
Paraclostridium | 44.77±42.86 | 47.88±44.04 | 44.02±44.54 | 38.7±40.68 |
Bacteroides | 5.72±8 | 2.83±4.62 | 5.8±6.72 | 12.77±12.77 |
Limosilactobacillus | 5.1±9.73 | 5.91±10.31 | 5.41±11.11 | 2.4±3.06 |
Phocaeicola | 3.83±5.4 | 2.45±4.54 | 5.12±6.63 | 4.39±3.83 |
Lactobacillus | 3.54±8.66 | 6.99±12.38 | 0.94±1.41 | 0.73±1.1 |
Fusobacterium_A | 3.22±4.67 | 3.04±5.44 | 3.36±4.56 | 3.36±3.04 |
Sarcina | 2.71±9.12 | 5.38±13.47 | 0.33±0.49 | 1.42±2.26 |
Lactococcus | 2.34±8.57 | 1.09±2.22 | 4.51±13.44 | 0.59±0.52 |
Paraprevotella | 2.23±3.46 | 2.3±4.06 | 1.33±2.3 | 4.11±3.63 |
Alloprevotella | 2.07±2.93 | 1.14±2.01 | 2.45±3.51 | 3.53±2.99 |
Mycoplasmoides | 1.78±3.5 | 0.9±2.51 | 1.58±2.76 | 4.43±5.72 |
Escherichia | 1.74±5.83 | 1.4±4.34 | 2.44±8.19 | 0.99±1.63 |
Fusobacterium_B | 1.67±2.21 | 1.32±2.25 | 1.85±2.32 | 2.11±2 |
CAMWXW01 | 1.36±7.99 | 3.12±12.06 | 0.02±0.08 | 0±0 |
Canicola | 1.32±8.53 | 0±0 | 3.37±13.61 | 0±0 |
Blautia | 1.2±1.74 | 1.09±1.91 | 1.25±1.8 | 1.37±1.27 |
Blautia_A | 1.01±1.57 | 0.66±1.17 | 1.18±1.87 | 1.5±1.71 |
Brachyspira | 0.91±2.5 | 0.35±1.12 | 1.94±3.64 | 0±0 |
Helicobacter_A | 0.9±2.2 | 0.64±1.34 | 0.59±1.02 | 2.22±4.63 |
Campylobacter_D | 0.76±1.77 | 1.45±2.39 | 0.33±0.97 | 0.02±0.07 |
Helicobacter_C | 0.75±2.24 | 0.96±2.1 | 0.86±2.84 | 0±0 |
Mucispirillum | 0.73±1.38 | 0.66±1.11 | 0.52±1.29 | 1.38±2.07 |
Phascolarctobacterium_A | 0.67±0.9 | 0.51±0.87 | 0.69±0.88 | 1.01±1.02 |
Ligilactobacillus | 0.64±1.09 | 0.69±1.08 | 0.69±1.26 | 0.41±0.72 |
Faecousia | 0.6±0.88 | 0.3±0.53 | 0.55±0.73 | 1.48±1.35 |
Sutterella | 0.6±0.84 | 0.57±0.89 | 0.46±0.71 | 1±0.93 |
Limisoma | 0.58±1.07 | 0.23±0.44 | 0.99±1.55 | 0.52±0.46 |
Faecalimonas | 0.57±0.89 | 0.44±0.88 | 0.48±0.73 | 1.07±1.15 |
Streptococcus | 0.56±1.09 | 0.36±0.65 | 0.93±1.54 | 0.19±0.42 |
Clostridium | 0.39±1.43 | 0.86±2.11 | 0.03±0.05 | 0.03±0.07 |
Faecalibacterium | 0.33±0.58 | 0.35±0.72 | 0.33±0.52 | 0.31±0.28 |
Plesiomonas | 0.31±0.97 | 0.57±1.35 | 0.13±0.53 | 0.03±0.06 |
Helicobacter_B | 0.28±1.27 | 0.15±0.44 | 0.49±1.99 | 0.11±0.18 |
Faecalimicrobium | 0.26±1.02 | 0.2±0.66 | 0.44±1.48 | 0±0 |
Otoolea | 0.24±0.36 | 0.25±0.43 | 0.16±0.25 | 0.42±0.37 |
Cryptobacteroides | 0.23±0.41 | 0.19±0.49 | 0.22±0.35 | 0.34±0.37 |
Oliverpabstia | 0.21±0.42 | 0.19±0.4 | 0.28±0.5 | 0.11±0.24 |
Allobaculum | 0.2±0.32 | 0.12±0.23 | 0.3±0.42 | 0.15±0.16 |
Prevotella | 0.19±0.49 | 0.22±0.6 | 0.15±0.45 | 0.22±0.34 |
Anaerobiospirillum | 0.17±0.37 | 0.14±0.37 | 0.25±0.43 | 0.04±0.1 |
Holdemanella | 0.16±0.33 | 0.2±0.45 | 0.12±0.24 | 0.17±0.18 |
Brotaphodocola | 0.16±0.28 | 0.08±0.22 | 0.12±0.2 | 0.44±0.4 |
CALUXS01 | 0.16±0.34 | 0.07±0.23 | 0.2±0.44 | 0.29±0.32 |
OM05-12 | 0.15±0.39 | 0.04±0.17 | 0.06±0.13 | 0.61±0.74 |
Odoribacter | 0.14±0.3 | 0.16±0.37 | 0.1±0.17 | 0.22±0.32 |
UBA9414 | 0.14±0.24 | 0.16±0.29 | 0.07±0.13 | 0.23±0.27 |
0.13±0.38 | 0.03±0.07 | 0.07±0.21 | 0.53±0.77 | |
Faecalibacillus | 0.13±0.42 | 0.04±0.09 | 0.27±0.64 | 0.04±0.08 |
JAHHTG01 | 0.12±0.38 | 0±0 | 0.13±0.34 | 0.39±0.73 |
JAGZHZ01 | 0.11±0.23 | 0.11±0.25 | 0.1±0.19 | 0.15±0.26 |
HGM12587 | 0.1±0.23 | 0.02±0.07 | 0.08±0.19 | 0.36±0.37 |
Gallintestinimicrobium | 0.09±0.14 | 0.08±0.14 | 0.11±0.16 | 0.08±0.09 |
Collinsella | 0.09±0.28 | 0.05±0.15 | 0.17±0.41 | 0.01±0.03 |
UMGS1370 | 0.09±0.16 | 0.08±0.17 | 0.11±0.16 | 0.06±0.14 |
51-20 | 0.09±0.19 | 0.07±0.21 | 0.06±0.15 | 0.19±0.2 |
Bifidobacterium | 0.09±0.23 | 0.09±0.25 | 0.09±0.23 | 0.08±0.2 |
Taurinivorans | 0.08±0.19 | 0.03±0.08 | 0.03±0.09 | 0.33±0.33 |
Frisingicoccus | 0.07±0.22 | 0.08±0.2 | 0.1±0.28 | 0±0 |
UBA3789 | 0.07±0.26 | 0±0 | 0±0 | 0.41±0.53 |
Marvinbryantia | 0.07±0.17 | 0.03±0.08 | 0.07±0.15 | 0.17±0.32 |
CAG-269 | 0.07±0.16 | 0±0.02 | 0.1±0.2 | 0.16±0.22 |
Schaedlerella | 0.06±0.12 | 0.04±0.1 | 0.06±0.14 | 0.09±0.14 |
Cetobacterium_A | 0.06±0.39 | 0±0 | 0.15±0.62 | 0±0 |
Parabacteroides | 0.06±0.13 | 0.02±0.07 | 0.06±0.15 | 0.12±0.19 |
Ventricola | 0.05±0.14 | 0.01±0.03 | 0.02±0.04 | 0.23±0.26 |
Phocaeicola_A | 0.05±0.12 | 0.02±0.1 | 0.06±0.13 | 0.09±0.13 |
CALVGN01 | 0.05±0.08 | 0.05±0.1 | 0.03±0.05 | 0.09±0.11 |
Enterococcus | 0.04±0.13 | 0.01±0.03 | 0.08±0.19 | 0.03±0.06 |
CAJMNU01 | 0.04±0.11 | 0.02±0.06 | 0.07±0.16 | 0.03±0.05 |
UMGS1590 | 0.04±0.09 | 0.05±0.1 | 0.03±0.06 | 0.04±0.11 |
CALDMQ01 | 0.04±0.1 | 0.06±0.14 | 0.02±0.04 | 0.03±0.06 |
Negativibacillus | 0.04±0.09 | 0.02±0.06 | 0.02±0.05 | 0.1±0.16 |
Fimiplasma | 0.04±0.15 | 0±0 | 0±0 | 0.21±0.32 |
Avimicrobium | 0.04±0.08 | 0.04±0.1 | 0.03±0.06 | 0.02±0.04 |
Anaerotignum | 0.03±0.08 | 0.02±0.05 | 0.06±0.11 | 0.03±0.06 |
Slackia | 0.03±0.08 | 0.04±0.1 | 0.01±0.02 | 0.06±0.11 |
Avilachnospira | 0.03±0.06 | 0.03±0.07 | 0.03±0.06 | 0.04±0.06 |
Butyricicoccus | 0.03±0.08 | 0.01±0.07 | 0.02±0.04 | 0.09±0.13 |
Latilactobacillus | 0.03±0.1 | 0.05±0.15 | 0.02±0.04 | 0±0 |
Turicibacter | 0.03±0.11 | 0±0 | 0.07±0.17 | 0±0 |
Laedolimicola | 0.03±0.07 | 0.01±0.03 | 0.04±0.1 | 0.04±0.05 |
UMGS1663 | 0.03±0.1 | 0±0 | 0±0 | 0.15±0.22 |
Choladousia | 0.03±0.07 | 0.01±0.04 | 0.05±0.11 | 0.01±0.03 |
Merdicola | 0.02±0.07 | 0.03±0.07 | 0.03±0.08 | 0.01±0.01 |
Beduini | 0.02±0.06 | 0.02±0.06 | 0.03±0.07 | 0.01±0.02 |
Metalachnospira | 0.02±0.07 | 0.01±0.02 | 0.04±0.1 | 0.01±0.02 |
Alitiscatomonas | 0.02±0.04 | 0.01±0.03 | 0.02±0.04 | 0.04±0.07 |
Dorea_B | 0.02±0.04 | 0.03±0.06 | 0±0.01 | 0.01±0.01 |
Ruminococcus_B | 0.01±0.05 | 0.02±0.08 | 0.01±0.02 | 0.01±0.03 |
Evtepia | 0.01±0.05 | 0±0.02 | 0.01±0.03 | 0.04±0.1 |
Gallibacter | 0.01±0.04 | 0.01±0.04 | 0.01±0.02 | 0.03±0.05 |
Succinivibrio | 0.01±0.03 | 0±0 | 0.02±0.05 | 0±0 |
Onthovivens | 0±0.01 | 0±0.01 | 0±0.01 | 0±0 |
genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~ gut_location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~fox_behaviour)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
5.2 Core microbiota
library(UpSetR)
genome_counts_rel <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
column_to_rownames(., "genome")
#Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1
#Add gut_location info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
rownames_to_column("sample") %>%
left_join(sample_metadata, by="sample")
#Summarize by location
table_upset_analysis <- df %>%
group_by(gut_location) %>%
summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
column_to_rownames("gut_location") %>%
t() %>%
as.data.frame()
location_colors <- c(
F_colon = "#d6604d",
M_colon = "#542788",
D_ileum = "#fdb863",
K_ileum = "#bb99d8"
)
#UpSet plot
upset(table_upset_analysis,
keep.order = TRUE,
sets = rev(c("F_colon", "M_colon", "D_ileum", "K_ileum")),
sets.bar.color = rev(location_colors),
mb.ratio = c(0.55, 0.45),
order.by = "freq")
# Convert to relative abundance
genome_counts_rel <- genome_counts_filt %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
column_to_rownames("genome")
# Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1
# Add behaviour info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
rownames_to_column("sample") %>%
left_join(sample_metadata, by = "sample")
# Summarize by behaviour
table_upset_analysis <- df %>%
group_by(fox_behaviour) %>%
summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
column_to_rownames("fox_behaviour") %>%
t() %>%
as.data.frame()
# Behaviour colors
behaviour_colors <- c(
tame = "#1f77b4",
aggr = "#d62728",
unsel = "#2ca02c"
)
# UpSet plot
upset(table_upset_analysis,
keep.order = TRUE,
sets = rev(names(behaviour_colors)),
sets.bar.color = rev(unname(behaviour_colors)),
mb.ratio = c(0.55, 0.45),
order.by = "freq")
5.3 Wilcoxon Test
H0 (null): The distribution of genome prevalence (presence) is the same across the two sample_type groups (gut tissue or gut content).
mag_prevalence_location <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, sample_type) %>%
group_by(genome,gut_location, sample_type) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) %>%
group_by(genome,sample_type) %>%
summarise(presence=sum(presence))
mag_prevalence_location %>%
group_by(sample_type) %>%
summarise(mean=mean(presence),sd=sd(presence)) %>%
tt()
sample_type | mean | sd |
---|---|---|
Gut content | 1.8601399 | 0.3480610 |
Gut tissue | 0.7552448 | 0.4924326 |
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 19123 5.12e-44 Wilcoxon rank sum test with continuity correction two.sided
mag_prevalence_location %>%
ggplot(aes(x=sample_type,y=presence, color=sample_type, fill=sample_type)) +
geom_boxplot(alpha = 0.5) +
geom_jitter() +
theme_minimal()
Comparing the prevalence/presence of the MAGs between aggressive and tame foxes
mag_prevalence_location <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
filter(!grepl("unsel", fox_behaviour))%>% #to remove the unselected foxes
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
group_by(genome,gut_location, fox_behaviour) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) %>%
group_by(genome,fox_behaviour) %>%
summarise(presence=sum(presence))
mag_prevalence_location %>%
group_by(fox_behaviour) %>%
summarise(mean=mean(presence),sd=sd(presence)) %>%
tt()
fox_behaviour | mean | sd |
---|---|---|
aggr | 2.321678 | 0.8101591 |
tame | 1.566434 | 0.7922107 |
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 15248 2.69e-14 Wilcoxon rank sum test with continuity correction two.sided
behaviour_colors <- c( "#d62728", "#1f77b4","#2ca02c")
mag_prevalence_location %>%
ggplot(aes(x=fox_behaviour,y=presence, color=fox_behaviour, fill=fox_behaviour)) +
geom_boxplot() +
geom_jitter() +
scale_color_manual(values=behaviour_colors) +
scale_fill_manual(values=str_c(behaviour_colors,"50")) +
theme_minimal()
5.4 Kruskal-Wallis
mag_prevalence_location_behaviour <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
group_by(genome,gut_location, fox_behaviour) %>%
summarise(presence=ifelse(sum(presence)>0,1,0))
`summarise()` has grouped output by 'genome', 'gut_location'. You can override using the `.groups` argument.
# Kruskal–Wallis test (nonparametric ANOVA)
kruskal.test(presence ~ gut_location, data = mag_prevalence_location_behaviour)
Kruskal-Wallis rank sum test
data: presence by gut_location
Kruskal-Wallis chi-squared = 692.97, df = 3, p-value < 2.2e-16
pairwise.wilcox.test(mag_prevalence_location_behaviour$presence,
mag_prevalence_location_behaviour$gut_location,
p.adjust.method = "fdr")
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: mag_prevalence_location_behaviour$presence and mag_prevalence_location_behaviour$gut_location
D_ileum F_colon K_ileum
F_colon <2e-16 - -
K_ileum <2e-16 <2e-16 -
M_colon 0.68 <2e-16 <2e-16
P value adjustment method: fdr
5.5 Generalized linear model
# Fit GLMM: logistic regression with random effect for gut_location
glmm_model <- glmer(
presence ~ fox_behaviour + (1 | gut_location),
data = mag_prevalence_location_behaviour,
family = binomial # logistic regression because presence is 0/1
)
summary(glmm_model)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: binomial ( logit )
Formula: presence ~ fox_behaviour + (1 | gut_location)
Data: mag_prevalence_location_behaviour
AIC BIC logLik -2*log(L) df.resid
1478.5 1500.3 -735.3 1470.5 1712
Scaled residuals:
Min 1Q Median 3Q Max
-4.9566 -0.7138 -0.0983 0.7018 10.1736
Random effects:
Groups Name Variance Std.Dev.
gut_location (Intercept) 5.604 2.367
Number of obs: 1716, groups: gut_location, 4
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.2997 1.1914 0.252 0.801
fox_behaviourtame -1.3226 0.1627 -8.130 4.28e-16 ***
fox_behaviourunsel -0.8087 0.1601 -5.053 4.35e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) fx_bhvrt
fox_behvrtm -0.068
fox_bhvrnsl -0.068 0.536