Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
# Merge data frames based on sample
# transplants_metadata <- sample_metadata %>%
# mutate(Tube_code = str_remove_all(Tube_code, "_a"))
# transplants_metadata$newID <- paste(transplants_metadata$Tube_code,
# "_",
# transplants_metadata$individual)
merged_data <- genome_counts_filt %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
filter(individual!="NA")
ggplot(merged_data, aes(
x = sample,
y = count,
fill = phylum,
group = phylum
)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat = "identity",
colour = "white",
linewidth = 0.1) + #plot stacked bars with white borders
scale_fill_manual(values = phylum_colors) +
facet_nested(. ~ factor (time_point, level=c("Wild", "Acclimation", "Antibiotics", "Inoculum1", "Inoculum2", "FMT1", "FMT2")) + type , scales = "free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
labs(fill = "Phylum", y = "Relative abundance", x = "Sample") +
theme(
axis.text.x = element_text(
angle = 45,
hjust = 1,
size = 0
),
strip.text.x = element_text(
size = 14,
colour = "black",
face = "bold"
),
strip.background = element_rect(fill ="lightgrey"),
axis.title = element_text(size = 18, face = "bold"),
panel.background = element_blank(),
legend.title = element_text(size = 20, face = "bold"),
legend.text = element_text(size = 16)
)5.1.1.1 Wild samples
merged_data %>%
filter(time_point=="Wild") %>%
ggplot(aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ species.y, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
labs(fill="Phylum",y = "Relative abundance",x="Sample")+
theme(
axis.text.x = element_text(
angle = 45,
hjust = 1,
size = 0
),
strip.text.x = element_text(
size = 14,
colour = "black",
face = "bold"
),
axis.title = element_text(size = 18, face = "bold"),
panel.background = element_blank(),
strip.background = element_rect(fill ="lightgrey"),
legend.title = element_text(size = 20, face = "bold"),
legend.text = element_text(size = 16)
)5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum) %>%
summarise(relabun=sum(count))5.1.2.1 Cold and wet population
phylum_summary %>%
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
group_by(phylum, Population) %>%
filter(Population=="Cold_wet" & time_point=="Wild") %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T)) %>%
filter(total_mean!=0) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total) %>%
tt()| phylum | total |
|---|---|
| p__Bacteroidota | 41.93±15.4 |
| p__Bacillota_A | 33.96±16.7 |
| p__Bacillota | 5.73±4.73 |
| p__Campylobacterota | 5.39±4.64 |
| p__Verrucomicrobiota | 4.36±3.06 |
| p__Pseudomonadota | 3.75±3.76 |
| p__Desulfobacterota | 2.31±1.6 |
| p__Bacillota_C | 0.85±0.82 |
| p__Cyanobacteriota | 0.64±0.83 |
| p__Bacillota_B | 0.5±0.77 |
| p__Fusobacteriota | 0.37±1.28 |
| p__Spirochaetota | 0.12±0.35 |
| p__Actinomycetota | 0.09±0.22 |
5.1.2.2 Warm and dry population
phylum_summary %>%
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>%
group_by(phylum, Population) %>%
filter(Population=="Hot_dry" & time_point=="Wild") %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T)) %>%
filter(total_mean!=0) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total) %>%
tt()| phylum | total |
|---|---|
| p__Bacillota_A | 33.9±16.17 |
| p__Bacteroidota | 27.63±17.64 |
| p__Campylobacterota | 13.53±20.98 |
| p__Pseudomonadota | 10.92±9.94 |
| p__Bacillota | 6.09±7.66 |
| p__Spirochaetota | 2.66±2.78 |
| p__Desulfobacterota | 1.69±1.85 |
| p__Fusobacteriota | 1.34±1.73 |
| p__Bacillota_B | 0.76±0.73 |
| p__Bacillota_C | 0.73±0.5 |
| p__Cyanobacteriota | 0.52±0.65 |
| p__Chlamydiota | 0.12±0.18 |
| p__Verrucomicrobiota | 0.06±0.1 |
| p__Elusimicrobiota | 0.05±0.14 |
5.2 Taxonomy boxplot
5.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == Tube_code)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__")genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
genus_summary %>%
left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
left_join(sample_metadata,by=join_by(sample==Tube_code)) %>%
mutate(genus= sub("^g__", "", genus)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
geom_jitter(alpha=0.5) +
facet_grid(.~type)+
theme_minimal() +
labs(y="Genus", x="Relative abundance", color="Phylum")