Chapter 4 Community composition
4.1 Taxonomy overview
4.1.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_grid(. ~ location, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ location + origin, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 12, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ origin + location, scales="free_x") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_blank(),
axis.ticks.x =element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_text(size=8),
axis.title.y = element_text(size=12),
panel.background = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
panel.border = element_rect(colour = "black", fill = NA),
strip.background = element_rect(fill = "white", color = "black"),
strip.text = element_text(size = 10, lineheight = 0.6)) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
4.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,origin) %>%
summarise(relabun=sum(count))
phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T),
domestic_mean=mean(relabun[origin=="Domestic"]*100, na.rm=T),
domestic_sd=sd(relabun[origin=="Domestic"]*100, na.rm=T),
feral_mean=mean(relabun[origin=="Feral"]*100, na.rm=T),
feral_sd=sd(relabun[origin=="Feral"]*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
domestic=str_c(round(domestic_mean,2),"±",round(domestic_sd,2)),
feral=str_c(round(feral_mean,2),"±",round(feral_sd,2))) %>%
arrange(-total_mean) %>%
select(phylum,total,domestic,feral) %>%
tt()
phylum | total | domestic | feral |
---|---|---|---|
Bacteroidota | 29.82±17.98 | 31.9±17.35 | 27.24±18.63 |
Bacillota_A | 20.34±13.17 | 17.97±12.46 | 23.29±13.57 |
Actinomycetota | 18.39±16.05 | 18.27±15.93 | 18.55±16.41 |
Campylobacterota | 9.54±15.11 | 11.99±18.12 | 6.51±9.6 |
Bacillota_C | 7.2±8.54 | 6.87±8.66 | 7.61±8.46 |
Bacillota | 6.65±9.59 | 5.34±9.02 | 8.28±10.13 |
Pseudomonadota | 6.45±9.5 | 6.56±9.19 | 6.31±9.98 |
Fusobacteriota | 1.48±3.1 | 1±1.9 | 2.07±4.08 |
Desulfobacterota | 0.09±0.33 | 0.06±0.2 | 0.14±0.45 |
Cyanobacteria | 0.03±0.24 | 0.05±0.32 | 0.01±0.05 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
dplyr::select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_nested(. ~ location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 10),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
) +
labs(y="Phylum",x="Relative abundance")
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")
4.1.3 Family
`summarise()` has grouped output by 'sample', 'family'. You can override using the `.groups` argument.
family | total | domestic | feral |
---|---|---|---|
Bacteroidaceae | 27.09±17.68 | 29.17±16.79 | 24.52±18.62 |
Lachnospiraceae | 12.12±10.54 | 10.12±10.13 | 14.62±10.63 |
Coriobacteriaceae | 9.56±9.52 | 9.92±10.64 | 9.12±8.03 |
Helicobacteraceae | 6.39±12.29 | 8.2±15.13 | 4.13±6.91 |
Bifidobacteriaceae | 4.37±7.2 | 4.19±6.03 | 4.6±8.51 |
Megasphaeraceae | 3.45±6.35 | 3.29±6.05 | 3.65±6.77 |
Ruminococcaceae | 3.33±3.48 | 3.29±3.81 | 3.37±3.07 |
Campylobacteraceae | 3.16±5.5 | 3.78±5.89 | 2.38±4.93 |
Actinomycetaceae | 2.62±7.94 | 2.65±8.06 | 2.58±7.88 |
Lactobacillaceae | 2.6±7.36 | 2.56±7.98 | 2.65±6.61 |
Succinivibrionaceae | 2.3±4.02 | 2.55±4.06 | 1.98±3.99 |
Enterobacteriaceae | 2.27±8.36 | 1.91±7.35 | 2.71±9.54 |
Erysipelotrichaceae | 1.7±3.28 | 1.3±2.79 | 2.2±3.78 |
Porphyromonadaceae | 1.67±6.7 | 1.73±5.87 | 1.61±7.69 |
Atopobiaceae | 1.55±4.11 | 1.03±2.72 | 2.2±5.32 |
Selenomonadaceae | 1.52±2.63 | 1.52±2.78 | 1.52±2.47 |
Fusobacteriaceae | 1.48±3.1 | 1±1.9 | 2.07±4.08 |
Dialisteraceae | 1.46±2.13 | 1.54±2.18 | 1.36±2.1 |
Burkholderiaceae | 1.46±1.54 | 1.67±1.69 | 1.18±1.29 |
Peptoniphilaceae | 1.21±4.39 | 1.62±4.35 | 0.7±4.44 |
Streptococcaceae | 1.19±4.98 | 0.43±1.79 | 2.13±7.12 |
Oscillospiraceae | 1.09±1.51 | 0.99±1.22 | 1.2±1.82 |
Clostridiaceae | 0.9±3.26 | 0.57±1.29 | 1.3±4.67 |
Acidaminococcaceae | 0.76±1.4 | 0.51±0.83 | 1.07±1.85 |
Erysipelatoclostridiaceae | 0.74±1.2 | 0.6±0.89 | 0.92±1.49 |
Peptostreptococcaceae | 0.65±0.97 | 0.41±0.64 | 0.93±1.22 |
Anaerovoracaceae | 0.47±1.18 | 0.49±1.41 | 0.45±0.83 |
Pasteurellaceae | 0.4±2.49 | 0.38±2.28 | 0.44±2.76 |
Marinifilaceae | 0.36±1.06 | 0.27±1.06 | 0.48±1.05 |
Rikenellaceae | 0.36±1.49 | 0.42±1.77 | 0.28±1.05 |
Acutalibacteraceae | 0.29±0.58 | 0.22±0.47 | 0.38±0.68 |
Mycobacteriaceae | 0.27±1.72 | 0.47±2.3 | 0.02±0.08 |
Enterococcaceae | 0.25±0.8 | 0.24±0.89 | 0.27±0.69 |
Tannerellaceae | 0.22±0.5 | 0.23±0.51 | 0.21±0.5 |
Butyricicoccaceae | 0.18±0.39 | 0.18±0.36 | 0.19±0.43 |
UBA660 | 0.13±0.76 | 0.18±0.94 | 0.07±0.44 |
Desulfovibrionaceae | 0.09±0.33 | 0.06±0.2 | 0.14±0.45 |
Muribaculaceae | 0.08±0.35 | 0.05±0.34 | 0.12±0.36 |
Anaerotignaceae | 0.06±0.23 | 0.02±0.07 | 0.1±0.33 |
CAG-508 | 0.04±0.14 | 0.05±0.17 | 0.02±0.09 |
CAG-826 | 0.03±0.17 | 0.03±0.14 | 0.03±0.2 |
Gastranaerophilaceae | 0.03±0.24 | 0.05±0.32 | 0.01±0.05 |
Barnesiellaceae | 0.03±0.11 | 0.03±0.11 | 0.03±0.11 |
CAG-239 | 0.02±0.22 | 0.05±0.29 | 0±0 |
UMGS124 | 0.02±0.13 | 0.01±0.04 | 0.03±0.19 |
UBA1381 | 0.01±0.07 | 0±0 | 0.02±0.11 |
family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
family_summary %>%
left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
4.1.4 Genus
`summarise()` has grouped output by 'sample', 'genus'. You can override using the `.groups` argument.
genus | total | domestic | feral |
---|---|---|---|
Prevotella | 16.47±15.19 | 18.73±15.43 | 13.66±14.58 |
Collinsella | 9.56±9.52 | 9.92±10.64 | 9.12±8.03 |
Phocaeicola | 5.36±6.66 | 5.03±5.72 | 5.76±7.72 |
Helicobacter_B | 5.21±11.59 | 6.48±14.28 | 3.64±6.77 |
Bifidobacterium | 4.37±7.2 | 4.19±6.03 | 4.6±8.51 |
Bacteroides | 3.76±4.85 | 4±5.32 | 3.46±4.24 |
Megasphaera | 3.43±6.33 | 3.28±6.04 | 3.62±6.75 |
Campylobacter_D | 3.16±5.5 | 3.78±5.89 | 2.38±4.93 |
Roseburia | 2.6±5.97 | 2.21±5.31 | 3.09±6.74 |
Blautia_A | 2.46±2.74 | 2.08±2.44 | 2.93±3.04 |
Anaerobiospirillum | 2.21±4.02 | 2.44±4.06 | 1.94±4 |
Escherichia | 1.85±7.36 | 1.2±4.96 | 2.66±9.55 |
Negativibacillus | 1.84±2.22 | 1.68±2.14 | 2.04±2.32 |
Blautia | 1.71±2.37 | 1.38±2.27 | 2.11±2.47 |
Porphyromonas_A | 1.67±6.7 | 1.73±5.87 | 1.61±7.69 |
Pauljensenia | 1.57±6.05 | 0.76±3.94 | 2.58±7.88 |
Prevotellamassilia | 1.49±2.9 | 1.38±2.5 | 1.63±3.35 |
UBA7748 | 1.41±4.03 | 0.94±2.67 | 1.99±5.23 |
Megamonas | 1.4±2.61 | 1.36±2.74 | 1.46±2.47 |
Dialister | 1.35±2 | 1.47±2.07 | 1.21±1.93 |
Sutterella | 1.31±1.45 | 1.53±1.59 | 1.05±1.22 |
Fusobacterium_B | 1.27±2.87 | 0.94±1.9 | 1.69±3.73 |
Ligilactobacillus | 1.24±3.91 | 1.08±3.47 | 1.44±4.43 |
Streptococcus | 1.14±4.85 | 0.43±1.78 | 2.01±6.94 |
Lactobacillus | 1.13±4.48 | 1.41±5.16 | 0.78±3.47 |
Helicobacter_A | 1.12±3.23 | 1.69±3.93 | 0.43±1.88 |
Trueperella | 1.05±5.29 | 1.89±7.02 | 0±0 |
Ruminococcus_B | 0.92±1.84 | 0.8±1.51 | 1.07±2.19 |
Clostridium_Q | 0.91±1.77 | 0.69±1.17 | 1.19±2.29 |
Holdemanella | 0.9±1.88 | 0.73±1.99 | 1.11±1.74 |
Clostridium_P | 0.88±3.25 | 0.56±1.3 | 1.27±4.65 |
Bulleidia | 0.79±2 | 0.56±1.71 | 1.08±2.31 |
Faecalibacterium | 0.73±1.55 | 0.75±1.79 | 0.71±1.23 |
Catenibacterium | 0.73±1.18 | 0.59±0.88 | 0.9±1.46 |
Gemmiger | 0.68±1.21 | 0.76±1.22 | 0.58±1.2 |
Peptacetobacter | 0.65±0.97 | 0.41±0.64 | 0.93±1.22 |
Peptoniphilus_A | 0.62±2.68 | 0.81±2.86 | 0.39±2.45 |
NA | 0.57±1.99 | 0.76±1.98 | 0.34±1.99 |
Faecalimonas | 0.56±1.04 | 0.36±0.89 | 0.8±1.16 |
Lawsonibacter | 0.49±0.73 | 0.52±0.84 | 0.46±0.58 |
Acidaminococcus | 0.46±1.26 | 0.29±0.58 | 0.67±1.77 |
Plesiomonas | 0.42±2.05 | 0.72±2.73 | 0.05±0.19 |
Histophilus | 0.4±2.49 | 0.38±2.28 | 0.44±2.76 |
Eisenbergiella | 0.36±0.61 | 0.35±0.62 | 0.38±0.61 |
Alistipes | 0.36±1.49 | 0.42±1.77 | 0.28±1.05 |
Odoribacter | 0.34±1.06 | 0.24±1.06 | 0.46±1.05 |
Phascolarctobacterium_A | 0.29±0.74 | 0.21±0.59 | 0.39±0.88 |
CAG-81 | 0.29±0.48 | 0.24±0.42 | 0.35±0.54 |
Catenibacillus | 0.28±0.46 | 0.19±0.36 | 0.38±0.55 |
Sellimonas | 0.26±0.53 | 0.26±0.52 | 0.25±0.54 |
CAG-317 | 0.25±0.45 | 0.2±0.34 | 0.31±0.55 |
Ruminococcus_A | 0.25±0.34 | 0.2±0.29 | 0.31±0.39 |
UMGS905 | 0.23±0.51 | 0.16±0.4 | 0.31±0.61 |
Parabacteroides | 0.22±0.5 | 0.23±0.51 | 0.21±0.5 |
Mediterraneibacter | 0.22±0.67 | 0.24±0.62 | 0.19±0.73 |
Fusobacterium_A | 0.2±0.78 | 0.06±0.24 | 0.38±1.13 |
Dysosmobacter | 0.2±0.34 | 0.23±0.37 | 0.17±0.3 |
Limosilactobacillus | 0.19±0.77 | 0.07±0.36 | 0.34±1.08 |
S5-A14a | 0.18±1.04 | 0.31±1.39 | 0.02±0.09 |
Schaedlerella | 0.18±0.39 | 0.19±0.5 | 0.16±0.2 |
Butyricicoccus | 0.17±0.39 | 0.16±0.35 | 0.19±0.43 |
Flavonifractor | 0.16±0.33 | 0.16±0.37 | 0.16±0.28 |
Corynebacterium | 0.16±1.31 | 0.29±1.76 | 0±0 |
Enterocloster | 0.14±0.21 | 0.11±0.2 | 0.18±0.23 |
Parolsenella | 0.14±0.34 | 0.09±0.14 | 0.2±0.47 |
CAG-110 | 0.14±0.81 | 0.02±0.1 | 0.28±1.19 |
CAG-521 | 0.13±0.62 | 0.15±0.74 | 0.12±0.44 |
VUNA01 | 0.12±0.49 | 0.06±0.17 | 0.19±0.7 |
Mitsuokella | 0.12±0.51 | 0.16±0.62 | 0.06±0.32 |
CAG-877 | 0.12±0.66 | 0.15±0.8 | 0.07±0.44 |
Lawsonella | 0.11±0.57 | 0.18±0.75 | 0.02±0.08 |
Allisonella | 0.11±0.24 | 0.07±0.17 | 0.15±0.29 |
Enterococcus_E | 0.1±0.6 | 0.15±0.8 | 0.04±0.12 |
Enterococcus_B | 0.09±0.41 | 0.04±0.23 | 0.16±0.56 |
Lachnospira | 0.09±0.29 | 0.11±0.36 | 0.05±0.15 |
UMGS1472 | 0.09±0.15 | 0.08±0.15 | 0.1±0.15 |
CAG-279 | 0.08±0.35 | 0.05±0.34 | 0.12±0.36 |
Desulfovibrio | 0.08±0.32 | 0.05±0.17 | 0.13±0.43 |
UMGS1370 | 0.08±0.21 | 0.06±0.16 | 0.1±0.26 |
UBA9502 | 0.07±0.13 | 0.06±0.1 | 0.1±0.16 |
Dorea_B | 0.07±0.2 | 0.07±0.23 | 0.08±0.17 |
Robinsoniella | 0.07±0.17 | 0.07±0.19 | 0.08±0.15 |
Eubacterium_M | 0.06±0.35 | 0.05±0.32 | 0.08±0.38 |
Enterococcus | 0.06±0.38 | 0.05±0.37 | 0.07±0.41 |
Fusicatenibacter | 0.06±0.27 | 0.06±0.23 | 0.06±0.32 |
Emergencia | 0.06±0.12 | 0.04±0.1 | 0.07±0.15 |
Anaerotignum | 0.06±0.23 | 0.02±0.07 | 0.1±0.33 |
Lactococcus | 0.05±0.34 | 0±0.01 | 0.12±0.51 |
Evtepia | 0.05±0.14 | 0.05±0.11 | 0.06±0.16 |
Succinivibrio | 0.05±0.22 | 0.07±0.25 | 0.04±0.16 |
Bariatricus | 0.05±0.18 | 0.03±0.13 | 0.08±0.22 |
Anaerobutyricum | 0.04±0.09 | 0.03±0.07 | 0.06±0.1 |
CAG-354 | 0.04±0.14 | 0.05±0.17 | 0.02±0.09 |
Dorea_A | 0.04±0.27 | 0±0.04 | 0.08±0.4 |
Peptoniphilus_C | 0.04±0.36 | 0.07±0.49 | 0±0 |
UMGS1872 | 0.04±0.24 | 0.01±0.04 | 0.08±0.36 |
Hungatella_A | 0.04±0.17 | 0.04±0.21 | 0.03±0.12 |
Mobilibacterium | 0.04±0.16 | 0.01±0.04 | 0.07±0.24 |
Latilactobacillus | 0.04±0.34 | 0±0 | 0.08±0.5 |
Eubacterium_H | 0.03±0.23 | 0±0 | 0.07±0.35 |
UBA4855 | 0.03±0.17 | 0.03±0.14 | 0.03±0.2 |
Helicobacter_C | 0.03±0.21 | 0.02±0.1 | 0.05±0.29 |
Clostridium_A | 0.03±0.23 | 0±0 | 0.07±0.35 |
Zag111 | 0.03±0.24 | 0.05±0.32 | 0.01±0.05 |
Barnesiella | 0.03±0.11 | 0.03±0.11 | 0.03±0.11 |
Butyricimonas | 0.03±0.11 | 0.03±0.12 | 0.03±0.1 |
Anaerobiospirillum_A | 0.03±0.14 | 0.04±0.19 | 0.01±0.04 |
Fournierella | 0.03±0.12 | 0.03±0.15 | 0.02±0.06 |
Phocea | 0.03±0.09 | 0.03±0.12 | 0.02±0.04 |
Ruminococcus_C | 0.03±0.13 | 0.05±0.18 | 0±0 |
CAG-495 | 0.02±0.22 | 0.05±0.29 | 0±0 |
Ruminococcus_E | 0.02±0.21 | 0.04±0.28 | 0±0.01 |
Caecibacter | 0.02±0.1 | 0.01±0.05 | 0.03±0.14 |
Helicobacter_D | 0.02±0.14 | 0.02±0.17 | 0.02±0.08 |
Erysipelatoclostridium | 0.02±0.08 | 0.01±0.07 | 0.03±0.09 |
Clostridium | 0.02±0.1 | 0.01±0.04 | 0.03±0.15 |
Paraprevotella | 0.02±0.15 | 0.03±0.2 | 0±0 |
CAG-145 | 0.02±0.07 | 0.02±0.07 | 0.02±0.06 |
Eubacterium_G | 0.02±0.11 | 0.03±0.14 | 0±0 |
Anaerostipes | 0.02±0.07 | 0±0.02 | 0.03±0.11 |
CAG-988 | 0.01±0.1 | 0.03±0.14 | 0±0 |
Absicoccus | 0.01±0.05 | 0.02±0.06 | 0.01±0.04 |
Phascolarctobacterium | 0.01±0.11 | 0.02±0.15 | 0±0.03 |
Marseille-P4683 | 0.01±0.06 | 0.02±0.07 | 0±0 |
AM07-15 | 0.01±0.05 | 0.02±0.07 | 0±0.02 |
Mailhella | 0.01±0.04 | 0.01±0.04 | 0.01±0.05 |
Parasutterella | 0.01±0.08 | 0±0 | 0.02±0.12 |
Levilactobacillus | 0.01±0.08 | 0±0 | 0.02±0.12 |
CAG-41 | 0.01±0.07 | 0±0 | 0.02±0.11 |
genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~location)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
genus_summary %>%
left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
filter(genus %in% genus_arrange[1:20]) %>%
mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
scale_fill_manual(values=phylum_colors[-8]) +
geom_boxplot(alpha=0.2)+
geom_jitter(alpha=0.5) +
facet_grid(.~origin)+
theme_minimal() +
theme(legend.position="none",
strip.text.x = element_text(size = 14, color="black",face="bold"),
axis.text.x = element_text(vjust = 0.5, size = 6),
axis.text.y = element_text(size = 12),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
axis.title = element_text(size = 14, face = "bold"),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
labs(y="Family", x="Relative abundance", color="Phylum")
4.2 Core microbiota
genome_counts_rel <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
column_to_rownames(., "genome")
genome_counts_rel_pa=1*(genome_counts_rel>0)
#MAGrel_pa[1:6,1:6]
table_upset_analysis_cont=t(aggregate(t(genome_counts_rel_pa),by=list(sample_metadata$location),FUN=sum)[,-1])
colnames(table_upset_analysis_cont)=levels(as.factor(sample_metadata$location))
table_upset_analysis=(table_upset_analysis_cont>0)*1
table_upset_analysis=data.frame(table_upset_analysis)
table_upset_analysis=apply(table_upset_analysis,2,as.integer)
rownames(table_upset_analysis) <- rownames(genome_counts_rel_pa)
#pdf("figures/MAG_intersection.pdf",width=8,height=6, onefile=F)
upset(as.data.frame(table_upset_analysis),
keep.order = T,
sets = rev(c("Aruba","Brazil","CaboVerde","Spain","Denmark","Malaysia")),
sets.bar.color= rev(location_colors),
mb.ratio = c(0.55, 0.45), order.by = "freq")
mag_prevalence_location <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(!genome,names_to="sample",values_to="abundance") %>%
left_join(sample_metadata,by="sample") %>%
mutate(presence=ifelse(abundance>0,1,0)) %>%
select(genome,sample,presence,location,origin) %>%
group_by(genome,location,origin) %>%
summarise(presence=ifelse(sum(presence)>0,1,0)) %>%
group_by(genome,origin) %>%
summarise(presence=sum(presence))
mag_prevalence_location %>%
group_by(origin) %>%
summarise(mean=mean(presence),sd=sd(presence)) %>%
tt()
origin | mean | sd |
---|---|---|
Domestic | 3.122271 | 1.933745 |
Feral | 3.187773 | 2.033627 |
# A tibble: 1 × 4
statistic p.value method alternative
<dbl> <dbl> <chr> <chr>
1 25706. 0.713 Wilcoxon rank sum test with continuity correction two.sided