Chapter 8 Metabarcoding selected filtering based on alpha diversity (t3_p10)
load("resources/metagenomics/data_filtered.Rdata")
load("resources/amplicon/selected_filtering.Rdata")
species_color <- c("#e5bd5b", "#6b7398", "#76b183")Number of MAGs
165
Number of Archaea phyla
genome_metadata_amplicon %>%
filter(domain == "Archaea")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()0
Number of Bacteria phyla
genome_metadata_amplicon %>%
filter(domain == "Bacteria")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()9
8.1 Taxonomy
genome_counts_filt_amplicon %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(., genome_metadata_amplicon, by = join_by(genome == genome)) %>%
left_join(., sample_metadata_amplicon, by = join_by(sample == sample)) %>%
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(
x = sample,
y = count,
fill = phylum,
group = phylum
)) +
geom_bar(stat = "identity",
colour = "white",
linewidth = 0.1) +
scale_fill_manual(values = phylum_colors) +
facet_nested( ~ factor(
Species,
labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
), scales = "free") +
guides(fill = guide_legend(ncol = 1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 10),
strip.background = element_rect(fill = "white"),
strip.text = element_text(
size = 12,
lineheight = 0.6,
face = "bold"
),
axis.line = element_line(
linewidth = 0.5,
linetype = "solid",
colour = "black"
)
) +
labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
guides(fill = guide_legend(ncol = 1))Grouping low-abundance bacteria
p1 <- genome_counts_filt %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(count > 0) %>%
group_by(sample, phylum) %>%
mutate(total_abundance = sum(count)) %>%
ungroup() %>%
mutate(phylum = if_else(total_abundance < 0.01, "Other", phylum)) %>%
group_by(sample, phylum, Species) %>%
summarise(count = sum(count), .groups = "drop") %>%
ggplot(aes(
x = sample,
y = count,
fill = phylum,
group = phylum
)) +
geom_bar(stat = "identity",
colour = "white",
linewidth = 0.1) +
scale_fill_manual(values = c(phylum_colors, "Other" = "grey50"),
drop = FALSE)+
facet_nested(~ factor(
Species,
labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
), scales = "free") +
guides(fill = guide_legend(ncol = 1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 10),
strip.background = element_rect(fill = "white"),
strip.text = element_text(
size = 12,
lineheight = 0.6,
face = "bold"
),
axis.line = element_line(
linewidth = 0.5,
linetype = "solid",
colour = "black"
)
) +
labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
guides(fill = guide_legend(ncol = 1))
#ggsave("community_plot_grouped_selected.pdf", plot = p1, width = 12, height = 6)
p1
Phylum relative abundances
phylum_summary <- genome_counts_filt_amplicon %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
left_join(genome_metadata_amplicon, by = join_by(genome == genome)) %>%
group_by(sample,phylum,Species) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(
Total_mean = mean(relabun * 100, na.rm = T),
Total_sd = sd(relabun * 100, na.rm = T),
Eb_mean = mean(relabun[Species == "Eb"] * 100, na.rm = T),
Eb_sd = sd(relabun[Species == "Eb"] * 100, na.rm = T),
Ha_mean = mean(relabun[Species == "Ha"] * 100, na.rm = T),
Ha_sd = sd(relabun[Species == "Ha"] * 100, na.rm = T),
Pk_mean = mean(relabun[Species == "Pk"] * 100, na.rm = T),
Pk_sd = sd(relabun[Species == "Pk"] * 100, na.rm = T)
) %>%
mutate(
Total = str_c(round(Total_mean, 3), "±", round(Total_sd, 3)),
Cnephaeus = str_c(round(Eb_mean, 3), "±", round(Eb_sd, 3)),
Hypsugo = str_c(round(Ha_mean, 3), "±", round(Ha_sd, 3)),
Pipistrellus = str_c(round(Pk_mean, 3), "±", round(Pk_sd, 3))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(phylum,
Total,
Cnephaeus,
Hypsugo,
Pipistrellus) %>%
tt()| phylum | Total | Cnephaeus | Hypsugo | Pipistrellus |
|---|---|---|---|---|
| Pseudomonadota | 59.638±31.639 | 58.193±32.766 | 69.011±27.096 | 50.012±34.371 |
| Bacillota | 31.961±27.296 | 23.271±24.626 | 28.927±25.913 | 40.465±29.393 |
| Fusobacteriota | 2.21±5.446 | 5.588±7.617 | 0.056±0.239 | 2.631±6.155 |
| Desulfobacterota | 2.321±6.038 | 4.09±5.942 | 0.078±0.28 | 3.788±8.506 |
| Bacteroidota | 1.085±4.589 | 3.755±9.296 | 0.644±2.044 | 0.006±0.019 |
| Patescibacteria | 0.549±3.201 | 2.523±6.759 | 0±0 | 0±0 |
| Rs-K70 termite group | 1.586±4.999 | 1.3±3.51 | 0.823±3.474 | 2.606±6.923 |
| Synergistota | 0.325±0.914 | 0.666±1.408 | 0.007±0.033 | 0.478±1.007 |
| Campylobacterota | 0.326±1.007 | 0.613±1.18 | 0.454±1.297 | 0.014±0.059 |
bats = c("Eb", "Pk", "Ha")
total_asvs <- data.frame(
Bat = character(),
ASVs = numeric(),
Phylum = numeric(),
Family = numeric(),
Genus = numeric()
)
preabs_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) %>%
filter(domain=="Bacteria")
phylum <- preabs_table %>%
distinct(phylum)
family <- preabs_table %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
distinct(phylum, class, order, family, genus)
total_asvs <- rbind(
total_asvs,
data.frame(
Bat = "Total",
ASVs = nrow(preabs_table),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(. >= 1)
phylum <- preabs_table %>%
select({{bat}}, phylum) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum)
family <- preabs_table %>%
select({{bat}}, phylum, class, order, family) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum, class, order, family, genus)
total_asvs <- rbind(
total_asvs,
data.frame(
Bat = bat,
ASVs = nrow(number),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
}bats = c("Eb", "Pk", "Ha")
no_annotation <- data.frame(Bat = character(),
No_genus = numeric(),
No_species = numeric())
preabs_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) %>%
filter(domain=="Bacteria")
genus <- preabs_table %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(is.na(species))
no_annotation <- rbind(no_annotation,
data.frame(
Bat = "Total",
No_genus = nrow(genus),
No_species = nrow(species)
))
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(. >= 1)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat) >= 1) %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(!!sym(bat) >= 1) %>%
filter(is.na(species))
no_annotation <- rbind(no_annotation,
data.frame(
Bat = bat,
No_genus = nrow(genus),
No_species = nrow(species)
))
}Total percentage of ASVs without genus-level annotation
nongenera <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(genus)) %>%
summarize(ASV_nogenera = n()) %>%
pull()
nasvs <- total_asvs %>%
filter(Bat == "Total") %>%
select(ASVs) %>%
pull()
perct <- nongenera * 100 / nasvs
cat(perct)22.42424
Percentage of ASVs without genus-level annotation by phylum
total_asv_phylum <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
group_by(phylum) %>%
summarize(Total_ASVs = n())
genome_metadata_amplicon %>%
filter(is.na(genus)) %>%
group_by(phylum) %>%
summarize(ASVs_nogenus = n()) %>%
left_join(total_asv_phylum, by = join_by(phylum == phylum)) %>%
mutate(Percentage_nogenus = 100 * ASVs_nogenus / Total_ASVs) %>%
tt()| phylum | ASVs_nogenus | Total_ASVs | Percentage_nogenus |
|---|---|---|---|
| Bacillota | 12 | 57 | 21.052632 |
| Bacteroidota | 1 | 12 | 8.333333 |
| Patescibacteria | 4 | 6 | 66.666667 |
| Pseudomonadota | 16 | 71 | 22.535211 |
| Rs-K70 termite group | 4 | 4 | 100.000000 |
Number of bacterial species
genome_metadata_amplicon %>%
filter(domain == "Bacteria")%>%
dplyr::select(species) %>%
unique() %>%
drop_na() %>%
pull() %>%
length() %>%
cat()10
Total percentage of ASVs without species-level annotation
nonspecies <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(species)) %>%
summarize(ASV_nospecies = n()) %>%
pull()
perct <- nonspecies * 100 / nasvs
cat(perct)90.90909
ASVs without species-level annotation
total_mag_phylum <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
group_by(phylum) %>%
summarize(ASVs_total = n())
genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(species)) %>%
group_by(phylum) %>%
summarize(ASVs_nospecies = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(Species_annotated = ASVs_total - ASVs_nospecies) %>%
mutate(Percentage_nospecies = 100 * ASVs_nospecies / ASVs_total) %>%
mutate(Percentage_species = 100 - 100 * ASVs_nospecies / ASVs_total) %>%
tt()| phylum | ASVs_nospecies | ASVs_total | Species_annotated | Percentage_nospecies | Percentage_species |
|---|---|---|---|---|---|
| Bacillota | 53 | 57 | 4 | 92.98246 | 7.017544 |
| Bacteroidota | 10 | 12 | 2 | 83.33333 | 16.666667 |
| Campylobacterota | 2 | 2 | 0 | 100.00000 | 0.000000 |
| Desulfobacterota | 8 | 8 | 0 | 100.00000 | 0.000000 |
| Fusobacteriota | 3 | 4 | 1 | 75.00000 | 25.000000 |
| Patescibacteria | 6 | 6 | 0 | 100.00000 | 0.000000 |
| Pseudomonadota | 63 | 71 | 8 | 88.73239 | 11.267606 |
| Rs-K70 termite group | 4 | 4 | 0 | 100.00000 | 0.000000 |
| Synergistota | 1 | 1 | 0 | 100.00000 | 0.000000 |
8.1.1 Summary table
bats = c("Eb", "Pk", "Ha")
single_sp <- data.frame(Bat = character(), Single_species = numeric())
bacteria <- genome_metadata_amplicon %>%
filter(domain=="Bacteria")
table_upset_analysis <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
filter(genome %in% bacteria$genome) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample")
unique_all <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1)
single_sp <- rbind(single_sp, data.frame(Bat = "Total",
Single_species = nrow(unique_all)))
for (bat in bats) {
unique <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1) %>%
column_to_rownames(., "sample") %>%
select(bat) %>%
filter(. > 0) %>%
nrow()
single_sp <- rbind(single_sp, data.frame(Bat = bat, Single_species = unique))
}single_ind <- data.frame(Bat = character(), Single_individual = numeric())
freq_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
filter(genome %in% bacteria$genome) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("asv")
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
filter(row_sum == 1)
single_ind <- rbind(single_ind, data.frame(
Bat = "Total",
Single_individual = nrow(singleton_filt)
))
for (bat in bats) {
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
select(bat) %>%
filter(. == 1)
single_ind <- rbind(single_ind, data.frame(
Bat = bat,
Single_individual = nrow(singleton_filt)
))
}summary_table_ampli <- total_asvs %>%
left_join(., no_annotation, by = "Bat") %>%
left_join(., single_ind, by = "Bat") %>%
left_join(., single_sp, by = "Bat") %>%
mutate(Bat = recode(Bat,
Eb = "Cnephaeus bottae",
Ha = "Hypsugo ariel",
Pk = "Pipistrellus kuhlii"))
summary_table_ampli %>%
tt()| Bat | ASVs | Phylum | Family | Genus | No_genus | No_species | Single_individual | Single_species |
|---|---|---|---|---|---|---|---|---|
| Total | 165 | 9 | 51 | 71 | 37 | 150 | 31 | 57 |
| Cnephaeus bottae | 140 | 9 | 46 | 61 | 30 | 127 | 31 | 46 |
| Pipistrellus kuhlii | 88 | 8 | 35 | 46 | 15 | 82 | 0 | 0 |
| Hypsugo ariel | 99 | 8 | 38 | 51 | 18 | 90 | 0 | 11 |
total_asv <- nrow(genome_metadata_amplicon)
summary_table_ampli %>%
select(-Phylum,-Family, -Genus) %>%
rowwise() %>%
mutate(ASV_perc=round(ASVs*100/total_asv, 2))%>%
mutate(No_genus_perc=round(No_genus*100/ASVs, 2))%>%
mutate(No_species_perc=round(No_species*100/ASVs, 2)) %>%
mutate(Single_individual_perc=round(Single_individual*100/ASVs, 2))%>%
mutate(Single_species_perc=round(Single_species*100/ASVs, 2)) %>%
mutate(Single_individual_per_unique=round(Single_individual*100/Single_species, 2)) %>%
select(1,7:12) %>% tt()| Bat | ASV_perc | No_genus_perc | No_species_perc | Single_individual_perc | Single_species_perc | Single_individual_per_unique |
|---|---|---|---|---|---|---|
| Total | 100.00 | 22.42 | 90.91 | 18.79 | 34.55 | 54.39 |
| Cnephaeus bottae | 84.85 | 21.43 | 90.71 | 22.14 | 32.86 | 67.39 |
| Pipistrellus kuhlii | 53.33 | 17.05 | 93.18 | 0.00 | 0.00 | NaN |
| Hypsugo ariel | 60.00 | 18.18 | 90.91 | 0.00 | 11.11 | 0.00 |
Shared phyla
meta_phylum <- genome_metadata %>%
distinct(domain,phylum)
genome_metadata_amplicon %>%
distinct(domain,phylum) %>%
filter(phylum %in% meta_phylum$phylum) domain phylum
1 Bacteria Pseudomonadota
2 Bacteria Fusobacteriota
3 Bacteria Bacillota
4 Bacteria Desulfobacterota
5 Bacteria Synergistota
6 Bacteria Bacteroidota
7 Bacteria Campylobacterota
Only in metagenomic data
ampli_phylum <- genome_metadata_amplicon %>%
distinct(domain,phylum)
genome_metadata %>%
distinct(domain,phylum) %>%
filter(!phylum %in% genome_metadata_amplicon$phylum)# A tibble: 6 × 2
domain phylum
<chr> <chr>
1 Bacteria Elusimicrobiota
2 Bacteria Deferribacterota
3 Bacteria Planctomycetota
4 Bacteria Spirochaetota
5 Bacteria Actinomycetota
6 Bacteria Cyanobacteriota
Only in metabarcoding selected data
domain phylum
1 Bacteria Rs-K70 termite group
2 Bacteria Patescibacteria
8.1.2 Differences at family level
Number of families in metagenomic data
56
Number of families in metabarcoding selected data
49
Shared families
meta_family <- genome_metadata %>%
distinct(domain,phylum, family)
genome_metadata_amplicon %>%
distinct(domain,phylum, family) %>%
filter(family %in% meta_family$family) domain phylum family
1 Bacteria Pseudomonadota Diplorickettsiaceae
2 Bacteria Pseudomonadota Vibrionaceae
3 Bacteria Pseudomonadota Enterobacteriaceae
4 Bacteria Fusobacteriota Leptotrichiaceae
5 Bacteria Bacillota Enterococcaceae
6 Bacteria Pseudomonadota Morganellaceae
7 Bacteria Pseudomonadota Pasteurellaceae
8 Bacteria Pseudomonadota Aeromonadaceae
9 Bacteria Bacillota Mycoplasmataceae
10 Bacteria Bacillota Streptococcaceae
11 Bacteria Pseudomonadota Rickettsiaceae
12 Bacteria Desulfobacterota Desulfovibrionaceae
13 Bacteria Bacillota Clostridiaceae
14 Bacteria Bacillota Lachnospiraceae
15 Bacteria Synergistota Synergistaceae
16 Bacteria Pseudomonadota Burkholderiaceae
17 Bacteria Bacillota Ruminococcaceae
18 Bacteria Fusobacteriota Fusobacteriaceae
19 Bacteria Pseudomonadota Halomonadaceae
20 Bacteria Bacteroidota Weeksellaceae
21 Bacteria Pseudomonadota Rhodocyclaceae
22 Bacteria Pseudomonadota Neisseriaceae
23 Bacteria Bacillota Erysipelotrichaceae
24 Bacteria Campylobacterota Helicobacteraceae
25 Bacteria Bacillota Christensenellaceae
26 Bacteria Bacteroidota Bacteroidaceae
27 Bacteria Campylobacterota Campylobacteraceae
28 Bacteria Bacteroidota Dysgonomonadaceae
29 Bacteria Pseudomonadota Acetobacteraceae
Only in metagenomic data
ampli_family <- genome_metadata_amplicon %>%
distinct(domain,phylum,family)
genome_metadata %>%
distinct(domain,phylum,family) %>%
filter(!family %in% ampli_family$family)# A tibble: 27 × 3
domain phylum family
<chr> <chr> <chr>
1 Bacteria Pseudomonadota ""
2 Bacteria Elusimicrobiota "Elusimicrobiaceae"
3 Bacteria Bacillota "Metamycoplasmataceae"
4 Bacteria Bacillota "Acutalibacteraceae"
5 Bacteria Pseudomonadota "CAG-239"
6 Bacteria Bacillota ""
7 Bacteria Desulfobacterota "Adiutricaceae"
8 Bacteria Bacteroidota "Tannerellaceae"
9 Bacteria Deferribacterota "Mucispirillaceae"
10 Bacteria Planctomycetota "SZUA-567"
# ℹ 17 more rows
Only in metabarcoding selected data
genome_metadata_amplicon %>%
distinct(domain,phylum,family) %>%
filter(!family %in% meta_family$family) domain phylum family
1 Bacteria Pseudomonadota Yersiniaceae
2 Bacteria Bacillota Bacillaceae
3 Bacteria Rs-K70 termite group <NA>
4 Bacteria Bacillota Aerococcaceae
5 Bacteria Pseudomonadota Pseudomonadaceae
6 Bacteria Pseudomonadota <NA>
7 Bacteria Bacillota Acidaminococcaceae
8 Bacteria Bacillota Lactobacillaceae
9 Bacteria Pseudomonadota Aquaspirillaceae
10 Bacteria Pseudomonadota Hafniaceae
11 Bacteria Bacillota <NA>
12 Bacteria Pseudomonadota Comamonadaceae
13 Bacteria Patescibacteria <NA>
14 Bacteria Bacillota Peptostreptococcaceae
15 Bacteria Bacillota Eubacteriaceae
16 Bacteria Patescibacteria Saccharimonadaceae
17 Bacteria Bacillota [Clostridium] methylpentosum group
18 Bacteria Bacillota Erysipelatoclostridiaceae
19 Bacteria Bacillota Vagococcaceae
20 Bacteria Bacillota Anaerovoracaceae
8.1.3 Differences in top ASVs and MAGs
Top 20 MAGs
genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
mutate(total = rowSums(across(-genome))) %>%
arrange(desc(total)) %>%
slice_head(n = 20) %>%
select(genome, total) %>%
left_join(genome_metadata[1:8]) genome total domain phylum class order family genus species
1 HA_bin.16 10.1429255 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Aquirickettsiella
2 PK_bin.10 3.4725989 Bacteria Bacillota Bacilli Mycoplasmatales Mycoplasmoidaceae Malacoplasma
3 EH_bin.20 3.0940365 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Vibrionaceae Vibrio cholerae
4 HA_bin.8 2.4545179 Bacteria Pseudomonadota Alphaproteobacteria Rickettsiales Rickettsiaceae Rickettsia
5 PK_bin.26 1.9706785 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Serratia ureilytica
6 HA_bin.7 1.9293752 Bacteria Pseudomonadota Gammaproteobacteria Chromatiales Chromatiaceae
7 PK_bin.12 1.2830141 Bacteria Pseudomonadota Alphaproteobacteria Rickettsiales Rickettsiaceae Rickettsia tillamookensis
8 PK_bin.46 1.1521884 Bacteria Bacteroidota Bacteroidia Flavobacteriales Weeksellaceae Apibacter raozihei
9 PK_bin.53 0.9584216 Bacteria Pseudomonadota Alphaproteobacteria Rickettsiales Rickettsiaceae Rickettsia bellii
10 PK_bin.52 0.9404985 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Klebsiella pneumoniae
11 PK_bin.64 0.9119555 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Morganellaceae Morganella morganii
12 EH_bin.5 0.8747411 Bacteria Bacillota Bacilli Mycoplasmatales Mycoplasmataceae Spiroplasma phoeniceum
13 HA_bin.19 0.8484409 Bacteria Bacillota Bacilli Mycoplasmatales Mycoplasmataceae Edwardiiplasma
14 PK_bin.5 0.8435610 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Pasteurellaceae Pasteurella oralis
15 PK_bin.72 0.8277429 Bacteria Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus faecalis
16 PK_bin.68 0.7829460 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Proteus cibarius
17 HA_bin.3 0.7433329 Bacteria Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae
18 HA_bin.2 0.7297240 Bacteria Pseudomonadota Alphaproteobacteria Acetobacterales Acetobacteraceae
19 PK_bin.22 0.7287690 Bacteria Bacillota Bacilli Mycoplasmatales Metamycoplasmataceae UBA710
20 EH_bin.1 0.6409658 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Aeromonadaceae Aeromonas veronii
Top 20 ASVs
genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
mutate(total = rowSums(across(-genome))) %>%
arrange(desc(total)) %>%
slice_head(n = 20) %>%
select(genome, total) %>%
left_join(genome_metadata_amplicon) genome total domain phylum class order family genus species
1 ASV_4 2.3764486 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Enterobacter <NA>
2 ASV_11 2.2760262 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus <NA>
3 ASV_2 2.2452471 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Yersiniaceae Serratia <NA>
4 ASV_3 2.1898226 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Vibrionaceae Vibrio <NA>
5 ASV_6 2.0174879 Bacteria Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus <NA>
6 ASV_12 1.7969253 Bacteria Bacillota Bacilli Bacillales Bacillaceae Bacillus <NA>
7 ASV_1 1.7070753 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae Rickettsiella <NA>
8 ASV_14 1.6969153 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Yersiniaceae Serratia <NA>
9 ASV_10 1.5038717 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Aeromonadaceae Aeromonas <NA>
10 ASV_7 1.3482075 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Morganellaceae Morganella morganii
11 ASV_8 1.3124299 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Pasteurellaceae Vespertiliibacter <NA>
12 ASV_40 1.0214370 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae <NA> <NA>
13 ASV_24 0.9463566 Bacteria Pseudomonadota Alphaproteobacteria Rickettsiales Rickettsiaceae Rickettsia <NA>
14 ASV_5 0.8238483 Bacteria Fusobacteriota Fusobacteriia Fusobacteriales Leptotrichiaceae Sebaldella termitidis
15 ASV_15 0.7146375 Bacteria Bacillota Bacilli Lactobacillales Streptococcaceae Lactococcus <NA>
16 ASV_17 0.6561496 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Pasteurellaceae <NA> <NA>
17 ASV_90 0.6544987 Bacteria Bacillota Clostridia Clostridiales Clostridiaceae Candidatus Arthromitus <NA>
18 ASV_13 0.6387632 Bacteria Bacillota Bacilli Mycoplasmatales Mycoplasmataceae Mycoplasma <NA>
19 ASV_20 0.5916116 Bacteria Pseudomonadota Gammaproteobacteria Enterobacterales Enterobacteriaceae Enterobacter <NA>
20 ASV_50 0.5839039 Bacteria Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus <NA>
Phylum level
topMAG <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
left_join(genome_metadata[c(1, 3, 6, 7)], by="genome") %>%
select(-genome) %>%
group_by(phylum) %>%
summarize(across(where(is.numeric), sum), .groups = "drop") %>%
mutate(total_mag = rowSums(across(where(is.numeric)))) %>%
select(total_mag, phylum) %>%
arrange(desc(total_mag))%>%
slice_head(n = 10)topASV <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
left_join(genome_metadata_amplicon, by="genome") %>%
select(-genome) %>%
group_by(phylum) %>%
summarize(across(where(is.numeric), sum), .groups = "drop") %>%
mutate(total_asv = rowSums(across(where(is.numeric)))) %>%
select(total_asv, phylum) %>%
arrange(desc(total_asv)) %>%
slice_head(n = 10)
table <- full_join(topMAG,topASV, by="phylum") %>%
mutate(ASV = replace_na(total_asv, 0)) %>%
mutate(MAG = replace_na(total_mag, 0)) %>%
select(-total_asv, -total_mag) %>%
pivot_longer(cols = c(MAG, ASV),
names_to = "source",
values_to = "abundance")ggplot(table, aes(x = source, y = phylum, size = abundance, color = phylum)) +
geom_point(alpha = 0.7) +
scale_size_continuous(
name = "Abundance",
range = c(0.01, 30), # 0 ensures no bubble at abundance = 0
breaks = seq(0, 35, by = 5), # Controls legend labels (your 0–5–10–...–30)
limits = c(0.01, NA) # keep full range of values
) +
scale_color_manual(values = phylum_colors) +
coord_flip()+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45,hjust = 1, size=12),
axis.text.y = element_text(size = 12),
axis.title = element_text(size = 14, face = "bold"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12))+
labs(x = "Source", y = "Phylum",
title = "Metabarcoding with selected threshold")+
guides(color = "none", size = guide_legend(
override.aes = list(size = c(2, 4, 6, 8, 10, 12)) # smaller bubbles in legend
)
)full_join(topMAG,topASV, by="phylum") %>%
mutate(ASV = replace_na(total_asv, 0)) %>%
mutate(MAG = replace_na(total_mag, 0)) %>%
ggplot(., aes(x = MAG, y = ASV, color = phylum, label = phylum)) +
geom_point(size = 3, alpha = 0.8) +
geom_text_repel(max.overlaps = 20) +
geom_smooth(method = "lm", se = FALSE, color = "grey50") +
labs(
x = "MAG abundance",
y = "ASV abundance",
color = "Phylum",
title = "Comparison of MAG and ASV abundances by phylum"
) +
theme_minimal()Family level
topFMAG <- genome_counts_filt %>%
mutate(across(-genome, ~ . / sum(.))) %>%
left_join(genome_metadata[c(1:7)], by="genome") %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)%>%
select(-genome) %>%
group_by(phylum, family) %>%
summarize(across(where(is.numeric), sum), .groups = "drop") %>%
mutate(total_mag = rowSums(across(where(is.numeric)))) %>%
select(total_mag, phylum, family) %>%
arrange(desc(total_mag)) %>%
slice_head(n = 20)full_join(topFMAG,topFASV, by="family") %>%
mutate(ASV = replace_na(total_asv, 0)) %>%
mutate(MAG = replace_na(total_mag, 0)) %>%
mutate(phylum_mag = coalesce(phylum.x, phylum.y))%>%
ggplot(., aes(x = MAG, y = ASV, color = phylum_mag, label = family)) +
geom_point(size = 3, alpha = 0.8) +
geom_text_repel(max.overlaps = 10) +
geom_smooth(method = "lm", se = FALSE, color = "grey50") +
scale_color_manual(values = phylum_colors) +
labs(
x = "MAG abundance",
y = "ASV abundance",
color = "Phylum",
title = "Metabarcoding with selected filtering"
) +
theme_minimal()8.2 Alpha Diversity
8.2.1 ASV level
richness <- genome_counts_filt_amplicon %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt_amplicon %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
metabarcoding_selected_alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
mutate(method="Metabarcoding_selected")Correlation between diversity and estimate microbial fraction
fraction_ampli <- microbial_fraction %>%
select(sample, estimated_microbial_fraction) %>%
right_join(metabarcoding_selected_alpha_div, by="sample") %>%
left_join(sample_metadata_amplicon, by="sample")
cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$richness, method = "spearman")
Spearman's rank correlation rho
data: fraction_ampli$estimated_microbial_fraction and fraction_ampli$richness
S = 10963, p-value = 0.4813
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1116885
Spearman's rank correlation rho
data: fraction_ampli$estimated_microbial_fraction and fraction_ampli$neutral
S = 11606, p-value = 0.7071
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.05955757
p1_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = richness)) +
geom_point(size = 3, color = "#2E86AB") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Richness", title= "Correlation between estimated microbial fraction and diversity")
p2_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = neutral)) +
geom_point(size = 3, color = "#F39C12") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")
p1_ampli + p2_ampli richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 20.26±10.3 | 30.9±10.56 | 16.71±9.73 | 17.84±6.64 |
| neutral | 7.98±5.18 | 11.9±5.65 | 6.57±4.65 | 7.17±4.57 |
8.2.2 Different taxonomic level
sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli_selec")) %>%
mutate(method="Metabarcoding_selected")
genome_metadata_amplicon <- genome_metadata_amplicon %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt_amplicon %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli_selec")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_amplicon, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()8.2.2.1 Family level
richness_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_f_ampli <- richness_f_ampli %>%
full_join(neutral_f_ampli, by = join_by(sample == sample))
alpha_div_f_ampli_data <- alpha_div_f_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_f_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 11.5±4.66 | 14.9±5.7 | 9.94±4.18 | 11.11±3.68 |
| neutral | 4.51±2.75 | 5.41±3.54 | 3.56±2.26 | 4.89±2.56 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_ampli_data,trace=TRUE)
summary(Model_richness)
Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_f_ampli_data,
trace = TRUE, init.theta = 21.87845286, link = log)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.7014 0.1062 25.432 < 2e-16 ***
SpeciesHa -0.2939 0.1357 -2.165 0.03036 *
SpeciesPk -0.4047 0.1410 -2.870 0.00411 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for Negative Binomial(21.8785) family taken to be 1)
Null deviance: 55.830 on 45 degrees of freedom
Residual deviance: 47.315 on 43 degrees of freedom
AIC: 268.09
Number of Fisher Scoring iterations: 1
Theta: 21.9
Std. Err.: 13.4
2 x log-likelihood: -260.089
Analysis of Deviance Table
Model: Negative Binomial(21.8785), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 45 55.830
Species 2 8.5153 43 47.315 0.01416 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 2.70 0.1060 Inf 2.49 2.91
Ha 2.41 0.0845 Inf 2.24 2.57
Pk 2.30 0.0928 Inf 2.11 2.48
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.294 0.136 Inf 2.165 0.0772
Eb - Pk 0.405 0.141 Inf 2.870 0.0115
Ha - Pk 0.111 0.126 Inf 0.882 0.6515
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_f_ampli_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 26.168 13.0838 1.7967 0.1781
Residuals 43 313.136 7.2822
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 5.41 0.853 43 3.69 7.13
Ha 4.89 0.619 43 3.64 6.14
Pk 3.56 0.654 43 2.24 4.88
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 0.52 1.050 43 0.493 0.8749
Eb - Pk 1.85 1.080 43 1.720 0.2096
Ha - Pk 1.33 0.901 43 1.476 0.3125
P value adjustment: tukey method for comparing a family of 3 estimates
8.2.2.2 Phylum level
richness_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum_ampli <- richness_p_ampli %>%
full_join(neutral_p_ampli, by = join_by(sample == sample))
alpha_div_phylum_ampli_data <- alpha_div_phylum_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_phylum_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 3.67±1.96 | 5.7±2.06 | 3.53±1.77 | 2.74±1.19 |
| neutral | 2.02±0.88 | 2.47±1.02 | 2.11±1.02 | 1.71±0.53 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_ampli_data,trace=TRUE)
summary(Model_richness)
Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_phylum_ampli_data,
trace = TRUE, init.theta = 86595.3377, link = log)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.7405 0.1325 13.140 < 2e-16 ***
SpeciesHa -0.7337 0.1918 -3.826 0.00013 ***
SpeciesPk -0.4793 0.1850 -2.591 0.00956 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for Negative Binomial(86595.34) family taken to be 1)
Null deviance: 44.053 on 45 degrees of freedom
Residual deviance: 29.422 on 43 degrees of freedom
AIC: 178.43
Number of Fisher Scoring iterations: 1
Theta: 86595
Std. Err.: 1870619
Warning while fitting theta: iteration limit reached
2 x log-likelihood: -170.427
Analysis of Deviance Table
Model: Negative Binomial(86595.34), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 45 44.053
Species 2 14.631 43 29.422 0.0006652 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.74 0.132 Inf 1.481 2.00
Ha 1.01 0.139 Inf 0.735 1.28
Pk 1.26 0.129 Inf 1.008 1.51
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.734 0.192 Inf 3.826 0.0004
Eb - Pk 0.479 0.185 Inf 2.591 0.0259
Ha - Pk -0.254 0.189 Inf -1.342 0.3717
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_phylum_ampli_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 3.9363 1.96813 2.723 0.07701 .
Residuals 43 31.0794 0.72278
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
8.2.3 Comparison (metabarcoding selected vs metagenomics)
Merge the table
merged_table_family_methods <- full_join(family_level_agg_ampli,merged_table,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum_methods <- full_join(phylum_level_agg_ampli,merged_table_phylum,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
all_samples <- rbind(all_samples, sample_metadata_amplicon)8.2.3.1 Family level
richness <- merged_table_family_methods %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- merged_table_family_methods %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_family <- richness %>%
full_join(neutral, by = join_by(sample == sample))
alpha_div_family_data <- alpha_div_family %>%
left_join(all_samples, by = join_by(sample == sample))
alpha_div_family_data_filt <- alpha_div_family_data %>%
filter(method!="Metabarcoding_standard") %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))Richness
Model_richness_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_family_data_filt)
summary(Model_richness_random)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(6.8503) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_family_data_filt
AIC BIC logLik deviance df.resid
572.2 592.8 -278.1 556.2 89
Scaled residuals:
Min 1Q Median 3Q Max
-1.78863 -0.76873 -0.07903 0.52297 2.29293
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.06038 0.2457
Number of obs: 97, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.6758 0.1666 16.064 < 2e-16 ***
SpeciesHa -0.2857 0.2083 -1.372 0.170136
SpeciesPk -0.4268 0.2144 -1.991 0.046527 *
methodMetagenomics -0.2589 0.2149 -1.205 0.228257
SpeciesHa:methodMetagenomics -1.0098 0.2880 -3.506 0.000455 ***
SpeciesPk:methodMetagenomics -0.1481 0.2732 -0.542 0.587682
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.798
SpeciesPk -0.772 0.619
mthdMtgnmcs -0.598 0.483 0.477
SpcsH:mthdM 0.447 -0.569 -0.354 -0.743
SpcsPk:mthM 0.472 -0.380 -0.633 -0.783 0.582
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 13.598 6.799 6.7989
method 1 34.674 34.674 34.6737
Species:method 2 15.691 7.845 7.8454
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 2.55 0.134 Inf 2.28 2.81
Ha 1.76 0.112 Inf 1.54 1.98
Pk 2.05 0.104 Inf 1.84 2.25
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.791 0.173 Inf 4.567 <.0001
Eb - Pk 0.501 0.166 Inf 3.019 0.0072
Ha - Pk -0.290 0.151 Inf -1.920 0.1329
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 2.68 0.167 Inf 2.349 3.00
Metagenomics 2.42 0.176 Inf 2.071 2.76
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 2.39 0.126 Inf 2.144 2.64
Metagenomics 1.12 0.167 Inf 0.794 1.45
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 2.25 0.136 Inf 1.982 2.52
Metagenomics 1.84 0.132 Inf 1.584 2.10
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 0.259 0.215 Inf 1.205 0.2283
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 1.269 0.193 Inf 6.585 <.0001
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 0.407 0.170 Inf 2.394 0.0167
Results are given on the log (not the response) scale.
Neutral
Model_neutral <- lme(fixed = neutral ~ Species*method, data = alpha_div_family_data_filt,
random = ~ 1 | sampleid)
anova(Model_neutral) numDF denDF F-value p-value
(Intercept) 1 48 134.80939 <.0001
Species 2 48 1.86572 0.1658
method 1 43 9.46491 0.0036
Species:method 2 43 6.99370 0.0023
Linear mixed-effects model fit by REML
Data: alpha_div_family_data_filt
AIC BIC logLik
457.7754 477.8623 -220.8877
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 1.841795 1.923724
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 5.407062 0.8421949 48 6.420203 0.0000
SpeciesHa -0.520108 1.0404826 48 -0.499872 0.6194
SpeciesPk -2.117345 1.0511109 48 -2.014388 0.0496
methodMetagenomics -0.776414 0.8603155 43 -0.902476 0.3718
SpeciesHa:methodMetagenomics -2.173151 1.0628695 43 -2.044608 0.0470
SpeciesPk:methodMetagenomics 1.129551 1.0722441 43 1.053446 0.2980
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.801 0.649
methodMetagenomics -0.511 0.413 0.409
SpeciesHa:methodMetagenomics 0.413 -0.511 -0.331 -0.809
SpeciesPk:methodMetagenomics 0.410 -0.332 -0.542 -0.802 0.649
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.8323946 -0.5540072 -0.1316342 0.4198305 2.3132824
Number of Observations: 97
Number of Groups: 51
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 5.41 0.842 50 3.715 7.10
Metagenomics 4.63 0.842 43 2.932 6.33
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 4.89 0.611 48 3.658 6.12
Metagenomics 1.94 0.611 43 0.705 3.17
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 3.29 0.629 48 2.025 4.55
Metagenomics 3.64 0.568 43 2.498 4.79
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics 0.776 0.860 43 0.902 0.3718
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics 2.950 0.624 43 4.726 <.0001
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics -0.353 0.640 43 -0.552 0.5839
Degrees-of-freedom method: containment
8.2.3.2 Phylum level
richness_phylum <- merged_table_phylum_methods %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_phylum <- merged_table_phylum_methods %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum <- richness_phylum %>%
full_join(neutral_phylum, by = join_by(sample == sample))
alpha_div_phylum_data <- alpha_div_phylum %>%
left_join(all_samples, by = join_by(sample == sample))
alpha_div_phylum_data_filt <- alpha_div_phylum_data %>%
filter(method!="Metabarcoding_standard") %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))Richness
Model_richness_phylum_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_phylum_data_filt)
summary(Model_richness_phylum_random)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(261450.9) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_phylum_data_filt
AIC BIC logLik deviance df.resid
361.7 382.3 -172.9 345.7 89
Scaled residuals:
Min 1Q Median 3Q Max
-1.6044 -0.4792 -0.1555 0.4965 2.3145
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.028 0.1673
Number of obs: 97, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.72561 0.14216 12.138 < 2e-16 ***
SpeciesHa -0.73182 0.19878 -3.682 0.000232 ***
SpeciesPk -0.48278 0.19572 -2.467 0.013636 *
methodMetagenomics -0.11121 0.18190 -0.611 0.540934
SpeciesHa:methodMetagenomics -0.70454 0.28934 -2.435 0.014892 *
SpeciesPk:methodMetagenomics -0.01749 0.25356 -0.069 0.944998
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.697
SpeciesPk -0.714 0.505
mthdMtgnmcs -0.614 0.418 0.444
SpcsH:mthdM 0.355 -0.540 -0.257 -0.574
SpcsPk:mthM 0.437 -0.299 -0.660 -0.714 0.410
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 32.291 16.1456 16.1456
method 1 5.347 5.3471 5.3471
Species:method 2 5.993 2.9963 2.9963
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.670 0.1120 Inf 1.450 1.890
Ha 0.586 0.1310 Inf 0.329 0.843
Pk 1.178 0.0981 Inf 0.986 1.371
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 1.084 0.171 Inf 6.322 <.0001
Eb - Pk 0.492 0.147 Inf 3.344 0.0024
Ha - Pk -0.593 0.162 Inf -3.655 0.0008
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 1.726 0.142 Inf 1.447 2.004
Metagenomics 1.614 0.147 Inf 1.327 1.902
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 0.994 0.143 Inf 0.714 1.273
Metagenomics 0.178 0.206 Inf -0.225 0.581
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected 1.243 0.137 Inf 0.974 1.512
Metagenomics 1.114 0.127 Inf 0.865 1.364
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 0.111 0.182 Inf 0.611 0.5409
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 0.816 0.237 Inf 3.436 0.0006
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected - Metagenomics 0.129 0.178 Inf 0.725 0.4685
Results are given on the log (not the response) scale.
Neutral
Model_neutral_phylum <- lme(fixed = neutral ~ Species*method, data = alpha_div_phylum_data_filt,
random = ~ 1 | sampleid)
anova(Model_neutral_phylum) numDF denDF F-value p-value
(Intercept) 1 48 246.72883 <.0001
Species 2 48 5.59538 0.0065
method 1 43 5.05229 0.0298
Species:method 2 43 5.08912 0.0104
Linear mixed-effects model fit by REML
Data: alpha_div_phylum_data_filt
AIC BIC logLik
247.5264 267.6133 -115.7632
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 0.7626942 0.5121722
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 2.4656565 0.2905207 48 8.487025 0.0000
SpeciesHa -0.7553578 0.3589214 48 -2.104522 0.0406
SpeciesPk -0.4882620 0.3587352 48 -1.361065 0.1798
methodMetagenomics -0.0946811 0.2290504 43 -0.413364 0.6814
SpeciesHa:methodMetagenomics -0.5415858 0.2829783 43 -1.913877 0.0623
SpeciesPk:methodMetagenomics 0.2035922 0.2867670 43 0.709957 0.4816
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.810 0.656
methodMetagenomics -0.394 0.319 0.319
SpeciesHa:methodMetagenomics 0.319 -0.394 -0.258 -0.809
SpeciesPk:methodMetagenomics 0.315 -0.255 -0.428 -0.799 0.647
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.59354361 -0.56553943 -0.08515739 0.38014230 2.19616114
Number of Observations: 97
Number of Groups: 51
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 2.42 0.267 43 1.88 2.96
Ha 1.39 0.194 43 1.00 1.78
Pk 2.03 0.184 43 1.66 2.40
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 1.026 0.330 43 3.111 0.0091
Eb - Pk 0.386 0.324 43 1.192 0.4645
Ha - Pk -0.640 0.267 43 -2.394 0.0540
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 2.47 0.291 50 1.882 3.05
Metagenomics 2.37 0.291 43 1.785 2.96
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 1.71 0.211 48 1.287 2.13
Metagenomics 1.07 0.211 43 0.649 1.50
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected 1.98 0.210 48 1.554 2.40
Metagenomics 2.09 0.196 43 1.691 2.48
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics 0.0947 0.229 43 0.413 0.6814
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics 0.6363 0.166 43 3.829 0.0004
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected - Metagenomics -0.1089 0.173 43 -0.631 0.5312
Degrees-of-freedom method: containment
8.2.4 Alpha diversity figure (three methods)
alpha_div_family_data %>%
select(-sample,-Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")
),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
)) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
# show.legend = FALSE,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
show.legend = FALSE
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
) +
labs(
# x = "Methodological approach",
y = "Alpha diversity at family level",
fill = "Species"
)alpha_div_phylum_data %>%
select(-sample,-Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")
),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
)) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
# show.legend = FALSE,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
show.legend = FALSE
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
) +
labs(
# x = "Methodological approach",
y = "Alpha diversity at phylum level",
fill = "Species"
)8.3 Beta diversity
8.3.1 At different taxonomic level
beta_q0n <- family_level_agg_ampli %>%
column_to_rownames(., "family") %>%
hillpair(., q = 0)
beta_q1n <- family_level_agg_ampli %>%
column_to_rownames(., "family") %>%
hillpair(., q = 1)
beta_q0n_phylum <- phylum_level_agg_ampli %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 0)
beta_q1n_phylum <- phylum_level_agg_ampli %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 1)8.3.1.1 Family
Richness
adonis2(
beta_q0n$S ~ Species,
data = sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q0n$S)) %>%
arrange(match(sample, labels(beta_q0n$S))),
permutations = 999
) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 1.936832 | 0.1062653 | 2.556355 | 0.001 |
| Residual | 43 | 16.289560 | 0.8937347 | NA | NA |
| Total | 45 | 18.226392 | 1.0000000 | NA | NA |
pairwise.adonis(
beta_q0n$S,
sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q0n$S)) %>%
arrange(match(sample, labels(beta_q0n$S))) %>%
pull(Species),
perm = 999) %>%
tt()| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| Eb vs Ha | 1 | 1.1892164 | 3.212674 | 0.10633532 | 0.001 | 0.003 | * |
| Eb vs Pk | 1 | 0.7483175 | 1.901197 | 0.07067333 | 0.042 | 0.126 | |
| Ha vs Pk | 1 | 0.9556233 | 2.549407 | 0.06975234 | 0.003 | 0.009 | * |
Neutral
adonis2(
beta_q1n$S ~ Species,
data = sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q1n$S)) %>%
arrange(match(sample, labels(beta_q1n$S))),
permutations = 999
) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 1.881489 | 0.1062486 | 2.555907 | 0.001 |
| Residual | 43 | 15.826869 | 0.8937514 | NA | NA |
| Total | 45 | 17.708358 | 1.0000000 | NA | NA |
pairwise.adonis(
beta_q1n$S,
sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q1n$S)) %>%
arrange(match(sample, labels(beta_q1n$S))) %>%
pull(Species),
perm = 999) %>%
tt()| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| Eb vs Ha | 1 | 1.3532821 | 3.840951 | 0.12454063 | 0.001 | 0.003 | * |
| Eb vs Pk | 1 | 0.7636118 | 2.006946 | 0.07431220 | 0.049 | 0.147 | |
| Ha vs Pk | 1 | 0.7412715 | 1.995708 | 0.05544294 | 0.049 | 0.147 |
8.3.1.2 Phylum
Richness
adonis2(
beta_q0n_phylum$S ~ Species,
by = "terms",
data = sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q0n_phylum$S)) %>%
arrange(match(sample, labels(beta_q0n_phylum$S))),
permutations = 999
) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 0.7646919 | 0.1095595 | 2.645354 | 0.018 |
| Residual | 43 | 6.2150010 | 0.8904405 | NA | NA |
| Total | 45 | 6.9796930 | 1.0000000 | NA | NA |
pairwise.adonis(
beta_q0n_phylum$S,
sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q0n_phylum$S)) %>%
arrange(match(sample, labels(beta_q0n_phylum$S))) %>%
pull(Species),
perm = 999) %>%
tt()| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| Eb vs Ha | 1 | 0.4237598 | 3.707872 | 0.12074662 | 0.017 | 0.051 | |
| Eb vs Pk | 1 | 0.2966148 | 1.614777 | 0.06067220 | 0.182 | 0.546 | |
| Ha vs Pk | 1 | 0.4132832 | 2.956948 | 0.08001061 | 0.047 | 0.141 |
Neutral
adonis2(
beta_q1n_phylum$S ~ Species,
data = sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q1n_phylum$S)) %>%
arrange(match(sample, labels(beta_q1n_phylum$S))),
permutations = 999
) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 0.4130198 | 0.07992329 | 1.867617 | 0.164 |
| Residual | 43 | 4.7546835 | 0.92007671 | NA | NA |
| Total | 45 | 5.1677033 | 1.00000000 | NA | NA |
8.3.2 Comparison between metabarcoding selected and metagenomics
beta_q0n <- merged_table_family_methods %>%
select(-H19, -H20) %>%
column_to_rownames(., "family") %>%
hillpair(., q = 0)
beta_q1n <- merged_table_family_methods %>%
select(-H19) %>%
column_to_rownames(., "family") %>%
hillpair(., q = 1)
beta_q0n_phylum <- merged_table_phylum_methods %>%
select(-H19, -H20) %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 0)
beta_q1n_phylum <- merged_table_phylum_methods %>%
select(-H19) %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 1)8.3.2.1 Family level
Richness
m <- as.matrix(beta_q0n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_S_filtered <- as.dist(m_filtered)
samples_sel <- all_samples %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))
samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_S_filtered)))
betadisper(beta_q0n_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.0253 0.0126481 3.6104 999 0.034 *
Residuals 92 0.3223 0.0035032
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 6.444711 | 0.162343 | 3.449748 | 0.001 |
| Residual | 89 | 33.253404 | 0.837657 | NA | NA |
| Total | 94 | 39.698115 | 1.000000 | NA | NA |
adonis2(beta_q0n_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 2.661606 | 0.06704615 | 3.506968 | 0.001 |
| method | 1 | 2.517837 | 0.06342460 | 6.635072 | 0.001 |
| Residual | 91 | 34.532127 | 0.86986819 | NA | NA |
| Total | 94 | 39.698115 | 1.00000000 | NA | NA |
adonis2(beta_q0n_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 1.278723 | 0.03221118 | 1.711199 | 0.005 |
| Residual | 89 | 33.253404 | 0.83765701 | NA | NA |
| Total | 94 | 39.698115 | 1.00000000 | NA | NA |
Neutral
m <- as.matrix(beta_q1n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_S_filtered <- as.dist(m_filtered)
samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_S_filtered)))
betadisper(beta_q1n_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.10823 0.054114 5.9591 999 0.002 **
Residuals 93 0.84453 0.009081
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 5.612085 | 0.141896 | 2.976479 | 0.001 |
| Residual | 90 | 33.938601 | 0.858104 | NA | NA |
| Total | 95 | 39.550686 | 1.000000 | NA | NA |
adonis2(beta_q1n_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 2.859173 | 0.07229137 | 3.796975 | 0.001 |
| method | 1 | 2.091473 | 0.05288083 | 5.554942 | 0.001 |
| Residual | 92 | 34.638614 | 0.87580312 | NA | NA |
| Total | 95 | 39.550686 | 1.00000000 | NA | NA |
adonis2(beta_q1n_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 0.7000129 | 0.01769913 | 0.9281638 | 0.07 |
| Residual | 90 | 33.9386014 | 0.85810399 | NA | NA |
| Total | 95 | 39.5506860 | 1.00000000 | NA | NA |
8.3.2.2 Phylum level
Richness
m <- as.matrix(beta_q0n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_phylum_S_filtered <- as.dist(m_filtered)
samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_phylum_S_filtered)))
betadisper(beta_q0n_phylum_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.2333 0.116640 1.6317 999 0.194
Residuals 92 6.5764 0.071482
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 4.848799 | 0.2864865 | 7.146968 | 0.001 |
| Residual | 89 | 12.076256 | 0.7135135 | NA | NA |
| Total | 94 | 16.925056 | 1.0000000 | NA | NA |
adonis2(beta_q0n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 2.917014 | 0.17234885 | 10.154105 | 0.001 |
| method | 1 | 1.079379 | 0.06377401 | 7.514619 | 0.001 |
| Residual | 91 | 13.070983 | 0.77228599 | NA | NA |
| Total | 94 | 16.925056 | 1.00000000 | NA | NA |
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 0.9947267 | 0.05877244 | 3.665485 | 0.003 |
| Residual | 89 | 12.0762564 | 0.71351355 | NA | NA |
| Total | 94 | 16.9250555 | 1.00000000 | NA | NA |
Neutral
m <- as.matrix(beta_q1n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_phylum_S_filtered <- as.dist(m_filtered)
samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_phylum_S_filtered)))
betadisper(beta_q1n_phylum_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.4308 0.215395 2.9832 999 0.062 .
Residuals 93 6.7149 0.072203
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 1.978556 | 0.1572087 | 3.357602 | 0.035 |
| Residual | 90 | 10.606975 | 0.8427913 | NA | NA |
| Total | 95 | 12.585531 | 1.0000000 | NA | NA |
adonis2(beta_q1n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 1.3839347 | 0.1099624 | 5.965511 | 0.014 |
| method | 1 | 0.6214811 | 0.0493806 | 5.357843 | 0.007 |
| Residual | 92 | 10.6715068 | 0.8479187 | NA | NA |
| Total | 95 | 12.5855306 | 1.0000000 | NA | NA |
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 0.06453176 | 0.005127457 | 0.2737754 | 0.719 |
| Residual | 90 | 10.60697513 | 0.842791253 | NA | NA |
| Total | 95 | 12.58553064 | 1.000000000 | NA | NA |
8.3.3 Three methodological approaches together
8.3.3.1 Family
Richness
beta_q0n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(
shape = "Methodological approach"
)#+labs(color = "method") +geom_text_repel(aes(label = sample), size=3)beta_q0n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(method = factor(method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors)+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
)Neutral
beta_q1n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii"))
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(shape = "Methodological approach")beta_q1n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
filter(sample!="P36") %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(method = factor(method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors)+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(color = "Methodological approach",shape="Species")8.3.3.2 Phylum level
Richness
beta_q0n_phylum$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii"))
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(
shape = "Methodological approach"
)Neutral
beta_q1n_phylum$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method, shape= Species)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors,
labels = c(
expression(italic("Metabarcoding Standard")),
expression(italic("Metabarcoding Selected")),
expression(italic("Metagenomics"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(color = "Methodological approach",shape="Species")8.4 Differential abundance
8.4.1 Metabarcoding selected
phylo_samples <- sample_metadata_amplicon %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt_amplicon %>%
select(one_of(c("genome", rownames(phylo_samples)))) %>%
column_to_rownames("genome") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata_amplicon %>%
filter(genome %in% rownames(phylo_genome)) %>%
column_to_rownames("genome") %>%
dplyr::select(domain,phylum,class,order,family,genus,species) %>%
as.matrix() %>%
tax_table()
physeq_genome_metab_sel <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)ancom_rand_output_phylum_metab_sel = ancombc2(
data = physeq_genome_metab_sel,
assay_name = "counts",
tax_level = "phylum",
fix_formula = "Species",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = "Species",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)8.4.1.1 Structural zeros
Total number of structural zeros
# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab_sel$zero_ind
# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("Eb"=1) %>%
rename("Ha"=2) %>%
rename("Pk"=3)
# Total number of structural zeros
nrow(structural_zero_groups)[1] 1
8.4.1.2 Enriched data
res_pair_phylum <- ancom_rand_output_phylum_metab_sel$res_pair
# Get log-fold change and q-value columns
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols <- gsub("^lfc_", "q_", lfc_cols)
# Build long table by pairing lfc and q for each contrast
plot_data_phylum <- lfc_cols %>%
setNames(nm = .) %>%
purrr::imap_dfr(function(lfc_col, name) {
q_col <- gsub("^lfc_", "q_", lfc_col)
res_pair_phylum %>%
select(taxon, !!lfc_col, !!q_col) %>%
rename(
lfc = !!lfc_col,
q = !!q_col
) %>%
mutate(contrast_key = name)
}) %>%
filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
mutate(
# Manually re-label contrasts for clarity
contrast = case_when(
contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
TRUE ~ contrast_key
),
direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
paste("Enriched in", sub(".*vs ", "", contrast)))
)
plot_data_phylum %>% select(-contrast_key) taxon lfc q contrast direction
1 Fusobacteriota -2.762137 0.01005929 H. ariel vs C. bottae Enriched in C. bottae
2 Synergistota -2.590353 0.01350310 H. ariel vs C. bottae Enriched in C. bottae
3 Bacteroidota -3.073607 0.02532932 P. kuhlii vs C. bottae Enriched in C. bottae
4 Fusobacteriota 2.319031 0.03647719 P. kuhlii vs H. ariel Enriched in P. kuhlii
5 Desulfobacterota 2.460761 0.02502321 P. kuhlii vs H. ariel Enriched in P. kuhlii
8.4.2 Differences with metagenomics at phylum level across species
load("resources/amplicon/selected_filtering.Rdata")
sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli")) %>%
mutate(method="Metabarcoding")
genome_metadata_ampli <- genome_metadata_amplicon %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt_amplicon %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_ampli, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()load("resources/metagenomics/data_filtered.Rdata")
genome_metadata <- genome_metadata %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax <- genome_counts_filt %>%
left_join(genome_metadata[c(1,3,4,6)], by = "genome")
# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() merged_table <- full_join(family_level_agg_ampli,family_level_agg,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
sample_shot <- sample_metadata %>%
select(-c(Habitat,Longitude, Latitude, Region))%>%
mutate(method="Metagenomics")
all_samples <- rbind(sample_shot, sample_metadata_amplicon)phylo_genome <- merged_table_phylum %>%
column_to_rownames("phylum") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_samples <- all_samples%>%
mutate(individual=sample)%>%
column_to_rownames("sample") %>%
sample_data()
merged_physeq_phylum <- phyloseq(phylo_genome, phylo_samples)ancom_rand_output_struc_approach_rand = ancombc2(data = merged_physeq_phylum,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
# rand_formula = "(1|Species)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
# lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacillota | 0.73797871 | -1.2936035 | NA | 0.2991155 | NA | -4.324763 | 1 | 4.828797e-05 | 1 | 0.0003380158 | FALSE | TRUE | FALSE | TRUE |
| Bacteroidota | 0.05402764 | 0.9844918 | NA | 0.2306612 | NA | 4.268129 | 1 | 2.670729e-04 | 1 | 0.0016024374 | FALSE | TRUE | FALSE | TRUE |
| Campylobacterota | -0.26420533 | 0.9675136 | NA | 0.2062888 | NA | 4.690092 | 1 | 2.903416e-04 | 1 | 0.0016024374 | FALSE | TRUE | TRUE | TRUE |
| Fusobacteriota | 0.22421631 | -0.6083450 | NA | 0.2296486 | NA | -2.649026 | 1 | 1.354560e-02 | 1 | 0.0406367972 | FALSE | TRUE | TRUE | FALSE |
| Pseudomonadota | 1.45908988 | -1.2852381 | NA | 0.3475817 | NA | -3.697658 | 1 | 3.697809e-04 | 1 | 0.0016024374 | FALSE | TRUE | FALSE | TRUE |
8.4.3 Differences with metagenomics at phylum level within each species
8.4.3.1 Cnephaeus
physeq_eb <- subset_samples(merged_physeq_phylum, Species == "Eb")
physeq_eb <- prune_taxa(taxa_sums(physeq_eb)>0, physeq_eb)ancom_rand_output_struc_eb = ancombc2(data = physeq_eb,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_eb$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 5
Structural zeros in amplicon
[1] 3
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Deferribacterota" "Elusimicrobiota" "Planctomycetota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Patescibacteria" "Rs-K70 termite group"
Enriched phyla
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept)
1 Bacillota 0.48669986 -0.5156488 0.4972809 0.8086622 0.9787223 -0.6376566 0.343239515
2 Bacteroidota 0.04860922 0.6073777 0.4793397 0.6679996 0.1014087 0.9092485 0.921449332
3 Campylobacterota -0.09388111 1.3159905 0.4705364 0.6107480 -0.1995193 2.1547195 0.846291508
4 Desulfobacterota 0.08059598 0.9210460 0.5170375 0.7117388 0.1558803 1.2940786 0.878521903
5 Fusobacteriota 0.37809823 -0.5307444 0.5014983 0.6423967 0.7539372 -0.8261941 0.466719515
6 Pseudomonadota 0.68565597 -0.1137261 0.6077870 0.8395206 1.1281188 -0.1354656 0.274082558
7 Synergistota 1.64754965 -1.3691382 0.2345994 0.4345869 7.0228222 -3.1504357 0.005930757
p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
1 0.53331077 1.0000000 1.0000000 FALSE FALSE FALSE TRUE
2 0.38692024 1.0000000 1.0000000 FALSE FALSE TRUE TRUE
3 0.05957734 1.0000000 0.3587369 FALSE FALSE TRUE TRUE
4 0.21815599 1.0000000 1.0000000 FALSE FALSE TRUE TRUE
5 0.42625118 1.0000000 1.0000000 FALSE FALSE TRUE TRUE
6 0.89374720 1.0000000 1.0000000 FALSE FALSE TRUE TRUE
7 0.05124813 0.0415153 0.3587369 TRUE FALSE FALSE FALSE
8.4.3.2 Pipistrellus
physeq_pk <- subset_samples(merged_physeq_phylum, Species == "Pk")
physeq_pk <- prune_taxa(taxa_sums(physeq_pk)>0, physeq_pk)ancom_rand_output_struc_pk = ancombc2(data = physeq_pk,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_pk$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 2
Structural zeros in amplicon
[1] 1
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Cyanobacteriota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Rs-K70 termite group"
Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacillota | 1.788892 | -1.387561 | 0.28713994 | 0.4803160 | 6.230036 | -2.88885 | 4.336663e-07 | 6.683597e-03 | 2.601998e-06 | 4.010158e-02 | TRUE | TRUE | TRUE | TRUE |
| Bacteroidota | -2.074828 | 4.144439 | 0.02690928 | 0.3105124 | -77.104531 | 13.34709 | 5.253686e-14 | 3.095963e-07 | 3.677580e-13 | 2.167174e-06 | TRUE | TRUE | TRUE | TRUE |
8.4.3.3 Hypsugo
physeq_ha <- subset_samples(merged_physeq_phylum, Species == "Ha")
physeq_ha <- prune_taxa(taxa_sums(physeq_ha)>0, physeq_ha)ancom_rand_output_struc_ha = ancombc2(data = physeq_ha,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_ha$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 7
Structural zeros in amplicon
[1] 2
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Actinomycetota" "Spirochaetota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Campylobacterota" "Desulfobacterota" "Fusobacteriota" "Rs-K70 termite group" "Synergistota"
Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Pseudomonadota | 1.23417 | -1.979773 | NA | 0.3427585 | NA | -5.775998 | 1 | 1.523765e-06 | 1 | 4.571296e-06 | FALSE | TRUE | FALSE | TRUE |