Chapter 9 Metabarcoding selected filtering based on beta diversity (t4_p0)
load("resources/metagenomics/data_filtered.Rdata")
load("resources/amplicon/selected_filtering_beta.Rdata")
species_color <- c("#e5bd5b", "#6b7398", "#76b183")Number of MAGs
79
Number of Archaea phyla
genome_metadata_amplicon %>%
filter(domain == "Archaea")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()1
Number of Bacteria phyla
genome_metadata_amplicon %>%
filter(domain == "Bacteria")%>%
dplyr::select(phylum) %>%
unique() %>%
pull() %>%
length()%>%
cat()10
9.1 Taxonomy
genome_counts_filt_amplicon %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(., genome_metadata_amplicon, by = join_by(genome == genome)) %>%
left_join(., sample_metadata_amplicon, by = join_by(sample == sample)) %>%
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(
x = sample,
y = count,
fill = phylum,
group = phylum
)) +
geom_bar(stat = "identity",
colour = "white",
linewidth = 0.1) +
scale_fill_manual(values = phylum_colors) +
facet_nested( ~ factor(
Species,
labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
), scales = "free") +
guides(fill = guide_legend(ncol = 1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 10),
strip.background = element_rect(fill = "white"),
strip.text = element_text(
size = 12,
lineheight = 0.6,
face = "bold"
),
axis.line = element_line(
linewidth = 0.5,
linetype = "solid",
colour = "black"
)
) +
labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
guides(fill = guide_legend(ncol = 1))Grouping low-abundance bacteria
p1 <- genome_counts_filt %>%
mutate_at(vars(-genome), ~ . / sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
filter(count > 0) %>%
group_by(sample, phylum) %>%
mutate(total_abundance = sum(count)) %>%
ungroup() %>%
mutate(phylum = if_else(total_abundance < 0.01, "Other", phylum)) %>%
group_by(sample, phylum, Species) %>%
summarise(count = sum(count), .groups = "drop") %>%
ggplot(aes(
x = sample,
y = count,
fill = phylum,
group = phylum
)) +
geom_bar(stat = "identity",
colour = "white",
linewidth = 0.1) +
scale_fill_manual(values = c(phylum_colors, "Other" = "grey50"),
drop = FALSE)+
facet_nested(~ factor(
Species,
labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
), scales = "free") +
guides(fill = guide_legend(ncol = 1)) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.text = element_text(size = 10),
strip.background = element_rect(fill = "white"),
strip.text = element_text(
size = 12,
lineheight = 0.6,
face = "bold"
),
axis.line = element_line(
linewidth = 0.5,
linetype = "solid",
colour = "black"
)
) +
labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
guides(fill = guide_legend(ncol = 1))
#ggsave("community_plot_grouped_selected.pdf", plot = p1, width = 12, height = 6)
p1
Phylum relative abundances
phylum_summary <- genome_counts_filt_amplicon %>%
mutate_at(vars(-genome),~./sum(.)) %>%
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
left_join(genome_metadata_amplicon, by = join_by(genome == genome)) %>%
group_by(sample,phylum,Species) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(
Total_mean = mean(relabun * 100, na.rm = T),
Total_sd = sd(relabun * 100, na.rm = T),
Eb_mean = mean(relabun[Species == "Eb"] * 100, na.rm = T),
Eb_sd = sd(relabun[Species == "Eb"] * 100, na.rm = T),
Ha_mean = mean(relabun[Species == "Ha"] * 100, na.rm = T),
Ha_sd = sd(relabun[Species == "Ha"] * 100, na.rm = T),
Pk_mean = mean(relabun[Species == "Pk"] * 100, na.rm = T),
Pk_sd = sd(relabun[Species == "Pk"] * 100, na.rm = T)
) %>%
mutate(
Total = str_c(round(Total_mean, 3), "±", round(Total_sd, 3)),
Cnephaeus = str_c(round(Eb_mean, 3), "±", round(Eb_sd, 3)),
Hypsugo = str_c(round(Ha_mean, 3), "±", round(Ha_sd, 3)),
Pipistrellus = str_c(round(Pk_mean, 3), "±", round(Pk_sd, 3))
) %>%
arrange(-Total_mean) %>%
dplyr::select(phylum,
Total,
Cnephaeus,
Hypsugo,
Pipistrellus) %>%
tt()| phylum | Total | Cnephaeus | Hypsugo | Pipistrellus |
|---|---|---|---|---|
| Pseudomonadota | 65.046±30.493 | 69.57±38.576 | 72.4±22.037 | 56.639±32.048 |
| Bacillota | 23.641±25.626 | 22.74±31.991 | 21.12±17.56 | 26.227±29.111 |
| Bacteroidota | 3.421±9.565 | 0.219±0.514 | 5.097±13.327 | 3.429±7.62 |
| Fusobacteriota | 2.601±8.882 | 6.14±13.202 | 0.015±0.062 | 3.227±10.035 |
| Halobacterota | 1.193±8.517 | 0±0 | 0±0 | 2.765±12.968 |
| Campylobacterota | 0.983±6.993 | 0±0 | 0±0 | 2.279±10.646 |
| Desulfobacterota | 0.886±3.639 | 0.875±2.742 | 0.009±0.041 | 1.649±5.196 |
| Rs-K70 termite group | 0.823±3.169 | 0.457±1.446 | 0.496±2.16 | 1.271±4.318 |
| Cyanobacteriota | 0.613±4.33 | 0±0 | 0.005±0.023 | 1.417±6.592 |
| Verrucomicrobiota | 0.473±3.38 | 0±0 | 0±0 | 1.097±5.146 |
| Actinomycetota | 0.32±2.284 | 0±0 | 0.859±3.742 | 0±0 |
bats = c("Eb", "Pk", "Ha")
total_asvs <- data.frame(
Bat = character(),
ASVs = numeric(),
Phylum = numeric(),
Family = numeric(),
Genus = numeric()
)
preabs_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata_amplicon, by = join_by("genome" == "genome"))
# %>%
# filter(domain=="Bacteria")
phylum <- preabs_table %>%
distinct(phylum)
family <- preabs_table %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
distinct(phylum, class, order, family, genus)
total_asvs <- rbind(
total_asvs,
data.frame(
Bat = "Total",
ASVs = nrow(preabs_table),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(. >= 1)
phylum <- preabs_table %>%
select({{bat}}, phylum) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum)
family <- preabs_table %>%
select({{bat}}, phylum, class, order, family) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum, class, order, family)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat) >= 1) %>%
distinct(phylum, class, order, family, genus)
total_asvs <- rbind(
total_asvs,
data.frame(
Bat = bat,
ASVs = nrow(number),
Phylum = nrow(phylum),
Family = nrow(family),
Genus = nrow(genus)
)
)
}bats = c("Eb", "Pk", "Ha")
no_annotation <- data.frame(Bat = character(),
No_genus = numeric(),
No_species = numeric())
preabs_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("genome") %>%
left_join(genome_metadata_amplicon, by = join_by("genome" == "genome"))
# %>%
# filter(domain=="Bacteria")
genus <- preabs_table %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(is.na(species))
no_annotation <- rbind(no_annotation,
data.frame(
Bat = "Total",
No_genus = nrow(genus),
No_species = nrow(species)
))
for (bat in bats) {
number <- preabs_table %>%
select({{bat}}) %>%
filter(. >= 1)
genus <- preabs_table %>%
select({{bat}}, phylum, class, order, family, genus) %>%
filter(!!sym(bat) >= 1) %>%
filter(is.na(genus))
species <- preabs_table %>%
filter(!!sym(bat) >= 1) %>%
filter(is.na(species))
no_annotation <- rbind(no_annotation,
data.frame(
Bat = bat,
No_genus = nrow(genus),
No_species = nrow(species)
))
}Total percentage of ASVs without genus-level annotation
nongenera <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(genus)) %>%
summarize(ASV_nogenera = n()) %>%
pull()
nasvs <- total_asvs %>%
filter(Bat == "Total") %>%
select(ASVs) %>%
pull()
perct <- nongenera * 100 / nasvs
cat(perct)15.18987
Percentage of ASVs without genus-level annotation by phylum
total_asv_phylum <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
group_by(phylum) %>%
summarize(Total_ASVs = n())
genome_metadata_amplicon %>%
filter(is.na(genus)) %>%
group_by(phylum) %>%
summarize(ASVs_nogenus = n()) %>%
left_join(total_asv_phylum, by = join_by(phylum == phylum)) %>%
mutate(Percentage_nogenus = 100 * ASVs_nogenus / Total_ASVs) %>%
tt()| phylum | ASVs_nogenus | Total_ASVs | Percentage_nogenus |
|---|---|---|---|
| Pseudomonadota | 11 | 50 | 22 |
| Rs-K70 termite group | 1 | 1 | 100 |
Number of bacterial species
genome_metadata_amplicon %>%
filter(domain == "Bacteria")%>%
dplyr::select(species) %>%
unique() %>%
drop_na() %>%
pull() %>%
length() %>%
cat()10
Total percentage of ASVs without species-level annotation
nonspecies <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(species)) %>%
summarize(ASV_nospecies = n()) %>%
pull()
perct <- nonspecies * 100 / nasvs
cat(perct)83.5443
ASVs without species-level annotation
total_mag_phylum <- genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
group_by(phylum) %>%
summarize(ASVs_total = n())
genome_metadata_amplicon %>%
filter(domain=="Bacteria") %>%
filter(is.na(species)) %>%
group_by(phylum) %>%
summarize(ASVs_nospecies = n()) %>%
left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
mutate(Species_annotated = ASVs_total - ASVs_nospecies) %>%
mutate(Percentage_nospecies = 100 * ASVs_nospecies / ASVs_total) %>%
mutate(Percentage_species = 100 - 100 * ASVs_nospecies / ASVs_total) %>%
tt()| phylum | ASVs_nospecies | ASVs_total | Species_annotated | Percentage_nospecies | Percentage_species |
|---|---|---|---|---|---|
| Actinomycetota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Bacillota | 14 | 15 | 1 | 93.33333 | 6.666667 |
| Bacteroidota | 5 | 6 | 1 | 83.33333 | 16.666667 |
| Campylobacterota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Cyanobacteriota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Desulfobacterota | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Pseudomonadota | 41 | 50 | 9 | 82.00000 | 18.000000 |
| Rs-K70 termite group | 1 | 1 | 0 | 100.00000 | 0.000000 |
| Verrucomicrobiota | 1 | 1 | 0 | 100.00000 | 0.000000 |
9.1.1 Summary table
bats = c("Eb", "Pk", "Ha")
single_sp <- data.frame(Bat = character(), Single_species = numeric())
# bacteria <- genome_metadata_amplicon %>%
# filter(domain=="Bacteria")
table_upset_analysis <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
# filter(genome %in% bacteria$genome) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample")
unique_all <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1)
single_sp <- rbind(single_sp, data.frame(Bat = "Total",
Single_species = nrow(unique_all)))
for (bat in bats) {
unique <- table_upset_analysis %>%
filter(rowSums(across(Eb:Pk)) == 1) %>%
column_to_rownames(., "sample") %>%
select(bat) %>%
filter(. > 0) %>%
nrow()
single_sp <- rbind(single_sp, data.frame(Bat = bat, Single_species = unique))
}single_ind <- data.frame(Bat = character(), Single_individual = numeric())
freq_table <- genome_counts_filt_amplicon %>%
mutate(across(-genome, ~ . / sum(.))) %>%
# filter(genome %in% bacteria$genome) %>%
column_to_rownames("genome") %>%
mutate(across(everything(), ~ as.integer(. > 0))) %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
group_by(Species) %>%
summarize(across(-sample, sum), .groups = "drop") %>%
column_to_rownames("Species") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("asv")
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
filter(row_sum == 1)
single_ind <- rbind(single_ind, data.frame(
Bat = "Total",
Single_individual = nrow(singleton_filt)
))
for (bat in bats) {
singleton_filt <- freq_table %>%
rowwise() %>%
mutate(row_sum = sum(c_across(-asv))) %>%
filter(row_sum == 1) %>%
column_to_rownames(var = "asv") %>%
select(bat) %>%
filter(. == 1)
single_ind <- rbind(single_ind, data.frame(
Bat = bat,
Single_individual = nrow(singleton_filt)
))
}summary_table_ampli <- total_asvs %>%
left_join(., no_annotation, by = "Bat") %>%
left_join(., single_ind, by = "Bat") %>%
left_join(., single_sp, by = "Bat") %>%
mutate(Bat = recode(Bat,
Eb = "Cnephaeus bottae",
Ha = "Hypsugo ariel",
Pk = "Pipistrellus kuhlii"))
summary_table_ampli %>%
tt()| Bat | ASVs | Phylum | Family | Genus | No_genus | No_species | Single_individual | Single_species |
|---|---|---|---|---|---|---|---|---|
| Total | 79 | 11 | 36 | 50 | 12 | 67 | 13 | 25 |
| Cnephaeus bottae | 43 | 6 | 22 | 29 | 5 | 37 | 1 | 2 |
| Pipistrellus kuhlii | 62 | 10 | 30 | 40 | 9 | 56 | 5 | 10 |
| Hypsugo ariel | 62 | 8 | 31 | 41 | 10 | 55 | 7 | 13 |
total_asv <- nrow(genome_metadata_amplicon)
summary_table_ampli %>%
select(-Phylum,-Family, -Genus) %>%
rowwise() %>%
mutate(ASV_perc=round(ASVs*100/total_asv, 2))%>%
mutate(No_genus_perc=round(No_genus*100/ASVs, 2))%>%
mutate(No_species_perc=round(No_species*100/ASVs, 2)) %>%
mutate(Single_individual_perc=round(Single_individual*100/ASVs, 2))%>%
mutate(Single_species_perc=round(Single_species*100/ASVs, 2)) %>%
mutate(Single_individual_per_unique=round(Single_individual*100/Single_species, 2)) %>%
select(1,7:12) %>% tt()| Bat | ASV_perc | No_genus_perc | No_species_perc | Single_individual_perc | Single_species_perc | Single_individual_per_unique |
|---|---|---|---|---|---|---|
| Total | 100.00 | 15.19 | 84.81 | 16.46 | 31.65 | 52.00 |
| Cnephaeus bottae | 54.43 | 11.63 | 86.05 | 2.33 | 4.65 | 50.00 |
| Pipistrellus kuhlii | 78.48 | 14.52 | 90.32 | 8.06 | 16.13 | 50.00 |
| Hypsugo ariel | 78.48 | 16.13 | 88.71 | 11.29 | 20.97 | 53.85 |
Shared phyla
meta_phylum <- genome_metadata %>%
distinct(domain,phylum)
genome_metadata_amplicon %>%
distinct(domain,phylum) %>%
filter(phylum %in% meta_phylum$phylum) domain phylum
1 Bacteria Pseudomonadota
2 Bacteria Fusobacteriota
3 Bacteria Bacillota
4 Bacteria Desulfobacterota
5 Bacteria Bacteroidota
6 Bacteria Cyanobacteriota
7 Bacteria Campylobacterota
8 Bacteria Actinomycetota
Only in metagenomic data
ampli_phylum <- genome_metadata_amplicon %>%
distinct(domain,phylum)
genome_metadata %>%
distinct(domain,phylum) %>%
filter(!phylum %in% genome_metadata_amplicon$phylum)# A tibble: 5 × 2
domain phylum
<chr> <chr>
1 Bacteria Elusimicrobiota
2 Bacteria Deferribacterota
3 Bacteria Planctomycetota
4 Bacteria Spirochaetota
5 Bacteria Synergistota
Only in metabarcoding selected data
domain phylum
1 Bacteria Rs-K70 termite group
2 Archaea Halobacterota
3 Bacteria Verrucomicrobiota
9.1.2 Differences at family level
Number of families in metagenomic data
56
Number of families in metabarcoding selected data
36
Shared families
meta_family <- genome_metadata %>%
distinct(domain,phylum, family)
genome_metadata_amplicon %>%
distinct(domain,phylum, family) %>%
filter(family %in% meta_family$family) domain phylum family
1 Bacteria Pseudomonadota Diplorickettsiaceae
2 Bacteria Pseudomonadota Vibrionaceae
3 Bacteria Pseudomonadota Enterobacteriaceae
4 Bacteria Fusobacteriota Leptotrichiaceae
5 Bacteria Bacillota Enterococcaceae
6 Bacteria Pseudomonadota Morganellaceae
7 Bacteria Pseudomonadota Pasteurellaceae
8 Bacteria Pseudomonadota Aeromonadaceae
9 Bacteria Bacillota Mycoplasmataceae
10 Bacteria Bacillota Streptococcaceae
11 Bacteria Pseudomonadota Rickettsiaceae
12 Bacteria Pseudomonadota Rhizobiaceae
13 Bacteria Desulfobacterota Desulfovibrionaceae
14 Bacteria Bacteroidota Dysgonomonadaceae
15 Bacteria Bacillota Lachnospiraceae
16 Bacteria Pseudomonadota Burkholderiaceae
17 Bacteria Campylobacterota Helicobacteraceae
18 Bacteria Actinomycetota Micrococcaceae
19 Bacteria Bacillota Clostridiaceae
20 Bacteria Bacteroidota Weeksellaceae
21 Bacteria Pseudomonadota Acetobacteraceae
22 Bacteria Pseudomonadota Halomonadaceae
23 Bacteria Pseudomonadota Neisseriaceae
Only in metagenomic data
ampli_family <- genome_metadata_amplicon %>%
distinct(domain,phylum,family)
genome_metadata %>%
distinct(domain,phylum,family) %>%
filter(!family %in% ampli_family$family)# A tibble: 33 × 3
domain phylum family
<chr> <chr> <chr>
1 Bacteria Pseudomonadota ""
2 Bacteria Bacteroidota "Bacteroidaceae"
3 Bacteria Campylobacterota "Campylobacteraceae"
4 Bacteria Pseudomonadota "Rhodocyclaceae"
5 Bacteria Bacillota "Erysipelotrichaceae"
6 Bacteria Elusimicrobiota "Elusimicrobiaceae"
7 Bacteria Bacillota "Metamycoplasmataceae"
8 Bacteria Bacillota "Acutalibacteraceae"
9 Bacteria Pseudomonadota "CAG-239"
10 Bacteria Bacillota ""
# ℹ 23 more rows
Only in metabarcoding selected data
genome_metadata_amplicon %>%
distinct(domain,phylum,family) %>%
filter(!family %in% meta_family$family) domain phylum family
1 Bacteria Pseudomonadota Yersiniaceae
2 Bacteria Bacillota Bacillaceae
3 Bacteria Rs-K70 termite group <NA>
4 Bacteria Bacillota Entomoplasmataceae
5 Bacteria Cyanobacteriota Microcystaceae
6 Bacteria Pseudomonadota Pseudomonadaceae
7 Bacteria Pseudomonadota <NA>
8 Archaea Halobacterota Methanosarcinaceae
9 Bacteria Pseudomonadota Oxalobacteraceae
10 Bacteria Pseudomonadota Erwiniaceae
11 Bacteria Bacillota Lactobacillaceae
12 Bacteria Verrucomicrobiota Simkaniaceae
13 Bacteria Pseudomonadota Orbaceae
9.2 Alpha Diversity
9.2.1 ASV level
richness <- genome_counts_filt_amplicon %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt_amplicon %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
metabarcoding_selected_alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
mutate(method="Metabarcoding_selected_beta")Correlation between diversity and estimate microbial fraction
fraction_ampli <- microbial_fraction %>%
select(sample, estimated_microbial_fraction) %>%
right_join(metabarcoding_selected_alpha_div, by="sample") %>%
left_join(sample_metadata_amplicon, by="sample")
cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$richness, method = "spearman")
Spearman's rank correlation rho
data: fraction_ampli$estimated_microbial_fraction and fraction_ampli$richness
S = 20749, p-value = 0.05981
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.2796438
Spearman's rank correlation rho
data: fraction_ampli$estimated_microbial_fraction and fraction_ampli$neutral
S = 22184, p-value = 0.01226
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.3681159
p1_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = richness)) +
geom_point(size = 3, color = "#2E86AB") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Richness", title= "Correlation between estimated microbial fraction and diversity")
p2_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = neutral)) +
geom_point(size = 3, color = "#F39C12") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")
p1_ampli + p2_ampli richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 10.61±4.95 | 10.1±5.49 | 9.55±5.29 | 12.11±4.07 |
| neutral | 4.24±2.16 | 3.51±1.96 | 3.89±2.15 | 5.03±2.14 |
9.2.2 Different taxonomic level
sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli_selec")) %>%
mutate(method="Metabarcoding_selected_beta")
genome_metadata_amplicon <- genome_metadata_amplicon %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt_amplicon %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli_selec")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_amplicon, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()9.2.2.1 Family level
richness_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_f_ampli <- richness_f_ampli %>%
full_join(neutral_f_ampli, by = join_by(sample == sample))
alpha_div_f_ampli_data <- alpha_div_f_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_f_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 7.22±3.25 | 5.4±2.76 | 6.95±3.59 | 8.47±2.63 |
| neutral | 3.16±1.5 | 2.25±1.01 | 2.85±1.5 | 4±1.34 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_ampli_data,trace=TRUE)
summary(Model_richness)
Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_f_ampli_data,
trace = TRUE, init.theta = 30.49122173, link = log)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.6864 0.1476 11.422 < 2e-16 ***
SpeciesHa 0.4506 0.1724 2.613 0.00898 **
SpeciesPk 0.2530 0.1727 1.465 0.14293
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for Negative Binomial(30.4912) family taken to be 1)
Null deviance: 57.647 on 50 degrees of freedom
Residual deviance: 50.201 on 48 degrees of freedom
AIC: 259.6
Number of Fisher Scoring iterations: 1
Theta: 30.5
Std. Err.: 30.8
2 x log-likelihood: -251.604
Analysis of Deviance Table
Model: Negative Binomial(30.4912), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 57.647
Species 2 7.4463 48 50.201 0.02416 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.69 0.1480 Inf 1.40 1.98
Ha 2.14 0.0891 Inf 1.96 2.31
Pk 1.94 0.0896 Inf 1.76 2.11
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha -0.451 0.172 Inf -2.613 0.0244
Eb - Pk -0.253 0.173 Inf -1.465 0.3079
Ha - Pk 0.198 0.126 Inf 1.564 0.2615
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_f_ampli_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 23.811 11.9053 6.4164 0.003392 **
Residuals 48 89.061 1.8554
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 2.25 0.431 48 1.38 3.12
Ha 4.00 0.312 48 3.37 4.63
Pk 2.85 0.290 48 2.26 3.43
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha -1.748 0.532 48 -3.285 0.0053
Eb - Pk -0.595 0.520 48 -1.146 0.4908
Ha - Pk 1.153 0.427 48 2.702 0.0253
P value adjustment: tukey method for comparing a family of 3 estimates
9.2.2.2 Phylum level
richness_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum_ampli <- richness_p_ampli %>%
full_join(neutral_p_ampli, by = join_by(sample == sample))
alpha_div_phylum_ampli_data <- alpha_div_phylum_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_phylum_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 2.96±1.43 | 2.6±1.43 | 3.5±1.63 | 2.53±0.96 |
| neutral | 1.85±0.75 | 1.55±0.63 | 2.03±0.87 | 1.79±0.61 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_ampli_data,trace=TRUE)
summary(Model_richness)
Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_phylum_ampli_data,
trace = TRUE, init.theta = 85648.54994, link = log)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.95551 0.19612 4.872 1.1e-06 ***
SpeciesHa -0.02875 0.24351 -0.118 0.906
SpeciesPk 0.29725 0.22683 1.310 0.190
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for Negative Binomial(85648.55) family taken to be 1)
Null deviance: 30.456 on 50 degrees of freedom
Residual deviance: 26.682 on 48 degrees of freedom
AIC: 182.32
Number of Fisher Scoring iterations: 1
Theta: 85649
Std. Err.: 1756092
Warning while fitting theta: iteration limit reached
2 x log-likelihood: -174.321
Analysis of Deviance Table
Model: Negative Binomial(85648.55), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 30.456
Species 2 3.7739 48 26.682 0.1515
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 0.956 0.196 Inf 0.571 1.34
Ha 0.927 0.144 Inf 0.644 1.21
Pk 1.253 0.114 Inf 1.029 1.48
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.0287 0.244 Inf 0.118 0.9923
Eb - Pk -0.2973 0.227 Inf -1.310 0.3893
Ha - Pk -0.3260 0.184 Inf -1.773 0.1788
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_phylum_ampli_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 1.6822 0.84110 1.5367 0.2255
Residuals 48 26.2715 0.54732
9.2.3 Comparison (metabarcoding selected vs metagenomics)
merged_table_family_methods <- full_join(family_level_agg_ampli,merged_table,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum_methods <- full_join(phylum_level_agg_ampli,merged_table_phylum,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
all_samples <- rbind(all_samples, sample_metadata_amplicon)9.2.3.1 Family level
richness <- merged_table_family_methods %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- merged_table_family_methods %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_family <- richness %>%
full_join(neutral, by = join_by(sample == sample))
alpha_div_family_data <- alpha_div_family %>%
left_join(all_samples, by = join_by(sample == sample))
alpha_div_family_data_filt <- alpha_div_family_data %>%
filter(method!="Metabarcoding_standard") %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))Richness
Model_richness_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_family_data_filt)
summary(Model_richness_random)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(4.9991) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_family_data_filt
AIC BIC logLik deviance df.resid
564.3 585.3 -274.2 548.3 94
Scaled residuals:
Min 1Q Median 3Q Max
-1.6910 -0.8206 -0.1384 0.4144 2.5247
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.02193 0.1481
Number of obs: 102, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.6787 0.2025 8.290 < 2e-16 ***
SpeciesHa 0.4511 0.2425 1.860 0.06288 .
SpeciesPk 0.2460 0.2402 1.024 0.30564
methodMetagenomics 0.7909 0.2625 3.013 0.00259 **
SpeciesHa:methodMetagenomics -1.7843 0.3352 -5.324 1.02e-07 ***
SpeciesPk:methodMetagenomics -0.8350 0.3158 -2.644 0.00819 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.832
SpeciesPk -0.838 0.702
mthdMtgnmcs -0.721 0.608 0.620
SpcsH:mthdM 0.566 -0.683 -0.484 -0.779
SpcsPk:mthM 0.602 -0.506 -0.720 -0.824 0.644
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 4.8318 2.4159 2.4159
method 1 2.1157 2.1157 2.1157
Species:method 2 29.5939 14.7970 14.7970
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 2.07 0.1410 Inf 1.80 2.35
Ha 1.63 0.1110 Inf 1.41 1.85
Pk 1.90 0.0992 Inf 1.71 2.10
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.441 0.177 Inf 2.489 0.0342
Eb - Pk 0.171 0.167 Inf 1.025 0.5611
Ha - Pk -0.270 0.146 Inf -1.847 0.1544
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 1.68 0.203 Inf 1.282 2.08
Metagenomics 2.47 0.182 Inf 2.112 2.83
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 2.13 0.134 Inf 1.866 2.39
Metagenomics 1.14 0.170 Inf 0.803 1.47
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 1.92 0.131 Inf 1.668 2.18
Metagenomics 1.88 0.136 Inf 1.614 2.15
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics -0.7909 0.263 Inf -3.013 0.0026
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics 0.9934 0.210 Inf 4.727 <.0001
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics 0.0441 0.179 Inf 0.247 0.8053
Results are given on the log (not the response) scale.
Neutral
Model_neutral <- lme(fixed = neutral ~ Species*method, data = alpha_div_family_data_filt,
random = ~ 1 | sampleid)
anova(Model_neutral) numDF denDF F-value p-value
(Intercept) 1 48 187.32322 <.0001
Species 2 48 0.29489 0.7460
method 1 48 0.01436 0.9051
Species:method 2 48 11.38991 0.0001
Linear mixed-effects model fit by REML
Data: alpha_div_family_data_filt
AIC BIC logLik
444.5844 465.0991 -214.2922
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 1.0451 1.822168
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 2.249724 0.6642687 48 3.386768 0.0014
SpeciesHa 1.748328 0.8206651 48 2.130379 0.0383
SpeciesPk 0.595448 0.8011382 48 0.743252 0.4610
methodMetagenomics 2.380924 0.8148981 48 2.921744 0.0053
SpeciesHa:methodMetagenomics -4.441587 1.0067590 48 -4.411768 0.0001
SpeciesPk:methodMetagenomics -1.583242 0.9828041 48 -1.610943 0.1137
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.829 0.671
methodMetagenomics -0.613 0.496 0.509
SpeciesHa:methodMetagenomics 0.496 -0.613 -0.412 -0.809
SpeciesPk:methodMetagenomics 0.509 -0.412 -0.613 -0.829 0.671
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.5079495 -0.5165373 -0.1550650 0.3499670 4.2213829
Number of Observations: 102
Number of Groups: 51
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 2.25 0.664 50 0.916 3.58
Metagenomics 4.63 0.664 48 3.295 5.97
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 4.00 0.482 48 3.029 4.97
Metagenomics 1.94 0.482 48 0.968 2.91
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 2.85 0.448 48 1.945 3.75
Metagenomics 3.64 0.448 48 2.742 4.54
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics -2.381 0.815 48 -2.922 0.0053
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics 2.061 0.591 48 3.486 0.0011
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics -0.798 0.549 48 -1.452 0.1530
Degrees-of-freedom method: containment
9.2.3.2 Phylum level
richness_phylum <- merged_table_phylum_methods %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_phylum <- merged_table_phylum_methods %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum <- richness_phylum %>%
full_join(neutral_phylum, by = join_by(sample == sample))
alpha_div_phylum_data <- alpha_div_phylum %>%
left_join(all_samples, by = join_by(sample == sample))
alpha_div_phylum_data_filt <- alpha_div_phylum_data %>%
filter(method!="Metabarcoding_standard") %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))Richness
Model_richness_random_phy <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_phylum_data_filt)
summary(Model_richness_random_phy)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(225004.8) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_phylum_data_filt
AIC BIC logLik deviance df.resid
366.2 387.2 -175.1 350.2 94
Scaled residuals:
Min 1Q Median 3Q Max
-1.7092 -0.3944 -0.1879 0.3041 2.0653
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.01722 0.1312
Number of obs: 102, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.9465 0.2027 4.671 3.00e-06 ***
SpeciesHa -0.0280 0.2564 -0.109 0.91303
SpeciesPk 0.2975 0.2306 1.290 0.19703
methodMetagenomics 0.6737 0.2387 2.822 0.00477 **
SpeciesHa:methodMetagenomics -1.4094 0.3610 -3.905 9.44e-05 ***
SpeciesPk:methodMetagenomics -0.7980 0.2847 -2.803 0.00507 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.860 0.701
mthdMtgnmcs -0.792 0.649 0.683
SpcsH:mthdM 0.553 -0.694 -0.478 -0.695
SpcsPk:mthM 0.647 -0.530 -0.749 -0.818 0.568
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 18.3783 9.1891 9.1891
method 1 0.2686 0.2686 0.2686
Species:method 2 16.4025 8.2012 8.2012
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.283 0.1300 Inf 1.028 1.539
Ha 0.551 0.1320 Inf 0.292 0.809
Pk 1.182 0.0893 Inf 1.007 1.357
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.733 0.185 Inf 3.968 0.0002
Eb - Pk 0.102 0.156 Inf 0.651 0.7917
Ha - Pk -0.631 0.158 Inf -4.007 0.0002
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 0.947 0.203 Inf 0.549 1.344
Metagenomics 1.620 0.146 Inf 1.333 1.907
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 0.919 0.151 Inf 0.623 1.214
Metagenomics 0.183 0.214 Inf -0.236 0.602
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_selected_beta 1.244 0.118 Inf 1.014 1.474
Metagenomics 1.120 0.125 Inf 0.875 1.364
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics -0.674 0.239 Inf -2.822 0.0048
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics 0.736 0.260 Inf 2.831 0.0046
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_selected_beta - Metagenomics 0.124 0.164 Inf 0.758 0.4485
Results are given on the log (not the response) scale.
Neutral
Model_neutral_phylum <- lme(fixed = neutral ~ Species*method, data = alpha_div_phylum_data_filt,
random = ~ 1 | sampleid)
anova(Model_neutral_phylum) numDF denDF F-value p-value
(Intercept) 1 48 288.78119 <.0001
Species 2 48 3.69441 0.0322
method 1 48 0.47980 0.4918
Species:method 2 48 11.46045 0.0001
Linear mixed-effects model fit by REML
Data: alpha_div_phylum_data_filt
AIC BIC logLik
262.4756 282.9904 -123.2378
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 0.6304502 0.5975657
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 1.5469903 0.2746911 48 5.631744 0.0000
SpeciesHa 0.2470693 0.3393648 48 0.728034 0.4701
SpeciesPk 0.4820295 0.3312900 48 1.455008 0.1522
methodMetagenomics 0.8239851 0.2672395 48 3.083321 0.0034
SpeciesHa:methodMetagenomics -1.5440129 0.3301588 48 -4.676577 0.0000
SpeciesPk:methodMetagenomics -0.7666993 0.3223030 48 -2.378816 0.0214
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.829 0.671
methodMetagenomics -0.486 0.394 0.403
SpeciesHa:methodMetagenomics 0.394 -0.486 -0.326 -0.809
SpeciesPk:methodMetagenomics 0.403 -0.326 -0.486 -0.829 0.671
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.0965139 -0.4701600 -0.0475519 0.3133924 3.1657939
Number of Observations: 102
Number of Groups: 51
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 1.96 0.240 48 1.48 2.44
Ha 1.43 0.174 48 1.08 1.78
Pk 2.06 0.162 48 1.73 2.38
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 0.5249 0.297 48 1.770 0.1904
Eb - Pk -0.0987 0.289 48 -0.341 0.9380
Ha - Pk -0.6236 0.238 48 -2.624 0.0307
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 1.55 0.275 50 0.995 2.10
Metagenomics 2.37 0.275 48 1.819 2.92
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 1.79 0.199 48 1.393 2.19
Metagenomics 1.07 0.199 48 0.673 1.47
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_selected_beta 2.03 0.185 48 1.657 2.40
Metagenomics 2.09 0.185 48 1.714 2.46
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics -0.8240 0.267 48 -3.083 0.0034
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics 0.7200 0.194 48 3.714 0.0005
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_selected_beta - Metagenomics -0.0573 0.180 48 -0.318 0.7519
Degrees-of-freedom method: containment
9.2.4 Alpha diversity figure (three methods)
alpha_div_family_data %>%
select(-sample, -Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")
),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
)) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
# show.legend = FALSE,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
show.legend = FALSE
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
) +
labs(
# x = "Methodological approach",
y = "Alpha diversity at family level",
fill = "Species"
)alpha_div_phylum_data %>%
select(-sample, -Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")
),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
)) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
# show.legend = FALSE,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
show.legend = FALSE
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
) +
labs(
# x = "Methodological approach",
y = "Alpha diversity at phylum level",
fill = "Species"
)9.3 Beta diversity
9.3.1 At different taxonomic level
beta_q0n <- family_level_agg_ampli %>%
column_to_rownames(., "family") %>%
hillpair(., q = 0)
beta_q1n <- family_level_agg_ampli %>%
column_to_rownames(., "family") %>%
hillpair(., q = 1)
beta_q0n_phylum <- phylum_level_agg_ampli %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 0)
beta_q1n_phylum <- phylum_level_agg_ampli %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 1)save(beta_q0n,
beta_q1n,
beta_q0n_phylum,
beta_q1n_phylum,
file = "resources/beta_tax_sele_beta.Rdata")9.3.1.1 Family
Richness
adonis2(beta_q0n$S ~ Species, data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n$S)) %>% arrange(match(sample, labels(beta_q0n$S))),
permutations = 999) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 1.398786 | 0.0618231 | 1.581529 | 0.022 |
| Residual | 48 | 21.226829 | 0.9381769 | NA | NA |
| Total | 50 | 22.625615 | 1.0000000 | NA | NA |
pairwise.adonis(beta_q0n$S, sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n$S)) %>% arrange(match(sample, labels(beta_q0n$S))) %>% pull(Species),
perm = 999) %>%
tt()| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| Eb vs Ha | 1 | 1.0198664 | 2.385481 | 0.08117888 | 0.003 | 0.009 | * |
| Eb vs Pk | 1 | 0.6962595 | 1.574272 | 0.04985932 | 0.063 | 0.189 | |
| Ha vs Pk | 1 | 0.4751619 | 1.050402 | 0.02622701 | 0.386 | 1.000 |
Neutral
adonis2(beta_q1n$S ~ Species, data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q1n$S)) %>% arrange(match(sample, labels(beta_q1n$S))), permutations = 999) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 1.350228 | 0.06036723 | 1.541893 | 0.04 |
| Residual | 48 | 21.016669 | 0.93963277 | NA | NA |
| Total | 50 | 22.366896 | 1.00000000 | NA | NA |
pairwise.adonis(beta_q1n$S, sample_metadata_amplicon %>% filter(sample %in% labels(beta_q1n$S)) %>% arrange(match(sample, labels(beta_q1n$S))) %>% pull(Species), perm = 999) %>%
tt()| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| Eb vs Ha | 1 | 1.0674286 | 2.5261102 | 0.08555513 | 0.002 | 0.006 | * |
| Eb vs Pk | 1 | 0.6882815 | 1.5814927 | 0.05007657 | 0.088 | 0.264 | |
| Ha vs Pk | 1 | 0.3873457 | 0.8598879 | 0.02157276 | 0.587 | 1.000 |
9.3.1.2 Phylum
Richness
adonis2(beta_q0n_phylum$S ~ Species, data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n_phylum$S)) %>% arrange(match(sample, labels(beta_q0n_phylum$S))), permutations = 999) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 0.2400242 | 0.02933301 | 0.7252664 | 0.643 |
| Residual | 48 | 7.9427100 | 0.97066699 | NA | NA |
| Total | 50 | 8.1827342 | 1.00000000 | NA | NA |
Neutral
adonis2(beta_q1n_phylum$S ~ Species,data = sample_metadata_amplicon %>%
filter(sample %in% labels(beta_q1n_phylum$S)) %>%
arrange(match(sample, labels(beta_q1n_phylum$S))),
permutations = 999
) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 2 | 0.1725489 | 0.02585067 | 0.6368798 | 0.642 |
| Residual | 48 | 6.5022830 | 0.97414933 | NA | NA |
| Total | 50 | 6.6748318 | 1.00000000 | NA | NA |
9.3.2 Comparison between metabarcoding selected and metagenomics
beta_q0n <- merged_table_family_methods %>%
select(-H19, -H20) %>%
column_to_rownames(., "family") %>%
hillpair(., q = 0)
beta_q1n <- merged_table_family_methods %>%
select(-H19) %>%
column_to_rownames(., "family") %>%
hillpair(., q = 1)
beta_q0n_phylum <- merged_table_phylum_methods %>%
select(-H19, -H20) %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 0)
beta_q1n_phylum <- merged_table_phylum_methods %>%
select(-H19) %>%
column_to_rownames(., "phylum") %>%
hillpair(., q = 1)9.3.2.1 Family level
Richness
m <- as.matrix(beta_q0n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_S_filtered <- as.dist(m_filtered)
samples_sel <- all_samples %>%
mutate(sampleid = str_remove(sample, "_ampli_selec"))
samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_S_filtered)))
betadisper(beta_q0n_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.02817 0.0140850 4.4954 999 0.016 *
Residuals 97 0.30392 0.0031332
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 5.197299 | 0.1197866 | 2.558458 | 0.001 |
| Residual | 94 | 38.190674 | 0.8802134 | NA | NA |
| Total | 99 | 43.387973 | 1.0000000 | NA | NA |
adonis2(beta_q0n_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 1.898649 | 0.04375979 | 2.295924 | 0.002 |
| method | 1 | 1.772572 | 0.04085400 | 4.286935 | 0.001 |
| Residual | 96 | 39.694308 | 0.91486892 | NA | NA |
| Total | 99 | 43.387973 | 1.00000000 | NA | NA |
adonis2(beta_q0n_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 1.503634 | 0.03465554 | 1.850472 | 0.003 |
| Residual | 94 | 38.190674 | 0.88021338 | NA | NA |
| Total | 99 | 43.387973 | 1.00000000 | NA | NA |
Neutral
m <- as.matrix(beta_q1n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_S_filtered <- as.dist(m_filtered)
samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_S_filtered)))
betadisper(beta_q1n_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.15474 0.077371 8.3185 999 0.002 **
Residuals 98 0.91151 0.009301
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 4.290399 | 0.09881431 | 2.083335 | 0.001 |
| Residual | 95 | 39.128401 | 0.90118569 | NA | NA |
| Total | 100 | 43.418800 | 1.00000000 | NA | NA |
adonis2(beta_q1n_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 2.220116 | 0.05113259 | 2.696190 | 0.001 |
| method | 1 | 1.251519 | 0.02882436 | 3.039782 | 0.001 |
| Residual | 97 | 39.936210 | 0.91979075 | NA | NA |
| Total | 100 | 43.418800 | 1.00000000 | NA | NA |
adonis2(beta_q1n_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 0.8078095 | 0.01860506 | 0.980642 | 0.019 |
| Residual | 95 | 39.1284008 | 0.90118569 | NA | NA |
| Total | 100 | 43.4187997 | 1.00000000 | NA | NA |
9.3.2.2 Phylum level
Richness
m <- as.matrix(beta_q0n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_phylum_S_filtered <- as.dist(m_filtered)
samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_phylum_S_filtered)))
betadisper(beta_q0n_phylum_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 1.2056 0.60280 9.5619 999 0.001 ***
Residuals 97 6.1150 0.06304
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 3.896413 | 0.2201316 | 5.306632 | 0.001 |
| Residual | 94 | 13.803965 | 0.7798684 | NA | NA |
| Total | 99 | 17.700378 | 1.0000000 | NA | NA |
adonis2(beta_q0n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species | 2 | 1.9255454 | 0.10878555 | 6.054582 | 0.261 |
| method | 1 | 0.4710232 | 0.02661091 | 2.962120 | 0.018 |
| Residual | 96 | 15.2654930 | 0.86243880 | NA | NA |
| Total | 99 | 17.7003783 | 1.00000000 | NA | NA |
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Species:method | 2 | 1.461528 | 0.08257041 | 4.976236 | 0.001 |
| Residual | 94 | 13.803965 | 0.77986838 | NA | NA |
| Total | 99 | 17.700378 | 1.00000000 | NA | NA |
Neutral
m <- as.matrix(beta_q1n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_phylum_S_filtered <- as.dist(m_filtered)
samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_phylum_S_filtered)))
betadisper(beta_q1n_phylum_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 2 0.7182 0.35912 4.7351 999 0.013 *
Residuals 98 7.4326 0.07584
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
broom::tidy() %>%
tt()| term | df | SumOfSqs | R2 | statistic | p.value |
|---|---|---|---|---|---|
| Model | 5 | 1.308865 | 0.09579321 | 2.012893 | 0.141 |
| Residual | 95 | 12.354575 | 0.90420679 | NA | NA |
| Total | 100 | 13.663439 | 1.00000000 | NA | NA |
9.3.3 Three methodological approaches together
9.3.3.1 Family
Richness
beta_q0n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii")),
method = factor(
method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(
shape = "Methodological approach"
)#+labs(color = "method") +geom_text_repel(aes(label = sample), size=3)beta_q0n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(method = factor(method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors)+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
)Neutral
beta_q1n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii"))
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(shape = "Methodological approach")beta_q1n$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
filter(sample!="P36") %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(method = factor(method,
levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
)
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors)+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(color = "Methodological approach",shape="Species")9.3.3.2 Phylum level
Richness
beta_q0n_phylum$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method, Species) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
Species = factor(
Species,
levels = c("Eb", "Ha", "Pk"),
labels = c("C. bottae", "H. ariel", "P. kuhlii"))
) %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
geom_point(size = 4) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = species_color,
labels = c(
expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(
shape = "Methodological approach"
)Neutral
beta_q1n_phylum$S %>%
vegan::metaMDS(.,
trymax = 500,
k = 2,
trace = 0) %>%
vegan::scores() %>%
as_tibble(., rownames = "sample") %>%
dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
group_by(method) %>%
mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(aes(x = NMDS1, y = NMDS2, color = method, shape= Species)) +
geom_point(size = 3) +
geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
scale_color_manual(values = methods_colors,
labels = c(
expression(italic("Metabarcoding Standard")),
expression(italic("Metabarcoding Selected")),
expression(italic("Metagenomics"))
))+
theme(
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.background = element_blank(),
axis.line = element_line(
size = 0.5,
linetype = "solid",
colour = "black"
),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical"
) +
labs(color = "Methodological approach",shape="Species")9.4 Differential abundance
9.4.1 Metabarcoding selected
phylo_samples <- sample_metadata_amplicon %>%
column_to_rownames("sample") %>%
sample_data()
phylo_genome <- genome_counts_filt_amplicon %>%
select(one_of(c("genome", rownames(phylo_samples)))) %>%
column_to_rownames("genome") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_taxonomy <- genome_metadata_amplicon %>%
filter(genome %in% rownames(phylo_genome)) %>%
column_to_rownames("genome") %>%
dplyr::select(domain,phylum,class,order,family,genus,species) %>%
as.matrix() %>%
tax_table()
physeq_genome_metab_sel <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)ancom_rand_output_phylum_metab_sel_beta = ancombc2(
data = physeq_genome_metab_sel,
assay_name = "counts",
tax_level = "phylum",
fix_formula = "Species",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0,
lib_cut = 0,
s0_perc = 0.05,
group = "Species",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(
tol = 1e-5,
max_iter = 20,
verbose = FALSE
),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL
)9.4.1.1 Structural zeros
Total number of structural zeros
# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab_sel_beta$zero_ind
# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("Eb"=1) %>%
rename("Ha"=2) %>%
rename("Pk"=3)
# Total number of structural zeros
nrow(structural_zero_groups)[1] 5
9.4.1.2 Enriched data
res_pair_phylum <- ancom_rand_output_phylum_metab_sel_beta$res_pair
# Get log-fold change and q-value columns
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols <- gsub("^lfc_", "q_", lfc_cols)
# Build long table by pairing lfc and q for each contrast
plot_data_phylum <- lfc_cols %>%
setNames(nm = .) %>%
purrr::imap_dfr(function(lfc_col, name) {
q_col <- gsub("^lfc_", "q_", lfc_col)
res_pair_phylum %>%
select(taxon, !!lfc_col, !!q_col) %>%
rename(
lfc = !!lfc_col,
q = !!q_col
) %>%
mutate(contrast_key = name)
}) %>%
filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
mutate(
# Manually re-label contrasts for clarity
contrast = case_when(
contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
TRUE ~ contrast_key
),
direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
paste("Enriched in", sub(".*vs ", "", contrast)))
)
plot_data_phylum %>% select(-contrast_key) taxon lfc q contrast direction
1 Fusobacteriota -2.999425 0.0069606310 H. ariel vs C. bottae Enriched in C. bottae
2 Bacteroidota 3.662399 0.0006033366 H. ariel vs C. bottae Enriched in H. ariel
3 Bacteroidota 2.205370 0.0161298287 P. kuhlii vs C. bottae Enriched in P. kuhlii
4 Fusobacteriota 2.375825 0.0287267228 P. kuhlii vs H. ariel Enriched in P. kuhlii
9.4.2 Differences with metagenomics at phylum level across species
load("resources/amplicon/selected_filtering_beta.Rdata")
sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli")) %>%
mutate(method="Metabarcoding")
genome_metadata_ampli <- genome_metadata_amplicon %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt_amplicon %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_ampli, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()load("resources/metagenomics/data_filtered.Rdata")
genome_metadata <- genome_metadata %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax <- genome_counts_filt %>%
left_join(genome_metadata[c(1,3,4,6)], by = "genome")
# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() merged_table <- full_join(family_level_agg_ampli,family_level_agg,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
sample_shot <- sample_metadata %>%
select(-c(Habitat,Longitude, Latitude, Region))%>%
mutate(method="Metagenomics")
all_samples <- rbind(sample_shot, sample_metadata_amplicon)
samples_sel <- all_samples %>%
mutate(sampleid = str_remove(sample, "_ampli"))
phylo_genome <- merged_table_phylum %>%
column_to_rownames("phylum") %>%
otu_table(., taxa_are_rows = TRUE)
phylo_samples <- samples_sel %>%
mutate(individual=sample) %>%
column_to_rownames("sample") %>%
sample_data()
merged_physeq_phylum <- phyloseq(phylo_genome, phylo_samples)ancom_rand_output_struc_approach_rand_beta = ancombc2(data = merged_physeq_phylum,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
# rand_formula = "(1|sampleid)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
# lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacillota | 0.4631885 | -0.9504081 | NaN | 0.2917864 | NaN | -3.257205 | 1 | 1.699690e-03 | 1 | 8.498452e-03 | FALSE | TRUE | FALSE | TRUE |
| Cyanobacteriota | -1.0681707 | 1.6064923 | NaN | 0.1443157 | NaN | 11.131795 | 1 | 1.020098e-04 | 1 | 7.140686e-04 | FALSE | TRUE | FALSE | TRUE |
| Desulfobacterota | -0.2977490 | 0.7242459 | NaN | 0.1908770 | NaN | 3.794306 | 1 | 1.328135e-03 | 1 | 7.968809e-03 | FALSE | TRUE | FALSE | TRUE |
| Pseudomonadota | 1.5499865 | -1.9428009 | NaN | 0.3049521 | NaN | -6.370840 | 1 | 6.494332e-09 | 1 | 5.195466e-08 | FALSE | TRUE | FALSE | TRUE |
9.4.3 Differences with metagenomics at phylum level within each species
9.4.3.1 Cnephaeus
physeq_eb <- subset_samples(merged_physeq_phylum, Species == "Eb")
physeq_eb <- prune_taxa(taxa_sums(physeq_eb)>0, physeq_eb)ancom_rand_output_struc_eb = ancombc2(data = physeq_eb,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_eb$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 6
Structural zeros in amplicon
[1] 5
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Campylobacterota" "Deferribacterota" "Elusimicrobiota" "Planctomycetota" "Synergistota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Rs-K70 termite group"
Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacteroidota | -1.426982 | 2.069855 | 0.1952675 | 0.5456508 | -7.307833 | 3.793369 | 3.349849e-04 | 0.009037660 | 0.0013399395 | 0.03615064 | TRUE | TRUE | FALSE | TRUE |
| Desulfobacterota | -1.587317 | 2.498657 | 0.1923669 | 0.5875411 | -8.251509 | 4.252735 | 7.476295e-05 | 0.003780339 | 0.0003738147 | 0.01890169 | TRUE | TRUE | TRUE | TRUE |
9.4.3.2 Pipistrellus
physeq_pk <- subset_samples(merged_physeq_phylum, Species == "Pk")
physeq_pk <- prune_taxa(taxa_sums(physeq_pk)>0, physeq_pk)ancom_rand_output_struc_pk = ancombc2(data = physeq_pk,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_pk$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 4
Structural zeros in amplicon
[1] 1
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Synergistota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Halobacterota" "Rs-K70 termite group" "Verrucomicrobiota"
Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacillota | 1.354895 | -1.500774 | 0.3216512 | 0.4736496 | 4.212310 | -3.168533 | 1.553444e-04 | 0.0030692348 | 0.0009320667 | 0.018415409 | TRUE | TRUE | FALSE | FALSE |
| Pseudomonadota | 1.673483 | -2.163873 | 0.3477039 | 0.5625639 | 4.812954 | -3.846448 | 2.144767e-05 | 0.0004209894 | 0.0001501337 | 0.002946926 | TRUE | TRUE | TRUE | TRUE |
9.4.3.3 Hypsugo
physeq_ha <- subset_samples(merged_physeq_phylum, Species == "Ha")
physeq_ha <- prune_taxa(taxa_sums(physeq_ha)>0, physeq_ha)ancom_rand_output_struc_ha = ancombc2(data = physeq_ha,
assay_name = "counts",
tax_level = NULL,
fix_formula = "method",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut =0,
lib_cut = 0,
s0_perc = 0.05,
group = "method",
struc_zero = TRUE,
neg_lb = FALSE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = FALSE,
pairwise = FALSE,
dunnet = FALSE,
trend = FALSE,
iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
em_control = list(tol = 1e-5, max_iter = 100),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = NULL)Total structural zeros
structural_zeros <- ancom_rand_output_struc_ha$zero_ind
structural_zeros_df <- as.data.frame(structural_zeros) %>%
column_to_rownames(., "taxon")
structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>%
rename("amplicon"=1) %>%
rename("metagenomics"=2)
nrow(structural_zero_groups)[1] 5
Structural zeros in amplicon
[1] 1
structural_zero_groups %>%
filter(amplicon==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Spirochaetota"
Structural zeros in metagenomics
structural_zero_groups %>%
filter(metagenomics==TRUE) %>%
rownames_to_column(., "phylum") %>%
left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>%
select(phylum) %>%
pull()[1] "Cyanobacteriota" "Desulfobacterota" "Fusobacteriota" "Rs-K70 termite group"
Enriched phyla
| taxon | lfc_(Intercept) | lfc_methodMetagenomics | se_(Intercept) | se_methodMetagenomics | W_(Intercept) | W_methodMetagenomics | p_(Intercept) | p_methodMetagenomics | q_(Intercept) | q_methodMetagenomics | diff_(Intercept) | diff_methodMetagenomics | passed_ss_(Intercept) | passed_ss_methodMetagenomics |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bacillota | 0.06866148 | 2.172955 | NaN | 0.3836565 | NaN | 5.663804 | 1 | 1.845838e-05 | 1 | 7.383351e-05 | FALSE | TRUE | FALSE | TRUE |