Chapter 8 Metabarcoding selected filtering based on alpha diversity (t3_p10)

load("resources/metagenomics/data_filtered.Rdata")
load("resources/amplicon/selected_filtering.Rdata")
species_color <- c("#e5bd5b", "#6b7398", "#76b183")

Number of MAGs

genome_metadata_amplicon %>% 
  nrow()%>% 
  cat()
165

Number of Archaea phyla

genome_metadata_amplicon %>% 
  filter(domain == "Archaea")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
0

Number of Bacteria phyla

genome_metadata_amplicon %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
9

8.1 Taxonomy

genome_counts_filt_amplicon %>%
  mutate_at(vars(-genome),  ~ . / sum(.)) %>% 
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% 
  left_join(., genome_metadata_amplicon, by = join_by(genome == genome)) %>% 
  left_join(., sample_metadata_amplicon, by = join_by(sample == sample)) %>% 
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(
    x = sample,
    y = count,
    fill = phylum,
    group = phylum
  )) + 
  geom_bar(stat = "identity",
           colour = "white",
           linewidth = 0.1) + 
  scale_fill_manual(values = phylum_colors) +
  facet_nested( ~ factor(
    Species,
    labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
  ), scales = "free") + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    panel.background = element_blank(),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.text = element_text(size = 10),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(
      size = 12,
      lineheight = 0.6,
      face = "bold"
    ),
    axis.line = element_line(
      linewidth = 0.5,
      linetype = "solid",
      colour = "black"
    )
  ) +
  labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
  guides(fill = guide_legend(ncol = 1))

Grouping low-abundance bacteria

p1 <- genome_counts_filt %>%
  mutate_at(vars(-genome), ~ . / sum(.)) %>% 
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  filter(count > 0) %>%
group_by(sample, phylum) %>%
  mutate(total_abundance = sum(count)) %>%
  ungroup() %>%
  mutate(phylum = if_else(total_abundance < 0.01, "Other", phylum)) %>%
  group_by(sample, phylum, Species) %>%
  summarise(count = sum(count), .groups = "drop") %>%
   ggplot(aes(
    x = sample,
    y = count,
    fill = phylum,
    group = phylum
  )) + 
  geom_bar(stat = "identity",
           colour = "white",
           linewidth = 0.1) + 
  scale_fill_manual(values = c(phylum_colors, "Other" = "grey50"),
                    drop = FALSE)+
  facet_nested(~ factor(
    Species,
    labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
  ), scales = "free") + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    panel.background = element_blank(),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.text = element_text(size = 10),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(
      size = 12,
      lineheight = 0.6,
      face = "bold"
    ),
    axis.line = element_line(
      linewidth = 0.5,
      linetype = "solid",
      colour = "black"
    )
  ) +
  labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
  guides(fill = guide_legend(ncol = 1))

#ggsave("community_plot_grouped_selected.pdf", plot = p1, width = 12, height = 6)
p1

Phylum relative abundances

phylum_summary <- genome_counts_filt_amplicon %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  left_join(genome_metadata_amplicon, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,Species) %>%
  summarise(relabun=sum(count))
phylum_summary %>%
  group_by(phylum) %>%
  summarise(
    Total_mean = mean(relabun * 100, na.rm = T),
    Total_sd = sd(relabun * 100, na.rm = T),
    Eb_mean = mean(relabun[Species == "Eb"] * 100, na.rm = T),
    Eb_sd = sd(relabun[Species == "Eb"] * 100, na.rm = T),
    Ha_mean = mean(relabun[Species == "Ha"] * 100, na.rm = T),
    Ha_sd = sd(relabun[Species == "Ha"] * 100, na.rm = T),
    Pk_mean = mean(relabun[Species == "Pk"] * 100, na.rm = T),
    Pk_sd = sd(relabun[Species == "Pk"] * 100, na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(Total_mean, 3), "±", round(Total_sd, 3)),
    Cnephaeus = str_c(round(Eb_mean, 3), "±", round(Eb_sd, 3)),
    Hypsugo = str_c(round(Ha_mean, 3), "±", round(Ha_sd, 3)),
    Pipistrellus = str_c(round(Pk_mean, 3), "±", round(Pk_sd, 3))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(phylum,
                Total,
                Cnephaeus,
                Hypsugo,
                Pipistrellus) %>%
  tt()
phylum Total Cnephaeus Hypsugo Pipistrellus
Pseudomonadota 59.638±31.639 58.193±32.766 69.011±27.096 50.012±34.371
Bacillota 31.961±27.296 23.271±24.626 28.927±25.913 40.465±29.393
Fusobacteriota 2.21±5.446 5.588±7.617 0.056±0.239 2.631±6.155
Desulfobacterota 2.321±6.038 4.09±5.942 0.078±0.28 3.788±8.506
Bacteroidota 1.085±4.589 3.755±9.296 0.644±2.044 0.006±0.019
Patescibacteria 0.549±3.201 2.523±6.759 0±0 0±0
Rs-K70 termite group 1.586±4.999 1.3±3.51 0.823±3.474 2.606±6.923
Synergistota 0.325±0.914 0.666±1.408 0.007±0.033 0.478±1.007
Campylobacterota 0.326±1.007 0.613±1.18 0.454±1.297 0.014±0.059
bats = c("Eb", "Pk", "Ha")

total_asvs <- data.frame(
  Bat = character(),
  ASVs = numeric(),
  Phylum = numeric(),
  Family = numeric(),
  Genus = numeric()
)

preabs_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>%
  left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) %>% 
  filter(domain=="Bacteria")

phylum <- preabs_table %>%
  distinct(phylum)

family <- preabs_table %>%
  distinct(phylum, class, order, family)

genus <- preabs_table %>%
  distinct(phylum, class, order, family, genus)

total_asvs <- rbind(
  total_asvs,
  data.frame(
    Bat = "Total",
    ASVs = nrow(preabs_table),
    Phylum = nrow(phylum),
    Family = nrow(family),
    Genus = nrow(genus)
  )
)

for (bat in bats) {
  number <- preabs_table %>%
    select({{bat}}) %>%
    filter(. >= 1)
  
  phylum <- preabs_table %>%
    select({{bat}}, phylum) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum)
  
  family <- preabs_table %>%
    select({{bat}}, phylum, class, order, family) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum, class, order, family)
  
  genus <- preabs_table %>%
    select({{bat}}, phylum, class, order, family, genus) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum, class, order, family, genus)
  
  total_asvs <- rbind(
    total_asvs,
    data.frame(
      Bat = bat,
      ASVs = nrow(number),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
    )
  )
}
bats = c("Eb", "Pk", "Ha")

no_annotation <- data.frame(Bat = character(),
                            No_genus = numeric(),
                            No_species = numeric())

preabs_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>%
  left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) %>% 
  filter(domain=="Bacteria")

genus <- preabs_table  %>%
  filter(is.na(genus))

species <- preabs_table  %>%
  filter(is.na(species))

no_annotation <- rbind(no_annotation,
                       data.frame(
                         Bat = "Total",
                         No_genus = nrow(genus),
                         No_species = nrow(species)
                       ))

for (bat in bats) {
  number <- preabs_table %>%
    select({{bat}}) %>%
    filter(. >= 1)
  
  genus <- preabs_table %>%
    select({{bat}}, phylum, class, order, family, genus) %>%
    filter(!!sym(bat) >= 1) %>%
  filter(is.na(genus))
  
  species <- preabs_table %>%
    filter(!!sym(bat) >= 1) %>%
  filter(is.na(species))
  
  no_annotation <- rbind(no_annotation,
                         data.frame(
                           Bat = bat,
                           No_genus = nrow(genus),
                           No_species = nrow(species)
                         ))
}

Total percentage of ASVs without genus-level annotation

nongenera <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(genus)) %>% 
  summarize(ASV_nogenera = n()) %>%
  pull()
nasvs <- total_asvs %>%
  filter(Bat == "Total") %>%
  select(ASVs) %>%
  pull()
perct <- nongenera * 100 / nasvs
cat(perct)
22.42424

Percentage of ASVs without genus-level annotation by phylum

total_asv_phylum <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  group_by(phylum) %>%
  summarize(Total_ASVs = n())
genome_metadata_amplicon %>%
  filter(is.na(genus)) %>% 
  group_by(phylum) %>%
  summarize(ASVs_nogenus = n()) %>%
  left_join(total_asv_phylum, by = join_by(phylum == phylum)) %>%
  mutate(Percentage_nogenus = 100 * ASVs_nogenus / Total_ASVs) %>%
  tt()
phylum ASVs_nogenus Total_ASVs Percentage_nogenus
Bacillota 12 57 21.052632
Bacteroidota 1 12 8.333333
Patescibacteria 4 6 66.666667
Pseudomonadota 16 71 22.535211
Rs-K70 termite group 4 4 100.000000

Number of bacterial species

genome_metadata_amplicon %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(species) %>%
  unique() %>%
  drop_na() %>% 
  pull() %>%
  length() %>% 
  cat()
10

Total percentage of ASVs without species-level annotation

nonspecies <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(species)) %>%
  summarize(ASV_nospecies = n()) %>%
  pull()
perct <- nonspecies * 100 / nasvs
cat(perct)
90.90909

ASVs without species-level annotation

total_mag_phylum <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  group_by(phylum) %>%
  summarize(ASVs_total = n())

genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(species)) %>%
  group_by(phylum) %>%
  summarize(ASVs_nospecies = n()) %>%
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
  mutate(Species_annotated = ASVs_total - ASVs_nospecies) %>%
  mutate(Percentage_nospecies = 100 * ASVs_nospecies / ASVs_total) %>%
  mutate(Percentage_species = 100 - 100 * ASVs_nospecies / ASVs_total) %>%
  tt()
phylum ASVs_nospecies ASVs_total Species_annotated Percentage_nospecies Percentage_species
Bacillota 53 57 4 92.98246 7.017544
Bacteroidota 10 12 2 83.33333 16.666667
Campylobacterota 2 2 0 100.00000 0.000000
Desulfobacterota 8 8 0 100.00000 0.000000
Fusobacteriota 3 4 1 75.00000 25.000000
Patescibacteria 6 6 0 100.00000 0.000000
Pseudomonadota 63 71 8 88.73239 11.267606
Rs-K70 termite group 4 4 0 100.00000 0.000000
Synergistota 1 1 0 100.00000 0.000000

8.1.1 Summary table

bats = c("Eb", "Pk", "Ha")

single_sp <- data.frame(Bat = character(), Single_species = numeric())

bacteria <- genome_metadata_amplicon %>% 
  filter(domain=="Bacteria")

table_upset_analysis <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  filter(genome %in% bacteria$genome) %>% 
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample")

unique_all <- table_upset_analysis %>%
  filter(rowSums(across(Eb:Pk)) == 1)

single_sp <- rbind(single_sp, data.frame(Bat = "Total",
                                         Single_species = nrow(unique_all)))
  
for (bat in bats) {
  unique <- table_upset_analysis %>%
    filter(rowSums(across(Eb:Pk)) == 1) %>%
    column_to_rownames(., "sample") %>% 
    select(bat) %>%
    filter(. > 0) %>%
    nrow()
  single_sp <- rbind(single_sp, data.frame(Bat = bat, Single_species = unique))
}
single_ind <- data.frame(Bat = character(), Single_individual = numeric())

freq_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  filter(genome %in% bacteria$genome) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("asv")

singleton_filt <- freq_table %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(-asv))) %>%
  filter(row_sum == 1) %>%
  column_to_rownames(var = "asv")  %>%
  filter(row_sum == 1)

single_ind <- rbind(single_ind, data.frame(
  Bat = "Total",
  Single_individual = nrow(singleton_filt)
))
     
for (bat in bats) {
  singleton_filt <- freq_table %>%
    rowwise() %>%
    mutate(row_sum = sum(c_across(-asv))) %>%
    filter(row_sum == 1) %>%
    column_to_rownames(var = "asv")  %>%
    select(bat) %>%
    filter(. == 1)
  
  single_ind <- rbind(single_ind, data.frame(
    Bat = bat,
    Single_individual = nrow(singleton_filt) 
  ))
}
summary_table_ampli <- total_asvs %>%
  left_join(., no_annotation, by = "Bat") %>%
  left_join(., single_ind, by = "Bat") %>%
  left_join(., single_sp, by = "Bat") %>%
  mutate(Bat = recode(Bat, 
                      Eb = "Cnephaeus bottae",
                      Ha = "Hypsugo ariel",
                      Pk = "Pipistrellus kuhlii"))
summary_table_ampli %>% 
  tt()
Bat ASVs Phylum Family Genus No_genus No_species Single_individual Single_species
Total 165 9 51 71 37 150 31 57
Cnephaeus bottae 140 9 46 61 30 127 31 46
Pipistrellus kuhlii 88 8 35 46 15 82 0 0
Hypsugo ariel 99 8 38 51 18 90 0 11
total_asv <- nrow(genome_metadata_amplicon)
summary_table_ampli %>% 
  select(-Phylum,-Family, -Genus) %>% 
  rowwise() %>% 
  mutate(ASV_perc=round(ASVs*100/total_asv, 2))%>% 
  mutate(No_genus_perc=round(No_genus*100/ASVs, 2))%>% 
  mutate(No_species_perc=round(No_species*100/ASVs, 2)) %>% 
  mutate(Single_individual_perc=round(Single_individual*100/ASVs, 2))%>% 
  mutate(Single_species_perc=round(Single_species*100/ASVs, 2)) %>% 
  mutate(Single_individual_per_unique=round(Single_individual*100/Single_species, 2)) %>% 
  select(1,7:12) %>% tt()
Bat ASV_perc No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_unique
Total 100.00 22.42 90.91 18.79 34.55 54.39
Cnephaeus bottae 84.85 21.43 90.71 22.14 32.86 67.39
Pipistrellus kuhlii 53.33 17.05 93.18 0.00 0.00 NaN
Hypsugo ariel 60.00 18.18 90.91 0.00 11.11 0.00

Shared phyla

meta_phylum <- genome_metadata %>% 
  distinct(domain,phylum)

genome_metadata_amplicon %>% 
  distinct(domain,phylum) %>% 
  filter(phylum %in% meta_phylum$phylum)
    domain           phylum
1 Bacteria   Pseudomonadota
2 Bacteria   Fusobacteriota
3 Bacteria        Bacillota
4 Bacteria Desulfobacterota
5 Bacteria     Synergistota
6 Bacteria     Bacteroidota
7 Bacteria Campylobacterota

Only in metagenomic data

ampli_phylum <- genome_metadata_amplicon %>% 
  distinct(domain,phylum)

genome_metadata %>% 
  distinct(domain,phylum) %>% 
  filter(!phylum %in% genome_metadata_amplicon$phylum)
# A tibble: 6 × 2
  domain   phylum          
  <chr>    <chr>           
1 Bacteria Elusimicrobiota 
2 Bacteria Deferribacterota
3 Bacteria Planctomycetota 
4 Bacteria Spirochaetota   
5 Bacteria Actinomycetota  
6 Bacteria Cyanobacteriota 

Only in metabarcoding selected data

genome_metadata_amplicon %>% 
  distinct(domain,phylum) %>% 
  filter(!phylum %in% meta_phylum$phylum)
    domain               phylum
1 Bacteria Rs-K70 termite group
2 Bacteria      Patescibacteria

8.1.2 Differences at family level

Number of families in metagenomic data

load("resources/metagenomics/data_filtered.Rdata")
cat(genome_metadata %>%
      distinct(domain, phylum, family) %>%
      count() %>%
      pull())
56

Number of families in metabarcoding selected data

cat(genome_metadata_amplicon %>%
      distinct(domain, phylum, family) %>%
      count() %>%
      pull())
49

Shared families

meta_family <- genome_metadata %>% 
  distinct(domain,phylum, family)

genome_metadata_amplicon %>% 
  distinct(domain,phylum, family) %>% 
  filter(family %in% meta_family$family)
     domain           phylum              family
1  Bacteria   Pseudomonadota Diplorickettsiaceae
2  Bacteria   Pseudomonadota        Vibrionaceae
3  Bacteria   Pseudomonadota  Enterobacteriaceae
4  Bacteria   Fusobacteriota    Leptotrichiaceae
5  Bacteria        Bacillota     Enterococcaceae
6  Bacteria   Pseudomonadota      Morganellaceae
7  Bacteria   Pseudomonadota     Pasteurellaceae
8  Bacteria   Pseudomonadota      Aeromonadaceae
9  Bacteria        Bacillota    Mycoplasmataceae
10 Bacteria        Bacillota    Streptococcaceae
11 Bacteria   Pseudomonadota      Rickettsiaceae
12 Bacteria Desulfobacterota Desulfovibrionaceae
13 Bacteria        Bacillota      Clostridiaceae
14 Bacteria        Bacillota     Lachnospiraceae
15 Bacteria     Synergistota      Synergistaceae
16 Bacteria   Pseudomonadota    Burkholderiaceae
17 Bacteria        Bacillota     Ruminococcaceae
18 Bacteria   Fusobacteriota    Fusobacteriaceae
19 Bacteria   Pseudomonadota      Halomonadaceae
20 Bacteria     Bacteroidota       Weeksellaceae
21 Bacteria   Pseudomonadota      Rhodocyclaceae
22 Bacteria   Pseudomonadota       Neisseriaceae
23 Bacteria        Bacillota Erysipelotrichaceae
24 Bacteria Campylobacterota   Helicobacteraceae
25 Bacteria        Bacillota Christensenellaceae
26 Bacteria     Bacteroidota      Bacteroidaceae
27 Bacteria Campylobacterota  Campylobacteraceae
28 Bacteria     Bacteroidota   Dysgonomonadaceae
29 Bacteria   Pseudomonadota    Acetobacteraceae

Only in metagenomic data

ampli_family <- genome_metadata_amplicon %>% 
  distinct(domain,phylum,family)

genome_metadata %>% 
  distinct(domain,phylum,family) %>% 
  filter(!family %in% ampli_family$family)
# A tibble: 27 × 3
   domain   phylum           family                
   <chr>    <chr>            <chr>                 
 1 Bacteria Pseudomonadota   ""                    
 2 Bacteria Elusimicrobiota  "Elusimicrobiaceae"   
 3 Bacteria Bacillota        "Metamycoplasmataceae"
 4 Bacteria Bacillota        "Acutalibacteraceae"  
 5 Bacteria Pseudomonadota   "CAG-239"             
 6 Bacteria Bacillota        ""                    
 7 Bacteria Desulfobacterota "Adiutricaceae"       
 8 Bacteria Bacteroidota     "Tannerellaceae"      
 9 Bacteria Deferribacterota "Mucispirillaceae"    
10 Bacteria Planctomycetota  "SZUA-567"            
# ℹ 17 more rows

Only in metabarcoding selected data

genome_metadata_amplicon %>% 
  distinct(domain,phylum,family) %>% 
  filter(!family %in% meta_family$family)
     domain               phylum                             family
1  Bacteria       Pseudomonadota                       Yersiniaceae
2  Bacteria            Bacillota                        Bacillaceae
3  Bacteria Rs-K70 termite group                               <NA>
4  Bacteria            Bacillota                      Aerococcaceae
5  Bacteria       Pseudomonadota                   Pseudomonadaceae
6  Bacteria       Pseudomonadota                               <NA>
7  Bacteria            Bacillota                 Acidaminococcaceae
8  Bacteria            Bacillota                   Lactobacillaceae
9  Bacteria       Pseudomonadota                   Aquaspirillaceae
10 Bacteria       Pseudomonadota                         Hafniaceae
11 Bacteria            Bacillota                               <NA>
12 Bacteria       Pseudomonadota                     Comamonadaceae
13 Bacteria      Patescibacteria                               <NA>
14 Bacteria            Bacillota              Peptostreptococcaceae
15 Bacteria            Bacillota                     Eubacteriaceae
16 Bacteria      Patescibacteria                 Saccharimonadaceae
17 Bacteria            Bacillota [Clostridium] methylpentosum group
18 Bacteria            Bacillota          Erysipelatoclostridiaceae
19 Bacteria            Bacillota                      Vagococcaceae
20 Bacteria            Bacillota                   Anaerovoracaceae

8.1.3 Differences in top ASVs and MAGs

Top 20 MAGs

genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  mutate(total = rowSums(across(-genome))) %>%
  arrange(desc(total)) %>%
  slice_head(n = 20) %>% 
  select(genome, total) %>% 
  left_join(genome_metadata[1:8])
      genome      total   domain         phylum               class              order               family             genus        species
1  HA_bin.16 10.1429255 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales  Diplorickettsiaceae Aquirickettsiella               
2  PK_bin.10  3.4725989 Bacteria      Bacillota             Bacilli    Mycoplasmatales    Mycoplasmoidaceae      Malacoplasma               
3  EH_bin.20  3.0940365 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales         Vibrionaceae            Vibrio       cholerae
4   HA_bin.8  2.4545179 Bacteria Pseudomonadota Alphaproteobacteria      Rickettsiales       Rickettsiaceae        Rickettsia               
5  PK_bin.26  1.9706785 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales   Enterobacteriaceae          Serratia     ureilytica
6   HA_bin.7  1.9293752 Bacteria Pseudomonadota Gammaproteobacteria       Chromatiales        Chromatiaceae                                 
7  PK_bin.12  1.2830141 Bacteria Pseudomonadota Alphaproteobacteria      Rickettsiales       Rickettsiaceae        Rickettsia tillamookensis
8  PK_bin.46  1.1521884 Bacteria   Bacteroidota         Bacteroidia   Flavobacteriales        Weeksellaceae         Apibacter       raozihei
9  PK_bin.53  0.9584216 Bacteria Pseudomonadota Alphaproteobacteria      Rickettsiales       Rickettsiaceae        Rickettsia         bellii
10 PK_bin.52  0.9404985 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales   Enterobacteriaceae        Klebsiella     pneumoniae
11 PK_bin.64  0.9119555 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales       Morganellaceae        Morganella       morganii
12  EH_bin.5  0.8747411 Bacteria      Bacillota             Bacilli    Mycoplasmatales     Mycoplasmataceae       Spiroplasma     phoeniceum
13 HA_bin.19  0.8484409 Bacteria      Bacillota             Bacilli    Mycoplasmatales     Mycoplasmataceae    Edwardiiplasma               
14  PK_bin.5  0.8435610 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales      Pasteurellaceae       Pasteurella         oralis
15 PK_bin.72  0.8277429 Bacteria      Bacillota             Bacilli    Lactobacillales      Enterococcaceae      Enterococcus       faecalis
16 PK_bin.68  0.7829460 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales   Enterobacteriaceae           Proteus       cibarius
17  HA_bin.3  0.7433329 Bacteria Pseudomonadota Gammaproteobacteria    Burkholderiales     Burkholderiaceae                                 
18  HA_bin.2  0.7297240 Bacteria Pseudomonadota Alphaproteobacteria    Acetobacterales     Acetobacteraceae                                 
19 PK_bin.22  0.7287690 Bacteria      Bacillota             Bacilli    Mycoplasmatales Metamycoplasmataceae            UBA710               
20  EH_bin.1  0.6409658 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales       Aeromonadaceae         Aeromonas        veronii

Top 20 ASVs

genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  mutate(total = rowSums(across(-genome))) %>%
  arrange(desc(total)) %>%
  slice_head(n = 20) %>% 
  select(genome, total) %>% 
  left_join(genome_metadata_amplicon)
   genome     total   domain         phylum               class              order              family                  genus    species
1   ASV_4 2.3764486 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales  Enterobacteriaceae           Enterobacter       <NA>
2  ASV_11 2.2760262 Bacteria      Bacillota             Bacilli         Bacillales         Bacillaceae               Bacillus       <NA>
3   ASV_2 2.2452471 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales        Yersiniaceae               Serratia       <NA>
4   ASV_3 2.1898226 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales        Vibrionaceae                 Vibrio       <NA>
5   ASV_6 2.0174879 Bacteria      Bacillota             Bacilli    Lactobacillales     Enterococcaceae           Enterococcus       <NA>
6  ASV_12 1.7969253 Bacteria      Bacillota             Bacilli         Bacillales         Bacillaceae               Bacillus       <NA>
7   ASV_1 1.7070753 Bacteria Pseudomonadota Gammaproteobacteria Diplorickettsiales Diplorickettsiaceae          Rickettsiella       <NA>
8  ASV_14 1.6969153 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales        Yersiniaceae               Serratia       <NA>
9  ASV_10 1.5038717 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales      Aeromonadaceae              Aeromonas       <NA>
10  ASV_7 1.3482075 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales      Morganellaceae             Morganella   morganii
11  ASV_8 1.3124299 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales     Pasteurellaceae      Vespertiliibacter       <NA>
12 ASV_40 1.0214370 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales  Enterobacteriaceae                   <NA>       <NA>
13 ASV_24 0.9463566 Bacteria Pseudomonadota Alphaproteobacteria      Rickettsiales      Rickettsiaceae             Rickettsia       <NA>
14  ASV_5 0.8238483 Bacteria Fusobacteriota       Fusobacteriia    Fusobacteriales    Leptotrichiaceae             Sebaldella termitidis
15 ASV_15 0.7146375 Bacteria      Bacillota             Bacilli    Lactobacillales    Streptococcaceae            Lactococcus       <NA>
16 ASV_17 0.6561496 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales     Pasteurellaceae                   <NA>       <NA>
17 ASV_90 0.6544987 Bacteria      Bacillota          Clostridia      Clostridiales      Clostridiaceae Candidatus Arthromitus       <NA>
18 ASV_13 0.6387632 Bacteria      Bacillota             Bacilli    Mycoplasmatales    Mycoplasmataceae             Mycoplasma       <NA>
19 ASV_20 0.5916116 Bacteria Pseudomonadota Gammaproteobacteria   Enterobacterales  Enterobacteriaceae           Enterobacter       <NA>
20 ASV_50 0.5839039 Bacteria      Bacillota             Bacilli    Lactobacillales     Enterococcaceae           Enterococcus       <NA>

Phylum level

topMAG <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  left_join(genome_metadata[c(1, 3, 6, 7)], by="genome") %>% 
  select(-genome) %>% 
  group_by(phylum) %>% 
  summarize(across(where(is.numeric), sum), .groups = "drop") %>% 
  mutate(total_mag = rowSums(across(where(is.numeric)))) %>% 
  select(total_mag, phylum) %>% 
  arrange(desc(total_mag))%>%
  slice_head(n = 10)
topASV <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  left_join(genome_metadata_amplicon, by="genome") %>% 
  select(-genome) %>% 
  group_by(phylum) %>% 
  summarize(across(where(is.numeric), sum), .groups = "drop") %>% 
  mutate(total_asv = rowSums(across(where(is.numeric)))) %>% 
  select(total_asv, phylum) %>% 
  arrange(desc(total_asv)) %>%
  slice_head(n = 10)

table <- full_join(topMAG,topASV, by="phylum") %>% 
    mutate(ASV = replace_na(total_asv, 0)) %>% 
    mutate(MAG = replace_na(total_mag, 0)) %>% 
  select(-total_asv, -total_mag) %>% 
  pivot_longer(cols = c(MAG, ASV),
               names_to = "source",
               values_to = "abundance")
ggplot(table, aes(x = source, y = phylum, size = abundance, color = phylum)) +
  geom_point(alpha = 0.7) +
  scale_size_continuous(
    name = "Abundance",
    range = c(0.01, 30),   # 0 ensures no bubble at abundance = 0
    breaks = seq(0, 35, by = 5),   # Controls legend labels (your 0–5–10–...–30)
    limits = c(0.01, NA)   # keep full range of values
  )  +
    scale_color_manual(values = phylum_colors) +
    coord_flip()+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45,hjust = 1, size=12),
        axis.text.y = element_text(size = 12),
        axis.title = element_text(size = 14, face = "bold"),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 12))+
  labs(x = "Source", y = "Phylum",
    title = "Metabarcoding with selected threshold")+
  guides(color = "none", size = guide_legend(
      override.aes = list(size = c(2, 4, 6, 8, 10, 12)) # smaller bubbles in legend
    )
  )

full_join(topMAG,topASV, by="phylum") %>% 
    mutate(ASV = replace_na(total_asv, 0)) %>% 
    mutate(MAG = replace_na(total_mag, 0)) %>% 
  ggplot(., aes(x = MAG, y = ASV, color = phylum, label = phylum)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_text_repel(max.overlaps = 20) +
  geom_smooth(method = "lm", se = FALSE, color = "grey50") +
  labs(
    x = "MAG abundance",
    y = "ASV abundance",
    color = "Phylum",
    title = "Comparison of MAG and ASV abundances by phylum"
  ) +
  theme_minimal()

Family level

topFMAG <- genome_counts_filt %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  left_join(genome_metadata[c(1:7)], by="genome") %>%
  mutate(
    class  = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
    order  = if_else(str_trim(order) == "",
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(str_trim(family) == "",
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(str_trim(genus) == "",
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )%>% 
  select(-genome) %>% 
  group_by(phylum, family) %>% 
  summarize(across(where(is.numeric), sum), .groups = "drop") %>% 
  mutate(total_mag = rowSums(across(where(is.numeric)))) %>% 
  select(total_mag, phylum, family) %>% 
  arrange(desc(total_mag)) %>%
  slice_head(n = 20)
full_join(topFMAG,topFASV, by="family") %>% 
    mutate(ASV = replace_na(total_asv, 0)) %>% 
    mutate(MAG = replace_na(total_mag, 0)) %>% 
    mutate(phylum_mag = coalesce(phylum.x, phylum.y))%>%
  ggplot(., aes(x = MAG, y = ASV, color = phylum_mag, label = family)) +
  geom_point(size = 3, alpha = 0.8) +
  geom_text_repel(max.overlaps = 10) +
  geom_smooth(method = "lm", se = FALSE, color = "grey50") +
      scale_color_manual(values = phylum_colors) +
  labs(
    x = "MAG abundance",
    y = "ASV abundance",
    color = "Phylum",
    title = "Metabarcoding with selected filtering"
  ) +
  theme_minimal()

8.2 Alpha Diversity

8.2.1 ASV level

richness <- genome_counts_filt_amplicon %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt_amplicon %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

metabarcoding_selected_alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>% 
  mutate(method="Metabarcoding_selected")

Correlation between diversity and estimate microbial fraction

fraction_ampli <- microbial_fraction %>% 
  select(sample, estimated_microbial_fraction) %>% 
  right_join(metabarcoding_selected_alpha_div, by="sample") %>% 
  left_join(sample_metadata_amplicon, by="sample") 

cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$richness, method = "spearman")

    Spearman's rank correlation rho

data:  fraction_ampli$estimated_microbial_fraction and fraction_ampli$richness
S = 10963, p-value = 0.4813
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1116885 
cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$neutral, method = "spearman")

    Spearman's rank correlation rho

data:  fraction_ampli$estimated_microbial_fraction and fraction_ampli$neutral
S = 11606, p-value = 0.7071
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
0.05955757 
p1_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = richness)) +
  geom_point(size = 3, color = "#2E86AB") +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  theme_classic() +
  labs(x = "Read Fraction (%)", y = "Richness", title= "Correlation between estimated microbial fraction and diversity")

p2_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = neutral)) +
  geom_point(size = 3, color = "#F39C12") +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  theme_classic() +
  labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")

p1_ampli + p2_ampli 

richness %>%
  full_join(neutral, by = join_by(sample == sample))  %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 20.26±10.3 30.9±10.56 16.71±9.73 17.84±6.64
neutral 7.98±5.18 11.9±5.65 6.57±4.65 7.17±4.57

8.2.2 Different taxonomic level

sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
  mutate(sample = paste0(sample, "_ampli_selec")) %>%
  mutate(method="Metabarcoding_selected")

genome_metadata_amplicon <- genome_metadata_amplicon %>%
  mutate(
    class  = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
    order  = if_else(is.na(order),
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(is.na(family),
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(is.na(genus),
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax_ampli <- genome_counts_filt_amplicon %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli_selec")) %>%
  rownames_to_column(., "genome")%>%
  left_join(genome_metadata_amplicon, by = "genome")

family_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame() %>% 
  select(-c(phylum,class))

phylum_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame()

8.2.2.1 Family level

richness_f_ampli <- family_level_agg_ampli %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_f_ampli <- family_level_agg_ampli %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_f_ampli <- richness_f_ampli %>%
  full_join(neutral_f_ampli, by = join_by(sample == sample))

alpha_div_f_ampli_data <- alpha_div_f_ampli %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample))

Diversity values

alpha_div_f_ampli %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 11.5±4.66 14.9±5.7 9.94±4.18 11.11±3.68
neutral 4.51±2.75 5.41±3.54 3.56±2.26 4.89±2.56

Richness

Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_ampli_data,trace=TRUE)
summary(Model_richness)

Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_f_ampli_data, 
    trace = TRUE, init.theta = 21.87845286, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   2.7014     0.1062  25.432  < 2e-16 ***
SpeciesHa    -0.2939     0.1357  -2.165  0.03036 *  
SpeciesPk    -0.4047     0.1410  -2.870  0.00411 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(21.8785) family taken to be 1)

    Null deviance: 55.830  on 45  degrees of freedom
Residual deviance: 47.315  on 43  degrees of freedom
AIC: 268.09

Number of Fisher Scoring iterations: 1

              Theta:  21.9 
          Std. Err.:  13.4 

 2 x log-likelihood:  -260.089 
anova(Model_richness)
Analysis of Deviance Table

Model: Negative Binomial(21.8785), link: log

Response: richness

Terms added sequentially (first to last)

        Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
NULL                       45     55.830           
Species  2   8.5153        43     47.315  0.01416 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(Model_richness, pairwise ~ Species)
$emmeans
 Species emmean     SE  df asymp.LCL asymp.UCL
 Eb        2.70 0.1060 Inf      2.49      2.91
 Ha        2.41 0.0845 Inf      2.24      2.57
 Pk        2.30 0.0928 Inf      2.11      2.48

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     0.294 0.136 Inf   2.165  0.0772
 Eb - Pk     0.405 0.141 Inf   2.870  0.0115
 Ha - Pk     0.111 0.126 Inf   0.882  0.6515

Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

Neutral

Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_f_ampli_data)
anova(Model_neutral)
Analysis of Variance Table

Response: neutral
          Df  Sum Sq Mean Sq F value Pr(>F)
Species    2  26.168 13.0838  1.7967 0.1781
Residuals 43 313.136  7.2822               
emmeans(Model_neutral, pairwise ~ Species)
$emmeans
 Species emmean    SE df lower.CL upper.CL
 Eb        5.41 0.853 43     3.69     7.13
 Ha        4.89 0.619 43     3.64     6.14
 Pk        3.56 0.654 43     2.24     4.88

Confidence level used: 0.95 

$contrasts
 contrast estimate    SE df t.ratio p.value
 Eb - Ha      0.52 1.050 43   0.493  0.8749
 Eb - Pk      1.85 1.080 43   1.720  0.2096
 Ha - Pk      1.33 0.901 43   1.476  0.3125

P value adjustment: tukey method for comparing a family of 3 estimates 

8.2.2.2 Phylum level

richness_p_ampli <- phylum_level_agg_ampli %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_p_ampli <- phylum_level_agg_ampli %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_phylum_ampli <- richness_p_ampli %>%
  full_join(neutral_p_ampli, by = join_by(sample == sample))

alpha_div_phylum_ampli_data <- alpha_div_phylum_ampli %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample))

Diversity values

alpha_div_phylum_ampli %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 3.67±1.96 5.7±2.06 3.53±1.77 2.74±1.19
neutral 2.02±0.88 2.47±1.02 2.11±1.02 1.71±0.53

Richness

Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_ampli_data,trace=TRUE)
summary(Model_richness)

Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_phylum_ampli_data, 
    trace = TRUE, init.theta = 86595.3377, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   1.7405     0.1325  13.140  < 2e-16 ***
SpeciesHa    -0.7337     0.1918  -3.826  0.00013 ***
SpeciesPk    -0.4793     0.1850  -2.591  0.00956 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(86595.34) family taken to be 1)

    Null deviance: 44.053  on 45  degrees of freedom
Residual deviance: 29.422  on 43  degrees of freedom
AIC: 178.43

Number of Fisher Scoring iterations: 1

              Theta:  86595 
          Std. Err.:  1870619 
Warning while fitting theta: iteration limit reached 

 2 x log-likelihood:  -170.427 
anova(Model_richness)
Analysis of Deviance Table

Model: Negative Binomial(86595.34), link: log

Response: richness

Terms added sequentially (first to last)

        Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
NULL                       45     44.053              
Species  2   14.631        43     29.422 0.0006652 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(Model_richness, pairwise ~ Species)
$emmeans
 Species emmean    SE  df asymp.LCL asymp.UCL
 Eb        1.74 0.132 Inf     1.481      2.00
 Ha        1.01 0.139 Inf     0.735      1.28
 Pk        1.26 0.129 Inf     1.008      1.51

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     0.734 0.192 Inf   3.826  0.0004
 Eb - Pk     0.479 0.185 Inf   2.591  0.0259
 Ha - Pk    -0.254 0.189 Inf  -1.342  0.3717

Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

Neutral

Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_phylum_ampli_data)
anova(Model_neutral)
Analysis of Variance Table

Response: neutral
          Df  Sum Sq Mean Sq F value  Pr(>F)  
Species    2  3.9363 1.96813   2.723 0.07701 .
Residuals 43 31.0794 0.72278                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8.2.3 Comparison (metabarcoding selected vs metagenomics)

load("resources/alpha_together.Rdata")

Merge the table

merged_table_family_methods <- full_join(family_level_agg_ampli,merged_table, 
          by = c("family"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

merged_table_phylum_methods <- full_join(phylum_level_agg_ampli,merged_table_phylum, 
          by = c("phylum"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

all_samples <- rbind(all_samples, sample_metadata_amplicon)

8.2.3.1 Family level

richness <- merged_table_family_methods %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- merged_table_family_methods %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_family <- richness %>%
  full_join(neutral, by = join_by(sample == sample))

alpha_div_family_data <- alpha_div_family %>%
  left_join(all_samples, by = join_by(sample == sample))

alpha_div_family_data_filt <- alpha_div_family_data %>% 
  filter(method!="Metabarcoding_standard") %>% 
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

Richness

Model_richness_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_family_data_filt)
summary(Model_richness_random)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(6.8503)  ( log )
Formula: richness ~ Species * method + (1 | sampleid)
   Data: alpha_div_family_data_filt

     AIC      BIC   logLik deviance df.resid 
   572.2    592.8   -278.1    556.2       89 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.78863 -0.76873 -0.07903  0.52297  2.29293 

Random effects:
 Groups   Name        Variance Std.Dev.
 sampleid (Intercept) 0.06038  0.2457  
Number of obs: 97, groups:  sampleid, 51

Fixed effects:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                    2.6758     0.1666  16.064  < 2e-16 ***
SpeciesHa                     -0.2857     0.2083  -1.372 0.170136    
SpeciesPk                     -0.4268     0.2144  -1.991 0.046527 *  
methodMetagenomics            -0.2589     0.2149  -1.205 0.228257    
SpeciesHa:methodMetagenomics  -1.0098     0.2880  -3.506 0.000455 ***
SpeciesPk:methodMetagenomics  -0.1481     0.2732  -0.542 0.587682    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa   -0.798                            
SpeciesPk   -0.772  0.619                     
mthdMtgnmcs -0.598  0.483  0.477              
SpcsH:mthdM  0.447 -0.569 -0.354 -0.743       
SpcsPk:mthM  0.472 -0.380 -0.633 -0.783  0.582
anova(Model_richness_random)
Analysis of Variance Table
               npar Sum Sq Mean Sq F value
Species           2 13.598   6.799  6.7989
method            1 34.674  34.674 34.6737
Species:method    2 15.691   7.845  7.8454
emmeans(Model_richness_random, pairwise ~ Species)
$emmeans
 Species emmean    SE  df asymp.LCL asymp.UCL
 Eb        2.55 0.134 Inf      2.28      2.81
 Ha        1.76 0.112 Inf      1.54      1.98
 Pk        2.05 0.104 Inf      1.84      2.25

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     0.791 0.173 Inf   4.567  <.0001
 Eb - Pk     0.501 0.166 Inf   3.019  0.0072
 Ha - Pk    -0.290 0.151 Inf  -1.920  0.1329

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_richness_random, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected   2.68 0.167 Inf     2.349      3.00
 Metagenomics             2.42 0.176 Inf     2.071      2.76

Species = Ha:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected   2.39 0.126 Inf     2.144      2.64
 Metagenomics             1.12 0.167 Inf     0.794      1.45

Species = Pk:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected   2.25 0.136 Inf     1.982      2.52
 Metagenomics             1.84 0.132 Inf     1.584      2.10

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    0.259 0.215 Inf   1.205  0.2283

Species = Ha:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    1.269 0.193 Inf   6.585  <.0001

Species = Pk:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    0.407 0.170 Inf   2.394  0.0167

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ Species*method, data = alpha_div_family_data_filt,
               random = ~ 1 | sampleid)
anova(Model_neutral)
               numDF denDF   F-value p-value
(Intercept)        1    48 134.80939  <.0001
Species            2    48   1.86572  0.1658
method             1    43   9.46491  0.0036
Species:method     2    43   6.99370  0.0023
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_family_data_filt 
       AIC      BIC    logLik
  457.7754 477.8623 -220.8877

Random effects:
 Formula: ~1 | sampleid
        (Intercept) Residual
StdDev:    1.841795 1.923724

Fixed effects:  neutral ~ Species * method 
                                 Value Std.Error DF   t-value p-value
(Intercept)                   5.407062 0.8421949 48  6.420203  0.0000
SpeciesHa                    -0.520108 1.0404826 48 -0.499872  0.6194
SpeciesPk                    -2.117345 1.0511109 48 -2.014388  0.0496
methodMetagenomics           -0.776414 0.8603155 43 -0.902476  0.3718
SpeciesHa:methodMetagenomics -2.173151 1.0628695 43 -2.044608  0.0470
SpeciesPk:methodMetagenomics  1.129551 1.0722441 43  1.053446  0.2980
 Correlation: 
                             (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa                    -0.809                            
SpeciesPk                    -0.801  0.649                     
methodMetagenomics           -0.511  0.413  0.409              
SpeciesHa:methodMetagenomics  0.413 -0.511 -0.331 -0.809       
SpeciesPk:methodMetagenomics  0.410 -0.332 -0.542 -0.802  0.649

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.8323946 -0.5540072 -0.1316342  0.4198305  2.3132824 

Number of Observations: 97
Number of Groups: 51 
emmeans(Model_neutral, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   5.41 0.842 50    3.715     7.10
 Metagenomics             4.63 0.842 43    2.932     6.33

Species = Ha:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   4.89 0.611 48    3.658     6.12
 Metagenomics             1.94 0.611 43    0.705     3.17

Species = Pk:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   3.29 0.629 48    2.025     4.55
 Metagenomics             3.64 0.568 43    2.498     4.79

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics    0.776 0.860 43   0.902  0.3718

Species = Ha:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics    2.950 0.624 43   4.726  <.0001

Species = Pk:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics   -0.353 0.640 43  -0.552  0.5839

Degrees-of-freedom method: containment 

8.2.3.2 Phylum level

richness_phylum <- merged_table_phylum_methods %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_phylum <- merged_table_phylum_methods %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_phylum <- richness_phylum %>%
  full_join(neutral_phylum, by = join_by(sample == sample))

alpha_div_phylum_data <- alpha_div_phylum %>%
  left_join(all_samples, by = join_by(sample == sample))

alpha_div_phylum_data_filt <- alpha_div_phylum_data %>% 
  filter(method!="Metabarcoding_standard") %>% 
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

Richness

Model_richness_phylum_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_phylum_data_filt)
summary(Model_richness_phylum_random)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(261450.9)  ( log )
Formula: richness ~ Species * method + (1 | sampleid)
   Data: alpha_div_phylum_data_filt

     AIC      BIC   logLik deviance df.resid 
   361.7    382.3   -172.9    345.7       89 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.6044 -0.4792 -0.1555  0.4965  2.3145 

Random effects:
 Groups   Name        Variance Std.Dev.
 sampleid (Intercept) 0.028    0.1673  
Number of obs: 97, groups:  sampleid, 51

Fixed effects:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                   1.72561    0.14216  12.138  < 2e-16 ***
SpeciesHa                    -0.73182    0.19878  -3.682 0.000232 ***
SpeciesPk                    -0.48278    0.19572  -2.467 0.013636 *  
methodMetagenomics           -0.11121    0.18190  -0.611 0.540934    
SpeciesHa:methodMetagenomics -0.70454    0.28934  -2.435 0.014892 *  
SpeciesPk:methodMetagenomics -0.01749    0.25356  -0.069 0.944998    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa   -0.697                            
SpeciesPk   -0.714  0.505                     
mthdMtgnmcs -0.614  0.418  0.444              
SpcsH:mthdM  0.355 -0.540 -0.257 -0.574       
SpcsPk:mthM  0.437 -0.299 -0.660 -0.714  0.410
anova(Model_richness_phylum_random)
Analysis of Variance Table
               npar Sum Sq Mean Sq F value
Species           2 32.291 16.1456 16.1456
method            1  5.347  5.3471  5.3471
Species:method    2  5.993  2.9963  2.9963
emmeans(Model_richness_phylum_random, pairwise ~ Species)
$emmeans
 Species emmean     SE  df asymp.LCL asymp.UCL
 Eb       1.670 0.1120 Inf     1.450     1.890
 Ha       0.586 0.1310 Inf     0.329     0.843
 Pk       1.178 0.0981 Inf     0.986     1.371

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     1.084 0.171 Inf   6.322  <.0001
 Eb - Pk     0.492 0.147 Inf   3.344  0.0024
 Ha - Pk    -0.593 0.162 Inf  -3.655  0.0008

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_richness_phylum_random, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected  1.726 0.142 Inf     1.447     2.004
 Metagenomics            1.614 0.147 Inf     1.327     1.902

Species = Ha:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected  0.994 0.143 Inf     0.714     1.273
 Metagenomics            0.178 0.206 Inf    -0.225     0.581

Species = Pk:
 method                 emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected  1.243 0.137 Inf     0.974     1.512
 Metagenomics            1.114 0.127 Inf     0.865     1.364

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    0.111 0.182 Inf   0.611  0.5409

Species = Ha:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    0.816 0.237 Inf   3.436  0.0006

Species = Pk:
 contrast                              estimate    SE  df z.ratio p.value
 Metabarcoding_selected - Metagenomics    0.129 0.178 Inf   0.725  0.4685

Results are given on the log (not the response) scale. 

Neutral

Model_neutral_phylum <- lme(fixed = neutral ~ Species*method, data = alpha_div_phylum_data_filt,
               random = ~ 1 | sampleid)
anova(Model_neutral_phylum)
               numDF denDF   F-value p-value
(Intercept)        1    48 246.72883  <.0001
Species            2    48   5.59538  0.0065
method             1    43   5.05229  0.0298
Species:method     2    43   5.08912  0.0104
summary(Model_neutral_phylum)
Linear mixed-effects model fit by REML
  Data: alpha_div_phylum_data_filt 
       AIC      BIC    logLik
  247.5264 267.6133 -115.7632

Random effects:
 Formula: ~1 | sampleid
        (Intercept)  Residual
StdDev:   0.7626942 0.5121722

Fixed effects:  neutral ~ Species * method 
                                  Value Std.Error DF   t-value p-value
(Intercept)                   2.4656565 0.2905207 48  8.487025  0.0000
SpeciesHa                    -0.7553578 0.3589214 48 -2.104522  0.0406
SpeciesPk                    -0.4882620 0.3587352 48 -1.361065  0.1798
methodMetagenomics           -0.0946811 0.2290504 43 -0.413364  0.6814
SpeciesHa:methodMetagenomics -0.5415858 0.2829783 43 -1.913877  0.0623
SpeciesPk:methodMetagenomics  0.2035922 0.2867670 43  0.709957  0.4816
 Correlation: 
                             (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa                    -0.809                            
SpeciesPk                    -0.810  0.656                     
methodMetagenomics           -0.394  0.319  0.319              
SpeciesHa:methodMetagenomics  0.319 -0.394 -0.258 -0.809       
SpeciesPk:methodMetagenomics  0.315 -0.255 -0.428 -0.799  0.647

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-1.59354361 -0.56553943 -0.08515739  0.38014230  2.19616114 

Number of Observations: 97
Number of Groups: 51 
emmeans(Model_neutral_phylum, pairwise ~ Species)
$emmeans
 Species emmean    SE df lower.CL upper.CL
 Eb        2.42 0.267 43     1.88     2.96
 Ha        1.39 0.194 43     1.00     1.78
 Pk        2.03 0.184 43     1.66     2.40

Results are averaged over the levels of: method 
Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE df t.ratio p.value
 Eb - Ha     1.026 0.330 43   3.111  0.0091
 Eb - Pk     0.386 0.324 43   1.192  0.4645
 Ha - Pk    -0.640 0.267 43  -2.394  0.0540

Results are averaged over the levels of: method 
Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_neutral_phylum, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   2.47 0.291 50    1.882     3.05
 Metagenomics             2.37 0.291 43    1.785     2.96

Species = Ha:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   1.71 0.211 48    1.287     2.13
 Metagenomics             1.07 0.211 43    0.649     1.50

Species = Pk:
 method                 emmean    SE df lower.CL upper.CL
 Metabarcoding_selected   1.98 0.210 48    1.554     2.40
 Metagenomics             2.09 0.196 43    1.691     2.48

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics   0.0947 0.229 43   0.413  0.6814

Species = Ha:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics   0.6363 0.166 43   3.829  0.0004

Species = Pk:
 contrast                              estimate    SE df t.ratio p.value
 Metabarcoding_selected - Metagenomics  -0.1089 0.173 43  -0.631  0.5312

Degrees-of-freedom method: containment 

8.2.4 Alpha diversity figure (three methods)

alpha_div_family_data %>%
  select(-sample,-Life_stage, -Sex) %>%
  pivot_longer(
    cols = -c(method, Species),
    names_to = "metric",
    values_to = "value"
  ) %>%
  mutate(
    metric = factor(
      metric,
      levels = c("richness", "neutral"),
      labels = c("Richness", "Neutral")
    ),
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")
      ),
    method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
  )) %>%   
  ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
  geom_boxplot(
    width = 0.4,
    alpha = 0.5,
    outlier.shape = NA,
 #   show.legend = FALSE,
    position = position_dodge(width = 0.75)
  ) +
  geom_jitter(
    position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
    size = 1,
    alpha = 0.8,
    show.legend = FALSE
  ) +
  facet_wrap(~metric, scales = "free_y") +
  scale_color_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii"))))+
  scale_fill_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii")))) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.title.x = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
    strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
  ) +
  labs(
#    x = "Methodological approach",
    y = "Alpha diversity at family level",
    fill = "Species"
  )

alpha_div_phylum_data %>%
  select(-sample,-Life_stage, -Sex) %>%
  pivot_longer(
    cols = -c(method, Species),
    names_to = "metric",
    values_to = "value"
  ) %>%
  mutate(
    metric = factor(
      metric,
      levels = c("richness", "neutral"),
      labels = c("Richness", "Neutral")
    ),
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")
      ),
    method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
  )) %>%   
  ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
  geom_boxplot(
    width = 0.4,
    alpha = 0.5,
    outlier.shape = NA,
 #   show.legend = FALSE,
    position = position_dodge(width = 0.75)
  ) +
  geom_jitter(
    position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
    size = 1,
    alpha = 0.8,
    show.legend = FALSE
  ) +
  facet_wrap(~metric, scales = "free_y") +
  scale_color_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii"))))+
  scale_fill_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii")))) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.title.x = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
    strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
  ) +
  labs(
#    x = "Methodological approach",
    y = "Alpha diversity at phylum level",
    fill = "Species"
  )

8.3 Beta diversity

8.3.1 At different taxonomic level

beta_q0n <- family_level_agg_ampli %>%
  column_to_rownames(., "family") %>% 
  hillpair(., q = 0)

beta_q1n <- family_level_agg_ampli %>%
  column_to_rownames(., "family") %>% 
  hillpair(., q = 1)

beta_q0n_phylum <- phylum_level_agg_ampli %>%
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 0)

beta_q1n_phylum <- phylum_level_agg_ampli %>%
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 1)
save(beta_q0n,
     beta_q1n,
     beta_q0n_phylum,
     beta_q1n_phylum,
     file = "resources/beta_tax_sele.Rdata")
load("resources/beta_tax_sele.Rdata")

8.3.1.1 Family

Richness

adonis2(
  beta_q0n$S ~ Species,
  data = sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q0n$S)) %>%
    arrange(match(sample, labels(beta_q0n$S))),
  permutations = 999
) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 1.936832 0.1062653 2.556355 0.001
Residual 43 16.289560 0.8937347 NA NA
Total 45 18.226392 1.0000000 NA NA
pairwise.adonis(
  beta_q0n$S,
  sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q0n$S)) %>%
    arrange(match(sample, labels(beta_q0n$S))) %>%
    pull(Species),
  perm = 999) %>%
  tt()
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
Eb vs Ha 1 1.1892164 3.212674 0.10633532 0.001 0.003 *
Eb vs Pk 1 0.7483175 1.901197 0.07067333 0.042 0.126
Ha vs Pk 1 0.9556233 2.549407 0.06975234 0.003 0.009 *

Neutral

adonis2(
  beta_q1n$S ~ Species,
  data = sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q1n$S)) %>%
    arrange(match(sample, labels(beta_q1n$S))),
  permutations = 999
) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 1.881489 0.1062486 2.555907 0.001
Residual 43 15.826869 0.8937514 NA NA
Total 45 17.708358 1.0000000 NA NA
pairwise.adonis(
  beta_q1n$S,
  sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q1n$S)) %>%
    arrange(match(sample, labels(beta_q1n$S))) %>%
    pull(Species),
  perm = 999) %>%
  tt()
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
Eb vs Ha 1 1.3532821 3.840951 0.12454063 0.001 0.003 *
Eb vs Pk 1 0.7636118 2.006946 0.07431220 0.049 0.147
Ha vs Pk 1 0.7412715 1.995708 0.05544294 0.049 0.147

8.3.1.2 Phylum

Richness

adonis2(
  beta_q0n_phylum$S ~ Species,
  by = "terms",
  data = sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q0n_phylum$S)) %>%
    arrange(match(sample, labels(beta_q0n_phylum$S))),
  permutations = 999
) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 0.7646919 0.1095595 2.645354 0.018
Residual 43 6.2150010 0.8904405 NA NA
Total 45 6.9796930 1.0000000 NA NA
pairwise.adonis(
  beta_q0n_phylum$S,
  sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q0n_phylum$S)) %>%
    arrange(match(sample, labels(beta_q0n_phylum$S))) %>%
    pull(Species),
  perm = 999) %>%
  tt()
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
Eb vs Ha 1 0.4237598 3.707872 0.12074662 0.017 0.051
Eb vs Pk 1 0.2966148 1.614777 0.06067220 0.182 0.546
Ha vs Pk 1 0.4132832 2.956948 0.08001061 0.047 0.141

Neutral

adonis2(
  beta_q1n_phylum$S ~ Species,
  data = sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q1n_phylum$S)) %>%
    arrange(match(sample, labels(beta_q1n_phylum$S))),
  permutations = 999
) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 0.4130198 0.07992329 1.867617 0.164
Residual 43 4.7546835 0.92007671 NA NA
Total 45 5.1677033 1.00000000 NA NA

8.3.2 Comparison between metabarcoding selected and metagenomics

beta_q0n <- merged_table_family_methods %>%
  select(-H19, -H20) %>% 
  column_to_rownames(., "family") %>% 
  hillpair(., q = 0)

beta_q1n <- merged_table_family_methods %>%
  select(-H19) %>% 
  column_to_rownames(., "family") %>% 
  hillpair(., q = 1)

beta_q0n_phylum <- merged_table_phylum_methods %>%
  select(-H19, -H20) %>% 
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 0)

beta_q1n_phylum <- merged_table_phylum_methods %>%
  select(-H19) %>% 
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 1)
save(beta_q0n,
     beta_q1n,
     beta_q0n_phylum,
     beta_q1n_phylum,
     file = "resources/beta_tax.Rdata")
load("resources/beta_tax.Rdata")
methods_colors <- c("#d15aa9","#d4b9ed", "#c7c5c6")

8.3.2.1 Family level

Richness

m <- as.matrix(beta_q0n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_S_filtered <- as.dist(m_filtered)

samples_sel <- all_samples %>%
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_S_filtered)))

betadisper(beta_q0n_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df Sum Sq   Mean Sq      F N.Perm Pr(>F)  
Groups     2 0.0253 0.0126481 3.6104    999  0.034 *
Residuals 92 0.3223 0.0035032                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 6.444711 0.162343 3.449748 0.001
Residual 89 33.253404 0.837657 NA NA
Total 94 39.698115 1.000000 NA NA
adonis2(beta_q0n_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 2.661606 0.06704615 3.506968 0.001
method 1 2.517837 0.06342460 6.635072 0.001
Residual 91 34.532127 0.86986819 NA NA
Total 94 39.698115 1.00000000 NA NA
adonis2(beta_q0n_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 1.278723 0.03221118 1.711199 0.005
Residual 89 33.253404 0.83765701 NA NA
Total 94 39.698115 1.00000000 NA NA

Neutral

m <- as.matrix(beta_q1n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_S_filtered <- as.dist(m_filtered)

samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_S_filtered)))

betadisper(beta_q1n_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
Groups     2 0.10823 0.054114 5.9591    999  0.002 **
Residuals 93 0.84453 0.009081                        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 5.612085 0.141896 2.976479 0.001
Residual 90 33.938601 0.858104 NA NA
Total 95 39.550686 1.000000 NA NA
adonis2(beta_q1n_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 2.859173 0.07229137 3.796975 0.001
method 1 2.091473 0.05288083 5.554942 0.001
Residual 92 34.638614 0.87580312 NA NA
Total 95 39.550686 1.00000000 NA NA
adonis2(beta_q1n_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 0.7000129 0.01769913 0.9281638 0.07
Residual 90 33.9386014 0.85810399 NA NA
Total 95 39.5506860 1.00000000 NA NA

8.3.2.2 Phylum level

Richness

m <- as.matrix(beta_q0n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_phylum_S_filtered <- as.dist(m_filtered)

samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_phylum_S_filtered)))

betadisper(beta_q0n_phylum_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     2 0.2333 0.116640 1.6317    999  0.194
Residuals 92 6.5764 0.071482                     
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 4.848799 0.2864865 7.146968 0.001
Residual 89 12.076256 0.7135135 NA NA
Total 94 16.925056 1.0000000 NA NA
adonis2(beta_q0n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 2.917014 0.17234885 10.154105 0.001
method 1 1.079379 0.06377401 7.514619 0.001
Residual 91 13.070983 0.77228599 NA NA
Total 94 16.925056 1.00000000 NA NA
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 0.9947267 0.05877244 3.665485 0.003
Residual 89 12.0762564 0.71351355 NA NA
Total 94 16.9250555 1.00000000 NA NA

Neutral

m <- as.matrix(beta_q1n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_phylum_S_filtered <- as.dist(m_filtered)

samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_phylum_S_filtered)))

betadisper(beta_q1n_phylum_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df Sum Sq  Mean Sq      F N.Perm Pr(>F)  
Groups     2 0.4308 0.215395 2.9832    999  0.062 .
Residuals 93 6.7149 0.072203                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 1.978556 0.1572087 3.357602 0.035
Residual 90 10.606975 0.8427913 NA NA
Total 95 12.585531 1.0000000 NA NA
adonis2(beta_q1n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 1.3839347 0.1099624 5.965511 0.014
method 1 0.6214811 0.0493806 5.357843 0.007
Residual 92 10.6715068 0.8479187 NA NA
Total 95 12.5855306 1.0000000 NA NA
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 0.06453176 0.005127457 0.2737754 0.719
Residual 90 10.60697513 0.842791253 NA NA
Total 95 12.58553064 1.000000000 NA NA

8.3.3 Three methodological approaches together

8.3.3.1 Family

Richness

beta_q0n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")),
      method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(
    shape = "Methodological approach"
  )#+labs(color = "method") +geom_text_repel(aes(label = sample), size=3)

beta_q0n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(method = factor(method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors)+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  )

Neutral

beta_q1n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii"))
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(shape = "Methodological approach")

beta_q1n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  filter(sample!="P36") %>% 
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
    mutate(method = factor(method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = method)) + 
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors)+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(color = "Methodological approach",shape="Species")

8.3.3.2 Phylum level

Richness

beta_q0n_phylum$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii"))
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(
    shape = "Methodological approach"
  )

Neutral

beta_q1n_phylum$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = method, shape= Species)) +
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors,
                     labels = c(
                       expression(italic("Metabarcoding Standard")),
                       expression(italic("Metabarcoding Selected")),
                       expression(italic("Metagenomics"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(color = "Methodological approach",shape="Species")

8.4 Differential abundance

8.4.1 Metabarcoding selected

load("resources/amplicon/selected_filtering.Rdata")
phylo_samples <- sample_metadata_amplicon %>% 
  column_to_rownames("sample") %>%
  sample_data()

phylo_genome <- genome_counts_filt_amplicon %>% 
  select(one_of(c("genome", rownames(phylo_samples)))) %>%
  column_to_rownames("genome") %>%
  otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata_amplicon %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_metab_sel <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)
ancom_rand_output_phylum_metab_sel = ancombc2(
  data = physeq_genome_metab_sel,
  assay_name = "counts",
  tax_level = "phylum",
  fix_formula = "Species",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = "Species",
  struc_zero = TRUE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = TRUE,
  pairwise = TRUE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

8.4.1.1 Structural zeros

Total number of structural zeros

# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab_sel$zero_ind

# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("Eb"=1) %>% 
  rename("Ha"=2) %>% 
  rename("Pk"=3) 

# Total number of structural zeros
nrow(structural_zero_groups)
[1] 1

8.4.1.2 Enriched data

res_pair_phylum <- ancom_rand_output_phylum_metab_sel$res_pair
# Get log-fold change and q-value columns
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols   <- gsub("^lfc_", "q_", lfc_cols)

# Build long table by pairing lfc and q for each contrast
plot_data_phylum <- lfc_cols %>%
  setNames(nm = .) %>%
  purrr::imap_dfr(function(lfc_col, name) {
    q_col <- gsub("^lfc_", "q_", lfc_col)
    
    res_pair_phylum %>%
      select(taxon, !!lfc_col, !!q_col) %>%
      rename(
        lfc = !!lfc_col,
        q   = !!q_col
      ) %>%
      mutate(contrast_key = name)
  }) %>%
  filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
  mutate(
    # Manually re-label contrasts for clarity
    contrast = case_when(
      contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
      contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
      contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
      TRUE ~ contrast_key
    ),
    direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
                             paste("Enriched in", sub(".*vs ", "", contrast)))
  )
plot_data_phylum %>% select(-contrast_key)
             taxon       lfc          q               contrast             direction
1   Fusobacteriota -2.762137 0.01005929  H. ariel vs C. bottae Enriched in C. bottae
2     Synergistota -2.590353 0.01350310  H. ariel vs C. bottae Enriched in C. bottae
3     Bacteroidota -3.073607 0.02532932 P. kuhlii vs C. bottae Enriched in C. bottae
4   Fusobacteriota  2.319031 0.03647719  P. kuhlii vs H. ariel Enriched in P. kuhlii
5 Desulfobacterota  2.460761 0.02502321  P. kuhlii vs H. ariel Enriched in P. kuhlii

8.4.2 Differences with metagenomics at phylum level across species

load("resources/amplicon/selected_filtering.Rdata")

sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
  mutate(sample = paste0(sample, "_ampli")) %>%
  mutate(method="Metabarcoding")

genome_metadata_ampli <- genome_metadata_amplicon %>%
  mutate(
    class  = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
    order  = if_else(is.na(order),
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(is.na(family),
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(is.na(genus),
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax_ampli <- genome_counts_filt_amplicon %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli")) %>%
  rownames_to_column(., "genome")%>%
  left_join(genome_metadata_ampli, by = "genome")

family_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame() %>% 
  select(-c(phylum,class))

phylum_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame()
load("resources/metagenomics/data_filtered.Rdata")

genome_metadata <- genome_metadata %>%
  mutate(
    class  = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
    order  = if_else(str_trim(order) == "",
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(str_trim(family) == "",
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(str_trim(genus) == "",
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax <- genome_counts_filt %>% 
  left_join(genome_metadata[c(1,3,4,6)], by = "genome")

# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() %>% 
  select(-c(phylum,class))

# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() 
merged_table <- full_join(family_level_agg_ampli,family_level_agg, 
          by = c("family"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg, 
          by = c("phylum"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


sample_shot <- sample_metadata %>% 
  select(-c(Habitat,Longitude, Latitude, Region))%>%
  mutate(method="Metagenomics")

all_samples <- rbind(sample_shot, sample_metadata_amplicon)
phylo_genome <- merged_table_phylum %>%
  column_to_rownames("phylum") %>% 
  otu_table(., taxa_are_rows = TRUE)

phylo_samples <- all_samples%>% 
  mutate(individual=sample)%>%
  column_to_rownames("sample") %>% 
  sample_data()

merged_physeq_phylum <- phyloseq(phylo_genome, phylo_samples)
ancom_rand_output_struc_approach_rand = ancombc2(data = merged_physeq_phylum, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
    #              rand_formula = "(1|Species)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
        #          lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Enriched phyla

ancom_rand_output_struc_approach_rand$res%>%
  filter(q_methodMetagenomics < 0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacillota 0.73797871 -1.2936035 NA 0.2991155 NA -4.324763 1 4.828797e-05 1 0.0003380158 FALSE TRUE FALSE TRUE
Bacteroidota 0.05402764 0.9844918 NA 0.2306612 NA 4.268129 1 2.670729e-04 1 0.0016024374 FALSE TRUE FALSE TRUE
Campylobacterota -0.26420533 0.9675136 NA 0.2062888 NA 4.690092 1 2.903416e-04 1 0.0016024374 FALSE TRUE TRUE TRUE
Fusobacteriota 0.22421631 -0.6083450 NA 0.2296486 NA -2.649026 1 1.354560e-02 1 0.0406367972 FALSE TRUE TRUE FALSE
Pseudomonadota 1.45908988 -1.2852381 NA 0.3475817 NA -3.697658 1 3.697809e-04 1 0.0016024374 FALSE TRUE FALSE TRUE

8.4.3 Differences with metagenomics at phylum level within each species

8.4.3.1 Cnephaeus

physeq_eb <- subset_samples(merged_physeq_phylum, Species == "Eb")
physeq_eb <- prune_taxa(taxa_sums(physeq_eb)>0, physeq_eb)
ancom_rand_output_struc_eb = ancombc2(data = physeq_eb, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_eb$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 5

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 3
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Deferribacterota" "Elusimicrobiota"  "Planctomycetota" 

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Patescibacteria"      "Rs-K70 termite group"

Enriched phyla

ancom_rand_output_struc_eb$res 
             taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept)
1        Bacillota      0.48669986             -0.5156488      0.4972809             0.8086622     0.9787223           -0.6376566   0.343239515
2     Bacteroidota      0.04860922              0.6073777      0.4793397             0.6679996     0.1014087            0.9092485   0.921449332
3 Campylobacterota     -0.09388111              1.3159905      0.4705364             0.6107480    -0.1995193            2.1547195   0.846291508
4 Desulfobacterota      0.08059598              0.9210460      0.5170375             0.7117388     0.1558803            1.2940786   0.878521903
5   Fusobacteriota      0.37809823             -0.5307444      0.5014983             0.6423967     0.7539372           -0.8261941   0.466719515
6   Pseudomonadota      0.68565597             -0.1137261      0.6077870             0.8395206     1.1281188           -0.1354656   0.274082558
7     Synergistota      1.64754965             -1.3691382      0.2345994             0.4345869     7.0228222           -3.1504357   0.005930757
  p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
1           0.53331077     1.0000000            1.0000000            FALSE                   FALSE                 FALSE                         TRUE
2           0.38692024     1.0000000            1.0000000            FALSE                   FALSE                  TRUE                         TRUE
3           0.05957734     1.0000000            0.3587369            FALSE                   FALSE                  TRUE                         TRUE
4           0.21815599     1.0000000            1.0000000            FALSE                   FALSE                  TRUE                         TRUE
5           0.42625118     1.0000000            1.0000000            FALSE                   FALSE                  TRUE                         TRUE
6           0.89374720     1.0000000            1.0000000            FALSE                   FALSE                  TRUE                         TRUE
7           0.05124813     0.0415153            0.3587369             TRUE                   FALSE                 FALSE                        FALSE

8.4.3.2 Pipistrellus

physeq_pk <- subset_samples(merged_physeq_phylum, Species == "Pk")
physeq_pk <- prune_taxa(taxa_sums(physeq_pk)>0, physeq_pk)
ancom_rand_output_struc_pk = ancombc2(data = physeq_pk, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_pk$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 2

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 1
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Cyanobacteriota"

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Rs-K70 termite group"

Enriched phyla

ancom_rand_output_struc_pk$res %>% 
  filter(q_methodMetagenomics<0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacillota 1.788892 -1.387561 0.28713994 0.4803160 6.230036 -2.88885 4.336663e-07 6.683597e-03 2.601998e-06 4.010158e-02 TRUE TRUE TRUE TRUE
Bacteroidota -2.074828 4.144439 0.02690928 0.3105124 -77.104531 13.34709 5.253686e-14 3.095963e-07 3.677580e-13 2.167174e-06 TRUE TRUE TRUE TRUE

8.4.3.3 Hypsugo

physeq_ha <- subset_samples(merged_physeq_phylum, Species == "Ha")
physeq_ha <- prune_taxa(taxa_sums(physeq_ha)>0, physeq_ha)
ancom_rand_output_struc_ha = ancombc2(data = physeq_ha, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_ha$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 7

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 2
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Actinomycetota" "Spirochaetota" 

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Campylobacterota"     "Desulfobacterota"     "Fusobacteriota"       "Rs-K70 termite group" "Synergistota"        

Enriched phyla

ancom_rand_output_struc_ha$res %>% 
  filter(q_methodMetagenomics<0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Pseudomonadota 1.23417 -1.979773 NA 0.3427585 NA -5.775998 1 1.523765e-06 1 4.571296e-06 FALSE TRUE FALSE TRUE