Chapter 9 Metabarcoding selected filtering based on beta diversity (t4_p0)

load("resources/metagenomics/data_filtered.Rdata")
load("resources/amplicon/selected_filtering_beta.Rdata")
species_color <- c("#e5bd5b", "#6b7398", "#76b183")

Number of MAGs

genome_metadata_amplicon %>% 
  nrow()%>% 
  cat()
79

Number of Archaea phyla

genome_metadata_amplicon %>% 
  filter(domain == "Archaea")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
1

Number of Bacteria phyla

genome_metadata_amplicon %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(phylum) %>%
  unique() %>%
  pull() %>%
  length()%>% 
  cat()
10

9.1 Taxonomy

genome_counts_filt_amplicon %>%
  mutate_at(vars(-genome),  ~ . / sum(.)) %>% 
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% 
  left_join(., genome_metadata_amplicon, by = join_by(genome == genome)) %>% 
  left_join(., sample_metadata_amplicon, by = join_by(sample == sample)) %>% 
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(
    x = sample,
    y = count,
    fill = phylum,
    group = phylum
  )) + 
  geom_bar(stat = "identity",
           colour = "white",
           linewidth = 0.1) + 
  scale_fill_manual(values = phylum_colors) +
  facet_nested( ~ factor(
    Species,
    labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
  ), scales = "free") + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    panel.background = element_blank(),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.text = element_text(size = 10),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(
      size = 12,
      lineheight = 0.6,
      face = "bold"
    ),
    axis.line = element_line(
      linewidth = 0.5,
      linetype = "solid",
      colour = "black"
    )
  ) +
  labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
  guides(fill = guide_legend(ncol = 1))

Grouping low-abundance bacteria

p1 <- genome_counts_filt %>%
  mutate_at(vars(-genome), ~ . / sum(.)) %>% 
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% 
  left_join(genome_metadata, by = join_by(genome == genome)) %>% 
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  filter(count > 0) %>%
group_by(sample, phylum) %>%
  mutate(total_abundance = sum(count)) %>%
  ungroup() %>%
  mutate(phylum = if_else(total_abundance < 0.01, "Other", phylum)) %>%
  group_by(sample, phylum, Species) %>%
  summarise(count = sum(count), .groups = "drop") %>%
   ggplot(aes(
    x = sample,
    y = count,
    fill = phylum,
    group = phylum
  )) + 
  geom_bar(stat = "identity",
           colour = "white",
           linewidth = 0.1) + 
  scale_fill_manual(values = c(phylum_colors, "Other" = "grey50"),
                    drop = FALSE)+
  facet_nested(~ factor(
    Species,
    labels = c("Eb" = "Cnephaeus", "Ha" = "Hypsugo", "Pk" = "Pipistrellus")
  ), scales = "free") + 
  guides(fill = guide_legend(ncol = 1)) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.x = element_blank(),
    panel.background = element_blank(),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.text = element_text(size = 10),
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(
      size = 12,
      lineheight = 0.6,
      face = "bold"
    ),
    axis.line = element_line(
      linewidth = 0.5,
      linetype = "solid",
      colour = "black"
    )
  ) +
  labs(fill = "Phylum", y = "Relative abundance", x = "Samples")+
  guides(fill = guide_legend(ncol = 1))

#ggsave("community_plot_grouped_selected.pdf", plot = p1, width = 12, height = 6)
p1

Phylum relative abundances

phylum_summary <- genome_counts_filt_amplicon %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  left_join(genome_metadata_amplicon, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,Species) %>%
  summarise(relabun=sum(count))
phylum_summary %>%
  group_by(phylum) %>%
  summarise(
    Total_mean = mean(relabun * 100, na.rm = T),
    Total_sd = sd(relabun * 100, na.rm = T),
    Eb_mean = mean(relabun[Species == "Eb"] * 100, na.rm = T),
    Eb_sd = sd(relabun[Species == "Eb"] * 100, na.rm = T),
    Ha_mean = mean(relabun[Species == "Ha"] * 100, na.rm = T),
    Ha_sd = sd(relabun[Species == "Ha"] * 100, na.rm = T),
    Pk_mean = mean(relabun[Species == "Pk"] * 100, na.rm = T),
    Pk_sd = sd(relabun[Species == "Pk"] * 100, na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(Total_mean, 3), "±", round(Total_sd, 3)),
    Cnephaeus = str_c(round(Eb_mean, 3), "±", round(Eb_sd, 3)),
    Hypsugo = str_c(round(Ha_mean, 3), "±", round(Ha_sd, 3)),
    Pipistrellus = str_c(round(Pk_mean, 3), "±", round(Pk_sd, 3))
  ) %>%
  arrange(-Total_mean) %>%
  dplyr::select(phylum,
                Total,
                Cnephaeus,
                Hypsugo,
                Pipistrellus) %>%
  tt()
phylum Total Cnephaeus Hypsugo Pipistrellus
Pseudomonadota 65.046±30.493 69.57±38.576 72.4±22.037 56.639±32.048
Bacillota 23.641±25.626 22.74±31.991 21.12±17.56 26.227±29.111
Bacteroidota 3.421±9.565 0.219±0.514 5.097±13.327 3.429±7.62
Fusobacteriota 2.601±8.882 6.14±13.202 0.015±0.062 3.227±10.035
Halobacterota 1.193±8.517 0±0 0±0 2.765±12.968
Campylobacterota 0.983±6.993 0±0 0±0 2.279±10.646
Desulfobacterota 0.886±3.639 0.875±2.742 0.009±0.041 1.649±5.196
Rs-K70 termite group 0.823±3.169 0.457±1.446 0.496±2.16 1.271±4.318
Cyanobacteriota 0.613±4.33 0±0 0.005±0.023 1.417±6.592
Verrucomicrobiota 0.473±3.38 0±0 0±0 1.097±5.146
Actinomycetota 0.32±2.284 0±0 0.859±3.742 0±0
bats = c("Eb", "Pk", "Ha")

total_asvs <- data.frame(
  Bat = character(),
  ASVs = numeric(),
  Phylum = numeric(),
  Family = numeric(),
  Genus = numeric()
)

preabs_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>%
  left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) 
# %>% 
#   filter(domain=="Bacteria")

phylum <- preabs_table %>%
  distinct(phylum)

family <- preabs_table %>%
  distinct(phylum, class, order, family)

genus <- preabs_table %>%
  distinct(phylum, class, order, family, genus)

total_asvs <- rbind(
  total_asvs,
  data.frame(
    Bat = "Total",
    ASVs = nrow(preabs_table),
    Phylum = nrow(phylum),
    Family = nrow(family),
    Genus = nrow(genus)
  )
)

for (bat in bats) {
  number <- preabs_table %>%
    select({{bat}}) %>%
    filter(. >= 1)
  
  phylum <- preabs_table %>%
    select({{bat}}, phylum) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum)
  
  family <- preabs_table %>%
    select({{bat}}, phylum, class, order, family) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum, class, order, family)
  
  genus <- preabs_table %>%
    select({{bat}}, phylum, class, order, family, genus) %>%
    filter(!!sym(bat) >= 1) %>%
    distinct(phylum, class, order, family, genus)
  
  total_asvs <- rbind(
    total_asvs,
    data.frame(
      Bat = bat,
      ASVs = nrow(number),
      Phylum = nrow(phylum),
      Family = nrow(family),
      Genus = nrow(genus)
    )
  )
}
bats = c("Eb", "Pk", "Ha")

no_annotation <- data.frame(Bat = character(),
                            No_genus = numeric(),
                            No_species = numeric())

preabs_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("genome")  %>%
  left_join(genome_metadata_amplicon, by = join_by("genome" == "genome")) 
# %>% 
#   filter(domain=="Bacteria")

genus <- preabs_table  %>%
  filter(is.na(genus))

species <- preabs_table  %>%
  filter(is.na(species))

no_annotation <- rbind(no_annotation,
                       data.frame(
                         Bat = "Total",
                         No_genus = nrow(genus),
                         No_species = nrow(species)
                       ))

for (bat in bats) {
  number <- preabs_table %>%
    select({{bat}}) %>%
    filter(. >= 1)
  
  genus <- preabs_table %>%
    select({{bat}}, phylum, class, order, family, genus) %>%
    filter(!!sym(bat) >= 1) %>%
  filter(is.na(genus))
  
  species <- preabs_table %>%
    filter(!!sym(bat) >= 1) %>%
  filter(is.na(species))
  
  no_annotation <- rbind(no_annotation,
                         data.frame(
                           Bat = bat,
                           No_genus = nrow(genus),
                           No_species = nrow(species)
                         ))
}

Total percentage of ASVs without genus-level annotation

nongenera <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(genus)) %>% 
  summarize(ASV_nogenera = n()) %>%
  pull()
nasvs <- total_asvs %>%
  filter(Bat == "Total") %>%
  select(ASVs) %>%
  pull()
perct <- nongenera * 100 / nasvs
cat(perct)
15.18987

Percentage of ASVs without genus-level annotation by phylum

total_asv_phylum <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  group_by(phylum) %>%
  summarize(Total_ASVs = n())
genome_metadata_amplicon %>%
  filter(is.na(genus)) %>% 
  group_by(phylum) %>%
  summarize(ASVs_nogenus = n()) %>%
  left_join(total_asv_phylum, by = join_by(phylum == phylum)) %>%
  mutate(Percentage_nogenus = 100 * ASVs_nogenus / Total_ASVs) %>%
  tt()
phylum ASVs_nogenus Total_ASVs Percentage_nogenus
Pseudomonadota 11 50 22
Rs-K70 termite group 1 1 100

Number of bacterial species

genome_metadata_amplicon %>% 
  filter(domain == "Bacteria")%>%
  dplyr::select(species) %>%
  unique() %>%
  drop_na() %>% 
  pull() %>%
  length() %>% 
  cat()
10

Total percentage of ASVs without species-level annotation

nonspecies <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(species)) %>%
  summarize(ASV_nospecies = n()) %>%
  pull()
perct <- nonspecies * 100 / nasvs
cat(perct)
83.5443

ASVs without species-level annotation

total_mag_phylum <- genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  group_by(phylum) %>%
  summarize(ASVs_total = n())

genome_metadata_amplicon %>%
  filter(domain=="Bacteria") %>%
  filter(is.na(species)) %>%
  group_by(phylum) %>%
  summarize(ASVs_nospecies = n()) %>%
  left_join(total_mag_phylum, by = join_by(phylum == phylum)) %>%
  mutate(Species_annotated = ASVs_total - ASVs_nospecies) %>%
  mutate(Percentage_nospecies = 100 * ASVs_nospecies / ASVs_total) %>%
  mutate(Percentage_species = 100 - 100 * ASVs_nospecies / ASVs_total) %>%
  tt()
phylum ASVs_nospecies ASVs_total Species_annotated Percentage_nospecies Percentage_species
Actinomycetota 1 1 0 100.00000 0.000000
Bacillota 14 15 1 93.33333 6.666667
Bacteroidota 5 6 1 83.33333 16.666667
Campylobacterota 1 1 0 100.00000 0.000000
Cyanobacteriota 1 1 0 100.00000 0.000000
Desulfobacterota 1 1 0 100.00000 0.000000
Pseudomonadota 41 50 9 82.00000 18.000000
Rs-K70 termite group 1 1 0 100.00000 0.000000
Verrucomicrobiota 1 1 0 100.00000 0.000000

9.1.1 Summary table

bats = c("Eb", "Pk", "Ha")

single_sp <- data.frame(Bat = character(), Single_species = numeric())

# bacteria <- genome_metadata_amplicon %>% 
#   filter(domain=="Bacteria")

table_upset_analysis <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
#  filter(genome %in% bacteria$genome) %>% 
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample")

unique_all <- table_upset_analysis %>%
  filter(rowSums(across(Eb:Pk)) == 1)

single_sp <- rbind(single_sp, data.frame(Bat = "Total",
                                         Single_species = nrow(unique_all)))
  
for (bat in bats) {
  unique <- table_upset_analysis %>%
    filter(rowSums(across(Eb:Pk)) == 1) %>%
    column_to_rownames(., "sample") %>% 
    select(bat) %>%
    filter(. > 0) %>%
    nrow()
  single_sp <- rbind(single_sp, data.frame(Bat = bat, Single_species = unique))
}
single_ind <- data.frame(Bat = character(), Single_individual = numeric())

freq_table <- genome_counts_filt_amplicon %>%
  mutate(across(-genome, ~ . / sum(.))) %>%
#  filter(genome %in% bacteria$genome) %>%
  column_to_rownames("genome") %>%
  mutate(across(everything(), ~ as.integer(. > 0))) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata_amplicon[c("sample", "Species")], by = "sample") %>%
  group_by(Species) %>%
  summarize(across(-sample, sum), .groups = "drop") %>%
  column_to_rownames("Species") %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("asv")

singleton_filt <- freq_table %>%
  rowwise() %>%
  mutate(row_sum = sum(c_across(-asv))) %>%
  filter(row_sum == 1) %>%
  column_to_rownames(var = "asv")  %>%
  filter(row_sum == 1)

single_ind <- rbind(single_ind, data.frame(
  Bat = "Total",
  Single_individual = nrow(singleton_filt)
))
     
for (bat in bats) {
  singleton_filt <- freq_table %>%
    rowwise() %>%
    mutate(row_sum = sum(c_across(-asv))) %>%
    filter(row_sum == 1) %>%
    column_to_rownames(var = "asv")  %>%
    select(bat) %>%
    filter(. == 1)
  
  single_ind <- rbind(single_ind, data.frame(
    Bat = bat,
    Single_individual = nrow(singleton_filt) 
  ))
}
summary_table_ampli <- total_asvs %>%
  left_join(., no_annotation, by = "Bat") %>%
  left_join(., single_ind, by = "Bat") %>%
  left_join(., single_sp, by = "Bat") %>%
  mutate(Bat = recode(Bat, 
                      Eb = "Cnephaeus bottae",
                      Ha = "Hypsugo ariel",
                      Pk = "Pipistrellus kuhlii"))
summary_table_ampli %>% 
  tt()
Bat ASVs Phylum Family Genus No_genus No_species Single_individual Single_species
Total 79 11 36 50 12 67 13 25
Cnephaeus bottae 43 6 22 29 5 37 1 2
Pipistrellus kuhlii 62 10 30 40 9 56 5 10
Hypsugo ariel 62 8 31 41 10 55 7 13
total_asv <- nrow(genome_metadata_amplicon)
summary_table_ampli %>% 
  select(-Phylum,-Family, -Genus) %>% 
  rowwise() %>% 
  mutate(ASV_perc=round(ASVs*100/total_asv, 2))%>% 
  mutate(No_genus_perc=round(No_genus*100/ASVs, 2))%>% 
  mutate(No_species_perc=round(No_species*100/ASVs, 2)) %>% 
  mutate(Single_individual_perc=round(Single_individual*100/ASVs, 2))%>% 
  mutate(Single_species_perc=round(Single_species*100/ASVs, 2)) %>% 
  mutate(Single_individual_per_unique=round(Single_individual*100/Single_species, 2)) %>% 
  select(1,7:12) %>% tt()
Bat ASV_perc No_genus_perc No_species_perc Single_individual_perc Single_species_perc Single_individual_per_unique
Total 100.00 15.19 84.81 16.46 31.65 52.00
Cnephaeus bottae 54.43 11.63 86.05 2.33 4.65 50.00
Pipistrellus kuhlii 78.48 14.52 90.32 8.06 16.13 50.00
Hypsugo ariel 78.48 16.13 88.71 11.29 20.97 53.85

Shared phyla

meta_phylum <- genome_metadata %>% 
  distinct(domain,phylum)

genome_metadata_amplicon %>% 
  distinct(domain,phylum) %>% 
  filter(phylum %in% meta_phylum$phylum)
    domain           phylum
1 Bacteria   Pseudomonadota
2 Bacteria   Fusobacteriota
3 Bacteria        Bacillota
4 Bacteria Desulfobacterota
5 Bacteria     Bacteroidota
6 Bacteria  Cyanobacteriota
7 Bacteria Campylobacterota
8 Bacteria   Actinomycetota

Only in metagenomic data

ampli_phylum <- genome_metadata_amplicon %>% 
  distinct(domain,phylum)

genome_metadata %>% 
  distinct(domain,phylum) %>% 
  filter(!phylum %in% genome_metadata_amplicon$phylum)
# A tibble: 5 × 2
  domain   phylum          
  <chr>    <chr>           
1 Bacteria Elusimicrobiota 
2 Bacteria Deferribacterota
3 Bacteria Planctomycetota 
4 Bacteria Spirochaetota   
5 Bacteria Synergistota    

Only in metabarcoding selected data

genome_metadata_amplicon %>% 
  distinct(domain,phylum) %>% 
  filter(!phylum %in% meta_phylum$phylum)
    domain               phylum
1 Bacteria Rs-K70 termite group
2  Archaea        Halobacterota
3 Bacteria    Verrucomicrobiota

9.1.2 Differences at family level

Number of families in metagenomic data

load("resources/metagenomics/data_filtered.Rdata")
cat(genome_metadata %>%
      distinct(domain, phylum, family) %>%
      count() %>%
      pull())
56

Number of families in metabarcoding selected data

cat(genome_metadata_amplicon %>%
      distinct(domain, phylum, family) %>%
      count() %>%
      pull())
36

Shared families

meta_family <- genome_metadata %>% 
  distinct(domain,phylum, family)

genome_metadata_amplicon %>% 
  distinct(domain,phylum, family) %>% 
  filter(family %in% meta_family$family)
     domain           phylum              family
1  Bacteria   Pseudomonadota Diplorickettsiaceae
2  Bacteria   Pseudomonadota        Vibrionaceae
3  Bacteria   Pseudomonadota  Enterobacteriaceae
4  Bacteria   Fusobacteriota    Leptotrichiaceae
5  Bacteria        Bacillota     Enterococcaceae
6  Bacteria   Pseudomonadota      Morganellaceae
7  Bacteria   Pseudomonadota     Pasteurellaceae
8  Bacteria   Pseudomonadota      Aeromonadaceae
9  Bacteria        Bacillota    Mycoplasmataceae
10 Bacteria        Bacillota    Streptococcaceae
11 Bacteria   Pseudomonadota      Rickettsiaceae
12 Bacteria   Pseudomonadota        Rhizobiaceae
13 Bacteria Desulfobacterota Desulfovibrionaceae
14 Bacteria     Bacteroidota   Dysgonomonadaceae
15 Bacteria        Bacillota     Lachnospiraceae
16 Bacteria   Pseudomonadota    Burkholderiaceae
17 Bacteria Campylobacterota   Helicobacteraceae
18 Bacteria   Actinomycetota      Micrococcaceae
19 Bacteria        Bacillota      Clostridiaceae
20 Bacteria     Bacteroidota       Weeksellaceae
21 Bacteria   Pseudomonadota    Acetobacteraceae
22 Bacteria   Pseudomonadota      Halomonadaceae
23 Bacteria   Pseudomonadota       Neisseriaceae

Only in metagenomic data

ampli_family <- genome_metadata_amplicon %>% 
  distinct(domain,phylum,family)

genome_metadata %>% 
  distinct(domain,phylum,family) %>% 
  filter(!family %in% ampli_family$family)
# A tibble: 33 × 3
   domain   phylum           family                
   <chr>    <chr>            <chr>                 
 1 Bacteria Pseudomonadota   ""                    
 2 Bacteria Bacteroidota     "Bacteroidaceae"      
 3 Bacteria Campylobacterota "Campylobacteraceae"  
 4 Bacteria Pseudomonadota   "Rhodocyclaceae"      
 5 Bacteria Bacillota        "Erysipelotrichaceae" 
 6 Bacteria Elusimicrobiota  "Elusimicrobiaceae"   
 7 Bacteria Bacillota        "Metamycoplasmataceae"
 8 Bacteria Bacillota        "Acutalibacteraceae"  
 9 Bacteria Pseudomonadota   "CAG-239"             
10 Bacteria Bacillota        ""                    
# ℹ 23 more rows

Only in metabarcoding selected data

genome_metadata_amplicon %>% 
  distinct(domain,phylum,family) %>% 
  filter(!family %in% meta_family$family)
     domain               phylum             family
1  Bacteria       Pseudomonadota       Yersiniaceae
2  Bacteria            Bacillota        Bacillaceae
3  Bacteria Rs-K70 termite group               <NA>
4  Bacteria            Bacillota Entomoplasmataceae
5  Bacteria      Cyanobacteriota     Microcystaceae
6  Bacteria       Pseudomonadota   Pseudomonadaceae
7  Bacteria       Pseudomonadota               <NA>
8   Archaea        Halobacterota Methanosarcinaceae
9  Bacteria       Pseudomonadota   Oxalobacteraceae
10 Bacteria       Pseudomonadota        Erwiniaceae
11 Bacteria            Bacillota   Lactobacillaceae
12 Bacteria    Verrucomicrobiota       Simkaniaceae
13 Bacteria       Pseudomonadota           Orbaceae

9.2 Alpha Diversity

9.2.1 ASV level

richness <- genome_counts_filt_amplicon %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt_amplicon %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

metabarcoding_selected_alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>% 
  mutate(method="Metabarcoding_selected_beta")

Correlation between diversity and estimate microbial fraction

fraction_ampli <- microbial_fraction %>% 
  select(sample, estimated_microbial_fraction) %>% 
  right_join(metabarcoding_selected_alpha_div, by="sample") %>% 
  left_join(sample_metadata_amplicon, by="sample") 

cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$richness, method = "spearman")

    Spearman's rank correlation rho

data:  fraction_ampli$estimated_microbial_fraction and fraction_ampli$richness
S = 20749, p-value = 0.05981
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
-0.2796438 
cor.test(fraction_ampli$estimated_microbial_fraction, fraction_ampli$neutral, method = "spearman")

    Spearman's rank correlation rho

data:  fraction_ampli$estimated_microbial_fraction and fraction_ampli$neutral
S = 22184, p-value = 0.01226
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
-0.3681159 
p1_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = richness)) +
  geom_point(size = 3, color = "#2E86AB") +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  theme_classic() +
  labs(x = "Read Fraction (%)", y = "Richness", title= "Correlation between estimated microbial fraction and diversity")

p2_ampli <- ggplot(fraction_ampli, aes(x = estimated_microbial_fraction, y = neutral)) +
  geom_point(size = 3, color = "#F39C12") +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  theme_classic() +
  labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")

p1_ampli + p2_ampli 

richness %>%
  full_join(neutral, by = join_by(sample == sample))  %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 10.61±4.95 10.1±5.49 9.55±5.29 12.11±4.07
neutral 4.24±2.16 3.51±1.96 3.89±2.15 5.03±2.14

9.2.2 Different taxonomic level

sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
  mutate(sample = paste0(sample, "_ampli_selec")) %>%
  mutate(method="Metabarcoding_selected_beta")

genome_metadata_amplicon <- genome_metadata_amplicon %>%
  mutate(
    class  = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
    order  = if_else(is.na(order),
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(is.na(family),
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(is.na(genus),
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax_ampli <- genome_counts_filt_amplicon %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli_selec")) %>%
  rownames_to_column(., "genome")%>%
  left_join(genome_metadata_amplicon, by = "genome")

family_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame() %>% 
  select(-c(phylum,class))

phylum_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame()

9.2.2.1 Family level

richness_f_ampli <- family_level_agg_ampli %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_f_ampli <- family_level_agg_ampli %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_f_ampli <- richness_f_ampli %>%
  full_join(neutral_f_ampli, by = join_by(sample == sample))

alpha_div_f_ampli_data <- alpha_div_f_ampli %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample))

Diversity values

alpha_div_f_ampli %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 7.22±3.25 5.4±2.76 6.95±3.59 8.47±2.63
neutral 3.16±1.5 2.25±1.01 2.85±1.5 4±1.34

Richness

Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_ampli_data,trace=TRUE)
summary(Model_richness)

Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_f_ampli_data, 
    trace = TRUE, init.theta = 30.49122173, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   1.6864     0.1476  11.422  < 2e-16 ***
SpeciesHa     0.4506     0.1724   2.613  0.00898 ** 
SpeciesPk     0.2530     0.1727   1.465  0.14293    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(30.4912) family taken to be 1)

    Null deviance: 57.647  on 50  degrees of freedom
Residual deviance: 50.201  on 48  degrees of freedom
AIC: 259.6

Number of Fisher Scoring iterations: 1

              Theta:  30.5 
          Std. Err.:  30.8 

 2 x log-likelihood:  -251.604 
anova(Model_richness)
Analysis of Deviance Table

Model: Negative Binomial(30.4912), link: log

Response: richness

Terms added sequentially (first to last)

        Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
NULL                       50     57.647           
Species  2   7.4463        48     50.201  0.02416 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(Model_richness, pairwise ~ Species)
$emmeans
 Species emmean     SE  df asymp.LCL asymp.UCL
 Eb        1.69 0.1480 Inf      1.40      1.98
 Ha        2.14 0.0891 Inf      1.96      2.31
 Pk        1.94 0.0896 Inf      1.76      2.11

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha    -0.451 0.172 Inf  -2.613  0.0244
 Eb - Pk    -0.253 0.173 Inf  -1.465  0.3079
 Ha - Pk     0.198 0.126 Inf   1.564  0.2615

Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

Neutral

Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_f_ampli_data)
anova(Model_neutral)
Analysis of Variance Table

Response: neutral
          Df Sum Sq Mean Sq F value   Pr(>F)   
Species    2 23.811 11.9053  6.4164 0.003392 **
Residuals 48 89.061  1.8554                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(Model_neutral, pairwise ~ Species)
$emmeans
 Species emmean    SE df lower.CL upper.CL
 Eb        2.25 0.431 48     1.38     3.12
 Ha        4.00 0.312 48     3.37     4.63
 Pk        2.85 0.290 48     2.26     3.43

Confidence level used: 0.95 

$contrasts
 contrast estimate    SE df t.ratio p.value
 Eb - Ha    -1.748 0.532 48  -3.285  0.0053
 Eb - Pk    -0.595 0.520 48  -1.146  0.4908
 Ha - Pk     1.153 0.427 48   2.702  0.0253

P value adjustment: tukey method for comparing a family of 3 estimates 

9.2.2.2 Phylum level

richness_p_ampli <- phylum_level_agg_ampli %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_p_ampli <- phylum_level_agg_ampli %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_phylum_ampli <- richness_p_ampli %>%
  full_join(neutral_p_ampli, by = join_by(sample == sample))

alpha_div_phylum_ampli_data <- alpha_div_phylum_ampli %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample))

Diversity values

alpha_div_phylum_ampli %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
  group_by(alpha) %>%
  summarise(
    total_mean = mean(value, na.rm = T),
    total_sd = sd(value, na.rm = T),
    Eb_mean = mean(value[Species == "Eb"], na.rm = T),
    Eb_sd = sd(value[Species == "Eb"], na.rm = T),
    Ha_mean = mean(value[Species == "Ha"], na.rm = T),
    Ha_sd = sd(value[Species == "Ha"], na.rm = T),
    Pk_mean = mean(value[Species == "Pk"], na.rm = T),
    Pk_sd = sd(value[Species == "Pk"], na.rm = T)
  ) %>%
  mutate(
    Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
    Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
    Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
    Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
  ) %>%
  arrange(-Eb_mean) %>%
  dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
  tt()
alpha Total Cnephaeus Pipistrellus Hypsugo
richness 2.96±1.43 2.6±1.43 3.5±1.63 2.53±0.96
neutral 1.85±0.75 1.55±0.63 2.03±0.87 1.79±0.61

Richness

Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_ampli_data,trace=TRUE)
summary(Model_richness)

Call:
MASS::glm.nb(formula = richness ~ Species, data = alpha_div_phylum_ampli_data, 
    trace = TRUE, init.theta = 85648.54994, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.95551    0.19612   4.872  1.1e-06 ***
SpeciesHa   -0.02875    0.24351  -0.118    0.906    
SpeciesPk    0.29725    0.22683   1.310    0.190    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(85648.55) family taken to be 1)

    Null deviance: 30.456  on 50  degrees of freedom
Residual deviance: 26.682  on 48  degrees of freedom
AIC: 182.32

Number of Fisher Scoring iterations: 1

              Theta:  85649 
          Std. Err.:  1756092 
Warning while fitting theta: iteration limit reached 

 2 x log-likelihood:  -174.321 
anova(Model_richness)
Analysis of Deviance Table

Model: Negative Binomial(85648.55), link: log

Response: richness

Terms added sequentially (first to last)

        Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL                       50     30.456         
Species  2   3.7739        48     26.682   0.1515
emmeans(Model_richness, pairwise ~ Species)
$emmeans
 Species emmean    SE  df asymp.LCL asymp.UCL
 Eb       0.956 0.196 Inf     0.571      1.34
 Ha       0.927 0.144 Inf     0.644      1.21
 Pk       1.253 0.114 Inf     1.029      1.48

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha    0.0287 0.244 Inf   0.118  0.9923
 Eb - Pk   -0.2973 0.227 Inf  -1.310  0.3893
 Ha - Pk   -0.3260 0.184 Inf  -1.773  0.1788

Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 

Neutral

Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_phylum_ampli_data)
anova(Model_neutral)
Analysis of Variance Table

Response: neutral
          Df  Sum Sq Mean Sq F value Pr(>F)
Species    2  1.6822 0.84110  1.5367 0.2255
Residuals 48 26.2715 0.54732               

9.2.3 Comparison (metabarcoding selected vs metagenomics)

load("resources/alpha_together.Rdata")
merged_table_family_methods <- full_join(family_level_agg_ampli,merged_table, 
          by = c("family"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

merged_table_phylum_methods <- full_join(phylum_level_agg_ampli,merged_table_phylum, 
          by = c("phylum"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

all_samples <- rbind(all_samples, sample_metadata_amplicon)

9.2.3.1 Family level

richness <- merged_table_family_methods %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- merged_table_family_methods %>%
  column_to_rownames(var = "family") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_family <- richness %>%
  full_join(neutral, by = join_by(sample == sample))

alpha_div_family_data <- alpha_div_family %>%
  left_join(all_samples, by = join_by(sample == sample))

alpha_div_family_data_filt <- alpha_div_family_data %>% 
  filter(method!="Metabarcoding_standard") %>% 
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

Richness

Model_richness_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_family_data_filt)
summary(Model_richness_random)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(4.9991)  ( log )
Formula: richness ~ Species * method + (1 | sampleid)
   Data: alpha_div_family_data_filt

     AIC      BIC   logLik deviance df.resid 
   564.3    585.3   -274.2    548.3       94 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.6910 -0.8206 -0.1384  0.4144  2.5247 

Random effects:
 Groups   Name        Variance Std.Dev.
 sampleid (Intercept) 0.02193  0.1481  
Number of obs: 102, groups:  sampleid, 51

Fixed effects:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                    1.6787     0.2025   8.290  < 2e-16 ***
SpeciesHa                      0.4511     0.2425   1.860  0.06288 .  
SpeciesPk                      0.2460     0.2402   1.024  0.30564    
methodMetagenomics             0.7909     0.2625   3.013  0.00259 ** 
SpeciesHa:methodMetagenomics  -1.7843     0.3352  -5.324 1.02e-07 ***
SpeciesPk:methodMetagenomics  -0.8350     0.3158  -2.644  0.00819 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa   -0.832                            
SpeciesPk   -0.838  0.702                     
mthdMtgnmcs -0.721  0.608  0.620              
SpcsH:mthdM  0.566 -0.683 -0.484 -0.779       
SpcsPk:mthM  0.602 -0.506 -0.720 -0.824  0.644
anova(Model_richness_random)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
Species           2  4.8318  2.4159  2.4159
method            1  2.1157  2.1157  2.1157
Species:method    2 29.5939 14.7970 14.7970
emmeans(Model_richness_random, pairwise ~ Species)
$emmeans
 Species emmean     SE  df asymp.LCL asymp.UCL
 Eb        2.07 0.1410 Inf      1.80      2.35
 Ha        1.63 0.1110 Inf      1.41      1.85
 Pk        1.90 0.0992 Inf      1.71      2.10

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     0.441 0.177 Inf   2.489  0.0342
 Eb - Pk     0.171 0.167 Inf   1.025  0.5611
 Ha - Pk    -0.270 0.146 Inf  -1.847  0.1544

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_richness_random, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta   1.68 0.203 Inf     1.282      2.08
 Metagenomics                  2.47 0.182 Inf     2.112      2.83

Species = Ha:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta   2.13 0.134 Inf     1.866      2.39
 Metagenomics                  1.14 0.170 Inf     0.803      1.47

Species = Pk:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta   1.92 0.131 Inf     1.668      2.18
 Metagenomics                  1.88 0.136 Inf     1.614      2.15

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics  -0.7909 0.263 Inf  -3.013  0.0026

Species = Ha:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   0.9934 0.210 Inf   4.727  <.0001

Species = Pk:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   0.0441 0.179 Inf   0.247  0.8053

Results are given on the log (not the response) scale. 

Neutral

Model_neutral <- lme(fixed = neutral ~ Species*method, data = alpha_div_family_data_filt,
               random = ~ 1 | sampleid)
anova(Model_neutral)
               numDF denDF   F-value p-value
(Intercept)        1    48 187.32322  <.0001
Species            2    48   0.29489  0.7460
method             1    48   0.01436  0.9051
Species:method     2    48  11.38991  0.0001
summary(Model_neutral)
Linear mixed-effects model fit by REML
  Data: alpha_div_family_data_filt 
       AIC      BIC    logLik
  444.5844 465.0991 -214.2922

Random effects:
 Formula: ~1 | sampleid
        (Intercept) Residual
StdDev:      1.0451 1.822168

Fixed effects:  neutral ~ Species * method 
                                 Value Std.Error DF   t-value p-value
(Intercept)                   2.249724 0.6642687 48  3.386768  0.0014
SpeciesHa                     1.748328 0.8206651 48  2.130379  0.0383
SpeciesPk                     0.595448 0.8011382 48  0.743252  0.4610
methodMetagenomics            2.380924 0.8148981 48  2.921744  0.0053
SpeciesHa:methodMetagenomics -4.441587 1.0067590 48 -4.411768  0.0001
SpeciesPk:methodMetagenomics -1.583242 0.9828041 48 -1.610943  0.1137
 Correlation: 
                             (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa                    -0.809                            
SpeciesPk                    -0.829  0.671                     
methodMetagenomics           -0.613  0.496  0.509              
SpeciesHa:methodMetagenomics  0.496 -0.613 -0.412 -0.809       
SpeciesPk:methodMetagenomics  0.509 -0.412 -0.613 -0.829  0.671

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.5079495 -0.5165373 -0.1550650  0.3499670  4.2213829 

Number of Observations: 102
Number of Groups: 51 
emmeans(Model_neutral, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   2.25 0.664 50    0.916     3.58
 Metagenomics                  4.63 0.664 48    3.295     5.97

Species = Ha:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   4.00 0.482 48    3.029     4.97
 Metagenomics                  1.94 0.482 48    0.968     2.91

Species = Pk:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   2.85 0.448 48    1.945     3.75
 Metagenomics                  3.64 0.448 48    2.742     4.54

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   -2.381 0.815 48  -2.922  0.0053

Species = Ha:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics    2.061 0.591 48   3.486  0.0011

Species = Pk:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   -0.798 0.549 48  -1.452  0.1530

Degrees-of-freedom method: containment 

9.2.3.2 Phylum level

richness_phylum <- merged_table_phylum_methods %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral_phylum <- merged_table_phylum_methods %>%
  column_to_rownames(var = "phylum") %>%
  dplyr::select(where( ~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

alpha_div_phylum <- richness_phylum %>%
  full_join(neutral_phylum, by = join_by(sample == sample))
alpha_div_phylum_data <- alpha_div_phylum %>%
  left_join(all_samples, by = join_by(sample == sample))
alpha_div_phylum_data_filt <- alpha_div_phylum_data %>% 
  filter(method!="Metabarcoding_standard") %>% 
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

Richness

Model_richness_random_phy <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_phylum_data_filt)
summary(Model_richness_random_phy)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(225004.8)  ( log )
Formula: richness ~ Species * method + (1 | sampleid)
   Data: alpha_div_phylum_data_filt

     AIC      BIC   logLik deviance df.resid 
   366.2    387.2   -175.1    350.2       94 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.7092 -0.3944 -0.1879  0.3041  2.0653 

Random effects:
 Groups   Name        Variance Std.Dev.
 sampleid (Intercept) 0.01722  0.1312  
Number of obs: 102, groups:  sampleid, 51

Fixed effects:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                    0.9465     0.2027   4.671 3.00e-06 ***
SpeciesHa                     -0.0280     0.2564  -0.109  0.91303    
SpeciesPk                      0.2975     0.2306   1.290  0.19703    
methodMetagenomics             0.6737     0.2387   2.822  0.00477 ** 
SpeciesHa:methodMetagenomics  -1.4094     0.3610  -3.905 9.44e-05 ***
SpeciesPk:methodMetagenomics  -0.7980     0.2847  -2.803  0.00507 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa   -0.809                            
SpeciesPk   -0.860  0.701                     
mthdMtgnmcs -0.792  0.649  0.683              
SpcsH:mthdM  0.553 -0.694 -0.478 -0.695       
SpcsPk:mthM  0.647 -0.530 -0.749 -0.818  0.568
anova(Model_richness_random_phy)
Analysis of Variance Table
               npar  Sum Sq Mean Sq F value
Species           2 18.3783  9.1891  9.1891
method            1  0.2686  0.2686  0.2686
Species:method    2 16.4025  8.2012  8.2012
emmeans(Model_richness_random_phy, pairwise ~ Species)
$emmeans
 Species emmean     SE  df asymp.LCL asymp.UCL
 Eb       1.283 0.1300 Inf     1.028     1.539
 Ha       0.551 0.1320 Inf     0.292     0.809
 Pk       1.182 0.0893 Inf     1.007     1.357

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE  df z.ratio p.value
 Eb - Ha     0.733 0.185 Inf   3.968  0.0002
 Eb - Pk     0.102 0.156 Inf   0.651  0.7917
 Ha - Pk    -0.631 0.158 Inf  -4.007  0.0002

Results are averaged over the levels of: method 
Results are given on the log (not the response) scale. 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_richness_random_phy, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta  0.947 0.203 Inf     0.549     1.344
 Metagenomics                 1.620 0.146 Inf     1.333     1.907

Species = Ha:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta  0.919 0.151 Inf     0.623     1.214
 Metagenomics                 0.183 0.214 Inf    -0.236     0.602

Species = Pk:
 method                      emmean    SE  df asymp.LCL asymp.UCL
 Metabarcoding_selected_beta  1.244 0.118 Inf     1.014     1.474
 Metagenomics                 1.120 0.125 Inf     0.875     1.364

Results are given on the log (not the response) scale. 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   -0.674 0.239 Inf  -2.822  0.0048

Species = Ha:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics    0.736 0.260 Inf   2.831  0.0046

Species = Pk:
 contrast                                   estimate    SE  df z.ratio p.value
 Metabarcoding_selected_beta - Metagenomics    0.124 0.164 Inf   0.758  0.4485

Results are given on the log (not the response) scale. 

Neutral

Model_neutral_phylum <- lme(fixed = neutral ~ Species*method, data = alpha_div_phylum_data_filt,
               random = ~ 1 | sampleid)
anova(Model_neutral_phylum)
               numDF denDF   F-value p-value
(Intercept)        1    48 288.78119  <.0001
Species            2    48   3.69441  0.0322
method             1    48   0.47980  0.4918
Species:method     2    48  11.46045  0.0001
summary(Model_neutral_phylum)
Linear mixed-effects model fit by REML
  Data: alpha_div_phylum_data_filt 
       AIC      BIC    logLik
  262.4756 282.9904 -123.2378

Random effects:
 Formula: ~1 | sampleid
        (Intercept)  Residual
StdDev:   0.6304502 0.5975657

Fixed effects:  neutral ~ Species * method 
                                  Value Std.Error DF   t-value p-value
(Intercept)                   1.5469903 0.2746911 48  5.631744  0.0000
SpeciesHa                     0.2470693 0.3393648 48  0.728034  0.4701
SpeciesPk                     0.4820295 0.3312900 48  1.455008  0.1522
methodMetagenomics            0.8239851 0.2672395 48  3.083321  0.0034
SpeciesHa:methodMetagenomics -1.5440129 0.3301588 48 -4.676577  0.0000
SpeciesPk:methodMetagenomics -0.7666993 0.3223030 48 -2.378816  0.0214
 Correlation: 
                             (Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa                    -0.809                            
SpeciesPk                    -0.829  0.671                     
methodMetagenomics           -0.486  0.394  0.403              
SpeciesHa:methodMetagenomics  0.394 -0.486 -0.326 -0.809       
SpeciesPk:methodMetagenomics  0.403 -0.326 -0.486 -0.829  0.671

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.0965139 -0.4701600 -0.0475519  0.3133924  3.1657939 

Number of Observations: 102
Number of Groups: 51 
emmeans(Model_neutral_phylum, pairwise ~ Species)
$emmeans
 Species emmean    SE df lower.CL upper.CL
 Eb        1.96 0.240 48     1.48     2.44
 Ha        1.43 0.174 48     1.08     1.78
 Pk        2.06 0.162 48     1.73     2.38

Results are averaged over the levels of: method 
Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
 contrast estimate    SE df t.ratio p.value
 Eb - Ha    0.5249 0.297 48   1.770  0.1904
 Eb - Pk   -0.0987 0.289 48  -0.341  0.9380
 Ha - Pk   -0.6236 0.238 48  -2.624  0.0307

Results are averaged over the levels of: method 
Degrees-of-freedom method: containment 
P value adjustment: tukey method for comparing a family of 3 estimates 
emmeans(Model_neutral_phylum, pairwise ~ method | Species)
$emmeans
Species = Eb:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   1.55 0.275 50    0.995     2.10
 Metagenomics                  2.37 0.275 48    1.819     2.92

Species = Ha:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   1.79 0.199 48    1.393     2.19
 Metagenomics                  1.07 0.199 48    0.673     1.47

Species = Pk:
 method                      emmean    SE df lower.CL upper.CL
 Metabarcoding_selected_beta   2.03 0.185 48    1.657     2.40
 Metagenomics                  2.09 0.185 48    1.714     2.46

Degrees-of-freedom method: containment 
Confidence level used: 0.95 

$contrasts
Species = Eb:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics  -0.8240 0.267 48  -3.083  0.0034

Species = Ha:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics   0.7200 0.194 48   3.714  0.0005

Species = Pk:
 contrast                                   estimate    SE df t.ratio p.value
 Metabarcoding_selected_beta - Metagenomics  -0.0573 0.180 48  -0.318  0.7519

Degrees-of-freedom method: containment 

9.2.4 Alpha diversity figure (three methods)

alpha_div_family_data %>%
  select(-sample, -Life_stage, -Sex) %>%
  pivot_longer(
    cols = -c(method, Species),
    names_to = "metric",
    values_to = "value"
  ) %>%
  mutate(
    metric = factor(
      metric,
      levels = c("richness", "neutral"),
      labels = c("Richness", "Neutral")
    ),
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")
      ),
    method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
  )) %>%   
  ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
  geom_boxplot(
    width = 0.4,
    alpha = 0.5,
    outlier.shape = NA,
 #   show.legend = FALSE,
    position = position_dodge(width = 0.75)
  ) +
  geom_jitter(
    position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
    size = 1,
    alpha = 0.8,
    show.legend = FALSE
  ) +
  facet_wrap(~metric, scales = "free_y") +
  scale_color_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii"))))+
  scale_fill_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii")))) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.title.x = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
    strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
  ) +
  labs(
#    x = "Methodological approach",
    y = "Alpha diversity at family level",
    fill = "Species"
  )

alpha_div_phylum_data %>%
  select(-sample, -Life_stage, -Sex) %>%
  pivot_longer(
    cols = -c(method, Species),
    names_to = "metric",
    values_to = "value"
  ) %>%
  mutate(
    metric = factor(
      metric,
      levels = c("richness", "neutral"),
      labels = c("Richness", "Neutral")
    ),
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")
      ),
    method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
  )) %>%   
  ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
  geom_boxplot(
    width = 0.4,
    alpha = 0.5,
    outlier.shape = NA,
 #   show.legend = FALSE,
    position = position_dodge(width = 0.75)
  ) +
  geom_jitter(
    position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
    size = 1,
    alpha = 0.8,
    show.legend = FALSE
  ) +
  facet_wrap(~metric, scales = "free_y") +
  scale_color_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii"))))+
  scale_fill_manual(values=species_color,  labels = c(expression(italic("C. bottae")),
             expression(italic("H. ariel")),
             expression(italic("P. kuhlii")))) +
  theme_classic() +
  theme(
    strip.background = element_blank(),
    panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 12),
    axis.title.x = element_blank(),
    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size = 14),
    strip.text = element_text(size = 14, lineheight = 0.6, face = "bold")
  ) +
  labs(
#    x = "Methodological approach",
    y = "Alpha diversity at phylum level",
    fill = "Species"
  )

9.3 Beta diversity

9.3.1 At different taxonomic level

beta_q0n <- family_level_agg_ampli %>%
  column_to_rownames(., "family") %>% 
  hillpair(., q = 0)

beta_q1n <- family_level_agg_ampli %>%
  column_to_rownames(., "family") %>% 
  hillpair(., q = 1)

beta_q0n_phylum <- phylum_level_agg_ampli %>%
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 0)

beta_q1n_phylum <- phylum_level_agg_ampli %>%
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 1)
save(beta_q0n,
     beta_q1n,
     beta_q0n_phylum,
     beta_q1n_phylum,
     file = "resources/beta_tax_sele_beta.Rdata")
load("resources/beta_tax_sele_beta.Rdata")

9.3.1.1 Family

Richness

adonis2(beta_q0n$S ~ Species, data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n$S)) %>% arrange(match(sample, labels(beta_q0n$S))),
  permutations = 999) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 1.398786 0.0618231 1.581529 0.022
Residual 48 21.226829 0.9381769 NA NA
Total 50 22.625615 1.0000000 NA NA
pairwise.adonis(beta_q0n$S, sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n$S)) %>% arrange(match(sample, labels(beta_q0n$S))) %>% pull(Species),
  perm = 999) %>%
  tt()
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
Eb vs Ha 1 1.0198664 2.385481 0.08117888 0.003 0.009 *
Eb vs Pk 1 0.6962595 1.574272 0.04985932 0.063 0.189
Ha vs Pk 1 0.4751619 1.050402 0.02622701 0.386 1.000

Neutral

adonis2(beta_q1n$S ~ Species,  data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q1n$S)) %>% arrange(match(sample, labels(beta_q1n$S))), permutations = 999) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 1.350228 0.06036723 1.541893 0.04
Residual 48 21.016669 0.93963277 NA NA
Total 50 22.366896 1.00000000 NA NA
pairwise.adonis(beta_q1n$S, sample_metadata_amplicon %>% filter(sample %in% labels(beta_q1n$S)) %>% arrange(match(sample, labels(beta_q1n$S))) %>% pull(Species), perm = 999) %>%
  tt()
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
Eb vs Ha 1 1.0674286 2.5261102 0.08555513 0.002 0.006 *
Eb vs Pk 1 0.6882815 1.5814927 0.05007657 0.088 0.264
Ha vs Pk 1 0.3873457 0.8598879 0.02157276 0.587 1.000

9.3.1.2 Phylum

Richness

adonis2(beta_q0n_phylum$S ~ Species, data = sample_metadata_amplicon %>% filter(sample %in% labels(beta_q0n_phylum$S)) %>% arrange(match(sample, labels(beta_q0n_phylum$S))), permutations = 999) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 0.2400242 0.02933301 0.7252664 0.643
Residual 48 7.9427100 0.97066699 NA NA
Total 50 8.1827342 1.00000000 NA NA

Neutral

adonis2(beta_q1n_phylum$S ~ Species,data = sample_metadata_amplicon %>%
    filter(sample %in% labels(beta_q1n_phylum$S)) %>%
    arrange(match(sample, labels(beta_q1n_phylum$S))),
  permutations = 999
) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 2 0.1725489 0.02585067 0.6368798 0.642
Residual 48 6.5022830 0.97414933 NA NA
Total 50 6.6748318 1.00000000 NA NA

9.3.2 Comparison between metabarcoding selected and metagenomics

beta_q0n <- merged_table_family_methods %>%
  select(-H19, -H20) %>% 
  column_to_rownames(., "family") %>% 
  hillpair(., q = 0)

beta_q1n <- merged_table_family_methods %>%
  select(-H19) %>% 
  column_to_rownames(., "family") %>% 
  hillpair(., q = 1)

beta_q0n_phylum <- merged_table_phylum_methods %>%
  select(-H19, -H20) %>% 
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 0)

beta_q1n_phylum <- merged_table_phylum_methods %>%
  select(-H19) %>% 
  column_to_rownames(., "phylum") %>% 
  hillpair(., q = 1)
save(beta_q0n,
     beta_q1n,
     beta_q0n_phylum,
     beta_q1n_phylum,
     file = "resources/beta_tax_beta.Rdata")
load("resources/beta_tax_beta.Rdata")
methods_colors <- c("#d15aa9","#d4b9ed", "#c7c5c6")

9.3.2.1 Family level

Richness

m <- as.matrix(beta_q0n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_S_filtered <- as.dist(m_filtered)

samples_sel <- all_samples %>%
  mutate(sampleid = str_remove(sample, "_ampli_selec"))

samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_S_filtered)))

betadisper(beta_q0n_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)  
Groups     2 0.02817 0.0140850 4.4954    999  0.016 *
Residuals 97 0.30392 0.0031332                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 5.197299 0.1197866 2.558458 0.001
Residual 94 38.190674 0.8802134 NA NA
Total 99 43.387973 1.0000000 NA NA
adonis2(beta_q0n_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 1.898649 0.04375979 2.295924 0.002
method 1 1.772572 0.04085400 4.286935 0.001
Residual 96 39.694308 0.91486892 NA NA
Total 99 43.387973 1.00000000 NA NA
adonis2(beta_q0n_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 1.503634 0.03465554 1.850472 0.003
Residual 94 38.190674 0.88021338 NA NA
Total 99 43.387973 1.00000000 NA NA

Neutral

m <- as.matrix(beta_q1n$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_S_filtered <- as.dist(m_filtered)

samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_S_filtered)))

betadisper(beta_q1n_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
Groups     2 0.15474 0.077371 8.3185    999  0.002 **
Residuals 98 0.91151 0.009301                        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 4.290399 0.09881431 2.083335 0.001
Residual 95 39.128401 0.90118569 NA NA
Total 100 43.418800 1.00000000 NA NA
adonis2(beta_q1n_S_filtered ~ Species+method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 2.220116 0.05113259 2.696190 0.001
method 1 1.251519 0.02882436 3.039782 0.001
Residual 97 39.936210 0.91979075 NA NA
Total 100 43.418800 1.00000000 NA NA
adonis2(beta_q1n_S_filtered ~ Species*method, by = "margin", data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 0.8078095 0.01860506 0.980642 0.019
Residual 95 39.1284008 0.90118569 NA NA
Total 100 43.4187997 1.00000000 NA NA

9.3.2.2 Phylum level

Richness

m <- as.matrix(beta_q0n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q0n_phylum_S_filtered <- as.dist(m_filtered)

samples_q0 <- samples_sel %>% filter(sample %in% labels(beta_q0n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q0n_phylum_S_filtered)))

betadisper(beta_q0n_phylum_S_filtered, samples_q0 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df Sum Sq Mean Sq      F N.Perm Pr(>F)    
Groups     2 1.2056 0.60280 9.5619    999  0.001 ***
Residuals 97 6.1150 0.06304                         
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 3.896413 0.2201316 5.306632 0.001
Residual 94 13.803965 0.7798684 NA NA
Total 99 17.700378 1.0000000 NA NA
adonis2(beta_q0n_phylum_S_filtered ~ Species+method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species 2 1.9255454 0.10878555 6.054582 0.261
method 1 0.4710232 0.02661091 2.962120 0.018
Residual 96 15.2654930 0.86243880 NA NA
Total 99 17.7003783 1.00000000 NA NA
adonis2(beta_q0n_phylum_S_filtered ~ Species*method, by = "margin", data = samples_q0, permutations = 999, strata = samples_q0$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Species:method 2 1.461528 0.08257041 4.976236 0.001
Residual 94 13.803965 0.77986838 NA NA
Total 99 17.700378 1.00000000 NA NA

Neutral

m <- as.matrix(beta_q1n_phylum$S)
samples <- rownames(m)
to_remove <- grepl("_ampli$", samples) & !grepl("_ampli_Select$", samples)
m_filtered <- m[!to_remove, !to_remove]
beta_q1n_phylum_S_filtered <- as.dist(m_filtered)

samples_q1 <- samples_sel %>% filter(sample %in% labels(beta_q1n_phylum_S_filtered)) %>% arrange(match(sample, labels(beta_q1n_phylum_S_filtered)))

betadisper(beta_q1n_phylum_S_filtered, samples_q1 %>% pull(Species)) %>% permutest(.)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df Sum Sq Mean Sq      F N.Perm Pr(>F)  
Groups     2 0.7182 0.35912 4.7351    999  0.013 *
Residuals 98 7.4326 0.07584                       
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(beta_q1n_phylum_S_filtered ~ Species*method, data = samples_q1, permutations = 999, strata = samples_q1$sampleid) %>%
  broom::tidy() %>%
  tt()
term df SumOfSqs R2 statistic p.value
Model 5 1.308865 0.09579321 2.012893 0.141
Residual 95 12.354575 0.90420679 NA NA
Total 100 13.663439 1.00000000 NA NA

9.3.3 Three methodological approaches together

9.3.3.1 Family

Richness

beta_q0n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii")),
      method = factor(
      method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(
    shape = "Methodological approach"
  )#+labs(color = "method") +geom_text_repel(aes(label = sample), size=3)

beta_q0n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(method = factor(method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors)+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  )

Neutral

beta_q1n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii"))
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(shape = "Methodological approach")

beta_q1n$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  filter(sample!="P36") %>% 
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
    mutate(method = factor(method,
      levels = c("Metabarcoding_standard", "Metabarcoding_selected_beta", "Metagenomics"),
      labels = c("Metabarcoding Standard", "Metabarcoding Selected Beta", "Metagenomics")
      )
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = method)) + 
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors)+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(color = "Methodological approach",shape="Species")

9.3.3.2 Phylum level

Richness

beta_q0n_phylum$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method, Species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(
    Species = factor(
      Species,
      levels = c("Eb", "Ha", "Pk"),
      labels = c("C. bottae", "H. ariel", "P. kuhlii"))
  ) %>% 
  ggplot(aes(x = NMDS1, y = NMDS2, color = Species, shape=method)) +
  geom_point(size = 4) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = species_color,
                     labels = c(
                       expression(italic("C. bottae")),
                       expression(italic("H. ariel")),
                       expression(italic("P. kuhlii"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(
    shape = "Methodological approach"
  )

Neutral

beta_q1n_phylum$S %>%
  vegan::metaMDS(.,
                 trymax = 500,
                 k = 2,
                 trace = 0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(all_samples, by = join_by(sample == sample)) %>%
  group_by(method) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = method, shape= Species)) +
  geom_point(size = 3) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2 ), alpha = 0.9, show.legend = FALSE) +
  scale_color_manual(values = methods_colors,
                     labels = c(
                       expression(italic("Metabarcoding Standard")),
                       expression(italic("Metabarcoding Selected")),
                       expression(italic("Metagenomics"))
                     ))+
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(
      size = 0.5,
      linetype = "solid",
      colour = "black"
    ),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right",
    legend.box = "vertical"
  ) +
  labs(color = "Methodological approach",shape="Species")

9.4 Differential abundance

9.4.1 Metabarcoding selected

load("resources/amplicon/selected_filtering_beta.Rdata")
phylo_samples <- sample_metadata_amplicon %>% 
  column_to_rownames("sample") %>%
  sample_data()

phylo_genome <- genome_counts_filt_amplicon %>% 
  select(one_of(c("genome", rownames(phylo_samples)))) %>%
  column_to_rownames("genome") %>%
  otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata_amplicon %>% 
  filter(genome %in% rownames(phylo_genome)) %>% 
  column_to_rownames("genome") %>% 
  dplyr::select(domain,phylum,class,order,family,genus,species) %>% 
  as.matrix() %>% 
  tax_table()

physeq_genome_metab_sel <- phyloseq(phylo_genome, phylo_samples, phylo_taxonomy)
ancom_rand_output_phylum_metab_sel_beta = ancombc2(
  data = physeq_genome_metab_sel,
  assay_name = "counts",
  tax_level = "phylum",
  fix_formula = "Species",
  p_adj_method = "holm",
  pseudo_sens = TRUE,
  prv_cut = 0,
  lib_cut = 0,
  s0_perc = 0.05,
  group = "Species",
  struc_zero = TRUE,
  neg_lb = FALSE,
  alpha = 0.05,
  n_cl = 2,
  verbose = TRUE,
  global = TRUE,
  pairwise = TRUE,
  dunnet = FALSE,
  trend = FALSE,
  iter_control = list(
    tol = 1e-5,
    max_iter = 20,
    verbose = FALSE
  ),
  em_control = list(tol = 1e-5, max_iter = 100),
  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
  trend_control = NULL
)

9.4.1.1 Structural zeros

Total number of structural zeros

# Extract structural zeros
structural_zeros <- ancom_rand_output_phylum_metab_sel_beta$zero_ind

# Convert to a data frame for better readability
structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

# Identify in which groups each taxon is a structural zero
structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("Eb"=1) %>% 
  rename("Ha"=2) %>% 
  rename("Pk"=3) 

# Total number of structural zeros
nrow(structural_zero_groups)
[1] 5

9.4.1.2 Enriched data

res_pair_phylum <- ancom_rand_output_phylum_metab_sel_beta$res_pair
# Get log-fold change and q-value columns
lfc_cols <- grep("^lfc_", colnames(res_pair_phylum), value = TRUE)
q_cols   <- gsub("^lfc_", "q_", lfc_cols)

# Build long table by pairing lfc and q for each contrast
plot_data_phylum <- lfc_cols %>%
  setNames(nm = .) %>%
  purrr::imap_dfr(function(lfc_col, name) {
    q_col <- gsub("^lfc_", "q_", lfc_col)
    
    res_pair_phylum %>%
      select(taxon, !!lfc_col, !!q_col) %>%
      rename(
        lfc = !!lfc_col,
        q   = !!q_col
      ) %>%
      mutate(contrast_key = name)
  }) %>%
  filter(!is.na(lfc), !is.na(q), q < 0.05) %>%
  mutate(
    # Manually re-label contrasts for clarity
    contrast = case_when(
      contrast_key == "lfc_SpeciesHa" ~ "H. ariel vs C. bottae",
      contrast_key == "lfc_SpeciesPk" ~ "P. kuhlii vs C. bottae",
      contrast_key == "lfc_SpeciesPk_SpeciesHa" ~ "P. kuhlii vs H. ariel",
      TRUE ~ contrast_key
    ),
    direction = ifelse(lfc > 0, paste("Enriched in", sub(" vs.*", "", contrast)),
                             paste("Enriched in", sub(".*vs ", "", contrast)))
  )
plot_data_phylum %>% select(-contrast_key)
           taxon       lfc            q               contrast             direction
1 Fusobacteriota -2.999425 0.0069606310  H. ariel vs C. bottae Enriched in C. bottae
2   Bacteroidota  3.662399 0.0006033366  H. ariel vs C. bottae  Enriched in H. ariel
3   Bacteroidota  2.205370 0.0161298287 P. kuhlii vs C. bottae Enriched in P. kuhlii
4 Fusobacteriota  2.375825 0.0287267228  P. kuhlii vs H. ariel Enriched in P. kuhlii

9.4.2 Differences with metagenomics at phylum level across species

load("resources/amplicon/selected_filtering_beta.Rdata")

sample_metadata_amplicon <- sample_metadata_amplicon[c(1,9,10,13)] %>%
  mutate(sample = paste0(sample, "_ampli")) %>%
  mutate(method="Metabarcoding")

genome_metadata_ampli <- genome_metadata_amplicon %>%
  mutate(
    class  = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
    order  = if_else(is.na(order),
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(is.na(family),
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(is.na(genus),
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax_ampli <- genome_counts_filt_amplicon %>% 
  column_to_rownames(., "genome")%>%
  rename_with(~ paste0(., "_ampli")) %>%
  rownames_to_column(., "genome")%>%
  left_join(genome_metadata_ampli, by = "genome")

family_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame() %>% 
  select(-c(phylum,class))

phylum_level_agg_ampli <- genome_tax_ampli %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
    as.data.frame()
load("resources/metagenomics/data_filtered.Rdata")

genome_metadata <- genome_metadata %>%
  mutate(
    class  = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
    order  = if_else(str_trim(order) == "",
                     if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
                     order),
    family = if_else(str_trim(family) == "",
                     if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
                     family),
    genus  = if_else(str_trim(genus) == "",
                     if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
                     genus)
  )

genome_tax <- genome_counts_filt %>% 
  left_join(genome_metadata[c(1,3,4,6)], by = "genome")

# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum, class, family) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() %>% 
  select(-c(phylum,class))

# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
  select(-genome) %>%
  group_by(phylum) %>%
  summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>% 
  as.data.frame() 
merged_table <- full_join(family_level_agg_ampli,family_level_agg, 
          by = c("family"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))

merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg, 
          by = c("phylum"))%>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


sample_shot <- sample_metadata %>% 
  select(-c(Habitat,Longitude, Latitude, Region))%>%
  mutate(method="Metagenomics")

all_samples <- rbind(sample_shot, sample_metadata_amplicon)

samples_sel <- all_samples %>%
  mutate(sampleid = str_remove(sample, "_ampli"))

phylo_genome <- merged_table_phylum %>%
  column_to_rownames("phylum") %>% 
  otu_table(., taxa_are_rows = TRUE)

phylo_samples <- samples_sel %>% 
  mutate(individual=sample) %>%
  column_to_rownames("sample") %>% 
  sample_data()

merged_physeq_phylum <- phyloseq(phylo_genome, phylo_samples)
ancom_rand_output_struc_approach_rand_beta = ancombc2(data = merged_physeq_phylum, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
 #                 rand_formula = "(1|sampleid)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
  #               lme_control = lme4::lmerControl(),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Enriched phyla

ancom_rand_output_struc_approach_rand_beta$res %>%
  filter(q_methodMetagenomics < 0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacillota 0.4631885 -0.9504081 NaN 0.2917864 NaN -3.257205 1 1.699690e-03 1 8.498452e-03 FALSE TRUE FALSE TRUE
Cyanobacteriota -1.0681707 1.6064923 NaN 0.1443157 NaN 11.131795 1 1.020098e-04 1 7.140686e-04 FALSE TRUE FALSE TRUE
Desulfobacterota -0.2977490 0.7242459 NaN 0.1908770 NaN 3.794306 1 1.328135e-03 1 7.968809e-03 FALSE TRUE FALSE TRUE
Pseudomonadota 1.5499865 -1.9428009 NaN 0.3049521 NaN -6.370840 1 6.494332e-09 1 5.195466e-08 FALSE TRUE FALSE TRUE

9.4.3 Differences with metagenomics at phylum level within each species

9.4.3.1 Cnephaeus

physeq_eb <- subset_samples(merged_physeq_phylum, Species == "Eb")
physeq_eb <- prune_taxa(taxa_sums(physeq_eb)>0, physeq_eb)
ancom_rand_output_struc_eb = ancombc2(data = physeq_eb, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_eb$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 6

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 5
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Campylobacterota" "Deferribacterota" "Elusimicrobiota"  "Planctomycetota"  "Synergistota"    

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Rs-K70 termite group"

Enriched phyla

ancom_rand_output_struc_eb$res %>% 
  filter(q_methodMetagenomics<0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacteroidota -1.426982 2.069855 0.1952675 0.5456508 -7.307833 3.793369 3.349849e-04 0.009037660 0.0013399395 0.03615064 TRUE TRUE FALSE TRUE
Desulfobacterota -1.587317 2.498657 0.1923669 0.5875411 -8.251509 4.252735 7.476295e-05 0.003780339 0.0003738147 0.01890169 TRUE TRUE TRUE TRUE

9.4.3.2 Pipistrellus

physeq_pk <- subset_samples(merged_physeq_phylum, Species == "Pk")
physeq_pk <- prune_taxa(taxa_sums(physeq_pk)>0, physeq_pk)
ancom_rand_output_struc_pk = ancombc2(data = physeq_pk, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_pk$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 4

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 1
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Synergistota"

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Halobacterota"        "Rs-K70 termite group" "Verrucomicrobiota"   

Enriched phyla

ancom_rand_output_struc_pk$res %>% 
  filter(q_methodMetagenomics<0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacillota 1.354895 -1.500774 0.3216512 0.4736496 4.212310 -3.168533 1.553444e-04 0.0030692348 0.0009320667 0.018415409 TRUE TRUE FALSE FALSE
Pseudomonadota 1.673483 -2.163873 0.3477039 0.5625639 4.812954 -3.846448 2.144767e-05 0.0004209894 0.0001501337 0.002946926 TRUE TRUE TRUE TRUE

9.4.3.3 Hypsugo

physeq_ha <- subset_samples(merged_physeq_phylum, Species == "Ha")
physeq_ha <- prune_taxa(taxa_sums(physeq_ha)>0, physeq_ha)
ancom_rand_output_struc_ha = ancombc2(data = physeq_ha, 
                  assay_name = "counts", 
                  tax_level = NULL,
                  fix_formula = "method",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut =0, 
                  lib_cut = 0, 
                  s0_perc = 0.05,
                  group = "method", 
                  struc_zero = TRUE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)

Total structural zeros

structural_zeros <- ancom_rand_output_struc_ha$zero_ind

structural_zeros_df <- as.data.frame(structural_zeros) %>% 
  column_to_rownames(., "taxon")

structural_zero_taxa <- rownames(structural_zeros_df)[apply(structural_zeros_df, 1, any)]

structural_zero_groups <- structural_zeros_df[structural_zero_taxa, , drop = FALSE] %>% 
  rename("amplicon"=1) %>% 
  rename("metagenomics"=2)

nrow(structural_zero_groups)
[1] 5

Structural zeros in amplicon

structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  nrow()
[1] 1
structural_zero_groups %>% 
  filter(amplicon==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Spirochaetota"

Structural zeros in metagenomics

structural_zero_groups %>% 
  filter(metagenomics==TRUE) %>% 
  rownames_to_column(., "phylum") %>% 
  left_join(., data.frame(physeq_eb@tax_table) %>% rownames_to_column(., "phylum"), by="phylum") %>% 
  select(phylum) %>% 
  pull()
[1] "Cyanobacteriota"      "Desulfobacterota"     "Fusobacteriota"       "Rs-K70 termite group"

Enriched phyla

ancom_rand_output_struc_ha$res %>% 
  filter(q_methodMetagenomics<0.05) %>%
  tt()
taxon lfc_(Intercept) lfc_methodMetagenomics se_(Intercept) se_methodMetagenomics W_(Intercept) W_methodMetagenomics p_(Intercept) p_methodMetagenomics q_(Intercept) q_methodMetagenomics diff_(Intercept) diff_methodMetagenomics passed_ss_(Intercept) passed_ss_methodMetagenomics
Bacillota 0.06866148 2.172955 NaN 0.3836565 NaN 5.663804 1 1.845838e-05 1 7.383351e-05 FALSE TRUE FALSE TRUE