Chapter 5 Compositional analysis

load("data/data.Rdata")

5.1 Taxonomy

5.1.1 Abundance barplot per individual

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count)) %>%
  ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Samples", y ="Relative Abundance") +
    facet_nested(. ~ species.y + sample_type ,  scales="free", space="free") + #facet per day and treatment
    scale_y_continuous(expand = c(0.001, 0.001)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          #legend.position = "none",
          strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))

### Abundance barplot per individual by host

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count)) %>%
  ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y ="Samples") +
    facet_nested(. ~ species.y,  scales="free", space="free") + #facet per day and treatment
    scale_y_continuous(expand = c(0.001, 0.001)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          #legend.position = "none",
          strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))

5.1.2 Abundance barplot per individual by sample type

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count)) %>%
  ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y ="Samples") +
    facet_nested(. ~ sample_type ,  scales="free", space="free") + #facet per day and treatment
    scale_y_continuous(expand = c(0.001, 0.001)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          #legend.position = "none",
          strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))

5.1.3 Dominant Phyla

#Dominant phyla
bin_count = genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count))
phyla_count = tapply(bin_count$count, bin_count$phylum, sum)
phyla_rel = phyla_count / sum(phyla_count)
head(sort(phyla_rel, decreasing = TRUE), 3)
   p__Bacillota_A   p__Bacteroidota p__Pseudomonadota 
        0.3031068         0.1736148         0.1371443 
sum(genome_metadata$genus=="g__")
[1] 45
sum(genome_metadata$species=="s__")
[1] 153
sum(genome_metadata$species=="s__")/length(genome_metadata$species)
[1] 0.6710526

5.1.4 Unique MAGs

#Host Species Comparison
caura_count = bin_count[which(match(bin_count$species.y, "Cathartes aura")==1),]
caura_count = caura_count[-which(match(caura_count$count, 0)==1),]
# Number of MAGs identified in Cathartes aura
length(unique(caura_count$genome))
[1] 208
catratus_count = bin_count[which(match(bin_count$species.y, "Coragyps atratus")==1),]
catratus_count = catratus_count[-which(match(catratus_count$count, 0)==1),]
# Number of MAGs identified in Coragyps atratus
length(unique(catratus_count$genome))
[1] 129
# Number of MAGs unique to Cathartes aura
length(setdiff(unique(caura_count$genome), unique(catratus_count$genome)))
[1] 99
# Number of MAGs unique to  Coragyps atratus
length(setdiff(unique(catratus_count$genome), unique(caura_count$genome)))
[1] 20
#Sample Type Comparison
stomach_count = bin_count[which(match(bin_count$sample_type, "Stomach contents")==1),]
stomach_count = stomach_count[-which(match(stomach_count$count, 0)==1),]
# Number of MAGs identified in Stomach Contents
length(unique(stomach_count$genome))
[1] 156
colon_count = bin_count[which(match(bin_count$sample_type, "Colon contents")==1),]
colon_count = colon_count[-which(match(colon_count$count, 0)==1),]
# Number of MAGs identified in Colon Contents
length(unique(colon_count$genome))
[1] 164
# Number of MAGs unique to Stomach Contents
length(setdiff(unique(stomach_count$genome), unique(colon_count$genome)))
[1] 64
# Number of MAGs unique to Colon Contents
length(setdiff(unique(colon_count$genome), unique(stomach_count$genome)))
[1] 72

5.2 Summary Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    arrange(-mean) %>%
    tt()
tinytable_jm09ovt0ldqeunh0zjvl
family mean sd
Bacteroidaceae 1.490709e-01 0.2443103149
Fusobacteriaceae 1.282902e-01 0.1443705609
Enterobacteriaceae 1.251916e-01 0.2011343329
Peptostreptococcaceae 1.251124e-01 0.1786407751
Helicobacteraceae 8.076195e-02 0.1625938079
Clostridiaceae 7.892501e-02 0.1313347535
Campylobacteraceae 5.212880e-02 0.1075223078
Cellulosilyticaceae 4.080261e-02 0.0611654255
Catellicoccaceae 3.989325e-02 0.1121311440
Mycoplasmoidaceae 3.328857e-02 0.1462071056
Porphyromonadaceae 2.260172e-02 0.0670564521
Peptoniphilaceae 2.157820e-02 0.0512850668
CAG-274 1.182361e-02 0.0379807447
Burkholderiaceae_A 1.145965e-02 0.0407079102
Lactobacillaceae 1.142778e-02 0.0536167065
Desulfovibrionaceae 6.601819e-03 0.0390535137
Selenomonadaceae 6.036043e-03 0.0390871712
Erysipelotrichaceae 5.923769e-03 0.0220992310
Chlamydiaceae 5.783864e-03 0.0270599475
4.412705e-03 0.0365148240
Veillonellaceae 4.290538e-03 0.0114594499
Filifactoraceae 3.841964e-03 0.0124330797
Lachnospiraceae 3.480717e-03 0.0087463260
Anaerovoracaceae 2.938628e-03 0.0096706536
Tissierellaceae 2.891583e-03 0.0243649074
Ruminococcaceae 2.818856e-03 0.0069679490
Planococcaceae 2.526432e-03 0.0202580338
Tepidimicrobiaceae 1.914414e-03 0.0158834796
Mucispirillaceae 1.891541e-03 0.0051614857
Peptococcaceae 1.686681e-03 0.0048979981
Streptococcaceae 1.674885e-03 0.0056884455
UBA932 1.650279e-03 0.0056509694
Acidaminococcaceae 1.151868e-03 0.0037814945
Mycobacteriaceae 8.745357e-04 0.0053735691
Tissierellaceae_A 7.856471e-04 0.0066199803
Vagococcaceae 7.558747e-04 0.0038815540
Butyricicoccaceae 6.856950e-04 0.0023794299
CAG-508 4.894172e-04 0.0020598114
Coprobacillaceae 4.188091e-04 0.0021992062
Moraxellaceae 4.111745e-04 0.0023567466
Atopobiaceae 3.559333e-04 0.0019235515
Anaerotignaceae 3.317441e-04 0.0012743338
Tannerellaceae 2.919738e-04 0.0011865247
Oscillospiraceae 2.736638e-04 0.0012051199
UBA3375 2.120617e-04 0.0007979193
Turicibacteraceae 1.588497e-04 0.0008210002
Wohlfahrtiimonadaceae 8.186553e-05 0.0006898112

5.2.1 Jitterplot Family

family_arrange <- family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    mutate(family= sub("^f__", "", family)) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        #geom_boxplot(alpha=0.2) +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ species + sample_type)+
        theme_minimal() +
        theme(legend.position = "none")

5.3 Summary Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")

genus_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    arrange(-mean) %>%
    tt()
tinytable_t8khcqvzvo5jqf968w5r
genus mean sd
g__Fusobacterium_A 1.282902e-01 0.1443705609
g__Escherichia 1.200678e-01 0.1964860711
g__GCA-900066495 8.205501e-02 0.1712499164
g__Prevotella 5.650019e-02 0.1892847437
g__Campylobacter_D 5.212880e-02 0.1075223078
g__Helicobacter_G 4.134742e-02 0.1090008497
g__Zhenhengia 4.080261e-02 0.0611654255
g__Catellicoccus 3.989325e-02 0.1121311440
g__Peptostreptococcus 3.503106e-02 0.0734362406
g__Helicobacter_D 3.027088e-02 0.0974853188
g__Sarcina 2.763321e-02 0.0566237706
g__Alloprevotella 2.368344e-02 0.0609490193
g__Porphyromonas 2.260172e-02 0.0670564521
g__Anaerosphaera 2.067564e-02 0.0510786126
g__Clostridium 1.958789e-02 0.0328640923
g__Phocaeicola 1.806500e-02 0.0940000374
g__Clostridium_H 1.489043e-02 0.0465139151
g__Ureaplasma 1.433108e-02 0.0587432097
g__Mycoplasma_L 1.414257e-02 0.1014122406
g__Bacteroides 1.301856e-02 0.0560145875
g__Tyzzerella 1.182361e-02 0.0379807447
g__Sutterella 1.108783e-02 0.0405643816
g__Bacteroides_E 1.074624e-02 0.0723915208
g__Clostridium_G 9.837772e-03 0.0824246806
g__Mailhella 6.601819e-03 0.0390535137
g__F6-6636 5.985085e-03 0.0495264820
g__Chlamydiifrater 5.783864e-03 0.0270599475
g__Bulleidia 5.682699e-03 0.0221160879
g__RGIG1987 4.814914e-03 0.0331080045
g__Plesiomonas 4.596222e-03 0.0190210615
g__Hathewaya 3.077618e-03 0.0113078394
g__Clostridium_J 2.889429e-03 0.0051109656
g__Savagea 2.526432e-03 0.0202580338
g__Lactobacillus 2.473803e-03 0.0123587727
g__Ligilactobacillus 1.696498e-03 0.0080645270
g__Cryptobacteroides 1.650279e-03 0.0056509694
g__Lactococcus 1.551557e-03 0.0056906734
g__Limosilactobacillus 1.272398e-03 0.0087581814
g__Anaerotruncus 1.142987e-03 0.0027780737
g__UBA9414 1.088612e-03 0.0048593519
g__Paraclostridium 9.401525e-04 0.0059546252
g__Gemmiger 8.978634e-04 0.0050330320
g__Mycobacterium 8.745357e-04 0.0053735691
g__Terrisporobacter 8.701646e-04 0.0033002565
g__Enterocloster 8.180857e-04 0.0048395418
g__Vagococcus_C 6.898436e-04 0.0038737945
g__Butyricicoccus 6.856950e-04 0.0023794299
g__Edwardsiella 5.275888e-04 0.0031219822
g__Clostridium_X 4.953897e-04 0.0013897614
g__CAG-793 4.894172e-04 0.0020598114
g__Ruthenibacterium 4.365591e-04 0.0025825225
g__Thomasclavelia 4.188091e-04 0.0021992062
g__Parasutterella 3.718211e-04 0.0018402360
g__Anaerotignum 3.317441e-04 0.0012743338
g__UBA1367 3.243070e-04 0.0019188301
g__Clostridium_AP 2.951699e-04 0.0017462840
g__Parabacteroides 2.919738e-04 0.0011865247
g__Anaerofilum 2.911164e-04 0.0008420415
g__Blautia 2.644410e-04 0.0015643087
g__Dwaynesavagella 2.573489e-04 0.0014289254
g__Clostridium_F 2.559199e-04 0.0016054140
g__Faecalicoccus 2.410706e-04 0.0014260679
g__UBA3375 2.120617e-04 0.0007979193
g__Turicibacter 1.588497e-04 0.0008210002
g__Flavonifractor 1.467751e-04 0.0008682496
g__Sellimonas 1.409439e-04 0.0006301750
g__Streptococcus 1.233284e-04 0.0005738541
g__Lawsonibacter 8.521827e-05 0.0003639590
g__Vagococcus 6.603111e-05 0.0003906168
g__RGIG3102 5.033007e-05 0.0002767955
g__Pseudoflavonifractor_A 4.167044e-05 0.0002006784
g__RGIG4373 3.162623e-05 0.0001973511

5.3.1 Jitterplot Genus

genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ species + sample_type)+
        theme_minimal()

5.4 Alpha diversity

5.4.1 Calculate Hill Numbers

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
  to.elements(., GIFT_db2) %>%
  traits2dist(., method = "gower")

functional <- genome_counts_filt %>%
  filter(genome %in% labels(dist)[[1]]) %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample)) %>%
  full_join(functional, by = join_by(sample == sample))

5.4.2 Plot Alpha Diversity

alpha_plot = alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
  unite("group", c(species,sample_type), remove = FALSE) %>%
      ggplot(aes(y = value, x = group, group=group, color=group, fill=group)) +
      geom_boxplot(outlier.shape = NA) +
  stat_summary(fun.y = mean, color = "black", geom = "point", shape = 20, fill = "black")+
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Group",
          breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
          labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
          values=c(species_colors, "#FF6A6A", "#8B8989" )) +
      scale_fill_manual(name="Group",
          breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
          labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
          values=c(species_colors, "#FF6A6A", "#8B8989" )) +
      facet_wrap(. ~ metric, scales = "free", ncol=4) +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank())

alpha_long <- alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  mutate(
    metric = factor(metric, levels = c("richness", "neutral", "phylogenetic", "functional")),
    group = paste(species, sample_type, sep = "_"))

5.5 Beta diversity

#Calculate Hill Numbers

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, tree = genome_tree)

beta_q1f <- genome_counts_filt %>%
  filter(genome %in% labels(dist)[[1]]) %>%
  column_to_rownames(., "genome") %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, dist = dist)

5.6 Richness diversity plot (Species)

group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]

beta_q0n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(species #, sample_type
           ) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = species,  #shape = as.factor(sample_type)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = species_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) + 
    #labs(shape="Sample Type") +
    labs(color="Species")

#+geom_text_repel(aes(label = sample), size=3)

#Richness
betadisper(beta_q0n$S, group$species) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00597 0.0059740 1.0104    999  0.333
Residuals 69 0.40795 0.0059123                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
                 Cathartes aura Coragyps atratus
Cathartes aura                              0.34
Coragyps atratus        0.31831                 
adonis2(beta_q0n$S ~ species, 
        data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_nitew0t1vhd3mixs8maz
term df SumOfSqs R2 statistic p.value
species 1 0.4870221 0.01722339 1.209241 0.206
Residual 69 27.7897652 0.98277661 NA NA
Total 70 28.2767873 1.00000000 NA NA

5.6.1 Richness diversity plot (Sample Type)

group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]

beta_q0n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(sample_type #, species
           ) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = sample_type,  #shape = as.factor(species)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = sampletype_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) + 
    #labs(shape="Species") +
    labs(color="Sample Type")

#+geom_text_repel(aes(label = sample), size=3)

#Richness
betadisper(beta_q0n$S, group_richness$sample_type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00480 0.0048029 0.8822    999  0.348
Residuals 69 0.37563 0.0054440                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
                 Colon contents Stomach contents
Colon contents                             0.351
Stomach contents        0.35087                 
adonis2(beta_q0n$S ~ sample_type, 
        data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_9zl8km2qlfecswq94uwh
term df SumOfSqs R2 statistic p.value
sample_type 1 0.6489657 0.02295048 1.620781 0.051
Residual 69 27.6278216 0.97704952 NA NA
Total 70 28.2767873 1.00000000 NA NA

5.6.2 Richness diversity plot (Sex)

group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]

beta_q0n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(sex) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = sex #, fill = sex, shape = as.factor(state)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = sex_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="Sex")

#Calculate dispersion and significance
betadisper(beta_q0n$S, group_richness$sex) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00510 0.0050977 0.9912    999  0.333
Residuals 69 0.35487 0.0051430                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        Female  Male
Female         0.335
Male   0.32293      
adonis2(beta_q0n$S ~ sex, 
        data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_xa0pz5g16xzoze2ffb7x
term df SumOfSqs R2 statistic p.value
sex 1 0.5361512 0.01896083 1.333583 0.128
Residual 69 27.7406361 0.98103917 NA NA
Total 70 28.2767873 1.00000000 NA NA

5.6.3 Richness diversity plot (State)

group_richness = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q0n$S))),]

beta_q0n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(state) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = state # shape = as.factor(sex)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = state_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="State")

#Calculate dispersion and significance
betadisper(beta_q0n$S, group_richness$state) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00435 0.0043537 0.8142    999  0.363
Residuals 69 0.36896 0.0053472                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        Florida Texas
Florida         0.364
Texas   0.37002      
#betadisper(beta_q0n$C, sample_metadata_non0$state) %>% permutest(., pairwise = TRUE) 
adonis2(beta_q0n$S ~ state, 
        data = group_richness %>% arrange(match(sample,labels(beta_q0n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_37qybpnmxi94ti69q8k7
term df SumOfSqs R2 statistic p.value
state 1 0.6060866 0.02143407 1.511345 0.049
Residual 69 27.6707008 0.97856593 NA NA
Total 70 28.2767873 1.00000000 NA NA

5.6.4 Neutral Diversity (Species)

group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]

beta_q1n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(species) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = species#, fill = sample_type, shape = as.factor(sample_type)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = species_colors)+
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

betadisper(beta_q1n$S, group_neutral$species) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     1 0.00306 0.0030636 0.2567    999  0.601
Residuals 69 0.82341 0.0119335                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
                 Cathartes aura Coragyps atratus
Cathartes aura                             0.603
Coragyps atratus          0.614                 
adonis2(beta_q1n$S ~ species, 
        data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_g4151ycg80exyf61k0ee
term df SumOfSqs R2 statistic p.value
species 1 0.3996775 0.01464862 1.025781 0.418
Residual 69 26.8846353 0.98535138 NA NA
Total 70 27.2843129 1.00000000 NA NA

5.6.5 Neutral Diversity (Sample Type)

group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]

beta_q1n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(sample_type) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = sample_type#, fill = sample_type, shape = as.factor(sample_type)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = sampletype_colors)+
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) 

betadisper(beta_q1n$S, group_neutral$sample_type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.01612 0.016118 1.2912    999   0.25
Residuals 69 0.86132 0.012483                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
                 Colon contents Stomach contents
Colon contents                             0.256
Stomach contents        0.25976                 
adonis2(beta_q1n$S ~ sample_type, 
        data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_281cxqdmztye58geslkg
term df SumOfSqs R2 statistic p.value
sample_type 1 0.5736848 0.02102618 1.481966 0.106
Residual 69 26.7106281 0.97897382 NA NA
Total 70 27.2843129 1.00000000 NA NA

5.6.6 Neutral Diversity (Sex)

group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]

beta_q1n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(sex) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = sex#, shape = as.factor(state)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = sex_colors)+
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="Sex")

betadisper(beta_q1n$C, group_neutral$sex) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq     F N.Perm Pr(>F)
Groups     1 0.01439 0.014390 0.897    999  0.346
Residuals 69 1.10689 0.016042                    

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        Female  Male
Female         0.351
Male   0.34689      
adonis2(beta_q1n$S ~ sex, 
        data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_zc058n5bohp42cxwx05v
term df SumOfSqs R2 statistic p.value
sex 1 0.37472 0.0137339 0.9608351 0.463
Residual 69 26.90959 0.9862661 NA NA
Total 70 27.28431 1.0000000 NA NA

5.6.7 Neutral Diversity (State)

group_neutral = sample_metadata[na.omit(match(sample_metadata$sample, names(beta_q1n$S))),]

beta_q1n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, trace=0) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(state) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = state#, shape = as.factor(state)
             )) +
  geom_point(size = 4) +
  scale_color_manual(values = state_colors)+
  geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    ) +
    labs(color="State")

betadisper(beta_q1n$C, group_neutral$state) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.01995 0.019954 1.2517    999  0.277
Residuals 69 1.09998 0.015942                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        Florida Texas
Florida          0.27
Texas   0.26711      
adonis2(beta_q1n$S ~ state, 
        data = group_neutral %>% arrange(match(sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
tinytable_0r1ersru6zdjsais11yf
term df SumOfSqs R2 statistic p.value
state 1 0.4270251 0.01565094 1.097085 0.335
Residual 69 26.8572877 0.98434906 NA NA
Total 70 27.2843129 1.00000000 NA NA

5.7 Functional Differences

5.7.1 Calculate GIFT Scores

# Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts, GIFT_db2)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

elements <- GIFTs_elements_filtered %>%
  as.data.frame()

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db2)
functions <- GIFTs_functions %>%
  as.data.frame()

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db2)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db2)

5.7.2 Functional Level

GIFTs_functions_community_no0 <- GIFTs_functions_community[rowSums(GIFTs_functions_community[])>0,]

GIFTs_functions_community_no0 %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed")))+
        facet_grid(sample_type ~ ., scales="free",space="free")

5.7.3 Element Level

#Filter out samples with only 0 GIFT scores
GIFTs_elements_community_no0 <- GIFTs_elements_community[rowSums(GIFTs_elements_community[])>0,]

GIFTs_elements_community_no0 %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
as.data.frame(GIFTs_elements_community_no0) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait, GIFT_db2$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db2$Code_function ~ GIFT_db2$Function[match(functionid, GIFT_db2$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db2$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db2$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) + 
        #facet_nested(~ species + sample_type,  scales="free", space="free") + 
        facet_grid(functionid ~ sample_type, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0), axis.text.y = element_text(angle=45, size=5)) + 
        labs(y="Functional Attribute",x="Samples",fill="GIFT")

5.7.4 Element Level Individual functions

#Change column in GIFT_db2 to focus on specific genetic function
GIFT_interest = GIFT_db2[which(GIFT_db2$Function == "Amino acid derivative biosynthesis"),]

GIFTs_elements_community_no0 %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
as.data.frame(GIFTs_elements_community_no0) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_interest$Code_element ~ GIFT_interest$Element[match(trait, GIFT_interest$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_interest$Code_function ~ GIFT_interest$Function[match(functionid, GIFT_interest$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_interest$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_interest$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) + 
        #facet_nested(~ species + sample_type,  scales="free", space="free") + 
        facet_grid(~ sample_type, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 90), axis.text.y = element_text(angle=45, size=5)) + 
        labs(y="Functional Attribute",x="Samples",fill="GIFT")

5.7.5 Function Matrix

dist_func = vegdist(GIFTs_elements_community_no0, method ="bray" )
adonis2(dist_func ~ group_neutral$species, 
        data = as.data.frame(GIFTs_elements_community_no0) %>% arrange(match(group_neutral$sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
Warning in tidy.anova(.): The following column names in ANOVA output were not recognized or transformed: SumOfSqs,
R2
tinytable_cfezf78oa9fd8p32h77q
term df SumOfSqs R2 statistic p.value
group_neutral$species 1 0.02520837 0.009904455 0.6902439 0.859
Residual 69 2.51994604 0.990095545 NA NA
Total 70 2.54515441 1.000000000 NA NA
adonis2(dist_func ~ group_neutral$sample_type, 
        data = as.data.frame(GIFTs_elements_community_no0) %>% arrange(match(group_neutral$sample,labels(beta_q1n$S))), 
        permutations = 999) %>%
        broom::tidy() %>%
        tt()
Warning in tidy.anova(.): The following column names in ANOVA output were not recognized or transformed: SumOfSqs,
R2
tinytable_1iite6h5416sbbw71jcg
term df SumOfSqs R2 statistic p.value
group_neutral$sample_type 1 0.03909879 0.01536205 1.076519 0.346
Residual 69 2.50605562 0.98463795 NA NA
Total 70 2.54515441 1.00000000 NA NA
betadisper(dist_func, group_neutral$sample_type) %>% permutest()

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)
Groups     1 0.00682 0.0068181 1.101    999  0.307
Residuals 69 0.42730 0.0061928                    
set.seed(123)
nmds_result <- metaMDS(dist_func, k = 2, trymax = 100)
Run 0 stress 0.1844193 
Run 1 stress 0.1992137 
Run 2 stress 0.193558 
Run 3 stress 0.1991246 
Run 4 stress 0.1834961 
... New best solution
... Procrustes: rmse 0.08663094  max resid 0.2839648 
Run 5 stress 0.196854 
Run 6 stress 0.1810674 
... New best solution
... Procrustes: rmse 0.06440697  max resid 0.3445628 
Run 7 stress 0.1768873 
... New best solution
... Procrustes: rmse 0.06494787  max resid 0.3210299 
Run 8 stress 0.1862977 
Run 9 stress 0.1809679 
Run 10 stress 0.1792945 
Run 11 stress 0.1889051 
Run 12 stress 0.1853142 
Run 13 stress 0.1857258 
Run 14 stress 0.1800068 
Run 15 stress 0.1767055 
... New best solution
... Procrustes: rmse 0.08175255  max resid 0.475352 
Run 16 stress 0.2017196 
Run 17 stress 0.1756847 
... New best solution
... Procrustes: rmse 0.08431076  max resid 0.4426863 
Run 18 stress 0.1994475 
Run 19 stress 0.1816233 
Run 20 stress 0.1827079 
Run 21 stress 0.2021623 
Run 22 stress 0.2114578 
Run 23 stress 0.1891313 
Run 24 stress 0.2083115 
Run 25 stress 0.1861193 
Run 26 stress 0.2028519 
Run 27 stress 0.1977956 
Run 28 stress 0.1748759 
... New best solution
... Procrustes: rmse 0.09247303  max resid 0.3676355 
Run 29 stress 0.1816649 
Run 30 stress 0.201681 
Run 31 stress 0.1940182 
Run 32 stress 0.1855793 
Run 33 stress 0.19913 
Run 34 stress 0.2078823 
Run 35 stress 0.1940457 
Run 36 stress 0.1852037 
Run 37 stress 0.1926344 
Run 38 stress 0.1848537 
Run 39 stress 0.1918392 
Run 40 stress 0.1964868 
Run 41 stress 0.2010841 
Run 42 stress 0.1754699 
Run 43 stress 0.1786253 
Run 44 stress 0.1782674 
Run 45 stress 0.1943384 
Run 46 stress 0.2035826 
Run 47 stress 0.1855595 
Run 48 stress 0.213982 
Run 49 stress 0.174449 
... New best solution
... Procrustes: rmse 0.06777011  max resid 0.413151 
Run 50 stress 0.2131544 
Run 51 stress 0.2024524 
Run 52 stress 0.1817632 
Run 53 stress 0.1965729 
Run 54 stress 0.1766407 
Run 55 stress 0.1805406 
Run 56 stress 0.1754543 
Run 57 stress 0.1785108 
Run 58 stress 0.1775952 
Run 59 stress 0.1875204 
Run 60 stress 0.1775863 
Run 61 stress 0.1808415 
Run 62 stress 0.1934209 
Run 63 stress 0.1972299 
Run 64 stress 0.1898226 
Run 65 stress 0.1768942 
Run 66 stress 0.2035381 
Run 67 stress 0.2044475 
Run 68 stress 0.1910243 
Run 69 stress 0.1915126 
Run 70 stress 0.1786784 
Run 71 stress 0.2035656 
Run 72 stress 0.1931804 
Run 73 stress 0.1887419 
Run 74 stress 0.1759115 
Run 75 stress 0.2157983 
Run 76 stress 0.1857274 
Run 77 stress 0.1731775 
... New best solution
... Procrustes: rmse 0.07835955  max resid 0.481939 
Run 78 stress 0.1794544 
Run 79 stress 0.2024807 
Run 80 stress 0.1904765 
Run 81 stress 0.1779329 
Run 82 stress 0.1749454 
Run 83 stress 0.1998384 
Run 84 stress 0.1880411 
Run 85 stress 0.1912452 
Run 86 stress 0.1811838 
Run 87 stress 0.202975 
Run 88 stress 0.1795674 
Run 89 stress 0.185256 
Run 90 stress 0.2005972 
Run 91 stress 0.1933866 
Run 92 stress 0.1762495 
Run 93 stress 0.180012 
Run 94 stress 0.1851051 
Run 95 stress 0.1854797 
Run 96 stress 0.184186 
Run 97 stress 0.2022917 
Run 98 stress 0.1787965 
Run 99 stress 0.186392 
Run 100 stress 0.1766509 
*** Best solution was not repeated -- monoMDS stopping criteria:
    35: no. of iterations >= maxit
    65: stress ratio > sratmax
nmds_scores <- as.data.frame(nmds_result$points)

#GIFT ordination by sample type
nmds_scores$Group <- group_neutral$sample_type

x_cen = mean(nmds_scores$MDS1, na.rm = TRUE)
y_cen = mean(nmds_scores$MDS2, na.rm = TRUE)
ggplot(nmds_scores, aes(x = MDS1, y = MDS2, color = Group)) +
  geom_point(size = 4) +
  scale_color_manual(values = sampletype_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = MDS1, yend = MDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

#GIFT ordination by species
nmds_scores$Group <- group_neutral$species

x_cen = mean(nmds_scores$MDS1, na.rm = TRUE)
y_cen = mean(nmds_scores$MDS2, na.rm = TRUE)
ggplot(nmds_scores, aes(x = MDS1, y = MDS2, color = Group)) +
  geom_point(size = 4) +
  scale_color_manual(values = species_colors) +
  geom_segment(aes(x = x_cen, y = y_cen, xend = MDS1, yend = MDS2), alpha = 0.9) +
  theme(
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    panel.background = element_blank(),
    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
    legend.text = element_text(size = 10),
    legend.title = element_text(size = 12),
    legend.position = "right", legend.box = "vertical"
    )

### Differential Abundance of Bacteria by Sample Type

phylo_metadata = sample_metadata_non0
phylo_metadata = data.frame(phylo_metadata)
rownames(phylo_metadata) = phylo_metadata[,1]
phylo_metadata = phylo_metadata[,-1]
phylo_metadata = sample_data(phylo_metadata)

phylo_mag = otu_table(genome_counts_non0, taxa_are_rows=TRUE)

mag_table = phyloseq(phylo_mag, phylo_metadata)

res <- ancombc2(
  data = mag_table,
  fix_formula = "sample_type",
  p_adj_method = "holm",
  lib_cut = 20,
  group = "sample_type",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  global = FALSE,
  verbose = TRUE
)
Obtaining initial estimates ...
Estimating sample-specific biases ...
Sensitivity analysis for pseudo-count addition to 0s: ...
ANCOM-BC2 primary results ...
# View results
lfc_res <- as.data.frame(res$res$taxon)
colnames(lfc_res)[1] = "Function"
rownames(lfc_res) = lfc_res$Function
lfc_res$Function = NULL

lfc_res = mutate(lfc_res, lfc = res$res$'lfc_sample_typeStomach contents')
lfc_res = mutate(lfc_res, p_val = res$res$'p_sample_typeStomach contents')

# See significant taxa
sig_taxa_sample = filter(lfc_res, p_val < 0.05)
print(sig_taxa_sample)
                       lfc        p_val
EHA01634_bin.46  1.1151583 0.0318630762
EHA01666_bin.16 -1.1699768 0.0019150418
EHA02611_bin.33  0.9825746 0.0086812155
EHA02611_bin.37 -0.9516886 0.0191501006
EHA02637_bin.51 -0.8556469 0.0096416401
EHA02637_bin.7  -1.3466827 0.0003748247
EHA03085_bin.25 -0.6651150 0.0402859338
EHA03138_bin.47 -0.7490996 0.0183654450
EHA03161_bin.23 -0.9584138 0.0068342412
EHA03189_bin.19 -1.2871884 0.0026345092
EHA03189_bin.1  -0.7994314 0.0309347176
EHA03206_bin.25 -1.3999139 0.0002905914
sig_taxa_sample$genome = rownames(sig_taxa_sample)

sig_taxa_sample = merge(sig_taxa_sample, genome_metadata, by = "genome")
sig_taxa_sample$signif_label <- cut(sig_taxa_sample$p_val,
                                     breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
                                     labels = c("***", "**", "*", ""))

# Bar plot of significant taxa (ordered by log fold change)
ggplot(sig_taxa_sample, aes(x = reorder(phylum, lfc), y = lfc, fill = lfc > 0)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(aes(label = signif_label), 
            hjust = ifelse(sig_taxa_sample$lfc > 0, -0.2, 1.2), 
            color = "black", 
            size = 4) +
  coord_flip() +
  scale_fill_manual(values = sampletype_colors,
                    labels = c("Colon Contents", "Stomach Contents")) +
  theme_minimal() +
  labs(
    x = "Family",
    y = "Log Fold Change",
    title = "Enrichment of Significant Taxa",
    fill = "More abundant in"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

### Differential Abundance of Bacteria by Species

phylo_metadata = sample_metadata_non0
phylo_metadata = data.frame(phylo_metadata)
rownames(phylo_metadata) = phylo_metadata[,1]
phylo_metadata = phylo_metadata[,-1]
phylo_metadata = sample_data(phylo_metadata)

phylo_mag = otu_table(genome_counts_non0, taxa_are_rows=TRUE)

mag_table = phyloseq(phylo_mag, phylo_metadata)

res <- ancombc2(
  data = mag_table,
  fix_formula = "species",
  p_adj_method = "holm",
  lib_cut = 20,
  group = "species",
  struc_zero = TRUE,
  neg_lb = TRUE,
  alpha = 0.05,
  global = FALSE,
  verbose = TRUE
)
Obtaining initial estimates ...
Estimating sample-specific biases ...
Sensitivity analysis for pseudo-count addition to 0s: ...
ANCOM-BC2 primary results ...
# View results
lfc_res <- as.data.frame(res$res$taxon)
colnames(lfc_res)[1] = "Function"
rownames(lfc_res) = lfc_res$Function
lfc_res$Function = NULL

lfc_res = mutate(lfc_res, lfc = res$res$'lfc_speciesCoragyps atratus')
lfc_res = mutate(lfc_res, p_val = res$res$'p_speciesCoragyps atratus')

# See significant taxa
sig_taxa_species = filter(lfc_res, p_val < 0.05)
print(sig_taxa_species)
                       lfc        p_val
EHA01589_bin.17 -0.8118426 3.737945e-03
EHA01589_bin.2   0.8957119 2.371683e-04
EHA01595_bin.5  -0.6359475 5.971373e-03
EHA01666_bin.16  0.6721596 2.242389e-03
EHA01709_bin.62  0.6927908 1.828504e-03
EHA02611_bin.16  0.5630132 9.184508e-03
EHA02611_bin.37  1.1601796 6.500070e-05
EHA02611_bin.39  0.9397022 2.874165e-04
EHA02637_bin.37 -0.7543196 2.960604e-03
EHA02637_bin.51 -0.7246455 1.899483e-03
EHA03085_bin.25 -0.8075419 1.153715e-03
EHA03085_bin.3  -0.7036940 2.802183e-03
EHA03128_bin.14  0.8331161 2.548205e-03
EHA03128_bin.35  0.3622284 4.548755e-02
EHA03138_bin.21 -1.5344794 5.085004e-05
EHA03138_bin.47 -0.5864219 4.052138e-03
EHA03138_bin.4  -0.7960476 3.495705e-03
EHA03161_bin.87 -0.4585107 3.907148e-02
EHA03161_bin.94  0.5946715 8.011410e-03
EHA03189_bin.40 -0.5775843 1.235070e-02
EHA03189_bin.46  1.0915245 5.726228e-05
EHA03189_bin.9   1.1688334 3.470488e-04
EHA03206_bin.1  -1.1621375 5.821359e-03
EHA03206_bin.23  0.6652361 4.316121e-03
sig_taxa_species$genome = rownames(sig_taxa_species)

sig_taxa_species = merge(sig_taxa_species, genome_metadata, by = "genome")
sig_taxa_species$signif_label <- cut(sig_taxa_species$p_val,
                                     breaks = c(-Inf, 0.001, 0.01, 0.05, Inf),
                                     labels = c("***", "**", "*", ""))

# Bar plot of significant taxa (ordered by log fold change)
ggplot(sig_taxa_species, aes(x = reorder(phylum, lfc), y = lfc, fill = lfc > 0)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(aes(label = signif_label), 
            hjust = ifelse(sig_taxa_species$lfc > 0, -0.2, 1.2), 
            color = "black", 
            size = 4) +
  coord_flip() +
  scale_fill_manual(values = species_colors,
                    labels = c("Cathartes aura", "Coragyps atratus")) +
  theme_minimal() +
  labs(
    x = "Family",
    y = "Log Fold Change",
    title = "Enrichment of Significant Taxa",
    fill = "More abundant in"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

5.7.6 Wilcoxon comparison

element_gift <- GIFTs_elements_community_no0 %>% 
  as.data.frame() %>% 
  rownames_to_column(., "sample") %>% 
  #Change column number to column in sample_metadata to change comparison sample_type, species, etc
  left_join(sample_metadata[c(1,5)], by="sample")
comparison = colnames(element_gift)[length(colnames(element_gift))]

uniqueGIFT_db<- unique(GIFT_db2) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)

#Wilcoxan test
sig_wilcox_sample = element_gift %>%
  pivot_longer(-c(sample, comparison), names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  summarise(
    p_value = wilcox.test(value ~ get(comparison))$p.value,
    mean_1 = mean(value[get(comparison) == unique(element_gift[,comparison])[1]], na.rm = TRUE),
    mean_2 = mean(value[get(comparison) == unique(element_gift[,comparison])[2]], na.rm = TRUE),
    mean_difference = mean_1 - mean_2,  # Magnitude of difference
    direction = case_when(
      mean_difference > 0 ~ paste("Higher in", unique(element_gift[,comparison])[1]),
      mean_difference < 0 ~ paste("Higher in", unique(element_gift[,comparison])[2]),
      TRUE ~ "No Difference"
    )
  ) %>% 
  mutate(trait_name = trait) %>%
  mutate(trait_name = case_when(
      trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait_name, GIFT_db2$Code_element)],
      TRUE ~ trait_name
    )) %>%
  mutate(
    p_adjust = p.adjust(p_value, method = "BH"),
    p_label = case_when(
      p_adjust < 0.001 ~ "***",
      p_adjust < 0.01  ~ "**",
      p_adjust < 0.05  ~ "*",
      TRUE ~ ""
    ),
    trait_label = paste0(trait_name, " ", p_label)  # Append p-values to trait name
  ) %>%
  filter(p_adjust < 0.01) #Keep only significant results
  

#Match significant functions to table
wilcox_elements_community_no0_sig = GIFTs_elements_community_no0[,which(match(colnames(GIFTs_elements_community_no0), sig_wilcox_sample$trait)>0)]

#Create table of gift scores with significant differences between comparisons
heatmap_data <- wilcox_elements_community_no0_sig %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample, names_to="trait", values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db2$Code_element ~ GIFT_db2$Element[match(trait, GIFT_db2$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db2$Code_function ~ GIFT_db2$Function[match(functionid, GIFT_db2$Code_function)],
      TRUE ~ functionid
    )) %>%
    left_join(sig_wilcox_sample %>% select(trait, trait_label, mean_difference, direction, p_adjust), by="trait") %>%
    mutate(trait_label = factor(trait_label, levels = unique(trait_label))) %>%
    mutate(functionid = factor(functionid, levels = unique(GIFT_db2$Function)))

ggplot(heatmap_data, aes(x=sample, y=paste0(trait, " ", sig_wilcox_sample$p_label), fill=gift)) +  
    geom_tile(colour="white", linewidth=0.2) +  
    scale_fill_gradientn(colours=rev(c("#2066a8", "#ededed"))) +  
    facet_nested(functionid ~ get(comparison) + species, scales="free", space="free") +  
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          strip.text.y = element_text(angle = 0), axis.text.y = element_text(angle=45, size=10)) + 
    labs(y="Functional Attribute (with p-values)", x="Samples", fill="GIFT")

### Volcano Plot

sig_wilcox_sample$negLogP <- -log10(sig_wilcox_sample$p_adjust)

ggplot(sig_wilcox_sample, aes(x = mean_difference, y = negLogP)) +
  geom_point(aes(), alpha = 0.7) +  # Highlight significant points
  geom_text_repel(aes(label = trait_name), size = 3, max.overlaps = 10) +  # Avoid too many overlapping labels
  theme_minimal() +
  labs(x = "GIFT Difference",
       y = "-Log10(P-value)") +
  theme(plot.title = element_text(hjust = 0.5))  # Center title