Chapter 5 Compositional analysis

load("data/data.Rdata")

5.0.1 Taxonomy barplot per individual

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count)) %>%
  ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y ="Samples") +
    facet_nested(. ~ species.y + sample_type ,  scales="free", space="free") + #facet per day and treatment
    scale_y_continuous(expand = c(0.001, 0.001)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          legend.position = "none",
          strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))

5.1 Taxonomy boxplot

5.1.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    arrange(-mean) %>%
    tt()
tinytable_jlgda8mtqi1yletmkdju
family mean sd
Bacteroidaceae 1.349934e-01 0.2355762671
Peptostreptococcaceae 1.265954e-01 0.2003047797
Fusobacteriaceae 1.179239e-01 0.1458738058
Enterobacteriaceae 1.088605e-01 0.1905777531
Helicobacteraceae 9.413014e-02 0.1767378275
Clostridiaceae 6.945329e-02 0.1243229057
Campylobacteraceae 5.591634e-02 0.1131315105
Catellicoccaceae 5.305885e-02 0.1422465924
Mycoplasmoidaceae 4.050434e-02 0.1714127764
Lactobacillaceae 3.758704e-02 0.1600968373
Cellulosilyticaceae 3.556923e-02 0.0582955697
Porphyromonadaceae 2.197603e-02 0.0666469294
Peptoniphilaceae 2.060237e-02 0.0515934647
CAG-274 1.210850e-02 0.0395977824
Burkholderiaceae_A 9.686136e-03 0.0376160652
Chlamydiaceae 8.117520e-03 0.0382848789
Desulfovibrionaceae 5.580109e-03 0.0359452882
Selenomonadaceae 5.101894e-03 0.0359629704
Erysipelotrichaceae 5.006995e-03 0.0204090413
3.729786e-03 0.0335719378
Veillonellaceae 3.626526e-03 0.0106389862
Filifactoraceae 3.247374e-03 0.0115032184
Lachnospiraceae 2.942034e-03 0.0081314504
Anaerovoracaceae 2.483841e-03 0.0089452121
Tissierellaceae 2.444076e-03 0.0224003238
Ruminococcaceae 2.382604e-03 0.0064807153
Planococcaceae 2.135437e-03 0.0186267365
Peptococcaceae 2.056186e-03 0.0072240288
Tepidimicrobiaceae 1.618136e-03 0.0146032741
Mucispirillaceae 1.598802e-03 0.0047897732
Streptococcaceae 1.415677e-03 0.0052594300
UBA932 1.394879e-03 0.0052242104
Acidaminococcaceae 9.736027e-04 0.0034979478
Mycobacteriaceae 7.391909e-04 0.0049450852
Tissierellaceae_A 6.640589e-04 0.0060862001
Vagococcaceae 6.388941e-04 0.0035752333
Butyricicoccaceae 5.795755e-04 0.0021993548
CAG-508 4.136741e-04 0.0018999987
Coprobacillaceae 3.539934e-04 0.0020253897
Moraxellaceae 3.475403e-04 0.0021694914
Atopobiaceae 3.008484e-04 0.0017712414
Anaerotignaceae 2.804028e-04 0.0011764975
Tannerellaceae 2.467874e-04 0.0010948157
Oscillospiraceae 2.313110e-04 0.0011111966
UBA3375 1.792426e-04 0.0007368232
Turicibacteraceae 1.342658e-04 0.0007561806
Wohlfahrtiimonadaceae 6.919587e-05 0.0006341906
family_arrange <- family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    mutate(family= sub("^f__", "", family)) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        #geom_boxplot(alpha=0.2) +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ species + sample_type)+
        theme_minimal() +
        theme(legend.position = "none")

5.1.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")

genus_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    arrange(-mean) %>%
    tt()
tinytable_s1of2hrpcbds026t3hky
genus mean sd
g__Fusobacterium_A 1.179239e-01 0.1458738058
g__Escherichia 1.045296e-01 0.1860683657
g__GCA-900066495 8.862392e-02 0.1958817247
g__Campylobacter_D 5.591634e-02 0.1131315105
g__Catellicoccus 5.305885e-02 0.1422465924
g__Prevotella 5.084046e-02 0.1755423947
g__Helicobacter_G 5.055896e-02 0.1218576010
g__Zhenhengia 3.556923e-02 0.0582955697
g__Helicobacter_D 3.215986e-02 0.1029459307
g__Peptostreptococcus 3.118737e-02 0.0694638235
g__Sarcina 2.609951e-02 0.0559480274
g__Ureaplasma 2.448075e-02 0.1206061525
g__Alloprevotella 2.361889e-02 0.0611609038
g__Ligilactobacillus 2.249411e-02 0.1154584801
g__Porphyromonas 2.197603e-02 0.0666469294
g__Anaerosphaera 1.983949e-02 0.0514068407
g__Clostridium 1.655643e-02 0.0310109524
g__Phocaeicola 1.526923e-02 0.0865751567
g__Clostridium_H 1.258596e-02 0.0430584409
g__Tyzzerella 1.210850e-02 0.0395977824
g__Mycoplasma_L 1.195384e-02 0.0932743545
g__F6-6636 1.192650e-02 0.0705082437
g__Bacteroides 1.100378e-02 0.0516588292
g__Sutterella 9.371858e-03 0.0374702630
g__Bacteroides_E 9.083135e-03 0.0665959028
g__Clostridium_G 8.315260e-03 0.0757795952
g__Chlamydiifrater 8.117520e-03 0.0382848789
g__Mailhella 5.580109e-03 0.0359452882
g__Bulleidia 4.803233e-03 0.0204153655
g__RGIG1987 4.069749e-03 0.0304552880
g__Plesiomonas 3.884902e-03 0.0175479313
g__Hathewaya 2.601320e-03 0.0104447961
g__Clostridium_J 2.442256e-03 0.0048099737
g__Savagea 2.135437e-03 0.0186267365
g__Lactobacillus 2.090952e-03 0.0113853596
g__Cryptobacteroides 1.394879e-03 0.0052242104
g__Lactococcus 1.311435e-03 0.0052564538
g__Limosilactobacillus 1.075479e-03 0.0080564212
g__Anaerotruncus 9.660959e-04 0.0025849276
g__UBA9414 9.201366e-04 0.0044801473
g__Paraclostridium 7.946527e-04 0.0054791406
g__Gemmiger 7.589084e-04 0.0046336342
g__Mycobacterium 7.391909e-04 0.0049450852
g__Terrisporobacter 7.354962e-04 0.0030472950
g__Enterocloster 6.914772e-04 0.0044543676
g__Vagococcus_C 5.830821e-04 0.0035663571
g__Butyricicoccus 5.795755e-04 0.0021993548
g__Edwardsiella 4.459381e-04 0.0028735031
g__Clostridium_X 4.187222e-04 0.0012889576
g__CAG-793 4.136741e-04 0.0018999987
g__Ruthenibacterium 3.689964e-04 0.0023769823
g__Thomasclavelia 3.539934e-04 0.0020253897
g__Parasutterella 3.142773e-04 0.0016953939
g__Anaerotignum 2.804028e-04 0.0011764975
g__UBA1367 2.741167e-04 0.0017661111
g__Clostridium_AP 2.494888e-04 0.0016072985
g__Parabacteroides 2.467874e-04 0.0010948157
g__Anaerofilum 2.460626e-04 0.0007805127
g__Blautia 2.235156e-04 0.0014398071
g__Dwaynesavagella 2.175211e-04 0.0013155954
g__Clostridium_F 2.163132e-04 0.0014772755
g__Faecalicoccus 2.037620e-04 0.0013125687
g__UBA3375 1.792426e-04 0.0007368232
g__Turicibacter 1.342658e-04 0.0007561806
g__Flavonifractor 1.240599e-04 0.0007991467
g__Sellimonas 1.191311e-04 0.0005809912
g__Streptococcus 1.042418e-04 0.0005289080
g__Lawsonibacter 7.202973e-05 0.0003356782
g__Vagococcus 5.581201e-05 0.0003595280
g__RGIG3102 4.254089e-05 0.0002548549
g__Pseudoflavonifractor_A 3.522144e-05 0.0001849164
g__RGIG4373 2.673169e-05 0.0001816031
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ species + sample_type)+
        theme_minimal()

5.2 Alpha diversity

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
  to.elements(., GIFT_db) %>%
  traits2dist(., method = "gower")

functional <- genome_counts_filt %>%
  filter(genome %in% labels(dist)[[1]]) %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample)) %>%
  full_join(functional, by = join_by(sample == sample))
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
  unite("group", c(species,sample_type), remove = FALSE) %>%
      ggplot(aes(y = value, x = group, group=group, color=group, fill=group)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Group",
          breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
          labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
          values=c("#e5bd5b", "#6b7398","#e2815a", "#876b96")) +
      scale_fill_manual(name="Group",
          breaks=c("Cathartes aura_Colon contents","Coragyps atratus_Colon contents","Cathartes aura_Stomach contents","Coragyps atratus_Stomach contents"),
          labels=c("Cathartes aura (colon)","Coragyps atratus (colon)","Cathartes aura (stomach)","Coragyps atratus (stomach)"),
          values=c("#e5bd5b50", "#6b739850","#e2815a50", "#876b9650")) +
      facet_wrap(. ~ metric, scales = "free", ncol=4) +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank())

5.3 Beta diversity

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, tree = genome_tree)

beta_q1f <- genome_counts_filt %>%
  filter(genome %in% labels(dist)[[1]]) %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, dist = dist)