Chapter 6 Taxonomic composition

load("data/metagenomics/data.Rdata")

6.0.1 Taxonomy barplot per individual

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(!is.na(count)) %>%
  ggplot(aes(y=count,x=sample, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(x = "Relative abundance", y ="Samples") +
    facet_nested(. ~ bat_species,  scales="free", space="free") + #facet per day and treatment
    scale_y_continuous(expand = c(0.001, 0.001)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          legend.position = "none",
          strip.background.x=element_rect(color = NA, fill= "#f4f4f4"))

6.1 Taxonomy boxplot

6.1.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    arrange(-mean) %>%
    tt()
tinytable_dpop2esye0al3d6m3cvo
family mean sd
Diplorickettsiaceae 2.066430e-01 0.3630831186
Enterobacteriaceae 1.893277e-01 0.3066240395
Rickettsiaceae 8.145172e-02 0.2374167586
Vibrionaceae 7.972825e-02 0.2063738722
Mycoplasmoidaceae 4.314658e-02 0.1816972619
Chromatiaceae 3.786770e-02 0.1871226759
Desulfovibrionaceae 2.805650e-02 0.0759014047
Pasteurellaceae 2.746922e-02 0.1061643096
Weeksellaceae 2.724435e-02 0.1486394013
Burkholderiaceae 2.703566e-02 0.1383404820
Mycoplasmataceae 2.582675e-02 0.1187781701
Enterococcaceae 2.018684e-02 0.0706119540
Aeromonadaceae 2.015713e-02 0.0692033892
Dysgonomonadaceae 1.896333e-02 0.0497917479
Metamycoplasmataceae 1.831217e-02 0.1281864393
Helicobacteraceae 1.630153e-02 0.1107680426
Anaplasmataceae 1.554908e-02 0.0927522928
Tannerellaceae 1.549917e-02 0.0535737293
Acetobacteraceae 1.360257e-02 0.0967742539
Adiutricaceae 1.182852e-02 0.0349008385
Rhizobiaceae 9.108139e-03 0.0366543408
Leptotrichiaceae 7.074750e-03 0.0196300205
Bacteroidaceae 6.853804e-03 0.0364486712
Lachnospiraceae 6.496660e-03 0.0247911875
Synergistaceae 5.587881e-03 0.0174072370
UBA932 5.565920e-03 0.0216533335
Neisseriaceae 5.157037e-03 0.0268237187
Streptococcaceae 3.499662e-03 0.0202503299
Oscillospiraceae 2.692306e-03 0.0115704557
2.309250e-03 0.0058956882
Burkholderiaceae_B 2.235806e-03 0.0071724137
Gemellaceae 2.121398e-03 0.0106187686
Rhodocyclaceae 1.968326e-03 0.0113503074
Halomonadaceae 1.883561e-03 0.0134513161
Microcoleaceae 1.849207e-03 0.0092981482
Ruminococcaceae 1.549323e-03 0.0059246842
Erysipelotrichaceae 1.340191e-03 0.0046796931
Acutalibacteraceae 1.217931e-03 0.0040092177
Elusimicrobiaceae 1.081020e-03 0.0070276238
Christensenellaceae 9.622707e-04 0.0065229596
Micrococcaceae 8.277861e-04 0.0059115751
SZUA-567 7.768563e-04 0.0041153802
Mucispirillaceae 7.025020e-04 0.0035261340
Fusobacteriaceae 6.907455e-04 0.0029637575
WRBN01 3.657439e-04 0.0026119337
Beijerinckiaceae 3.384698e-04 0.0016917143
Anaerotignaceae 3.244078e-04 0.0012663030
Rikenellaceae 3.236972e-04 0.0017316093
Clostridiaceae 2.328503e-04 0.0016628836
Cyanobiaceae 1.713001e-04 0.0009579297
Endomicrobiaceae 1.568425e-04 0.0010127663
CAG-508 9.142321e-05 0.0006528923
Campylobacteraceae 8.972836e-05 0.0004001237
UBA660 7.911019e-05 0.0005125600
CAG-239 7.629402e-05 0.0005448483
family_arrange <- family_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    mutate(family= sub("^f__", "", family)) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(family= sub("^f__", "", family)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2) +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ bat_species)+
        theme_minimal() +
        theme(legend.position = "none")

6.1.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")

genus_summary %>%
    filter(!is.na(relabun)) %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    arrange(-mean) %>%
    tt()
tinytable_twr680kqi1xhyy7gfl7u
genus mean sd
g__Aquirickettsiella 2.066430e-01 0.3630831186
g__Rickettsia 8.145172e-02 0.2374167586
g__Vibrio 7.972825e-02 0.2063738722
g__Malacoplasma 4.314658e-02 0.1816972619
g__Serratia 3.781432e-02 0.1442735807
g__Klebsiella 3.527392e-02 0.1609844058
g__Proteus 3.502862e-02 0.1388093531
g__Apibacter 2.724435e-02 0.1486394013
g__Aeromonas 2.015713e-02 0.0692033892
g__Dysgonomonas 1.896333e-02 0.0497917479
g__UBA710 1.831217e-02 0.1281864393
g__Morganella 1.639039e-02 0.0700801355
g__Enterococcus 1.584941e-02 0.0498525505
g__NHYM01 1.552127e-02 0.1108440094
g__Edwardiiplasma 1.517989e-02 0.1073274591
g__Aggregatibacter 1.425805e-02 0.0847502171
g__Tannerella 1.333155e-02 0.0496532992
g__Pasteurella 1.321117e-02 0.0668769540
g__Providencia 1.317104e-02 0.0825454378
g__FLUQ01 1.244851e-02 0.0380568384
g__Mesenet 1.197438e-02 0.0855141593
g__Arsenophonus 1.191432e-02 0.0823521277
g__Adiutrix 1.182852e-02 0.0349008385
g__Spiroplasma 1.064685e-02 0.0532973801
g__Jejubacter 9.570393e-03 0.0478520370
g__Pseudocitrobacter 8.760620e-03 0.0574285139
g__Frigididesulfovibrio 7.889555e-03 0.0203162078
g__WRHT01 7.718438e-03 0.0221726222
g__Sebaldella 7.074750e-03 0.0196300205
g__Paraburkholderia 6.910657e-03 0.0459867188
g__Bacteroides 5.902028e-03 0.0311091136
g__Citrobacter_A 5.809507e-03 0.0216896385
g__Escherichia 5.512404e-03 0.0357559173
g__Neisseria 5.157037e-03 0.0268237187
g__Caballeronia 3.971317e-03 0.0283608787
g__Orbus 3.915117e-03 0.0279595306
g__Wolbachia 3.574704e-03 0.0121363242
g__Lactococcus 3.499662e-03 0.0202503299
g__CALYQQ01 3.073067e-03 0.0165504095
g__Enterococcus_B 2.505891e-03 0.0178956403
g__Enterobacillus 2.297797e-03 0.0121221737
g__Saezia 2.235806e-03 0.0071724137
g__DFXE01 2.167618e-03 0.0154798861
g__Trinickia 2.120753e-03 0.0106036233
g__WQUU01 1.905223e-03 0.0087650731
g__Zymobacter 1.883561e-03 0.0134513161
g__Planktothrix 1.849207e-03 0.0092981482
g__Enterococcus_D 1.831542e-03 0.0108315120
g__Breznakia 1.340191e-03 0.0046796931
g__Scatolibacter 1.217931e-03 0.0040092177
g__WRAV01 1.054847e-03 0.0056761271
g__QANA01 9.622707e-04 0.0065229596
g__UBA1794 9.517755e-04 0.0059502884
g__Acaricomes 8.277861e-04 0.0059115751
g__GCA-022846635 7.962132e-04 0.0044153137
g__UBA1174 7.876488e-04 0.0049458893
g__Helicobacter_C 7.802691e-04 0.0028054940
g__JAJBSZ01 7.768563e-04 0.0041153802
g__Fusobacterium_A 6.907455e-04 0.0029637575
g__Tokpelaia_A 5.169962e-04 0.0036920911
g__JAAYCI01 4.944755e-04 0.0019868365
g__JAHHUI01 3.657439e-04 0.0026119337
g__JAHZDZ01 3.244078e-04 0.0012663030
g__JAJQAW01 3.236972e-04 0.0017316093
g__Elusimicrobium 2.933708e-04 0.0020950863
g__Sarcina 2.328503e-04 0.0016628836
g__Vulcanococcus 1.713001e-04 0.0009579297
g__Lawsonibacter 1.704014e-04 0.0008167847
g__Endomicrobium 1.568425e-04 0.0010127663
g__CHH4-2 1.498779e-04 0.0010703421
g__Entomobacter 5.048026e-05 0.0001942887
g__Fusobacterium_C 0.000000e+00 0.0000000000
g__Helicobacter_G 0.000000e+00 0.0000000000
g__Photobacterium 0.000000e+00 0.0000000000
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ bat_species)+
        theme_minimal()