Chapter 6 Community composition

load("data/data.Rdata")

6.1 Taxonomy overview

6.1.1 Stacked barplot

genome_counts_filt_met<-genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == EHI_number)) %>% #append sample metadata
  filter(count > 0) #filter 0 counts

genome_counts_filt_met$Elevation<-as.factor(genome_counts_filt_met$Elevation)
# Create an interaction variable for elevation and sample
genome_counts_filt_met$interaction_var <- interaction(genome_counts_filt_met$sample, genome_counts_filt_met$Elevation)

# Plot stacked barplot
ggplot(genome_counts_filt_met, aes(x=interaction_var,y=count,fill=phylum, group=phylum))+ #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1)+ #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    labs(y = "Relative abundance", x="Elevation (m)") +
    guides(fill = guide_legend(ncol = 3)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          legend.position = "none",
    legend.title = element_blank(),
    legend.text = element_text(size=7)) +
    facet_nested(.~Transect+Elevation, scales = "free")

6.1.1.1 Number of bacteria phyla

[1] 16

6.1.1.2 Bacteria phyla in Aisa transect

phylum_summary_aisa <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count)) %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  filter(Transect=="Aisa")%>%
  filter(relabun > 0)

# Number of bacterial phyla Aisa
length(unique(phylum_summary_aisa$phylum))
[1] 13
# Bacteria phyla
phylum_summary_aisa %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=TRUE),
              total_sd=sd(relabun*100, na.rm=TRUE))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
phylum total
p__Bacillota_A 42.18±15.99
p__Bacteroidota 39.09±17.57
p__Pseudomonadota 5.27±6.75
p__Bacillota 3.99±3.67
p__Verrucomicrobiota 3.53±3.26
p__Campylobacterota 3.29±3.44
p__Desulfobacterota 2.48±1.93
p__Fusobacteriota 1.35±1.33
p__Bacillota_C 1.29±1.53
p__Cyanobacteriota 0.82±0.49
p__Chlamydiota 0.7±0.63
p__Bacillota_B 0.53±0.41
p__Actinomycetota 0.18±0.09

6.1.1.3 Bacteria phyla in Aran transect

phylum_summary_aran <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count)) %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  filter(Transect=="Aran")%>%
  filter(relabun > 0)

# Number of bacterial phyla Aran
length(unique(phylum_summary_aran$phylum))
[1] 14
# Bacteria phyla
phylum_summary_aran %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=TRUE),
              total_sd=sd(relabun*100, na.rm=TRUE))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
phylum total
p__Bacillota_A 41.7±16.74
p__Bacteroidota 36.85±14.71
p__Pseudomonadota 7.27±11.02
p__Campylobacterota 5.5±8.27
p__Bacillota 4.64±5.26
p__Fusobacteriota 4.01±8.68
p__Desulfobacterota 2.75±2.22
p__Verrucomicrobiota 2.38±3.15
p__Spirochaetota 1.22±1.19
p__Bacillota_C 1.14±1.18
p__Cyanobacteriota 0.72±0.54
p__Deferribacterota NA
p__Bacillota_B 0.44±0.45
p__Actinomycetota 0.3±0.26

6.1.1.4 Bacteria phyla in Sentein transect

phylum_summary_sentein <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count)) %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  filter(Transect=="Sentein")%>%
  filter(relabun > 0.)

# Number of bacterial phyla Sentein
length(unique(phylum_summary_sentein$phylum))
[1] 15
# Bacteria phyla
phylum_summary_sentein %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=TRUE),
              total_sd=sd(relabun*100, na.rm=TRUE))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
phylum total
p__Bacteroidota 40.72±14.71
p__Bacillota_A 34.82±14.88
p__Bacillota 6.17±10.94
p__Pseudomonadota 5.19±5.3
p__Campylobacterota 4.8±5.42
p__Desulfobacterota 4.77±8.22
p__Spirochaetota 4.76±5.8
p__Chlamydiota 3.19±3.26
p__Fusobacteriota 2.17±5.62
p__Verrucomicrobiota 1.34±1.49
p__Cyanobacteriota 0.78±0.37
p__Actinomycetota 0.77±1.29
p__Bacillota_C 0.61±0.51
p__Bacillota_B 0.16±0.07
p__Deferribacterota NA

6.1.1.5 Bacteria phyla in Tourmalet transect

phylum_summary_tourmalet <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count)) %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  filter(Transect=="Tourmalet")

# Number of bacterial phyla Tourmalet
length(unique(phylum_summary_tourmalet$phylum))
[1] 16
# Bacteria phyla
phylum_summary_tourmalet %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=TRUE),
              total_sd=sd(relabun*100, na.rm=TRUE))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
phylum total
p__Bacillota_A 41.87±15.42
p__Bacteroidota 39.29±15.9
p__Bacillota 5.87±5.94
p__Pseudomonadota 5.3±5.15
p__Campylobacterota 2.32±2.71
p__Desulfobacterota 1.58±2.02
p__Verrucomicrobiota 0.94±1.13
p__Bacillota_C 0.83±0.53
p__Cyanobacteriota 0.53±0.82
p__Fusobacteriota 0.4±0.81
p__Actinomycetota 0.39±0.73
p__Spirochaetota 0.23±0.51
p__Deferribacterota 0.16±0.51
p__Bacillota_B 0.16±0.23
p__Chlamydiota 0.11±0.53
p__Synergistota 0.03±0.16

6.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T))  %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>% 
    arrange(-total_mean) %>% 
    dplyr::select(phylum,total) %>% 
    tt()
phylum total
p__Bacillota_A 40.6±15.93
p__Bacteroidota 38.3±15.88
p__Pseudomonadota 5.83±7.87
p__Bacillota 4.59±6.37
p__Campylobacterota 3.47±5.63
p__Desulfobacterota 2.68±4.05
p__Verrucomicrobiota 1.53±2.43
p__Bacillota_C 0.95±1.05
p__Fusobacteriota 0.91±3.7
p__Cyanobacteriota 0.4±0.6
p__Spirochaetota 0.22±0.97
p__Bacillota_B 0.19±0.32
p__Actinomycetota 0.17±0.52
p__Chlamydiota 0.1±0.61
p__Deferribacterota 0.05±0.27
p__Synergistota 0.01±0.08
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal()

6.2 Taxonomy boxplot

6.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    arrange(-mean) %>%
    tt()
family mean sd
f__Lachnospiraceae 2.798160e-01 0.1323151296
f__Bacteroidaceae 2.132576e-01 0.1045524501
f__Tannerellaceae 1.122881e-01 0.0756060991
f__ 6.556348e-02 0.0820773598
f__Helicobacteraceae 3.474580e-02 0.0562678198
f__Marinifilaceae 3.441228e-02 0.0255200299
f__UBA3700 2.777694e-02 0.0330290932
f__Desulfovibrionaceae 2.680077e-02 0.0404627923
f__Ruminococcaceae 2.435712e-02 0.0226880677
f__Rikenellaceae 1.900710e-02 0.0176572117
f__Erysipelotrichaceae 1.767814e-02 0.0269603016
f__Oscillospiraceae 1.696763e-02 0.0140960977
f__Coprobacillaceae 1.273032e-02 0.0336927310
f__Mycoplasmoidaceae 1.210259e-02 0.0270262454
f__Enterobacteriaceae 1.077111e-02 0.0651262303
f__Fusobacteriaceae 9.053249e-03 0.0370456782
f__Akkermansiaceae 8.677367e-03 0.0108836065
f__CAG-239 7.003283e-03 0.0098019806
f__LL51 6.656864e-03 0.0216352391
f__Anaerotignaceae 6.551732e-03 0.0074061779
f__UBA3830 5.185462e-03 0.0171469663
f__Gastranaerophilaceae 4.003188e-03 0.0059975426
f__Muribaculaceae 3.991492e-03 0.0409006227
f__Butyricicoccaceae 3.919222e-03 0.0050520985
f__CAG-274 3.051617e-03 0.0060828463
f__Acutalibacteraceae 2.723615e-03 0.0047471119
f__Anaerovoracaceae 2.683446e-03 0.0040557355
f__Pumilibacteraceae 2.544525e-03 0.0041701010
f__UBA1997 2.383785e-03 0.0080270055
f__CAG-508 2.324078e-03 0.0063109845
f__Brevinemataceae 2.191263e-03 0.0097068603
f__Peptococcaceae 1.937490e-03 0.0031961496
f__Rhodocyclaceae 1.936615e-03 0.0194735479
f__DTU072 1.792884e-03 0.0055918788
f__UBA660 1.742458e-03 0.0054664441
f__MGBC116941 1.714356e-03 0.0090980913
f__Massilibacillaceae 1.502892e-03 0.0024528334
f__Eggerthellaceae 1.377525e-03 0.0037174906
f__Enterococcaceae 8.454990e-04 0.0070848098
f__CALTSX01 5.198677e-04 0.0053270584
f__Chlamydiaceae 5.170637e-04 0.0030492819
f__Mucispirillaceae 4.593259e-04 0.0026630822
f__CALVMC01 4.496143e-04 0.0018583730
f__Clostridiaceae 4.212744e-04 0.0029132915
f__Acidaminococcaceae 4.004438e-04 0.0015777220
f__UBA1242 3.719350e-04 0.0014704441
f__RUG11792 3.717143e-04 0.0019425248
f__CAG-465 3.497207e-04 0.0015695529
f__Microbacteriaceae 3.199134e-04 0.0026999464
f__CAG-288 2.985206e-04 0.0017514912
f__Streptococcaceae 2.479473e-04 0.0018438849
f__Anaplasmataceae 2.435113e-04 0.0021508694
f__Hepatoplasmataceae 2.362221e-04 0.0024205565
f__Aeromonadaceae 2.327082e-04 0.0022486997
f__Peptostreptococcaceae 1.412306e-04 0.0014471826
f__Rhodobacteraceae 1.371859e-04 0.0014057373
f__Xanthomonadaceae 9.245878e-05 0.0009474206
f__Synergistaceae 7.840493e-05 0.0008034115
f__Lactobacillaceae 4.203166e-05 0.0004306963
f__Turicibacteraceae 0.000000e+00 0.0000000000
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==EHI_number)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

6.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")

genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun),sd=sd(relabun)) %>%
    arrange(-mean) %>%
    tt()
genus mean sd
g__Bacteroides 1.725925e-01 0.0994470682
g__Parabacteroides 1.103381e-01 0.0767310570
g__Roseburia 5.077999e-02 0.0743624266
g__Phocaeicola 3.874581e-02 0.0435882803
g__JAAYNV01 3.526845e-02 0.0735025649
g__Odoribacter 3.376557e-02 0.0254845145
g__Helicobacter_J 2.983823e-02 0.0400574428
g__CAG-95 1.659217e-02 0.0240471187
g__Alistipes 1.555908e-02 0.0156740595
g__Kineothrix 1.544351e-02 0.0584815409
g__MGBC136627 1.451743e-02 0.0230914933
g__Mycoplasmoides 1.139564e-02 0.0269054026
g__Hungatella_A 1.113992e-02 0.0670243947
g__Anaerotruncus 1.006956e-02 0.0112599653
g__Velocimicrobium 9.687390e-03 0.0159382742
g__Enterocloster 9.506772e-03 0.0098451213
g__Fusobacterium_A 9.053249e-03 0.0370456782
g__Acetatifactor 8.989245e-03 0.0139417426
g__Akkermansia 8.677367e-03 0.0108836065
g__Clostridium_Q 7.770136e-03 0.0138182274
g__Bilophila 7.347777e-03 0.0088824113
g__Lawsonia 7.130440e-03 0.0356135290
g__Intestinimonas 6.565049e-03 0.0061282758
g__Lacrimispora 6.397950e-03 0.0078443852
g__Lachnotalea 5.679000e-03 0.0086820363
g__Desulfovibrio 5.604601e-03 0.0085133819
g__MGBC140009 5.533083e-03 0.0134005650
g__Extibacter 5.301402e-03 0.0438086011
g__Coprobacillus 5.255707e-03 0.0160178991
g__Ventrimonas 5.038137e-03 0.0083320432
g__NHYM01 4.907573e-03 0.0427691471
g__Dielma 4.863951e-03 0.0065480876
g__Eisenbergiella 4.698317e-03 0.0069699563
g__CHH4-2 4.463440e-03 0.0044701587
g__RGIG4733 4.252340e-03 0.0103379868
g__Negativibacillus 4.038946e-03 0.0054742770
g__Thomasclavelia 3.858196e-03 0.0117540652
g__Hungatella 3.440712e-03 0.0046402483
g__C-19 3.334625e-03 0.0097460323
g__Citrobacter 3.199667e-03 0.0207814167
g__UMGS1251 2.883830e-03 0.0066485409
g__Oscillibacter 2.697945e-03 0.0038453712
g__CAZU01 2.687821e-03 0.0062923086
g__Breznakia 2.569391e-03 0.0076690515
g__Copromonas 2.559264e-03 0.0037548588
g__Mailhella 2.486840e-03 0.0034349591
g__Pseudoflavonifractor 2.349829e-03 0.0028250595
g__Intestinibacillus 2.290036e-03 0.0027696754
g__Escherichia 2.232632e-03 0.0160096591
g__Brevinema 2.191263e-03 0.0097068603
g__MGBC165282 2.146060e-03 0.0051869977
g__Rikenella 2.123615e-03 0.0033608364
g__Morganella 2.019952e-03 0.0206983454
g__Robinsoniella 2.000942e-03 0.0198167695
g__Parabacteroides_B 1.950084e-03 0.0064473560
g__Hafnia 1.939718e-03 0.0112340944
g__Fluviibacter 1.936615e-03 0.0194735479
g__JAIHAL01 1.909337e-03 0.0043937983
g__CAJLXD01 1.809579e-03 0.0041385093
g__Marseille-P3106 1.729807e-03 0.0025326958
g__UBA866 1.535703e-03 0.0026149730
g__MGBC116941 1.433278e-03 0.0090996222
g__Duncaniella 1.426611e-03 0.0146184139
g__Stoquefichus 1.418761e-03 0.0046100976
g__Limenecus 1.393591e-03 0.0029887106
g__JAAYQI01 1.271605e-03 0.0020016472
g__RGIG6463 1.268390e-03 0.0028982477
g__Lawsonibacter 1.177407e-03 0.0016547788
g__Scatousia 1.174334e-03 0.0033329854
g__MGBC101980 1.147280e-03 0.0043856608
g__Hespellia 1.102861e-03 0.0080033094
g__Clostridium_AQ 1.075206e-03 0.0036972777
g__Tidjanibacter 1.062223e-03 0.0029392145
g__Fournierella 1.021882e-03 0.0022586979
g__Eggerthella 9.946416e-04 0.0031801193
g__OM05-12 9.566920e-04 0.0022952664
g__CALXRO01 9.555497e-04 0.0060396607
g__CALURL01 9.088777e-04 0.0021653921
g__Harryflintia 8.906013e-04 0.0020436518
g__MGBC133411 8.872215e-04 0.0021982622
g__Scatacola_A 8.685187e-04 0.0028129788
g__JALFVM01 8.392923e-04 0.0020111922
g__Bacteroides_G 8.358400e-04 0.0024999742
g__Ventrisoma 8.334289e-04 0.0015664780
g__CAG-269 8.185801e-04 0.0029513181
g__IOR16 8.132853e-04 0.0024147185
g__CAG-873 7.794020e-04 0.0079864937
g__Buttiauxella 7.562352e-04 0.0062186311
g__14-2 7.242943e-04 0.0014229865
g__Ureaplasma 7.069544e-04 0.0022811697
g__Scatocola 6.903112e-04 0.0024144468
g__Dysosmobacter 6.880800e-04 0.0012817741
g__Muricomes 6.843281e-04 0.0023723812
g__Anaerovorax 6.779999e-04 0.0017521412
g__UBA7185 6.690992e-04 0.0018509746
g__Butyricimonas 6.467068e-04 0.0016133034
g__MGBC131033 6.443020e-04 0.0015998136
g__Evtepia 6.309865e-04 0.0010453519
g__CAJMNU01 6.281598e-04 0.0008908563
g__Beduini 5.914594e-04 0.0013148807
g__Muribaculum 5.550053e-04 0.0056871120
g__Scandinavium 5.392506e-04 0.0034848792
g__Lactonifactor 5.340135e-04 0.0013169750
g__CALTSX01 5.198677e-04 0.0053270584
g__CAG-485 5.128011e-04 0.0052546481
g__Merdicola 5.109908e-04 0.0018183944
g__Ventrenecus 4.954074e-04 0.0024443063
g__UMGS1202 4.885966e-04 0.0016971062
g__Copranaerobaculum 4.827706e-04 0.0029167211
g__NSJ-61 4.574747e-04 0.0014044413
g__Faecimonas 4.467204e-04 0.0017621834
g__RGIG8482 4.415175e-04 0.0020991286
g__Faecivivens 4.313538e-04 0.0008927244
g__RGIG9287 4.228193e-04 0.0021002237
g__Sarcina 4.212744e-04 0.0029132915
g__Blautia_A 4.144742e-04 0.0010034450
g__Scatenecus 4.103021e-04 0.0026534948
g__Phascolarctobacterium 4.004438e-04 0.0015777220
g__Raoultibacter 3.828831e-04 0.0011609168
g__Caccovivens 3.719350e-04 0.0014704441
g__CAJTFG01 3.690163e-04 0.0010223015
g__HGM11386 3.642876e-04 0.0016060882
g__CAG-465 3.497207e-04 0.0015695529
g__Amedibacillus 3.457091e-04 0.0023834921
g__Enterococcus_A 3.276668e-04 0.0023252026
g__UMGS2016 3.212305e-04 0.0012912770
g__Emergencia 3.205926e-04 0.0009702487
g__Holdemania 3.098282e-04 0.0010287045
g__Blautia 3.095400e-04 0.0011077317
g__Protoclostridium 3.067429e-04 0.0010837140
g__Fimivivens 3.043536e-04 0.0008104254
g__RGIG7389 2.985465e-04 0.0005848732
g__CAG-345 2.985206e-04 0.0017514912
g__UBA7173 2.963722e-04 0.0030369115
g__Bariatricus 2.928468e-04 0.0008379528
g__Agathobaculum 2.787287e-04 0.0016389968
g__CALXDZ01 2.660973e-04 0.0006132220
g__UBA940 2.621830e-04 0.0009075589
g__Microbacterium 2.563077e-04 0.0026263723
g__Aminipila 2.506050e-04 0.0007479203
g__Lactococcus 2.479473e-04 0.0018438849
g__Wolbachia 2.435113e-04 0.0021508694
g__Paramuribaculum 2.432436e-04 0.0024925056
g__Hepatoplasma 2.362221e-04 0.0024205565
g__Aeromonas 2.327082e-04 0.0022486997
g__WRHT01 2.186196e-04 0.0006955370
g__Zhenpiania 2.116559e-04 0.0012708329
g__UBA5026 2.112884e-04 0.0009778887
g__UMGS1663 1.999591e-04 0.0007286034
g__MGBC107952 1.752651e-04 0.0009887232
g__CALXEL01 1.725652e-04 0.0013728689
g__CAG-273 1.482564e-04 0.0007864691
g__Clostridioides 1.412306e-04 0.0014471826
g__Paracoccus 1.371859e-04 0.0014057373
g__JAAWBF01 1.286984e-04 0.0006144643
g__JAFLTL01 1.270703e-04 0.0013020827
g__Bacteroides_H 1.267213e-04 0.0012985068
g__RUG12867 9.254620e-05 0.0006129332
g__Stenotrophomonas 9.245878e-05 0.0009474206
g__Rahnella 8.365618e-05 0.0008059384
g__Lumbricidophila 6.360575e-05 0.0006517650
g__UBA3263 5.098641e-05 0.0005224553
g__Fructobacillus 4.203166e-05 0.0004306963
g__Clostridium 0.000000e+00 0.0000000000
g__Turicibacter 0.000000e+00 0.0000000000
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==EHI_number)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        labs(y="Genus", x="Relative abundance", color="Phylum")