Chapter 4 Community composition

4.1 Taxonomy overview

4.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_grid(. ~ location,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ location + origin,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ origin + location,  scales="free_x") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 10, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

4.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,origin) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T),
              domestic_mean=mean(relabun[origin=="Domestic"]*100, na.rm=T),
              domestic_sd=sd(relabun[origin=="Domestic"]*100, na.rm=T),
              feral_mean=mean(relabun[origin=="Feral"]*100, na.rm=T),
              feral_sd=sd(relabun[origin=="Feral"]*100, na.rm=T)) %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
           domestic=str_c(round(domestic_mean,2),"±",round(domestic_sd,2)),
           feral=str_c(round(feral_mean,2),"±",round(feral_sd,2))) %>% 
    arrange(-total_mean) %>% 
    select(phylum,total,domestic,feral) %>% 
    tt()
phylum total domestic feral
Bacteroidota 29.82±17.98 31.9±17.35 27.24±18.63
Bacillota_A 20.34±13.17 17.97±12.46 23.29±13.57
Actinomycetota 18.39±16.05 18.27±15.93 18.55±16.41
Campylobacterota 9.54±15.11 11.99±18.12 6.51±9.6
Bacillota_C 7.2±8.54 6.87±8.66 7.61±8.46
Bacillota 6.65±9.59 5.34±9.02 8.28±10.13
Pseudomonadota 6.45±9.5 6.56±9.19 6.31±9.98
Fusobacteriota 1.48±3.1 1±1.9 2.07±4.08
Desulfobacterota 0.09±0.33 0.06±0.2 0.14±0.45
Cyanobacteria 0.03±0.24 0.05±0.32 0.01±0.05
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    dplyr::select(phylum) %>%
    pull()

phylum_summary %>%
  filter(phylum %in% phylum_arrange) %>%
  mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
    filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ location)+ 
        theme_minimal() + 
        theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 10),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
              ) +
        labs(y="Phylum",x="Relative abundance")

phylum_summary %>%
  filter(phylum %in% phylum_arrange) %>%
  mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
    filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

4.1.3 Family

`summarise()` has grouped output by 'sample', 'family'. You can override using the `.groups` argument.
family total domestic feral
Bacteroidaceae 27.09±17.68 29.17±16.79 24.52±18.62
Lachnospiraceae 12.12±10.54 10.12±10.13 14.62±10.63
Coriobacteriaceae 9.56±9.52 9.92±10.64 9.12±8.03
Helicobacteraceae 6.39±12.29 8.2±15.13 4.13±6.91
Bifidobacteriaceae 4.37±7.2 4.19±6.03 4.6±8.51
Megasphaeraceae 3.45±6.35 3.29±6.05 3.65±6.77
Ruminococcaceae 3.33±3.48 3.29±3.81 3.37±3.07
Campylobacteraceae 3.16±5.5 3.78±5.89 2.38±4.93
Actinomycetaceae 2.62±7.94 2.65±8.06 2.58±7.88
Lactobacillaceae 2.6±7.36 2.56±7.98 2.65±6.61
Succinivibrionaceae 2.3±4.02 2.55±4.06 1.98±3.99
Enterobacteriaceae 2.27±8.36 1.91±7.35 2.71±9.54
Erysipelotrichaceae 1.7±3.28 1.3±2.79 2.2±3.78
Porphyromonadaceae 1.67±6.7 1.73±5.87 1.61±7.69
Atopobiaceae 1.55±4.11 1.03±2.72 2.2±5.32
Selenomonadaceae 1.52±2.63 1.52±2.78 1.52±2.47
Fusobacteriaceae 1.48±3.1 1±1.9 2.07±4.08
Dialisteraceae 1.46±2.13 1.54±2.18 1.36±2.1
Burkholderiaceae 1.46±1.54 1.67±1.69 1.18±1.29
Peptoniphilaceae 1.21±4.39 1.62±4.35 0.7±4.44
Streptococcaceae 1.19±4.98 0.43±1.79 2.13±7.12
Oscillospiraceae 1.09±1.51 0.99±1.22 1.2±1.82
Clostridiaceae 0.9±3.26 0.57±1.29 1.3±4.67
Acidaminococcaceae 0.76±1.4 0.51±0.83 1.07±1.85
Erysipelatoclostridiaceae 0.74±1.2 0.6±0.89 0.92±1.49
Peptostreptococcaceae 0.65±0.97 0.41±0.64 0.93±1.22
Anaerovoracaceae 0.47±1.18 0.49±1.41 0.45±0.83
Pasteurellaceae 0.4±2.49 0.38±2.28 0.44±2.76
Marinifilaceae 0.36±1.06 0.27±1.06 0.48±1.05
Rikenellaceae 0.36±1.49 0.42±1.77 0.28±1.05
Acutalibacteraceae 0.29±0.58 0.22±0.47 0.38±0.68
Mycobacteriaceae 0.27±1.72 0.47±2.3 0.02±0.08
Enterococcaceae 0.25±0.8 0.24±0.89 0.27±0.69
Tannerellaceae 0.22±0.5 0.23±0.51 0.21±0.5
Butyricicoccaceae 0.18±0.39 0.18±0.36 0.19±0.43
UBA660 0.13±0.76 0.18±0.94 0.07±0.44
Desulfovibrionaceae 0.09±0.33 0.06±0.2 0.14±0.45
Muribaculaceae 0.08±0.35 0.05±0.34 0.12±0.36
Anaerotignaceae 0.06±0.23 0.02±0.07 0.1±0.33
CAG-508 0.04±0.14 0.05±0.17 0.02±0.09
CAG-826 0.03±0.17 0.03±0.14 0.03±0.2
Gastranaerophilaceae 0.03±0.24 0.05±0.32 0.01±0.05
Barnesiellaceae 0.03±0.11 0.03±0.11 0.03±0.11
CAG-239 0.02±0.22 0.05±0.29 0±0
UMGS124 0.02±0.13 0.01±0.04 0.03±0.19
UBA1381 0.01±0.07 0±0 0.02±0.11
family_summary %>%
  left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
  left_join(sample_metadata,by=join_by(sample==sample)) %>%
  filter(family %in% family_arrange[1:20]) %>%
  mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
      scale_color_manual(values=phylum_colors[-8]) +
      scale_fill_manual(values=phylum_colors[-8]) +
      geom_boxplot(alpha=0.2)+
      geom_jitter(alpha=0.5) + 
      facet_grid(.~location)+
      theme_minimal() +
      theme(legend.position="none",
                  strip.text.x = element_text(size = 14, color="black",face="bold"),
                  axis.text.x = element_text(vjust = 0.5, size = 6),
                  axis.text.y = element_text(size = 12),
                  axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
                  axis.title = element_text(size = 14, face = "bold"),
                  axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
                  axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
      labs(y="Family", x="Relative abundance", color="Phylum")

family_summary %>%
    left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        facet_grid(.~location)+
        theme_minimal() +
        theme(legend.position="none",
                    strip.text.x = element_text(size = 14, color="black",face="bold"),
                    axis.text.x = element_text(vjust = 0.5, size = 6),
                    axis.text.y = element_text(size = 12),
                    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
                    axis.title = element_text(size = 14, face = "bold"),
                    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
                    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
        labs(y="Family", x="Relative abundance", color="Phylum")

4.1.4 Genus

`summarise()` has grouped output by 'sample', 'genus'. You can override using the `.groups` argument.
genus total domestic feral
Prevotella 16.47±15.19 18.73±15.43 13.66±14.58
Collinsella 9.56±9.52 9.92±10.64 9.12±8.03
Phocaeicola 5.36±6.66 5.03±5.72 5.76±7.72
Helicobacter_B 5.21±11.59 6.48±14.28 3.64±6.77
Bifidobacterium 4.37±7.2 4.19±6.03 4.6±8.51
Bacteroides 3.76±4.85 4±5.32 3.46±4.24
Megasphaera 3.43±6.33 3.28±6.04 3.62±6.75
Campylobacter_D 3.16±5.5 3.78±5.89 2.38±4.93
Roseburia 2.6±5.97 2.21±5.31 3.09±6.74
Blautia_A 2.46±2.74 2.08±2.44 2.93±3.04
Anaerobiospirillum 2.21±4.02 2.44±4.06 1.94±4
Escherichia 1.85±7.36 1.2±4.96 2.66±9.55
Negativibacillus 1.84±2.22 1.68±2.14 2.04±2.32
Blautia 1.71±2.37 1.38±2.27 2.11±2.47
Porphyromonas_A 1.67±6.7 1.73±5.87 1.61±7.69
Pauljensenia 1.57±6.05 0.76±3.94 2.58±7.88
Prevotellamassilia 1.49±2.9 1.38±2.5 1.63±3.35
UBA7748 1.41±4.03 0.94±2.67 1.99±5.23
Megamonas 1.4±2.61 1.36±2.74 1.46±2.47
Dialister 1.35±2 1.47±2.07 1.21±1.93
Sutterella 1.31±1.45 1.53±1.59 1.05±1.22
Fusobacterium_B 1.27±2.87 0.94±1.9 1.69±3.73
Ligilactobacillus 1.24±3.91 1.08±3.47 1.44±4.43
Streptococcus 1.14±4.85 0.43±1.78 2.01±6.94
Lactobacillus 1.13±4.48 1.41±5.16 0.78±3.47
Helicobacter_A 1.12±3.23 1.69±3.93 0.43±1.88
Trueperella 1.05±5.29 1.89±7.02 0±0
Ruminococcus_B 0.92±1.84 0.8±1.51 1.07±2.19
Clostridium_Q 0.91±1.77 0.69±1.17 1.19±2.29
Holdemanella 0.9±1.88 0.73±1.99 1.11±1.74
Clostridium_P 0.88±3.25 0.56±1.3 1.27±4.65
Bulleidia 0.79±2 0.56±1.71 1.08±2.31
Faecalibacterium 0.73±1.55 0.75±1.79 0.71±1.23
Catenibacterium 0.73±1.18 0.59±0.88 0.9±1.46
Gemmiger 0.68±1.21 0.76±1.22 0.58±1.2
Peptacetobacter 0.65±0.97 0.41±0.64 0.93±1.22
Peptoniphilus_A 0.62±2.68 0.81±2.86 0.39±2.45
NA 0.57±1.99 0.76±1.98 0.34±1.99
Faecalimonas 0.56±1.04 0.36±0.89 0.8±1.16
Lawsonibacter 0.49±0.73 0.52±0.84 0.46±0.58
Acidaminococcus 0.46±1.26 0.29±0.58 0.67±1.77
Plesiomonas 0.42±2.05 0.72±2.73 0.05±0.19
Histophilus 0.4±2.49 0.38±2.28 0.44±2.76
Eisenbergiella 0.36±0.61 0.35±0.62 0.38±0.61
Alistipes 0.36±1.49 0.42±1.77 0.28±1.05
Odoribacter 0.34±1.06 0.24±1.06 0.46±1.05
Phascolarctobacterium_A 0.29±0.74 0.21±0.59 0.39±0.88
CAG-81 0.29±0.48 0.24±0.42 0.35±0.54
Catenibacillus 0.28±0.46 0.19±0.36 0.38±0.55
Sellimonas 0.26±0.53 0.26±0.52 0.25±0.54
CAG-317 0.25±0.45 0.2±0.34 0.31±0.55
Ruminococcus_A 0.25±0.34 0.2±0.29 0.31±0.39
UMGS905 0.23±0.51 0.16±0.4 0.31±0.61
Parabacteroides 0.22±0.5 0.23±0.51 0.21±0.5
Mediterraneibacter 0.22±0.67 0.24±0.62 0.19±0.73
Fusobacterium_A 0.2±0.78 0.06±0.24 0.38±1.13
Dysosmobacter 0.2±0.34 0.23±0.37 0.17±0.3
Limosilactobacillus 0.19±0.77 0.07±0.36 0.34±1.08
S5-A14a 0.18±1.04 0.31±1.39 0.02±0.09
Schaedlerella 0.18±0.39 0.19±0.5 0.16±0.2
Butyricicoccus 0.17±0.39 0.16±0.35 0.19±0.43
Flavonifractor 0.16±0.33 0.16±0.37 0.16±0.28
Corynebacterium 0.16±1.31 0.29±1.76 0±0
Enterocloster 0.14±0.21 0.11±0.2 0.18±0.23
Parolsenella 0.14±0.34 0.09±0.14 0.2±0.47
CAG-110 0.14±0.81 0.02±0.1 0.28±1.19
CAG-521 0.13±0.62 0.15±0.74 0.12±0.44
VUNA01 0.12±0.49 0.06±0.17 0.19±0.7
Mitsuokella 0.12±0.51 0.16±0.62 0.06±0.32
CAG-877 0.12±0.66 0.15±0.8 0.07±0.44
Lawsonella 0.11±0.57 0.18±0.75 0.02±0.08
Allisonella 0.11±0.24 0.07±0.17 0.15±0.29
Enterococcus_E 0.1±0.6 0.15±0.8 0.04±0.12
Enterococcus_B 0.09±0.41 0.04±0.23 0.16±0.56
Lachnospira 0.09±0.29 0.11±0.36 0.05±0.15
UMGS1472 0.09±0.15 0.08±0.15 0.1±0.15
CAG-279 0.08±0.35 0.05±0.34 0.12±0.36
Desulfovibrio 0.08±0.32 0.05±0.17 0.13±0.43
UMGS1370 0.08±0.21 0.06±0.16 0.1±0.26
UBA9502 0.07±0.13 0.06±0.1 0.1±0.16
Dorea_B 0.07±0.2 0.07±0.23 0.08±0.17
Robinsoniella 0.07±0.17 0.07±0.19 0.08±0.15
Eubacterium_M 0.06±0.35 0.05±0.32 0.08±0.38
Enterococcus 0.06±0.38 0.05±0.37 0.07±0.41
Fusicatenibacter 0.06±0.27 0.06±0.23 0.06±0.32
Emergencia 0.06±0.12 0.04±0.1 0.07±0.15
Anaerotignum 0.06±0.23 0.02±0.07 0.1±0.33
Lactococcus 0.05±0.34 0±0.01 0.12±0.51
Evtepia 0.05±0.14 0.05±0.11 0.06±0.16
Succinivibrio 0.05±0.22 0.07±0.25 0.04±0.16
Bariatricus 0.05±0.18 0.03±0.13 0.08±0.22
Anaerobutyricum 0.04±0.09 0.03±0.07 0.06±0.1
CAG-354 0.04±0.14 0.05±0.17 0.02±0.09
Dorea_A 0.04±0.27 0±0.04 0.08±0.4
Peptoniphilus_C 0.04±0.36 0.07±0.49 0±0
UMGS1872 0.04±0.24 0.01±0.04 0.08±0.36
Hungatella_A 0.04±0.17 0.04±0.21 0.03±0.12
Mobilibacterium 0.04±0.16 0.01±0.04 0.07±0.24
Latilactobacillus 0.04±0.34 0±0 0.08±0.5
Eubacterium_H 0.03±0.23 0±0 0.07±0.35
UBA4855 0.03±0.17 0.03±0.14 0.03±0.2
Helicobacter_C 0.03±0.21 0.02±0.1 0.05±0.29
Clostridium_A 0.03±0.23 0±0 0.07±0.35
Zag111 0.03±0.24 0.05±0.32 0.01±0.05
Barnesiella 0.03±0.11 0.03±0.11 0.03±0.11
Butyricimonas 0.03±0.11 0.03±0.12 0.03±0.1
Anaerobiospirillum_A 0.03±0.14 0.04±0.19 0.01±0.04
Fournierella 0.03±0.12 0.03±0.15 0.02±0.06
Phocea 0.03±0.09 0.03±0.12 0.02±0.04
Ruminococcus_C 0.03±0.13 0.05±0.18 0±0
CAG-495 0.02±0.22 0.05±0.29 0±0
Ruminococcus_E 0.02±0.21 0.04±0.28 0±0.01
Caecibacter 0.02±0.1 0.01±0.05 0.03±0.14
Helicobacter_D 0.02±0.14 0.02±0.17 0.02±0.08
Erysipelatoclostridium 0.02±0.08 0.01±0.07 0.03±0.09
Clostridium 0.02±0.1 0.01±0.04 0.03±0.15
Paraprevotella 0.02±0.15 0.03±0.2 0±0
CAG-145 0.02±0.07 0.02±0.07 0.02±0.06
Eubacterium_G 0.02±0.11 0.03±0.14 0±0
Anaerostipes 0.02±0.07 0±0.02 0.03±0.11
CAG-988 0.01±0.1 0.03±0.14 0±0
Absicoccus 0.01±0.05 0.02±0.06 0.01±0.04
Phascolarctobacterium 0.01±0.11 0.02±0.15 0±0.03
Marseille-P4683 0.01±0.06 0.02±0.07 0±0
AM07-15 0.01±0.05 0.02±0.07 0±0.02
Mailhella 0.01±0.04 0.01±0.04 0.01±0.05
Parasutterella 0.01±0.08 0±0 0.02±0.12
Levilactobacillus 0.01±0.08 0±0 0.02±0.12
CAG-41 0.01±0.07 0±0 0.02±0.11
genus_summary %>%
  left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
  left_join(sample_metadata,by=join_by(sample==sample)) %>%
  filter(genus %in% genus_arrange[1:20]) %>%
  mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[-8]) +
  scale_fill_manual(values=phylum_colors[-8]) +
  geom_boxplot(alpha=0.2)+
  geom_jitter(alpha=0.5) + 
  facet_grid(.~location)+
  theme_minimal() +
  theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 6),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
  labs(y="Family", x="Relative abundance", color="Phylum")

genus_summary %>%
  left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
  filter(genus %in% genus_arrange[1:20]) %>%
  mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[-8]) +
  scale_fill_manual(values=phylum_colors[-8]) +
  geom_boxplot(alpha=0.2)+
  geom_jitter(alpha=0.5) + 
  facet_grid(.~origin)+
  theme_minimal() +
  theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 6),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
  labs(y="Family", x="Relative abundance", color="Phylum")

4.2 Core microbiota

genome_counts_rel <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  column_to_rownames(., "genome")
  
genome_counts_rel_pa=1*(genome_counts_rel>0)
#MAGrel_pa[1:6,1:6]
table_upset_analysis_cont=t(aggregate(t(genome_counts_rel_pa),by=list(sample_metadata$location),FUN=sum)[,-1])
colnames(table_upset_analysis_cont)=levels(as.factor(sample_metadata$location))
table_upset_analysis=(table_upset_analysis_cont>0)*1
table_upset_analysis=data.frame(table_upset_analysis)
table_upset_analysis=apply(table_upset_analysis,2,as.integer)
rownames(table_upset_analysis) <- rownames(genome_counts_rel_pa)

#pdf("figures/MAG_intersection.pdf",width=8,height=6, onefile=F)
upset(as.data.frame(table_upset_analysis),
  keep.order = T,
  sets = rev(c("Aruba","Brazil","CaboVerde","Spain","Denmark","Malaysia")),
  sets.bar.color= rev(location_colors),
  mb.ratio = c(0.55, 0.45), order.by = "freq")

#dev.off()
mag_prevalence_location <- genome_counts_filt  %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(!genome,names_to="sample",values_to="abundance") %>% 
  left_join(sample_metadata,by="sample") %>% 
  mutate(presence=ifelse(abundance>0,1,0)) %>% 
  select(genome,sample,presence,location,origin) %>%
  group_by(genome,location,origin) %>%
  summarise(presence=ifelse(sum(presence)>0,1,0)) %>% 
  group_by(genome,origin) %>%
  summarise(presence=sum(presence))

mag_prevalence_location %>% 
  group_by(origin) %>%
  summarise(mean=mean(presence),sd=sd(presence)) %>%
  tt()
origin mean sd
Domestic 3.122271 1.933745
Feral 3.187773 2.033627
wilcox.test(presence ~ origin, data=mag_prevalence_location)  %>%
  tidy()
# A tibble: 1 × 4
  statistic p.value method                                            alternative
      <dbl>   <dbl> <chr>                                             <chr>      
1    25706.   0.713 Wilcoxon rank sum test with continuity correction two.sided  
mag_prevalence_location %>% 
  ggplot(aes(x=origin,y=presence, color=origin, fill=origin)) +
    geom_boxplot() +
    geom_jitter() +
    scale_color_manual(values=origin_colors) +
    scale_fill_manual(values=str_c(origin_colors,"50")) +
    theme_minimal()