Chapter 5 Community composition

behaviour_colors <- c(
  tame = "#1f77b4",   
  aggr = "#d62728",   
  unsel = "#2ca02c"   
)

5.1 Taxonomy overview

5.1.1 Stacked barplot

tax_barplot <- genome_counts_filt %>%
  mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0)  #filter 0 counts
tax_barplot_filt <- genome_counts_filt %>%
 dplyr::select(-all_of(samples_to_remove))%>%
  mutate_at(vars(-genome), ~./sum(.)) %>% #apply TSS normalization
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0)  #filter 0 counts

Evaluating the community composition at each of the gut locations.

ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_grid(. ~ gut_location,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

ggplot(tax_barplot_filt, aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_grid(. ~ gut_location,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

Evaluating community composition between the fox behaviours within the different gut locations:

ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + 
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ gut_location + fox_behaviour,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + 
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~  fox_behaviour + gut_location,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + 
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~  fox_behaviour + sample_type,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

ggplot(tax_barplot, aes(x=sample,y=count, fill=phylum, group=phylum)) + 
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~   sample_type+ fox_behaviour,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

### Only with F_colon

tax_barplot_fcolon <- tax_barplot %>%
  filter(gut_location == "F_colon")
ggplot(tax_barplot_fcolon, aes(x=sample,y=count, fill=phylum, group=phylum)) + 
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~  fox_behaviour,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_blank(),
          axis.ticks.x =element_blank(),
          axis.title.x = element_blank(),
          axis.text.y = element_text(size=8),
          axis.title.y = element_text(size=12),
          panel.background = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black"),
          panel.border = element_rect(colour = "black", fill = NA),
          strip.background = element_rect(fill = "white", color = "black"),
          strip.text = element_text(size = 12, lineheight = 0.6)) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS normalization
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum,fox_behaviour) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(total_mean=mean(relabun*100, na.rm=T),
              total_sd=sd(relabun*100, na.rm=T),
              tame_mean=mean(relabun[fox_behaviour =="tame"]*100, na.rm=T),
              tame_sd=sd(relabun[fox_behaviour =="tame"]*100, na.rm=T),
              aggressive_mean=mean(relabun[fox_behaviour =="aggr"]*100, na.rm=T),
              aggressive_sd=sd(relabun[fox_behaviour=="aggr"]*100, na.rm=T),
              unsel_mean=mean(relabun[fox_behaviour =="unsel"]*100, na.rm=T),
              unsel_sd=sd(relabun[fox_behaviour =="unsel"]*100, na.rm=T)) %>%
    mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2)),
           tame=str_c(round(tame_mean,2),"±",round(tame_sd,2)),
           aggresive=str_c(round(aggressive_mean,2),"±",round(aggressive_sd,2)),
           unselected =str_c(round(unsel_mean,2),"±",round(unsel_sd,2))) %>% 
    arrange(-total_mean) %>% 
   dplyr::select(phylum,total,tame,aggresive, unselected) %>% 
    tt()
phylum total tame aggresive unselected
Bacillota_A 53.84±36.19 58.59±35.17 50.48±39.83 49.55±33.08
Bacteroidota 15.25±19.97 9.6±16.51 16.34±20.1 26.91±24.37
Bacillota 12.28±22.53 15.11±24.53 12.65±24.9 4.34±5.37
Fusobacteriota 4.95±6.83 4.36±7.66 5.36±6.89 5.47±4.84
Pseudomonadota 4.22±13.86 2.75±5.01 6.73±21.62 2.25±2.29
Bacillota_I 2.76±3.46 1.45±2.48 2.68±2.84 6.21±4.68
Campylobacterota 2.69±4.84 3.2±4.91 2.27±5.02 2.35±4.79
Desulfobacterota 1.45±7.98 3.15±12.05 0.05±0.11 0.33±0.33
Spirochaetota 0.91±2.5 0.35±1.12 1.94±3.64 0±0
Deferribacterota 0.73±1.38 0.66±1.11 0.52±1.29 1.38±2.07
Bacillota_C 0.67±0.9 0.51±0.87 0.69±0.88 1.01±1.02
Actinomycetota 0.21±0.37 0.18±0.31 0.26±0.48 0.15±0.25
Bacillota_B 0.04±0.09 0.05±0.1 0.03±0.06 0.04±0.11

Distribution of relative abundance across samples:

phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    dplyr::select(phylum) %>%
    pull()

phylum_summary %>%
  filter(phylum %in% phylum_arrange) %>%
  mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
    filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

## Calculate the phylum order based on mean relative abundance
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    dplyr::select(phylum) %>%
    pull()

phylum_summary %>%
  filter(phylum %in% phylum_arrange) %>%
  mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
    filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        facet_nested(. ~ gut_location)+ 
        theme_minimal() + 
        theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 10),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
              ) +
        labs(y="Phylum",x="Relative abundance")

phylum_summary %>%
  filter(phylum %in% phylum_arrange) %>%
  mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        facet_grid(.~fox_behaviour)+
        theme_minimal() + 
        theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 10),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0))
              ) +
        labs(y="Phylum",x="Relative abundance")

5.1.3 Family

`summarise()` has grouped output by 'sample', 'family'. You can override using the `.groups` argument.
family total tame aggr unsel
Peptostreptococcaceae 45.03±42.9 48.09±43.82 44.45±44.89 38.7±40.68
Bacteroidaceae 14.24±18.56 9±15.28 14.97±18.23 25.71±23.3
Lactobacillaceae 9.31±16.8 13.65±22.13 7.06±12.19 3.54±4.51
Fusobacteriaceae 4.95±6.83 4.36±7.66 5.36±6.89 5.47±4.84
Lachnospiraceae 4.29±6.04 3.43±6.01 4.38±6.09 6.22±6.34
Clostridiaceae 3.1±10.53 6.24±15.54 0.36±0.53 1.45±2.28
Streptococcaceae 2.9±8.69 1.46±2.66 5.44±13.42 0.78±0.84
Enterobacteriaceae 2.04±5.98 1.97±4.82 2.57±8.17 1.02±1.62
Helicobacteraceae 1.93±3.97 1.76±3.39 1.94±4.4 2.32±4.79
Mycoplasmoidaceae 1.78±3.5 0.9±2.51 1.58±2.76 4.43±5.72
CAMWXW01 1.36±7.99 3.12±12.06 0.02±0.08 0±0
Pasteurellaceae 1.32±8.53 0±0 3.37±13.61 0±0
Brachyspiraceae 0.91±2.5 0.35±1.12 1.94±3.64 0±0
Campylobacteraceae 0.76±1.77 1.45±2.39 0.33±0.97 0.02±0.07
Mucispirillaceae 0.73±1.38 0.66±1.11 0.52±1.29 1.38±2.07
Acidaminococcaceae 0.67±0.9 0.51±0.87 0.69±0.88 1.01±1.02
Oscillospiraceae 0.62±0.9 0.31±0.53 0.56±0.74 1.52±1.37
Burkholderiaceae 0.6±0.84 0.57±0.89 0.46±0.71 1±0.93
Muribaculaceae 0.58±1.07 0.23±0.44 0.99±1.55 0.52±0.46
Erysipelotrichaceae 0.53±0.75 0.37±0.66 0.58±0.77 0.8±0.9
Ruminococcaceae 0.41±0.67 0.41±0.84 0.39±0.58 0.43±0.44
UBA932 0.23±0.41 0.19±0.49 0.22±0.35 0.34±0.37
Coprobacillaceae 0.23±0.46 0.12±0.24 0.32±0.65 0.28±0.38
Succinivibrionaceae 0.18±0.37 0.14±0.37 0.27±0.44 0.04±0.1
Anaeroplasmataceae 0.16±0.34 0.07±0.23 0.2±0.44 0.29±0.32
Marinifilaceae 0.14±0.3 0.16±0.37 0.1±0.17 0.22±0.32
CAG-465 0.13±0.38 0.03±0.07 0.07±0.21 0.53±0.77
CAG-508 0.12±0.27 0.03±0.07 0.12±0.28 0.32±0.43
Coriobacteriaceae 0.09±0.28 0.05±0.15 0.17±0.41 0.01±0.03
CAG-239 0.09±0.19 0.07±0.21 0.06±0.15 0.19±0.2
Bifidobacteriaceae 0.09±0.23 0.09±0.25 0.09±0.23 0.08±0.2
Desulfovibrionaceae 0.08±0.19 0.03±0.08 0.03±0.09 0.33±0.33
UBA660 0.07±0.26 0±0 0±0 0.41±0.53
Tannerellaceae 0.06±0.13 0.02±0.07 0.06±0.15 0.12±0.19
Anaerotignaceae 0.05±0.14 0.02±0.06 0.1±0.21 0.04±0.07
Aristaeellaceae 0.05±0.14 0.01±0.03 0.02±0.04 0.23±0.26
Enterococcaceae 0.04±0.13 0.01±0.03 0.08±0.19 0.03±0.06
Peptococcaceae 0.04±0.09 0.05±0.1 0.03±0.06 0.04±0.11
Eggerthellaceae 0.03±0.08 0.04±0.1 0.01±0.02 0.06±0.11
Butyricicoccaceae 0.03±0.08 0.01±0.07 0.02±0.04 0.09±0.13
Turicibacteraceae 0.03±0.11 0±0 0.07±0.17 0±0
Anaerovoracaceae 0.01±0.04 0.01±0.04 0.01±0.02 0.03±0.05
CAG-826 0±0.01 0±0.01 0±0.01 0±0
family_summary %>%
  left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
  left_join(sample_metadata,by=join_by(sample==sample)) %>%
  filter(family %in% family_arrange[1:20]) %>%
  mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
      scale_color_manual(values=phylum_colors[-8]) +
      scale_fill_manual(values=phylum_colors[-8]) +
      geom_boxplot(alpha=0.2)+
      geom_jitter(alpha=0.5) + 
      facet_grid(.~gut_location)+
      theme_minimal() +
      theme(legend.position="none",
                  strip.text.x = element_text(size = 14, color="black",face="bold"),
                  axis.text.x = element_text(vjust = 0.5, size = 6),
                  axis.text.y = element_text(size = 12),
                  axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
                  axis.title = element_text(size = 14, face = "bold"),
                  axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
                  axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
      labs(y="Family", x="Relative abundance", color="Phylum")

family_summary %>%
    left_join(genome_metadata %>% dplyr::select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum, fill=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        scale_fill_manual(values=phylum_colors[-8]) +
        geom_boxplot(alpha=0.2)+
        geom_jitter(alpha=0.5) + 
        facet_grid(.~fox_behaviour.x)+
        theme_minimal() +
        theme(legend.position="none",
                    strip.text.x = element_text(size = 14, color="black",face="bold"),
                    axis.text.x = element_text(vjust = 0.5, size = 6),
                    axis.text.y = element_text(size = 12),
                    axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
                    axis.title = element_text(size = 14, face = "bold"),
                    axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
                    axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
        labs(y="Family", x="Relative abundance", color="Phylum")

5.1.4 Genus

`summarise()` has grouped output by 'sample', 'genus'. You can override using the `.groups` argument.
genus total tame aggr unsel
Paraclostridium 44.77±42.86 47.88±44.04 44.02±44.54 38.7±40.68
Bacteroides 5.72±8 2.83±4.62 5.8±6.72 12.77±12.77
Limosilactobacillus 5.1±9.73 5.91±10.31 5.41±11.11 2.4±3.06
Phocaeicola 3.83±5.4 2.45±4.54 5.12±6.63 4.39±3.83
Lactobacillus 3.54±8.66 6.99±12.38 0.94±1.41 0.73±1.1
Fusobacterium_A 3.22±4.67 3.04±5.44 3.36±4.56 3.36±3.04
Sarcina 2.71±9.12 5.38±13.47 0.33±0.49 1.42±2.26
Lactococcus 2.34±8.57 1.09±2.22 4.51±13.44 0.59±0.52
Paraprevotella 2.23±3.46 2.3±4.06 1.33±2.3 4.11±3.63
Alloprevotella 2.07±2.93 1.14±2.01 2.45±3.51 3.53±2.99
Mycoplasmoides 1.78±3.5 0.9±2.51 1.58±2.76 4.43±5.72
Escherichia 1.74±5.83 1.4±4.34 2.44±8.19 0.99±1.63
Fusobacterium_B 1.67±2.21 1.32±2.25 1.85±2.32 2.11±2
CAMWXW01 1.36±7.99 3.12±12.06 0.02±0.08 0±0
Canicola 1.32±8.53 0±0 3.37±13.61 0±0
Blautia 1.2±1.74 1.09±1.91 1.25±1.8 1.37±1.27
Blautia_A 1.01±1.57 0.66±1.17 1.18±1.87 1.5±1.71
Brachyspira 0.91±2.5 0.35±1.12 1.94±3.64 0±0
Helicobacter_A 0.9±2.2 0.64±1.34 0.59±1.02 2.22±4.63
Campylobacter_D 0.76±1.77 1.45±2.39 0.33±0.97 0.02±0.07
Helicobacter_C 0.75±2.24 0.96±2.1 0.86±2.84 0±0
Mucispirillum 0.73±1.38 0.66±1.11 0.52±1.29 1.38±2.07
Phascolarctobacterium_A 0.67±0.9 0.51±0.87 0.69±0.88 1.01±1.02
Ligilactobacillus 0.64±1.09 0.69±1.08 0.69±1.26 0.41±0.72
Faecousia 0.6±0.88 0.3±0.53 0.55±0.73 1.48±1.35
Sutterella 0.6±0.84 0.57±0.89 0.46±0.71 1±0.93
Limisoma 0.58±1.07 0.23±0.44 0.99±1.55 0.52±0.46
Faecalimonas 0.57±0.89 0.44±0.88 0.48±0.73 1.07±1.15
Streptococcus 0.56±1.09 0.36±0.65 0.93±1.54 0.19±0.42
Clostridium 0.39±1.43 0.86±2.11 0.03±0.05 0.03±0.07
Faecalibacterium 0.33±0.58 0.35±0.72 0.33±0.52 0.31±0.28
Plesiomonas 0.31±0.97 0.57±1.35 0.13±0.53 0.03±0.06
Helicobacter_B 0.28±1.27 0.15±0.44 0.49±1.99 0.11±0.18
Faecalimicrobium 0.26±1.02 0.2±0.66 0.44±1.48 0±0
Otoolea 0.24±0.36 0.25±0.43 0.16±0.25 0.42±0.37
Cryptobacteroides 0.23±0.41 0.19±0.49 0.22±0.35 0.34±0.37
Oliverpabstia 0.21±0.42 0.19±0.4 0.28±0.5 0.11±0.24
Allobaculum 0.2±0.32 0.12±0.23 0.3±0.42 0.15±0.16
Prevotella 0.19±0.49 0.22±0.6 0.15±0.45 0.22±0.34
Anaerobiospirillum 0.17±0.37 0.14±0.37 0.25±0.43 0.04±0.1
Holdemanella 0.16±0.33 0.2±0.45 0.12±0.24 0.17±0.18
Brotaphodocola 0.16±0.28 0.08±0.22 0.12±0.2 0.44±0.4
CALUXS01 0.16±0.34 0.07±0.23 0.2±0.44 0.29±0.32
OM05-12 0.15±0.39 0.04±0.17 0.06±0.13 0.61±0.74
Odoribacter 0.14±0.3 0.16±0.37 0.1±0.17 0.22±0.32
UBA9414 0.14±0.24 0.16±0.29 0.07±0.13 0.23±0.27
0.13±0.38 0.03±0.07 0.07±0.21 0.53±0.77
Faecalibacillus 0.13±0.42 0.04±0.09 0.27±0.64 0.04±0.08
JAHHTG01 0.12±0.38 0±0 0.13±0.34 0.39±0.73
JAGZHZ01 0.11±0.23 0.11±0.25 0.1±0.19 0.15±0.26
HGM12587 0.1±0.23 0.02±0.07 0.08±0.19 0.36±0.37
Gallintestinimicrobium 0.09±0.14 0.08±0.14 0.11±0.16 0.08±0.09
Collinsella 0.09±0.28 0.05±0.15 0.17±0.41 0.01±0.03
UMGS1370 0.09±0.16 0.08±0.17 0.11±0.16 0.06±0.14
51-20 0.09±0.19 0.07±0.21 0.06±0.15 0.19±0.2
Bifidobacterium 0.09±0.23 0.09±0.25 0.09±0.23 0.08±0.2
Taurinivorans 0.08±0.19 0.03±0.08 0.03±0.09 0.33±0.33
Frisingicoccus 0.07±0.22 0.08±0.2 0.1±0.28 0±0
UBA3789 0.07±0.26 0±0 0±0 0.41±0.53
Marvinbryantia 0.07±0.17 0.03±0.08 0.07±0.15 0.17±0.32
CAG-269 0.07±0.16 0±0.02 0.1±0.2 0.16±0.22
Schaedlerella 0.06±0.12 0.04±0.1 0.06±0.14 0.09±0.14
Cetobacterium_A 0.06±0.39 0±0 0.15±0.62 0±0
Parabacteroides 0.06±0.13 0.02±0.07 0.06±0.15 0.12±0.19
Ventricola 0.05±0.14 0.01±0.03 0.02±0.04 0.23±0.26
Phocaeicola_A 0.05±0.12 0.02±0.1 0.06±0.13 0.09±0.13
CALVGN01 0.05±0.08 0.05±0.1 0.03±0.05 0.09±0.11
Enterococcus 0.04±0.13 0.01±0.03 0.08±0.19 0.03±0.06
CAJMNU01 0.04±0.11 0.02±0.06 0.07±0.16 0.03±0.05
UMGS1590 0.04±0.09 0.05±0.1 0.03±0.06 0.04±0.11
CALDMQ01 0.04±0.1 0.06±0.14 0.02±0.04 0.03±0.06
Negativibacillus 0.04±0.09 0.02±0.06 0.02±0.05 0.1±0.16
Fimiplasma 0.04±0.15 0±0 0±0 0.21±0.32
Avimicrobium 0.04±0.08 0.04±0.1 0.03±0.06 0.02±0.04
Anaerotignum 0.03±0.08 0.02±0.05 0.06±0.11 0.03±0.06
Slackia 0.03±0.08 0.04±0.1 0.01±0.02 0.06±0.11
Avilachnospira 0.03±0.06 0.03±0.07 0.03±0.06 0.04±0.06
Butyricicoccus 0.03±0.08 0.01±0.07 0.02±0.04 0.09±0.13
Latilactobacillus 0.03±0.1 0.05±0.15 0.02±0.04 0±0
Turicibacter 0.03±0.11 0±0 0.07±0.17 0±0
Laedolimicola 0.03±0.07 0.01±0.03 0.04±0.1 0.04±0.05
UMGS1663 0.03±0.1 0±0 0±0 0.15±0.22
Choladousia 0.03±0.07 0.01±0.04 0.05±0.11 0.01±0.03
Merdicola 0.02±0.07 0.03±0.07 0.03±0.08 0.01±0.01
Beduini 0.02±0.06 0.02±0.06 0.03±0.07 0.01±0.02
Metalachnospira 0.02±0.07 0.01±0.02 0.04±0.1 0.01±0.02
Alitiscatomonas 0.02±0.04 0.01±0.03 0.02±0.04 0.04±0.07
Dorea_B 0.02±0.04 0.03±0.06 0±0.01 0.01±0.01
Ruminococcus_B 0.01±0.05 0.02±0.08 0.01±0.02 0.01±0.03
Evtepia 0.01±0.05 0±0.02 0.01±0.03 0.04±0.1
Gallibacter 0.01±0.04 0.01±0.04 0.01±0.02 0.03±0.05
Succinivibrio 0.01±0.03 0±0 0.02±0.05 0±0
Onthovivens 0±0.01 0±0.01 0±0.01 0±0
genus_summary %>%
  left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
  left_join(sample_metadata,by=join_by(sample==sample)) %>%
  filter(genus %in% genus_arrange[1:20]) %>%
  mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[-8]) +
  scale_fill_manual(values=phylum_colors[-8]) +
  geom_boxplot(alpha=0.2)+
  geom_jitter(alpha=0.5) + 
  facet_grid(.~ gut_location)+
  theme_minimal() +
  theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 6),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
  labs(y="Family", x="Relative abundance", color="Phylum")

genus_summary %>%
  left_join(genome_metadata %>% dplyr::select(genus,phylum) %>% unique(),by="genus") %>%
  filter(genus %in% genus_arrange[1:20]) %>%
  mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
  filter(relabun > 0) %>%
  ggplot(aes(x=relabun, y=genus, group=genus, color=phylum, fill=phylum)) +
  scale_color_manual(values=phylum_colors[-8]) +
  scale_fill_manual(values=phylum_colors[-8]) +
  geom_boxplot(alpha=0.2)+
  geom_jitter(alpha=0.5) + 
  facet_grid(.~fox_behaviour)+
  theme_minimal() +
  theme(legend.position="none",
              strip.text.x = element_text(size = 14, color="black",face="bold"),
              axis.text.x = element_text(vjust = 0.5, size = 6),
              axis.text.y = element_text(size = 12),
              axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
              axis.title = element_text(size = 14, face = "bold"),
              axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
              axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+
  labs(y="Family", x="Relative abundance", color="Phylum")

5.2 Core microbiota

library(UpSetR)

genome_counts_rel <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  column_to_rownames(., "genome")

#Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1

#Add gut_location info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata, by="sample") 

#Summarize by location
table_upset_analysis <- df %>%
  group_by(gut_location) %>%
  summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
  column_to_rownames("gut_location") %>%
  t() %>%
  as.data.frame()

location_colors <- c(
  F_colon = "#d6604d",
  M_colon = "#542788",
  D_ileum = "#fdb863",
  K_ileum = "#bb99d8"
)

#UpSet plot
upset(table_upset_analysis,
      keep.order = TRUE,
      sets = rev(c("F_colon", "M_colon", "D_ileum", "K_ileum")),
      sets.bar.color = rev(location_colors),
      mb.ratio = c(0.55, 0.45),
      order.by = "freq")

# Convert to relative abundance
genome_counts_rel <- genome_counts_filt %>%
  mutate_at(vars(-genome), ~ . / sum(.)) %>%
  column_to_rownames("genome")

# Presence/absence
genome_counts_rel_pa <- (genome_counts_rel > 0) * 1

# Add behaviour info
df <- as.data.frame(t(genome_counts_rel_pa)) %>%
  rownames_to_column("sample") %>%
  left_join(sample_metadata, by = "sample")

# Summarize by behaviour 
table_upset_analysis <- df %>%
  group_by(fox_behaviour) %>%
  summarise(across(-sample, ~ as.integer(any(. > 0)))) %>%
  column_to_rownames("fox_behaviour") %>%
  t() %>%
  as.data.frame()

# Behaviour colors
behaviour_colors <- c(
  tame  = "#1f77b4",  
  aggr  = "#d62728",  
  unsel = "#2ca02c"   
)

# UpSet plot
upset(table_upset_analysis,
      keep.order = TRUE,
      sets = rev(names(behaviour_colors)),       
      sets.bar.color = rev(unname(behaviour_colors)), 
      mb.ratio = c(0.55, 0.45),
      order.by = "freq")

5.3 Wilcoxon Test

H0 (null): The distribution of genome prevalence (presence) is the same across the two sample_type groups (gut tissue or gut content).

mag_prevalence_location <- genome_counts_filt  %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(!genome,names_to="sample",values_to="abundance") %>% 
  left_join(sample_metadata,by="sample") %>% 
  mutate(presence=ifelse(abundance>0,1,0)) %>% 
 dplyr::select(genome,sample,presence,gut_location, sample_type) %>%
  group_by(genome,gut_location, sample_type) %>%
  summarise(presence=ifelse(sum(presence)>0,1,0)) %>% 
  group_by(genome,sample_type) %>%
  summarise(presence=sum(presence))

mag_prevalence_location %>% 
  group_by(sample_type) %>%
  summarise(mean=mean(presence),sd=sd(presence)) %>%
  tt()
sample_type mean sd
Gut content 1.8601399 0.3480610
Gut tissue 0.7552448 0.4924326
wilcox.test(presence ~ sample_type, data=mag_prevalence_location)  %>%
  tidy()
# A tibble: 1 × 4
  statistic  p.value method                                            alternative
      <dbl>    <dbl> <chr>                                             <chr>      
1     19123 5.12e-44 Wilcoxon rank sum test with continuity correction two.sided  
mag_prevalence_location %>% 
  ggplot(aes(x=sample_type,y=presence, color=sample_type, fill=sample_type)) +
    geom_boxplot(alpha = 0.5) +
    geom_jitter() +
    theme_minimal()

Comparing the prevalence/presence of the MAGs between aggressive and tame foxes

mag_prevalence_location <- genome_counts_filt  %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(!genome,names_to="sample",values_to="abundance") %>% 
  left_join(sample_metadata,by="sample") %>% 
  filter(!grepl("unsel", fox_behaviour))%>% #to remove the unselected foxes
  mutate(presence=ifelse(abundance>0,1,0)) %>% 
 dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
  group_by(genome,gut_location, fox_behaviour) %>%
  summarise(presence=ifelse(sum(presence)>0,1,0)) %>% 
  group_by(genome,fox_behaviour) %>%
  summarise(presence=sum(presence))

mag_prevalence_location %>% 
  group_by(fox_behaviour) %>%
  summarise(mean=mean(presence),sd=sd(presence)) %>%
  tt()
fox_behaviour mean sd
aggr 2.321678 0.8101591
tame 1.566434 0.7922107
wilcox.test(presence ~ fox_behaviour, data=mag_prevalence_location)  %>%
  tidy()
# A tibble: 1 × 4
  statistic  p.value method                                            alternative
      <dbl>    <dbl> <chr>                                             <chr>      
1     15248 2.69e-14 Wilcoxon rank sum test with continuity correction two.sided  
behaviour_colors <- c( "#d62728", "#1f77b4","#2ca02c")

mag_prevalence_location %>% 
  ggplot(aes(x=fox_behaviour,y=presence, color=fox_behaviour, fill=fox_behaviour)) +
    geom_boxplot() +
    geom_jitter() +
    scale_color_manual(values=behaviour_colors) +
    scale_fill_manual(values=str_c(behaviour_colors,"50")) +
    theme_minimal()

5.4 Kruskal-Wallis

mag_prevalence_location_behaviour <- genome_counts_filt  %>%
  mutate_at(vars(-genome),~./sum(.)) %>%
  pivot_longer(!genome,names_to="sample",values_to="abundance") %>% 
  left_join(sample_metadata,by="sample") %>%
  mutate(presence=ifelse(abundance>0,1,0)) %>% 
 dplyr::select(genome,sample,presence,gut_location, fox_behaviour) %>%
  group_by(genome,gut_location, fox_behaviour) %>%
  summarise(presence=ifelse(sum(presence)>0,1,0)) 
`summarise()` has grouped output by 'genome', 'gut_location'. You can override using the `.groups` argument.
# Kruskal–Wallis test (nonparametric ANOVA)
kruskal.test(presence ~ gut_location, data = mag_prevalence_location_behaviour)

    Kruskal-Wallis rank sum test

data:  presence by gut_location
Kruskal-Wallis chi-squared = 692.97, df = 3, p-value < 2.2e-16
pairwise.wilcox.test(mag_prevalence_location_behaviour$presence,
                     mag_prevalence_location_behaviour$gut_location,
                     p.adjust.method = "fdr")

    Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  mag_prevalence_location_behaviour$presence and mag_prevalence_location_behaviour$gut_location 

        D_ileum F_colon K_ileum
F_colon <2e-16  -       -      
K_ileum <2e-16  <2e-16  -      
M_colon 0.68    <2e-16  <2e-16 

P value adjustment method: fdr 

5.5 Generalized linear model

# Fit GLMM: logistic regression with random effect for gut_location
glmm_model <- glmer(
  presence ~ fox_behaviour + (1 | gut_location), 
  data = mag_prevalence_location_behaviour,
  family = binomial   # logistic regression because presence is 0/1
)


summary(glmm_model)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: binomial  ( logit )
Formula: presence ~ fox_behaviour + (1 | gut_location)
   Data: mag_prevalence_location_behaviour

      AIC       BIC    logLik -2*log(L)  df.resid 
   1478.5    1500.3    -735.3    1470.5      1712 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.9566 -0.7138 -0.0983  0.7018 10.1736 

Random effects:
 Groups       Name        Variance Std.Dev.
 gut_location (Intercept) 5.604    2.367   
Number of obs: 1716, groups:  gut_location, 4

Fixed effects:
                   Estimate Std. Error z value Pr(>|z|)    
(Intercept)          0.2997     1.1914   0.252    0.801    
fox_behaviourtame   -1.3226     0.1627  -8.130 4.28e-16 ***
fox_behaviourunsel  -0.8087     0.1601  -5.053 4.35e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) fx_bhvrt
fox_behvrtm -0.068         
fox_bhvrnsl -0.068  0.536