Chapter 6 Alpha diversity

load("data/data_host_filtered.Rdata")
diet_colors<- c("#F4B02E","#E6771D", "#74C8AE")

6.1 Summary table

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# # Aggregate basal GIFT into elements
genome_counts_filt <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts_raw),]
genome_counts_filt <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0))%>%
  rownames_to_column(., "genome")

genome_gifts <- genome_gifts_raw[rownames(genome_gifts_raw) %in% genome_counts_filt$genome,]
genome_gifts <- genome_gifts[, colSums(genome_gifts != 0) > 0]

dist <- genome_gifts %>%
  to.elements(., GIFT_db) %>%
  traits2dist(., method = "gower")

functional <- genome_counts_filt %>%
  filter(genome %in% rownames(dist)) %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample))%>%
  full_join(functional, by = join_by(sample == sample))

alpha_div %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
    group_by(alpha)%>%
    summarise(
              Wild_mean=mean(value[diet=="Wild"], na.rm=T),
              Wild_sd=sd(value[diet=="Wild"], na.rm=T),
              Pre_grass_mean=mean(value[diet=="Pre_grass"], na.rm=T),
              Pre_grass_sd=sd(value[diet=="Pre_grass"], na.rm=T),
              Post_grass_mean=mean(value[diet=="Post_grass"], na.rm=T),
              Post_grass_sd=sd(value[diet=="Post_grass"], na.rm=T)) %>%
    mutate(
           Wild=str_c(round(Wild_mean,2),"±",round(Wild_sd,2)),
           Pre_grass=str_c(round(Pre_grass_mean,2),"±",round(Pre_grass_sd,2)),
           Post_grass=str_c(round(Post_grass_mean,2),"±",round(Post_grass_sd,2))) %>% 
    arrange(-Wild_mean) %>% 
    dplyr::select(alpha,Wild,Pre_grass,Post_grass) %>% 
    tt()

alpha	Wild	Pre_grass	Post_grass
richness	795.67±26.45	836.83±16.57	835.67±14.03
neutral	70.62±37.82	112.37±28.26	101.87±41.5
phylogenetic	6.5±1.01	7.61±1.83	6.63±0.86
functional	1.45±0.04	1.53±0.05	1.51±0.03

6.2 Wild vs Captive-bred (pre-grass)

6.2.1 Shapiro test

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="richness") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value)

[1] 0.00896346

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="neutral") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value)

[1] 0.03469882

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="phylogenetic") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value)

[1] 0.9219334

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="phylogenetic") %>% 
  summarize(var.test_p_value_phylo = var.test(value ~ diet)$p.value)

# A tibble: 1 × 1
  var.test_p_value_phylo
                   <dbl>
1                 0.0600

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="functional") %>% 
  summarize(shapiro_p_value = shapiro.test(value)$p.value) %>%
  pull(shapiro_p_value)

[1] 0.3997293

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  filter(!diet =="Post_grass") %>% 
  filter(metric=="functional") %>% 
  summarize(var.test_p_functional = var.test(value ~ diet)$p.value)

# A tibble: 1 × 1
  var.test_p_functional
                  <dbl>
1                 0.859

6.2.2 Plots

alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic", "functional"))) %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  ggplot(aes(y = value, x = diet, group=diet, color=diet, fill=diet)) +
  geom_boxplot(alpha = 0.5, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  scale_color_manual(values=diet_colors)+
  scale_fill_manual(values=diet_colors) +
  scale_x_discrete(labels=c("Wild" = "Wild", "Pre_grass" = "Captive-born")) +
  facet_wrap(. ~factor(metric, labels=c("richness" = "Richness", "neutral" = "Neutral", "phylogenetic" = "Phylogenetic", "functional"="Functional")), scales = "free", ncol=5) +
  coord_cartesian(xlim = c(1, NA)) +
  stat_compare_means(method = "kruskal.test", show.legend = F, size = 3)+
  theme_classic() +
  theme(
    strip.background = element_blank(),
    strip.text = element_text(size =12, lineheight = 0.6),
    panel.grid.minor.x = element_line(size = .1, color = "grey"),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.x = element_blank(), 
    legend.title = element_blank()
  )

6.2.3 Mixed models

alpha_div_meta <- alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>% 
  filter(!diet =="Wild")

Model_richness <- glmer.nb(richness ~ diet+(1|individual), data = alpha_div_meta)
summary(Model_richness)

Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
 Family: Negative Binomial(138070152)  ( log )
Formula: richness ~ diet + (1 | individual)
   Data: alpha_div_meta

     AIC      BIC   logLik deviance df.resid 
   219.9    224.6   -105.9    211.9       20 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.5498 -0.1528  0.1297  0.3169  0.6688 

Random effects:
 Groups     Name        Variance  Std.Dev. 
 individual (Intercept) 4.654e-12 2.157e-06
Number of obs: 24, groups:  individual, 12

Fixed effects:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept)   6.728231   0.010080 667.463   <2e-16 ***
dietPre_grass 0.001393   0.014482   0.096    0.923    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
dietPr_grss -0.712
optimizer (Nelder_Mead) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

Model_neutral <- lme(fixed = neutral ~ diet, data = alpha_div_meta, random = ~ 1 | individual)
summary(Model_neutral)

Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  232.4667 236.8308 -112.2333

Random effects:
 Formula: ~1 | individual
        (Intercept) Residual
StdDev: 0.001716963 35.50333

Fixed effects:  neutral ~ diet 
                  Value Std.Error DF  t-value p-value
(Intercept)   101.86734  10.24893 11 9.939316   0.000
dietPre_grass  10.49914  14.49417 11 0.724370   0.484
 Correlation: 
              (Intr)
dietPre_grass -0.707

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.0371846 -0.2957926  0.1194249  0.4747379  2.1136583 

Number of Observations: 24
Number of Groups: 12

Model_phylo <- lme(fixed = phylogenetic ~ diet, data = alpha_div_meta, random = ~ 1 | individual)
summary(Model_phylo)

Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
       AIC      BIC    logLik
  91.20606 95.57023 -41.60303

Random effects:
 Formula: ~1 | individual
         (Intercept) Residual
StdDev: 0.0001476023 1.432123

Fixed effects:  phylogenetic ~ diet 
                 Value Std.Error DF  t-value p-value
(Intercept)   6.628824 0.4134183 11 16.03418  0.0000
dietPre_grass 0.986097 0.5846618 11  1.68661  0.1198
 Correlation: 
              (Intr)
dietPre_grass -0.707

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.35109679 -0.47719193 -0.03827828  0.57082503  1.94927769 

Number of Observations: 24
Number of Groups: 12

Model_funct <- lme(fixed = functional ~ diet, data = alpha_div_meta, random = ~ 1 | individual)
summary(Model_funct)

Linear mixed-effects model fit by REML
  Data: alpha_div_meta 
        AIC       BIC   logLik
  -69.49624 -65.13207 38.74812

Random effects:
 Formula: ~1 | individual
         (Intercept)   Residual
StdDev: 1.631191e-06 0.03713605

Fixed effects:  functional ~ diet 
                  Value  Std.Error DF  t-value p-value
(Intercept)   1.5116555 0.01072026 11 141.0093  0.0000
dietPre_grass 0.0144785 0.01516073 11   0.9550  0.3601
 Correlation: 
              (Intr)
dietPre_grass -0.707

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-1.5122885 -0.6779157  0.1131752  0.4439028  3.1416800 

Number of Observations: 24
Number of Groups: 12