Chapter 5 Alpha diversity

5.1 Load MG data

load("data/data_mg.Rdata")

5.2 Hill numbers - Alpha diversity

# Neutral
neutral <-
  mag_weighted %>% 
  dplyr::select(where(~ !all(. == 0))) %>% 
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "animal_code")

# Phylogenetic
phylogenetic <-
  mag_weighted %>% 
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "animal_code")

# Functional KEGG
dist_kegg <-
  genome_kegg_paths %>%
  column_to_rownames(var = "genome") %>%
  traits2dist(., method = "gower")

functional_kegg <-
  mag_weighted %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1 , dis = dist_kegg) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional_kegg = 1) %>%
  rownames_to_column(var = "animal_code") %>%
  mutate(functional_kegg = if_else(is.nan(functional_kegg), 1, functional_kegg))

# Sequence depth
log_seq_depth <-
  read_counts %>%
  column_to_rownames(var = 'genome') %>%  
  {as.data.frame(log(colSums(.)))} %>%
  rename(seq_depth = 'log(colSums(.))') %>%
  rownames_to_column(var = 'animal_code')

# Merge all metrics
alpha_div <-
  neutral %>%
  full_join(phylogenetic, by = 'animal_code') %>%
  full_join(functional_kegg, by = 'animal_code') %>%
  left_join(chicken_metadata, by = 'animal_code') %>%
  left_join(log_seq_depth, by = 'animal_code')
save(alpha_div, file = "data/alpha_div.Rdata")

5.2.1 Alpha diversity by sampling day

The resulting plot corresponds to Figure 2a.

load("data/alpha_div.Rdata")

# Neutral
p_n <- 
  ggplot(alpha_div, 
         aes(x = sampling_time, y = neutral, group = sampling_time)) +
  geom_jitter(aes_string(color = 'sampling_time'),
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  theme_minimal() +
  ylab(NULL) +
  xlab(NULL) +
  ggtitle("Neutral") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = "none")

# Phylogenetic
p_p <- 
  ggplot(alpha_div, 
         aes(x = sampling_time, y = phylogenetic, group = sampling_time)) +
  geom_jitter(aes_string(colour = 'sampling_time'), 
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  scale_color_manual(values = c('#E69F00', '#CC6677', '#56B4E9')) +
  theme_minimal() +
  labs(x = NULL, y = NULL) +
  ggtitle("Phylogenetic") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = 'none')

# Functional KEGGs
p_f_kegg <-
  ggplot(alpha_div,
         aes(x = sampling_time, y = functional_kegg, group = sampling_time)) +
  geom_jitter(aes_string(colour = 'sampling_time'), 
              size = 0.3, width = 0.25, alpha = 0.6) +
  geom_boxplot(aes_string(fill = 'sampling_time'),
               width = 0.5,
               alpha = 0.5,
               outlier.color = NA) +
  scale_fill_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
  scale_color_manual(values = c("#E69F00", "#CC6677", "#56B4E9")) +
  theme_minimal() +
  labs(x =NULL, y = NULL) +
  ggtitle("Functional") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        plot.title =  element_text(size = 10),
        legend.position = "none")

grid.arrange(p_n, p_p, p_f_kegg, ncol = 3)

5.3 Temporal development

The resulting tables correspond to Supplementary Table S2.

5.3.1 Neutral

# Assuming each variables has an independent effect on diversity                                                                                                    
m_div_n <-
  lme(neutral ~ seq_depth + sex + breed + treatment + trial + age,
      random = ~1|pen,
      data = alpha_div)

anova(m_div_n)
            numDF denDF   F-value p-value
(Intercept)     1   338 2115.9396  <.0001
seq_depth       1   338    4.1326  0.0428
sex             1    42    1.7532  0.1926
breed           1    42    0.2038  0.6540
treatment       2    42    1.4209  0.2529
trial           1    42    2.1672  0.1484
age             1   338   44.2580  <.0001
# Assuming each variable interacts with age                                                                                                 
m_div_n <-
  lme(neutral ~ seq_depth + 
        sex * age + 
        breed * age + 
        treatment * age + 
        trial * age,
      random = ~1|pen,
      data = alpha_div)

anova(m_div_n)
              numDF denDF   F-value p-value
(Intercept)       1   333 2087.4278  <.0001
seq_depth         1   333    4.1861  0.0415
sex               1    42    1.7208  0.1967
age               1   333   43.4715  <.0001
breed             1    42    0.0724  0.7892
treatment         2    42    1.7954  0.1786
trial             1    42    3.0412  0.0885
sex:age           1   333    4.6357  0.0320
age:breed         1   333    0.9477  0.3310
age:treatment     2   333    1.8660  0.1564
age:trial         1   333    0.5478  0.4597
# Best fitted model considered for the manuscript                                                                                               
m_div_n <-
  lme(neutral ~ seq_depth + sex + breed + treatment + trial + age + trial*age,
      random = ~1|pen,
      data = alpha_div)

anova(m_div_n)
            numDF denDF   F-value p-value
(Intercept)     1   337 2131.3662  <.0001
seq_depth       1   337    4.1298  0.0429
sex             1    42    1.7688  0.1907
breed           1    42    0.2047  0.6533
treatment       2    42    1.4325  0.2501
trial           1    42    2.1839  0.1469
age             1   337   44.1644  <.0001
trial:age       1   337    0.7599  0.3840

5.3.2 Phylogenetic

# Assuming each variables has an independent effect on diversity                                                                                                    
m_div_p <-
  lme(phylogenetic ~ seq_depth + sex + breed + treatment + trial + age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_p)
            numDF denDF   F-value p-value
(Intercept)     1   338 22133.936  <.0001
seq_depth       1   338     0.403  0.5258
sex             1    42     4.000  0.0520
breed           1    42    11.533  0.0015
treatment       2    42     1.141  0.3291
trial           1    42     3.813  0.0575
age             1   338  1158.601  <.0001
# Assuming each variable interacts with age                                                                                                 
m_div_p <-
  lme(phylogenetic ~ seq_depth +
        sex * age +
        breed * age +
        treatment * age +
        trial * age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_p)
              numDF denDF   F-value p-value
(Intercept)       1   333 22677.634  <.0001
seq_depth         1   333     0.413  0.5207
sex               1    42     4.099  0.0493
age               1   333  1184.616  <.0001
breed             1    42     4.711  0.0357
treatment         2    42     0.315  0.7316
trial             1    42    15.166  0.0003
sex:age           1   333     0.005  0.9447
age:breed         1   333     0.112  0.7384
age:treatment     2   333     1.960  0.1425
age:trial         1   333    10.298  0.0015
# Best fitted model considered for the manuscript                                                                                               
m_div_p <-
  lme(phylogenetic ~ seq_depth + sex + breed + treatment + trial + age + trial*age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_p)
            numDF denDF   F-value p-value
(Intercept)     1   337 22644.853  <.0001
seq_depth       1   337     0.413  0.5210
sex             1    42     4.093  0.0495
breed           1    42    11.799  0.0013
treatment       2    42     1.168  0.3210
trial           1    42     3.901  0.0549
age             1   337  1185.345  <.0001
trial:age       1   337     9.772  0.0019

5.3.3 Functional KEGG

# Assuming each variables has an independent effect on diversity                                                                                                    
m_div_f_kegg <-
  lme(functional_kegg ~ seq_depth + sex + breed + treatment + trial + age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_f_kegg)
            numDF denDF   F-value p-value
(Intercept)     1   338 306974.78  <.0001
seq_depth       1   338      0.20  0.6539
sex             1    42      0.79  0.3796
breed           1    42      2.99  0.0912
treatment       2    42      1.23  0.3026
trial           1    42      2.50  0.1211
age             1   338    406.32  <.0001
# Assuming each variable interacts with age                                                                                                 
m_div_f_kegg <-
  lme(functional_kegg ~ seq_depth +
        sex * age + 
        breed * age + 
        treatment * age + 
        trial * age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_f_kegg)
              numDF denDF   F-value p-value
(Intercept)       1   333 307850.60  <.0001
seq_depth         1   333      0.20  0.6537
sex               1    42      0.79  0.3796
age               1   333    401.66  <.0001
breed             1    42      1.12  0.2967
treatment         2    42      0.88  0.4208
trial             1    42      6.84  0.0123
sex:age           1   333      0.24  0.6215
age:breed         1   333      0.01  0.9050
age:treatment     2   333      0.55  0.5765
age:trial         1   333      1.21  0.2716
# Best fitted model considered for the manuscript                                                                                               
m_div_f_kegg <-
  lme(functional_kegg ~ seq_depth + sex + breed + treatment + trial + age + trial*age,
      random = ~ 1|pen,
      data = alpha_div)

anova(m_div_f_kegg)
            numDF denDF   F-value p-value
(Intercept)     1   337 307927.81  <.0001
seq_depth       1   337      0.20  0.6535
sex             1    42      0.79  0.3791
breed           1    42      3.00  0.0907
treatment       2    42      1.23  0.3016
trial           1    42      2.52  0.1203
age             1   337    406.41  <.0001
trial:age       1   337      1.17  0.2799
rm(list = ls())