Chapter 9 HMSC analysis

9.1 Load model

# Directory
localDir = "."
ModelDir = file.path(localDir, "temporal_hmsc/models")
MFDir = file.path(localDir, "temporal_hmsc/model_fit")

# Load model
if(!file.exists("temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata")){
  download.file(
    url = 'https://sid.erda.dk/share_redirect/Bd8UfDO2D6/temporal_model_thin_10_samples_250_chains_4.Rdata',
    destfile = 'temporal_hmsc/models/temporal_model_thin_10_samples_250_chains_4.Rdata', method = 'curl'
    )
}

load(file = file.path(ModelDir, 'temporal_model_thin_10_samples_250_chains_4.Rdata'))

9.2 Cross-validation

The cross-validation part is also computationally intense. Make sure you have the right capacity for it.

# Create partitions
set.seed(12)
partition_1 <- createPartition(hM = m, nfolds = 2)
partition_2 <- c(rep(1,sum(m$XData$trial == "CA")),
                 rep(2,sum(m$XData$trial == "CB")))

set.seed(1)

predY.MF <- computePredictedValues(m, expected = TRUE)
MF <- evaluateModelFit(hM = m, predY = predY.MF)

filename <- file.path(MFDir, paste("MF_chains_4_thin_10_samples_250.rds"))
saveRDS(MF, file = filename)

MF <- readRDS(file = filename)
mean(MF$R2)

9.2.1 Model fit using 2-fold CV: samples randomly assigned to folds

set.seed(1)
predY.CV_1 <- computePredictedValues(m,
                                     expected = TRUE,
                                     partition = partition_1,
                                     nChains = 1,
                                     nParallel = 1)

MF.CV_1 <- evaluateModelFit(hM = m, predY = predY.CV_1)

# create file name and save
filename <- file.path(MFDir, paste("MF_CV1_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_1,file = filename)

MF_CV_1 <- readRDS(file = filename)
mean(MF_CV_1$R2)

9.2.2 Model fit using 2-fold CV: each fold contains the samples from one trial

set.seed(1)
predY.CV_2 <- computePredictedValues(m,
                                     expected = TRUE,
                                     partition = partition_2,
                                     nChains = 1,
                                     nParallel = 1)

MF.CV_2 <- evaluateModelFit(hM = m, predY = predY.CV_2)

# create file name and save
filename <- file.path(MFDir, paste("MF_CV2_chains_4_thin_10_samples_250.rds"))
saveRDS(MF.CV_2, file = filename)

MF_CV_2 <- readRDS(file = filename)
mean(MF_CV_2$R2)

9.3 Functional structure

beta_post <- getPostEstimate(m, parName = "Beta")
plotBeta(m, beta_post, supportLevel = 0.95, plotTree = TRUE)

Gradient_age <- constructGradient(m,
                                 focalVariable = "age",
                                 non.focalVariables = 1)

predY_age <- predict(m, Gradient = Gradient_age, expected = TRUE)

# Example using cmag_376
plotGradient(m,
             Gradient_age,
             pred = predY_age,
             yshow = 0,
             measure = "Y",
             index = 376,
             showData = TRUE)

9.5 Correlate bacterial MCI and response to time

The resulting plot corresponds to Figure 2e.

load("data/data_mg.Rdata")
trends <- read_tsv("data/hmsc_mag_trend.tsv")
mci_df <-
  gifts_elements %>%
  as.data.frame() %>% 
  mutate(avg_mci = rowMeans(.)) %>% 
  select(avg_mci) %>% 
  rownames_to_column(var = 'genome')

 
trends %>% 
  left_join(genome_taxonomy %>% select(genome, order), by = 'genome') %>% 
  left_join(mci_df, by = 'genome') %>% 
  ggplot() +
  geom_point(aes(x = parameter, y = avg_mci, color = order), size = 1) +
  scale_color_manual(values = order_colors) +
  geom_smooth(aes(x = parameter, y = avg_mci),
              method = 'lm', linewidth = 0.3) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        legend.position = 'none')

9.6 Variance partitioning

The resulting results correspond to Supplementary Table S4.

VP <- computeVariancePartitioning(m)
plotVariancePartitioning(hM = m, VP = VP)

9.7 Phylogenetic correlogram

The resulting plot corresponds to Figure 2d.

9.7.1 Calcultate phylogenetic distance

pw_phylo_dist <- cophenetic.phylo(genome_tree)
xy <- t(combn(colnames(pw_phylo_dist), 2))
pw_phylo_dist <- data.frame(xy, dist = pw_phylo_dist[xy])
colnames(pw_phylo_dist) <- c('genome.x', 'genome.y', 'distance')

pw_phylo_dist_taxa <-
  pw_phylo_dist %>%
  inner_join(genome_taxonomy, by = c('genome.x' = 'genome')) %>%
  inner_join(genome_taxonomy, by = c('genome.y' = 'genome'))


# Create distance table
tax_table <- c()
for (i in c(1:nrow(pw_phylo_dist_taxa))) {
  pair <- pw_phylo_dist_taxa[i,]
  #Phylum level
  if (!is.na(pair$phylum.x != pair$phylum.y) & pair$phylum.x != pair$phylum.y) {
    row <- c('Phylum', pair$distance)
  } else {
    #Class level
    if (!is.na(pair$class.x != pair$class.y) & pair$class.x != pair$class.y) {
      row <- c('Class', pair$distance)
    } else {
      #Order level
      if (!is.na(pair$order.x != pair$order.y) & pair$order.x != pair$order.y) {
        row <- c('Order', pair$distance)
      } else {
        #Family level
        if (!is.na(pair$family.x != pair$family.y) & pair$family.x != pair$family.y) {
          row <- c('Family', pair$distance)
        } else {
          # Genus
          if (!is.na(pair$genus.x != pair$genus.y) & pair$genus.x != pair$genus.y) {
            row <- c('Genus', pair$distance)
          }
        }
      }
    }
  }
  tax_table <- rbind(tax_table,row)
}

tax_table <-
  tax_table %>%
  data.frame() %>%
  dplyr::rename(taxonomy = 'X1', distance = 'X2') %>%
  mutate(distance = as.numeric(distance)) %>%
  write_tsv("data/taxonomy_distance.tsv")
tax_table <- read_tsv("data/taxonomy_distance.tsv")

tax_table %>%
ggplot(aes(x = distance,
           fill = taxonomy,
           color = taxonomy,
           alpha = 0.1
           )) +
  geom_density(adjust = 5) +
  scale_fill_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
  scale_color_manual(values = c('#E69F00','#e28cff','#999999','#56B4E9','#99cc00')) +
  theme_void()

9.7.2 Phylogenetic signal

mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))

age_parameter <- beta_post$mean[8,]
age_parameter_phyloSorted <-
  data.frame(age_parameter = age_parameter[
    match(m$phyloTree$tip.label,
          names(age_parameter))
    ])
mean(rownames(age_parameter_phyloSorted) == m$phyloTree$tip.label)

new_tree <- m$phyloTree
new_tree$node.label <- NULL

obj <- phylo4d(new_tree, tip.data = age_parameter_phyloSorted)
barplot.phylo4d(obj)

age.cg <- phyloCorrelogram(obj, trait = "age_parameter")
saveRDS(age.cg,file = "age.cg.rds")
age.cg <- readRDS(file = "age.cg.rds")

plot(age.cg)
rm(list = ls())