Chapter 9 Elevation HMSC analysis

9.1 Load data

load("data/data.Rdata")

9.2 Compute variance partitioning

# Select modelchain of interest
load("hmsc_elevation/fit_model3_250_10.Rdata")

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=c("Elevation","I(Elevation^2)","logseqdepth","Transect","Random: Sampling_point"))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()

variable	mean	sd
Elevation	27.924257	8.342730
I(Elevation^2)	29.280193	11.011515
logseqdepth	6.528125	5.916625
Transect	9.031848	8.595048
Random: Sampling_point	27.235577	19.862624

# Basal tree
varpart_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("Elevation","I(Elevation^2)","logseqdepth","Transect","Random: Sampling_point")))) %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label)))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)

***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#34738f","#cccccc","#ed8a45","#b2b530","#be3e2b","#83bb90","#f6de6c", "#122f3d"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity",
             color = "white",
             size=0.1)+
      labs(fill="Variable")

vertical_tree

9.3 Model fit

MFCV <- evaluateModelFit(hM=m, predY=cv)

mean(MFCV$R2, na.rm = TRUE)

[1] 0.1163624

var_pred_table <- tibble(mag=m$Elevation,
       pred=MFCV$R2,
       var_pred=MFCV$R2 * varpart$vals[1,],
       support=getPostEstimate(hM=m, parName="Beta")$support %>% .[2,],
       estimate=getPostEstimate(hM=m, parName="Beta")$mean %>% .[2,])

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#plotBeta(hM=m, post=getPostEstimate(hM=m, parName="Beta"), param = "Support", plotTree = TRUE, covNamesNumbers=c(1,0))

# Posterior estimate table
post_estimates <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(trend = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) 


post_estimate_df<-post_estimates %>%
    select(-trend) %>%
    mutate(value = case_when(
          value >= support~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
             Elevation=3,
           Elevation_2=4,
           TransectAran=5,
           TransectSentein=6,
           TransectTourmalet=7,
             logseqdepth=8
           ) %>%
    select(genome,intercept,Elevation,Elevation_2,TransectAran,TransectSentein,TransectTourmalet,logseqdepth) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)

***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_estimate_df, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Reference tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames)


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>%
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() +
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

corr.legend <- get_legend(matrix, position="none") 
corr.legend <- as_ggplot(corr.legend)

vtree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5) +
  hexpand(0.5)

***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

#Add phylum colors next to the tree tips
vtree <- gheatmap(vtree, phylum_colors, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic) +
      theme(legend.position = 'none') + scale_y_reverse()

vtreeD <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(., expand=1.5, layout="dendrogram")

***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************

#Add phylum colors next to the tree tips
vtreeD <- gheatmap(vtreeD, phylum_colors, offset=-0.1, width=0.6, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic) +
      theme(legend.position = 'none') 

# properly align trees to corr matrix with package aplot
matrix %>%  insert_left(vtree, width=.25) %>% aplot::insert_top(vtreeD, height=.3) %>% insert_right(corr.legend, width=0.15)

9.4 Phylogenetic signal

mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))

  5%  50%  95% 
0.99 1.00 1.00

9.5 Elevation predictions

gradient = seq(940, 2350, by = 100)
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred_elevation <- constructGradient(m,
                      focalVariable = "Elevation",
                      non.focalVariables = 1,
                      ngrid=gradientlength) %>%
                      predict(m, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(elevation=rep(gradient,1000)) %>%
                      pivot_longer(-c(elevation), names_to = "genome", values_to = "value")

9.5.1 Predicted adundance at elevational level

pred_elevation %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  select("elevation", "genome", "value", "phylum")%>%
  ggplot(., aes(x = elevation, y = value, fill = phylum, colour=phylum)) +
  stat_summary(fun = mean, geom = "line", size = 1) +
  stat_summary(fun.data = mean_cl_boot, geom = "ribbon", alpha = 0.2) +
  scale_color_manual(name="phylum",
                       breaks=c("p__Bacillota_A", "p__Bacillota", "p__Bacillota_C", "p__Bacteroidota", "p__Campylobacterota", "p__Desulfobacterota", "p__Fusobacteriota", "p__Pseudomonadota", "p__Verrucomicrobiota"),
                       labels=c("Bacillota_A", "Bacillota", "Bacillota_C", "Bacteroidota", "Campylobacterota", "Desulfobacterota", "Fusobacteriota", "Pseudomonadota", "Verrucomicrobiota"),
                       values=c("#086c98ff","#08d1d1ff","#04384f","#670034","#ff0000","#008000","#ebac82","#ebe082","#b09595ff")) +
      scale_fill_manual(name="phylum",
                       breaks=c("p__Bacillota_A", "p__Bacillota", "p__Bacillota_C", "p__Bacteroidota", "p__Campylobacterota", "p__Desulfobacterota", "p__Fusobacteriota", "p__Pseudomonadota", "p__Verrucomicrobiota"),
                       labels=c("Bacillota_A", "Bacillota", "Bacillota_C", "Bacteroidota", "Campylobacterota", "Desulfobacterota", "Fusobacteriota", "Pseudomonadota", "Verrucomicrobiota"),
                       values=c("#086c98ff","#08d1d1ff","#04384f","#670034","#ff0000","#008000","#ebac82","#ebe082","#b09595ff")) +
  theme_classic() +
  xlab("Elevation (m)") +
  ylab("Predicted Abundance")

9.5.2 Responses to elevation

# Select desired support threshold
support=0.9
negsupport=1-support

#Get phylum colors from the EHI standard
phylum_colors <- genome_metadata %>%
    left_join(read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"), by=join_by(phylum == phylum)) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    #slice(2:5) %>%
    select(colors) %>%
    pull()

post_estimates %>%
    filter(variable=="Elevation") %>%
    select(genome, trend) %>%
    left_join(pred_elevation, by=join_by(genome==genome)) %>%
    group_by(genome, trend, elevation) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=elevation, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=phylum_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Elevation") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
)

9.6 Functional predictions

9.6.1 Element level

elements_table <- genome_gifts_filt %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

9.6.1.1 Positive associated functions at element level

unique_funct_db<- GIFT_db[c(2,4,5,6)] %>% 
  distinct(Code_element, .keep_all = TRUE)

element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
  arrange(Domain,Function)%>%
  paged_table()

9.6.1.2 Negative associated functions at element level

element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
  arrange(Domain,Function)%>%
  paged_table()

#Positively associated
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.15,0.15)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

community_elements %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
    filter(trait %in% positive$trait) %>%
    mutate(trait=factor(trait, levels=positive$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.1.3 GIFT test visualization

# Aggregate bundle-level GIFTs into the compound level
#genome_counts_filt_filt <- tibble::rownames_to_column(genome_counts_filt_filt, var = "genome")
GIFTs_elements_filtered <- elements_table[rownames(elements_table) %in% genome_counts_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_elements_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ Elevation, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) +
        labs(y="Traits",x="Samples",fill="GIFT")

9.6.2 Functional level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

9.6.2.1 Positive associated functions at element level

function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

9.6.2.2 Negative associated functions at element level

function_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()

positive_func <- function_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  dplyr::select(-negative_support) %>%
  rename(support=positive_support)

negative_func <- function_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  dplyr::select(-positive_support) %>%
  rename(support=negative_support)

all_functions <- bind_rows(positive_func,negative_func) %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.06,0.06)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      guides(col = guide_legend(ncol = 1))

community_functions %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% function_predictions$trait) %>%
  mutate(trait=factor(trait, levels=function_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.2.3 GIFT test visualization

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_functions_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")

9.6.3 Domain level

domains_table <- functions_table %>%
    to.domains(., GIFT_db) %>%
    as.data.frame()

community_domains <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(domains_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

domain_predictions <- map_dfc(community_domains, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_domains[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

9.6.3.1 Positive associated functions at element level (there isn’t)

domain_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  paged_table()

9.6.3.2 Negative associated functions at element level (there isn’t)

domain_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  paged_table()

all_domains <- domain_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9) %>% 
  unique()

all_domains %>%
  ggplot(aes(x=mean, y=fct_reorder(trait, mean), xmin=p1, xmax=p9, color=trait)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.03)) +
      geom_vline(xintercept=0) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Domain level") +
      guides(col = guide_legend(ncol = 1))

community_domains %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% domain_predictions$trait) %>%
  mutate(trait=factor(trait, levels=domain_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.3.3 GIFT test visualization

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_domains_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")