Chapter 11 HMSC analysis
11.1 Load data
load("data/data.Rdata")
if(!file.exists("hmsc_behaviour/fit_model1_250_10.Rdata")){
download.file(url='https://sid.erda.dk/share_redirect/A6rm945NbY/hmsc_behaviour/fit_model1_250_10.Rdata',
destfile='hmsc/fit_model1_250_10.Rdata', method='curl')
}
load("hmsc_behaviour/fit_model1_250_10.Rdata")
# Select desired support threshold
support_threshold=0.9
negsupport_threshold=1-support_threshold
11.2 Variance partitioning
# Compute variance partitioning
varpart=computeVariancePartitioning(m)
varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(variable=factor(variable, levels=rev(c("behaviour","sex","logseqdepth","Random: location")))) %>%
group_by(variable) %>%
summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
tt()
variable | mean | sd |
---|---|---|
Random: location | 40.1370903 | 24.4507737 |
logseqdepth | 54.0327314 | 25.1272735 |
sex | 4.9385216 | 5.6346956 |
behaviour | 0.8916567 | 0.9764987 |
# Basal tree
varpart_tree <- genome_tree
#Varpart table
varpart_table <- varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
mutate(variable=factor(variable, levels=rev(c("behaviour","sex","logseqdepth","Random: location"))))
#Phyla
phylum_groups <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
dplyr::select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
dplyr::select(colors) %>%
pull()
# Basal ggtree
varpart_tree <- varpart_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_groups, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()
# Add variance stacked barplot
vertical_tree <- varpart_tree +
scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
geom_fruit(
data=varpart_table,
geom=geom_bar,
mapping = aes(x=value, y=genome, fill=variable, group=variable),
pwidth = 2,
offset = 0.05,
width= 1,
orientation="y",
stat="identity")+
labs(fill="Variable")
vertical_tree
11.3 Model fit
[1] 0.5184004
genome_fit <- tibble(genome=m$spNames, r2 = MFCV[[2]])
genome_counts_filt %>%
mutate_if(is.numeric, ~ . / sum(.)) %>%
left_join(genome_fit, by="genome") %>%
filter(r2>0.5) %>%
select(-c(genome,r2)) %>%
colSums() %>%
hist(main="Predictive capacity")
var_pred_table <- tibble(mag=m$spNames,
pred=MFCV$R2,
var_pred=MFCV$R2 * varpart$vals[1,],
support=getPostEstimate(hM=m, parName="Beta")$support %>% .[2,],
estimate=getPostEstimate(hM=m, parName="Beta")$mean %>% .[2,]) %>%
mutate(enrichment=ifelse(support>=support_threshold,"Feral","Neutral")) %>%
mutate(enrichment=ifelse(support<=negsupport_threshold,"Domestic",enrichment))
var_pred_table %>%
ggplot(aes(x=estimate,y=var_pred, color=enrichment))+
geom_point()+
scale_color_manual(values=c("#bd70ae","#949293","#ebe8e8"))+
geom_hline(yintercept=0.005, linetype="dashed")+
theme_minimal()
11.4 Posterior estimates
# Basal tree
postestimates_tree <- genome_tree
# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
mutate(value = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
pivot_wider(names_from = variable, values_from = value) %>%
#select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
column_to_rownames(var="genome")
# Genome-specific attributes
genome_depth <- genome_counts_filt %>%
mutate_if(is.numeric, ~ . / sum(.)) %>%
rowwise() %>%
mutate(depth = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
ungroup() %>%
select(genome,depth)
#Phylums
phylum_groups <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
dplyr::select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
dplyr::select(colors) %>%
pull()
# Basal ggtree
postestimates_tree <- postestimates_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_groups, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()
# Add posterior significant heatmap
postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
labs(fill="Trend")
postestimates_tree +
vexpand(.25, 1) # expand top
11.4.1 Association with cat behaviour
estimate <- getPostEstimate(hM=m, parName="Beta")$mean %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
filter(variable=="behaviour") %>%
pivot_longer(!variable, names_to = "genome", values_to = "mean") %>%
dplyr::select(genome,mean)
support <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
filter(variable=="behaviour") %>%
pivot_longer(!variable, names_to = "genome", values_to = "support") %>%
dplyr::select(genome,support)
#pdf("figures/sig_Mags_violin.pdf",width=10, height=8)
inner_join(estimate,support,by=join_by(genome==genome)) %>%
mutate(significance=ifelse(support >= 0.9 | support <= 0.1,1,0)) %>%
mutate(support=ifelse(mean<0,1-support,support)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
mutate(phylum = ifelse(support > 0.9, phylum, NA)) %>%
ggplot(aes(x=mean,y=support,color=phylum))+
geom_point(alpha=0.7, shape=16, size=3)+
scale_color_manual(values = phylum_colors) +
geom_vline(xintercept = 0) +
xlim(c(-0.4,0.4)) +
labs(x = "Beta regression coefficient", y = "Posterior probability") +
theme_minimal()
estimate_quantiles <- getPostEstimate(hM=m, parName="Beta", q=c(0.1,0.9))$q
map_dfr(1:229, ~ {
tibble(
q10 = estimate_quantiles[1, 2, .x], # Extract 10% quantile
q90 = estimate_quantiles[2, 2, .x] # Extract 90% quantile
)
}) %>% mutate(genome=estimate$genome) %>%
right_join(estimate, by="genome") %>%
left_join(genome_metadata, by="genome") %>%
filter(q10>0 | q90<0) %>%
ggplot(aes(x=mean, y=fct_reorder(genome, mean), xmin=q10, xmax=q90, color=phylum)) +
geom_point() +
geom_errorbar() +
#xlim(c(-0.03,0.04)) +
geom_vline(xintercept=0) +
scale_color_manual(values = phylum_colors) +
theme_minimal() +
labs(x="Regression coefficient",y="Bacterial genomes") +
theme(legend.position="none")
estimate_top <- estimate %>%
arrange(desc(mean)) %>%
slice_head(n = 15) %>%
bind_rows(
estimate %>%
arrange(mean) %>%
slice_head(n = 15))
estimate_top_significant <-map_dfr(1:229, ~ {
tibble(
q10 = estimate_quantiles[1, 2, .x], # Extract 10% quantile
q90 = estimate_quantiles[2, 2, .x] # Extract 90% quantile
)
}) %>% mutate(genome=estimate$genome) %>%
right_join(estimate_top, by="genome") %>%
left_join(genome_metadata, by="genome") %>%
filter(q10>0 | q90<0) %>%
arrange(mean)
estimate_top_significant %>%
ggplot(aes(x=mean, y=fct_reorder(genome, mean), xmin=q10, xmax=q90, color=phylum)) +
geom_point() +
geom_errorbar() +
scale_y_discrete(labels = estimate_top_significant$genus) +
geom_vline(xintercept=0) +
scale_color_manual(values = phylum_colors[-c(6,7)]) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
theme(legend.position="none")
11.4.1.1 Associated with tame cats
behaviour_tame <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(trend = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
filter(variable=="behaviour", trend=="Negative") %>%
arrange(-value) %>%
left_join(genome_metadata,by=join_by(genome==genome)) %>%
dplyr::select(genome,phylum,class,order,family,genus, species,value) %>%
arrange(phylum, class, family, genus, species)
behaviour_tame %>%
arrange(value) %>%
paged_table()
11.4.1.2 Associated with aggresive cats
behaviour_aggresive <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(trend = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
filter(variable=="behaviour", trend=="Positive") %>%
arrange(value) %>%
left_join(genome_metadata,by=join_by(genome==genome)) %>%
dplyr::select(genome,phylum,class,order,family,genus,species,value) %>%
arrange(phylum,class,family,genus,species)
behaviour_aggresive %>%
arrange(-value) %>%
paged_table()
11.6 Microbiome composition predictions
Support values of the cat behaviour variable per genome are interpreted as enrichment for aggresuve or tame cats.
The most likely microbiome compositions are predicted for the gradient from tameness to aggressiveness based on the hmsc models
11.7 Functional predictions
11.7.1 Element level
elements_table <- genome_gifts %>%
to.elements(., GIFT_db) %>%
as.data.frame() %>%
.[predictive_mags,] #keep only predictive mags
#Calculate community-level functional traits for predicted communities
community_elements <- prediction %>%
filter(genome %in% predictive_mags) %>% #keep only predictive mags
group_by(behaviour, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
dplyr::select(-row_id) %>%
column_to_rownames(var = "behaviour") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(elements_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="behaviour")
})
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
element_predictions <- map_dfc(community_elements, function(df) {
df %>%
column_to_rownames(var = "behaviour") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_elements[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)
11.7.1.1 Positive associated functions at element level
# Positively associated
unique_funct_db<- GIFT_db[c(2,4,5,6)] %>%
distinct(Code_element, .keep_all = TRUE)
element_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9) %>%
left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
arrange(Domain,Function)%>%
paged_table()
11.7.1.2 Negatively associated functions at element level
element_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
filter(negative_support>=0.9) %>%
left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
arrange(Domain,Function)%>%
paged_table()
positive_behaviour <- element_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
dplyr::select(-negative_support) %>%
rename(support=positive_support)
negative_behaviour <- element_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
dplyr::select(-positive_support) %>%
rename(support=negative_support)
all_elements <- bind_rows(positive_behaviour,negative_behaviour) %>%
left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
mutate(trait=factor(trait,levels=c(rev(positive_behaviour$trait),rev(negative_behaviour$trait)))) %>%
mutate(Code_function=factor(Code_function)) %>%
mutate(element_legend=str_c(trait," - ",Element)) %>%
mutate(function_legend=str_c(Code_function," - ",Function)) %>%
select(trait,mean,p1,p9,element_legend,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_elements$function_legend)
all_elements %>%
ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.03,0.04)) +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
theme(legend.position="none")
11.7.2 Function level
functions_table <- elements_table %>%
to.functions(., GIFT_db) %>%
as.data.frame()
community_functions <- prediction %>%
filter(genome %in% predictive_mags) %>% #keep only predictive mags
group_by(behaviour, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
dplyr::select(-row_id) %>%
column_to_rownames(var = "behaviour") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(functions_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="behaviour")
})
calculate_slope <- function(x) {
lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
coef(lm_fit)[2]
}
function_predictions <- map_dfc(community_functions, function(df) {
df %>%
column_to_rownames(var = "behaviour") %>%
t() %>%
as.data.frame() %>%
rowwise() %>%
mutate(slope = calculate_slope(c_across(everything()))) %>%
select(slope) }) %>%
t() %>%
as.data.frame() %>%
set_names(colnames(community_functions[[1]])[-1]) %>%
rownames_to_column(var="iteration") %>%
pivot_longer(!iteration, names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)
positive <- function_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
dplyr::select(-negative_support) %>%
rename(support=positive_support)
negative <- function_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
dplyr::select(-positive_support) %>%
rename(support=negative_support)
all_functions <- function_predictions %>%
left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
mutate(trait=factor(trait)) %>%
mutate(function_legend=str_c(trait," - ",Function)) %>%
select(trait,mean,p1,p9,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_functions$function_legend)
all_functions %>%
ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
guides(col = guide_legend(ncol = 1))