Chapter 8 HMSC analysis
8.1 Load data
load("data/data.Rdata")
if(!file.exists("hmsc_origin/fit_model1_250_10.Rdata")){
download.file(url='https://sid.erda.dk/share_redirect/A6rm945NbY/hmsc_origin/fit_model1_250_10.Rdata',
destfile='hmsc_origin/fit_model1_250_10.Rdata', method='curl')
}
load("hmsc_origin/fit_model1_250_10.Rdata")
# Select desired support threshold
support_threshold=0.9
negsupport_threshold=1-support_threshold
8.2 Variance partitioning
# Compute variance partitioning
varpart=computeVariancePartitioning(m)
varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location")))) %>%
group_by(variable) %>%
summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
tt()
variable | mean | sd |
---|---|---|
Random: location | 40.1111261 | 24.825232 |
logseqdepth | 54.0718559 | 25.120545 |
sex | 4.8180020 | 5.744219 |
origin | 0.9990159 | 1.223566 |
# Basal tree
varpart_tree <- genome_tree
#Varpart table
varpart_table <- varpart$vals %>%
as.data.frame() %>%
rownames_to_column(var="variable") %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location"))))
#Phyla
phylum_groups <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__"))%>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% varpart_tree$tip.label) %>%
arrange(match(genome, varpart_tree$tip.label)) %>%
dplyr::select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
dplyr::select(colors) %>%
pull()
# Basal ggtree
varpart_tree <- varpart_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_groups, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()
# Add variance stacked barplot
vertical_tree <- varpart_tree +
scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
geom_fruit(
data=varpart_table,
geom=geom_bar,
mapping = aes(x=value, y=genome, fill=variable, group=variable),
pwidth = 2,
offset = 0.05,
width= 1,
orientation="y",
stat="identity")+
labs(fill="Variable")
vertical_tree
8.3 Model fit
[1] 0.524663
genome_fit <- tibble(genome=m$spNames, r2 = MFCV[[2]])
genome_counts_filt %>%
mutate_if(is.numeric, ~ . / sum(.)) %>%
left_join(genome_fit, by="genome") %>%
filter(r2>0.5) %>%
select(-c(genome,r2)) %>%
colSums() %>%
hist(main="Predictive capacity")
var_pred_table <- tibble(mag=m$spNames,
pred=MFCV$R2,
var_pred=MFCV$R2 * varpart$vals[1,],
support=getPostEstimate(hM=m, parName="Beta")$support %>% .[2,],
estimate=getPostEstimate(hM=m, parName="Beta")$mean %>% .[2,]) %>%
mutate(enrichment=ifelse(support>=support_threshold,"Feral","Neutral")) %>%
mutate(enrichment=ifelse(support<=negsupport_threshold,"Domestic",enrichment))
var_pred_table %>%
ggplot(aes(x=estimate,y=var_pred, color=enrichment))+
geom_point()+
scale_color_manual(values=c("#bd70ae","#949293","#ebe8e8"))+
geom_hline(yintercept=0.005, linetype="dashed")+
theme_minimal()
8.4 Posterior estimates
# Basal tree
postestimates_tree <- genome_tree
# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
mutate(value = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
pivot_wider(names_from = variable, values_from = value) %>%
#select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
column_to_rownames(var="genome")
# Genome-specific attributes
genome_depth <- genome_counts_filt %>%
mutate_if(is.numeric, ~ . / sum(.)) %>%
rowwise() %>%
mutate(depth = mean(c_across(where(is.numeric)), na.rm = TRUE)) %>%
ungroup() %>%
select(genome,depth)
#Phylums
phylum_groups <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
column_to_rownames(var = "genome") %>%
dplyr::select(phylum)
colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
filter(genome %in% postestimates_tree$tip.label) %>%
arrange(match(genome, postestimates_tree$tip.label)) %>%
dplyr::select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
dplyr::select(colors) %>%
pull()
# Basal ggtree
postestimates_tree <- postestimates_tree %>%
force.ultrametric(.,method="extend") %>%
ggtree(., size = 0.3)
***************************************************************
* Note: *
* force.ultrametric does not include a formal method to *
* ultrametricize a tree & should only be used to coerce *
* a phylogeny that fails is.ultrametric due to rounding -- *
* not as a substitute for formal rate-smoothing methods. *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_groups, offset=-0.2, width=0.1, colnames=FALSE) +
scale_fill_manual(values=colors_alphabetic)+
labs(fill="Phylum")
#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()
# Add posterior significant heatmap
postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
labs(fill="Trend")
postestimates_tree +
vexpand(.25, 1) # expand top
### Association with cat origin
estimate <- getPostEstimate(hM=m, parName="Beta")$mean %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
filter(variable=="originFeral") %>%
pivot_longer(!variable, names_to = "genome", values_to = "mean") %>%
dplyr::select(genome,mean)
support <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
filter(variable=="originFeral") %>%
pivot_longer(!variable, names_to = "genome", values_to = "support") %>%
dplyr::select(genome,support)
#pdf("figures/sig_Mags_violin.pdf",width=10, height=8)
inner_join(estimate,support,by=join_by(genome==genome)) %>%
mutate(significance=ifelse(support >= 0.9 | support <= 0.1,1,0)) %>%
mutate(support=ifelse(mean<0,1-support,support)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
mutate(phylum = ifelse(support > 0.9, phylum, NA)) %>%
ggplot(aes(x=mean,y=support,color=phylum))+
geom_point(alpha=0.7, shape=16, size=3)+
scale_color_manual(values = phylum_colors) +
geom_vline(xintercept = 0) +
xlim(c(-0.4,0.4)) +
labs(x = "Beta regression coefficient", y = "Posterior probability") +
theme_minimal()
estimate_quantiles <- getPostEstimate(hM=m, parName="Beta", q=c(0.1,0.9))$q
map_dfr(1:229, ~ {
tibble(
q10 = estimate_quantiles[1, 2, .x], # Extract 10% quantile
q90 = estimate_quantiles[2, 2, .x] # Extract 90% quantile
)
}) %>% mutate(genome=estimate$genome) %>%
right_join(estimate, by="genome") %>%
left_join(genome_metadata, by="genome") %>%
filter(q10>0 | q90<0) %>%
ggplot(aes(x=mean, y=fct_reorder(genome, mean), xmin=q10, xmax=q90, color=phylum)) +
geom_point() +
geom_errorbar() +
#xlim(c(-0.03,0.04)) +
geom_vline(xintercept=0) +
scale_color_manual(values = phylum_colors) +
theme_minimal() +
labs(x="Regression coefficient",y="Bacterial genome") +
theme(legend.position="none")
estimate_top <- estimate %>%
arrange(desc(mean)) %>%
slice_head(n = 15) %>%
bind_rows(
estimate %>%
arrange(mean) %>%
slice_head(n = 15))
estimate_top_significant <-map_dfr(1:229, ~ {
tibble(
q10 = estimate_quantiles[1, 2, .x], # Extract 10% quantile
q90 = estimate_quantiles[2, 2, .x] # Extract 90% quantile
)
}) %>% mutate(genome=estimate$genome) %>%
right_join(estimate_top, by="genome") %>%
left_join(genome_metadata, by="genome") %>%
filter(q10>0 | q90<0) %>%
arrange(mean)
estimate_top_significant %>%
ggplot(aes(x=mean, y=fct_reorder(genome, mean), xmin=q10, xmax=q90, color=phylum)) +
geom_point() +
geom_errorbar() +
scale_y_discrete(labels = estimate_top_significant$genus) +
geom_vline(xintercept=0) +
scale_color_manual(values = phylum_colors[-c(5,7,9)]) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
theme(legend.position="none")
8.4.0.1 Associated with domestic cats
origin_domestic <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(trend = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
filter(variable=="originFeral", trend=="Negative") %>%
arrange(-value) %>%
left_join(genome_metadata,by=join_by(genome==genome)) %>%
dplyr::select(genome,phylum,class,order,family,species,value) %>%
arrange(phylum, class, family, species)
origin_domestic %>%
paged_table()
8.4.0.2 Associated with feral cats
origin_feral <- getPostEstimate(hM=m, parName="Beta")$support %>%
as.data.frame() %>%
mutate(variable=m$covNames) %>%
pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
mutate(trend = case_when(
value >= support_threshold ~ "Positive",
value <= negsupport_threshold ~ "Negative",
TRUE ~ "Neutral")) %>%
filter(variable=="originFeral", trend=="Positive") %>%
arrange(value) %>%
left_join(genome_metadata,by=join_by(genome==genome)) %>%
dplyr::select(genome,phylum,class,order,family,species,value) %>%
arrange(phylum,class,family,species)
origin_feral %>%
paged_table()
8.6 Microbiome composition predictions
Support values of the cat origin variable per genome are interpreted as enrichment for domestic or feral cats.
The most likely microbiome compositions are predicted for domestic and feral cats based on the hmsc models
Log-abundance differences between domestic and feral cats are plotted for genomes found enriched for either origin.
8.7 Functional predictions
8.7.1 Element level
elements_table <- genome_gifts %>%
to.elements(., GIFT_db) %>%
as.data.frame() %>%
.[predictive_mags,] #keep only predictive mags
#Calculate community-level functional traits for predicted communities
community_elements <- prediction %>%
filter(genome %in% predictive_mags) %>% #keep only predictive mags
group_by(origin, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
dplyr::select(-row_id) %>%
column_to_rownames(var = "origin") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(elements_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="origin")
})
element_predictions <- map_dfr(community_elements, function(df) {
domestic_values <- df %>% filter(origin == "domestic") %>% select(-origin)
feral_values <- df %>% filter(origin == "feral") %>% select(-origin)
domestic_values - feral_values
}) %>%
mutate(iteration=c(1:1000)) %>%
pivot_longer(!iteration,names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)
8.7.1.1 Positively associated functions at element level
# Positively associated
unique_funct_db<- GIFT_db[c(2,4,5,6)] %>%
distinct(Code_element, .keep_all = TRUE)
element_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
filter(positive_support>=0.9) %>%
left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
arrange(Domain,Function)%>%
paged_table()
8.7.1.2 Negatively associated functions at element level
element_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
filter(negative_support>=0.9) %>%
left_join(unique_funct_db, by = join_by(trait == Code_element))%>%
arrange(Domain,Function)%>%
paged_table()
positive_origin <- element_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
dplyr::select(-negative_support) %>%
rename(support=positive_support)
negative_origin <- element_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
dplyr::select(-positive_support) %>%
rename(support=negative_support)
all_elements <- bind_rows(positive_origin,negative_origin) %>%
left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
mutate(trait=factor(trait,levels=c(rev(positive_origin$trait),rev(negative_origin$trait)))) %>%
mutate(Code_function=factor(Code_function)) %>%
mutate(element_legend=str_c(trait," - ",Element)) %>%
mutate(function_legend=str_c(Code_function," - ",Function)) %>%
select(trait,mean,p1,p9,element_legend,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_elements$function_legend)
all_elements %>%
ggplot(aes(x=-mean, y=fct_reorder(element_legend, -mean), xmin=-p1, xmax=-p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.04,0.12)) +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
theme(legend.position="none")
8.7.2 Function level
functions_table <- elements_table %>%
to.functions(., GIFT_db) %>%
as.data.frame()
community_functions <- prediction %>%
filter(genome %in% predictive_mags) %>% #keep only predictive mags
group_by(origin, genome) %>%
mutate(row_id = row_number()) %>%
pivot_wider(names_from = genome, values_from = value) %>%
ungroup() %>%
group_split(row_id) %>%
as.list() %>%
lapply(., FUN = function(x){x %>%
dplyr::select(-row_id) %>%
column_to_rownames(var = "origin") %>%
as.data.frame() %>%
exp() %>%
t() %>%
tss() %>%
to.community(functions_table,.,GIFT_db) %>%
as.data.frame() %>%
rownames_to_column(var="origin")
})
function_predictions <- map_dfr(community_functions, function(df) {
domestic_values <- df %>% filter(origin == "domestic") %>% select(-origin)
feral_values <- df %>% filter(origin == "feral") %>% select(-origin)
domestic_values - feral_values
}) %>%
mutate(iteration=c(1:1000)) %>%
pivot_longer(!iteration,names_to="trait",values_to="value") %>%
group_by(trait) %>%
summarise(mean=mean(value),
p1 = quantile(value, probs = 0.1),
p9 = quantile(value, probs = 0.9),
positive_support = sum(value > 0)/1000,
negative_support = sum(value < 0)/1000) %>%
arrange(-positive_support)
# Positively associated
function_predictions %>%
filter(mean >0) %>%
arrange(-positive_support) %>%
paged_table()
# Negatively associated
function_predictions %>%
filter(mean <0) %>%
arrange(-negative_support) %>%
paged_table()
positive <- function_predictions %>%
filter(mean >0) %>%
arrange(mean) %>%
filter(positive_support>=0.9) %>%
dplyr::select(-negative_support) %>%
rename(support=positive_support)
negative <- function_predictions %>%
filter(mean <0) %>%
arrange(mean) %>%
filter(negative_support>=0.9) %>%
dplyr::select(-positive_support) %>%
rename(support=negative_support)
all_functions <- function_predictions %>%
left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
mutate(trait=factor(trait)) %>%
mutate(function_legend=str_c(trait," - ",Function)) %>%
select(trait,mean,p1,p9,function_legend) %>%
unique()
gift_colors <- read_tsv("data/gift_colors.tsv") %>%
mutate(legend=str_c(Code_function," - ",Function)) %>%
filter(legend %in% all_functions$function_legend)
all_functions %>%
ggplot(aes(x=-mean, y=fct_reorder(function_legend, -mean), xmin=-p1, xmax=-p9, color=function_legend)) +
geom_point() +
geom_errorbar() +
xlim(c(-0.05,0.12)) +
geom_vline(xintercept=0) +
scale_color_manual(values = gift_colors$Color) +
theme_minimal() +
labs(x="Regression coefficient",y="Functional trait") +
guides(col = guide_legend(ncol = 1))