Chapter 5 Heterozigosity analysis
heterozigosity <- read_tsv("data/heterozigosity.tsv") %>%
rename(mouse=1,obs_hom=2,est_hom=3,n_sites=4,f_stat=5) %>%
mutate(obs_het=n_sites - obs_hom) %>%
mutate(est_het=n_sites - est_hom) %>%
mutate(obs_het_perc=obs_het / n_sites * 100) %>%
mutate(est_het_perc=est_het / n_sites * 100) %>%
mutate(cage=substr(mouse, 1, 3))
shapiro.test(heterozigosity$obs_het_perc)
heterozigosity %>%
as.data.frame() %>%
aov(obs_het_perc ~ cage, data = .) %>%
broom::tidy()
heterozigosity %>%
ggplot(aes(y=cage,x=obs_het_perc,group=cage))+
geom_boxplot(fill="#f4f4f4")+
theme_minimal()
mean_heterozigosity <- heterozigosity %>%
group_by(cage) %>%
summarise(mean=mean(obs_het_perc),sd=sd(obs_het_perc))
heterozigosity %>%
group_by(cage) %>%
summarise(mean=mean(obs_het_perc),sd=sd(obs_het_perc)) %>%
tt()
heterozigosity %>%
ggplot(aes(y=fct_rev(cage),x=obs_het_perc,group=cage))+
geom_boxplot(fill="#f4f4f4", color="#666666")+
theme_minimal()
hierarchy_stability %>%
as.data.frame() %>% t() %>% t() %>% as.data.frame() %>%
rownames_to_column(var="cage") %>%
rename(cage=1,stability=2) %>%
left_join(mean_heterozigosity, by=join_by(cage==cage)) %>%
{cor.test(.$stability, .$mean)}
hierarchy_stability %>%
as.data.frame() %>% t() %>% t() %>% as.data.frame() %>%
rownames_to_column(var="cage") %>%
rename(cage=1,stability=2) %>%
left_join(mean_heterozigosity, by=join_by(cage==cage)) %>%
ggplot(aes(x=mean,y=stability)) +
geom_point()
# GWAS analysis
5.1 Association analysis
This step is repeated for each time point separately.
# SNP data
X = read.plink("data/variants_filtered.bed")
X = as(X$genotypes,'numeric')
# Metadata
data = read_table("data/dominance_t1.tsv") %>%
mutate(cage=factor(cage)) %>%
mutate(mouse=factor(mouse)) %>%
as.data.frame()
# Run GWAS
gwas = GridLMM_GWAS(formula = dominance ~ 1 + (1|cage),
test_formula = ~1,
reduced_formula = ~0,
data = data,
X = X,
X_ID = 'mouse',
method = 'REML',
fillNAX = TRUE,
verbose = F,
mc.cores = 8)
5.1.1 Manhattan plot
This step is repeated for each gwas result separately.
# Prepare working table
gwas_result <- gwas$results %>%
#slice_sample(n=100000) %>%
separate(X_ID, into = c("chr", "bp"), sep = "_", remove = FALSE) %>% # separate chromosome and position
mutate(bp=as.numeric(bp)) %>%
mutate(chr=factor(chr, levels=c(c(1:19),"X"))) %>%
rename(p=p_value_REML) %>%
filter(!is.nan(p))
###
# Custom Manhattan
###
# https://r-graph-gallery.com/101_Manhattan_plot.html
# Append chromosome coordinates
gwas_result <- gwas_result %>%
group_by(chr) %>%
summarise(chr_len=max(bp)) %>%
mutate(tot=cumsum(chr_len)-chr_len) %>%
select(-chr_len) %>%
left_join(gwas_result, ., by=c("chr"="chr")) %>%
arrange(chr, bp) %>%
mutate(bp_cum=bp+tot)
# Append significance level
gwas_result_unsig <- gwas_result %>%
filter(p > 0.000001)
# Generate chromosome number code
axisdf <- gwas_result %>%
group_by(chr) %>%
summarize(center=mean(bp_cum))
# Plot gwas
ggplot(gwas_result_unsig, aes(x=bp_cum, y=-log10(p))) +
# Show all points
geom_point(aes(color=as.factor(chr)), alpha=0.8, size=1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22 )) +
# Show significant points
geom_point(data=gwas_result_sig, aes(x=bp_cum, y=-log10(p)), color="#FFB91F", shape=17, alpha=0.9, size=1.6) +
# custom X axis:
scale_x_continuous(label = axisdf$chr, breaks= axisdf$center, expand = c(0.01, 0.01)) +
scale_y_continuous(expand = c(0.01, 0.04)) +
# Custom the theme:
theme_bw() +
labs(x="Chromosome") +
theme(
legend.position="none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)