Chapter 6 Candidate gene analysis
A negative value of beta.2 indicates that the alternative allelle is possitively correlated with dominance, while a positive value indicates that the reference allele is possitively correlated with dominance.
# Load SNP tables
snps <- bind_rows(
read_tsv("results/gwas_dominance_t1.tsv") %>% mutate(time="t1"),
read_tsv("results/gwas_dominance_t2.tsv") %>% mutate(time="t2"),
read_tsv("results/gwas_dominance_t3.tsv") %>% mutate(time="t3"),
read_tsv("results/gwas_dominance_t4.tsv") %>% mutate(time="t4"),
read_tsv("results/gwas_dominance_t5.tsv") %>% mutate(time="t5"),
read_tsv("results/gwas_dominance_t6.tsv") %>% mutate(time="t6")) %>%
separate(X_ID, into = c("chr", "position"), sep = "_", convert = TRUE) %>%
select(-Trait)%>%
mutate(chr = as.character(chr))
table(snps$chr)
# Load genome annotation
annot <- read_tsv("https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/635/GCF_000001635.27_GRCm39/GCF_000001635.27_GRCm39_feature_table.txt.gz") %>%
rename(feature=1)
# Merge SNPs and annotation
snps_annot <- annot %>%
filter(feature == "gene") %>%
select(chromosome, start, end, name, symbol, GeneID) %>%
right_join(snps, by = c("chromosome" = "chr"), relationship = "many-to-many") %>%
filter(position >= start, position <= end) %>%
mutate(chromosome = as.numeric(chromosome)) %>%
mutate(time = factor(time)) %>%
select(time, chromosome, position, name, symbol, GeneID, p_value_REML) %>%
arrange(time, chromosome, position)
# Number of different features per time
snps_annot %>%
group_by(time) %>%
summarise(features = n_distinct(name), snps=n_distinct(position))
#Number of times per feature
snps_annot %>%
group_by(symbol) %>%
summarise(time = n_distinct(time)) %>% arrange(-time)
#Time-progress
snps_time <- snps_annot %>%
select(symbol,time) %>%
mutate(indicator = 1) %>%
pivot_wider(names_from = time, values_from = indicator, values_fill = NULL) %>%
mutate(across(starts_with("t"), ~ map_int(., ~ ifelse(is.null(.), 0, length(.)))))
snps_time %>% tt()
snps_time %>%
pivot_longer(!symbol, names_to = "time", values_to = "value") %>%
mutate(symbol=factor(symbol, levels=rev(snps_time$symbol))) %>%
ggplot(aes(y=symbol, x=time, fill=value)) +
geom_tile(color = "white", lwd = 1, linetype = 1) +
scale_fill_gradient(low = "white", high = "#d572ad") +
labs(y="Genes", x="Time") +
theme_classic()
6.1 Extract genotypes
The reference allele is whatever is found in the reference genome. It is not necessarily the major allele. The alternative allele is the allele found in the sample you are studying. 0 indicates the reference allele and 1 indicates the alternative allele. The vertical pipe | indicates that the genotype is phased, and is used to indicate which chromosome the alleles are on. If this is a slash / rather than a vertical pipe, it means we don’t know which chromosome they are on.
snps_annot %>%
unite(snp, chromosome, position, sep = "_") %>%
pull(snp) %>%
unique() %>%
paste(collapse = ", ")
[1] "1_43293781, 10_3176712, 10_3180401, 10_3215733, 10_3222503, 10_3239685, 10_3247274, 10_3255540, 10_3278967, 10_3291671, 10_3291687, 10_3295099, 1_54382637, 1_41965255, 1_43054237, 1_41829441, 1_41911439, 1_41911465, 1_43150828, 1_43166326, 6_118519418, 9_68750520, 17_88356194, 2_170210928, 2_170483712, 2_170583793, 2_170617767, 2_170636806"
module load plink2/2.00a2.3
plink2 --bfile variants_filtered --snps 1_43293781, 10_3176712, 10_3180401, 10_3215733, 10_3222503, 10_3239685, 10_3247274, 10_3255540, 10_3278967, 10_3291671, 10_3291687, 10_3295099, 1_54382637, 1_41965255, 1_43054237, 1_41829441, 1_41911439, 1_41911465, 1_43150828, 1_43166326, 6_118519418, 9_68750520, 17_88356194, 2_170210928, 2_170483712, 2_170583793, 2_170617767, 2_170636806 --out candidate_genotypes --export vcf --threads 1
genotypes <- read_tsv("data/candidate_genotypes.tsv") %>%
select(-c(CHROM,POS,QUAL,FILTER,INFO,FORMAT)) %>%
pivot_longer(!c(ID,REF,ALT),names_to="sample",values_to="genotype") %>%
left_join(snps_annot %>% unite(id,chromosome,position, sep = "_") %>% select(id,time),by=join_by(ID==id)) %>%
left_join(dominance_predictions %>% select(mouse,time,dominance),by=join_by(sample==mouse,time==time)) %>%
filter(!genotype == "./.")
genotypes %>%
ggplot(aes(x=genotype,y=dominance, group=genotype, color=genotype, fill=genotype)) +
geom_boxplot() +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_fill_manual(values=c("#D5CFCF50","#86b9d450","#a6897c50"))+
facet_wrap(ID ~ ., ncol=4) +
theme_minimal()
#RORA
genotypes %>%
filter(ID=="9_68750520") %>%
mutate(genotype=factor(genotype, levels=c("0/0","0/1","1/1"))) %>%
ggplot(aes(x=genotype,y=dominance, group=genotype, color=genotype, fill=genotype)) +
geom_boxplot() +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_fill_manual(values=c("#D5CFCF50","#86b9d450","#a6897c50"))+
scale_x_discrete(drop = FALSE)+
theme_minimal()+
theme(legend.position = "none")
genotypes %>%
filter(ID=="9_68750520") %>%
select(sample,genotype) %>%
inner_join(dominance_predictions, by=join_by(sample==mouse)) %>%
mutate(time = as.numeric(substr(time, 2, 2))) %>%
mutate(mouse_number = substr(sample, 5, 5)) %>%
ggplot(aes(x=time,y=dominance, group=sample, color=genotype)) +
geom_line(size=1) +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(1:6)) +
facet_wrap(cage ~ ., ncol=4) +
theme_minimal()
#Fbxo11
genotypes %>%
filter(ID=="17_88356194") %>%
mutate(genotype=factor(genotype, levels=c("0/0","0/1","1/1"))) %>%
ggplot(aes(x=genotype,y=dominance, group=genotype, color=genotype, fill=genotype)) +
geom_boxplot() +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_fill_manual(values=c("#D5CFCF50","#86b9d450","#a6897c50"))+
scale_x_discrete(drop = FALSE)+
theme_minimal()+
theme(legend.position = "none")
genotypes %>%
filter(ID=="17_88356194") %>%
select(sample,genotype) %>%
inner_join(dominance_predictions, by=join_by(sample==mouse)) %>%
mutate(time = as.numeric(substr(time, 2, 2))) %>%
mutate(mouse_number = substr(sample, 5, 5)) %>%
ggplot(aes(x=time,y=dominance, group=sample, color=genotype)) +
geom_line(size=1) +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(1:6)) +
facet_wrap(cage ~ ., ncol=4) +
theme_minimal()
#Ankrd26
genotypes %>%
filter(ID=="6_118519418") %>%
mutate(genotype=factor(genotype, levels=c("0/0","0/1","1/1"))) %>%
ggplot(aes(x=genotype,y=dominance, group=genotype, color=genotype, fill=genotype)) +
geom_boxplot() +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_fill_manual(values=c("#D5CFCF50","#86b9d450","#a6897c50"))+
scale_x_discrete(drop = FALSE)+
theme_minimal()+
theme(legend.position = "none")
genotypes %>%
filter(ID=="6_118519418") %>%
select(sample,genotype) %>%
inner_join(dominance_predictions, by=join_by(sample==mouse)) %>%
mutate(time = as.numeric(substr(time, 2, 2))) %>%
mutate(mouse_number = substr(sample, 5, 5)) %>%
ggplot(aes(x=time,y=dominance, group=sample, color=genotype)) +
geom_line(size=1) +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(1:6)) +
facet_wrap(cage ~ ., ncol=4) +
theme_minimal()
#Bcas1
genotypes %>%
filter(ID=="2_170210928") %>%
mutate(genotype=factor(genotype, levels=c("0/0","0/1","1/1"))) %>%
ggplot(aes(x=genotype,y=dominance, group=genotype, color=genotype, fill=genotype)) +
geom_boxplot() +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_fill_manual(values=c("#D5CFCF50","#86b9d450","#a6897c50"))+
scale_x_discrete(drop = FALSE)+
theme_minimal()+
theme(legend.position = "none")
genotypes %>%
filter(ID=="2_170210928") %>%
select(sample,genotype) %>%
inner_join(dominance_predictions, by=join_by(sample==mouse)) %>%
mutate(time = as.numeric(substr(time, 2, 2))) %>%
mutate(mouse_number = substr(sample, 5, 5)) %>%
ggplot(aes(x=time,y=dominance, group=sample, color=genotype)) +
geom_line(size=1) +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(1:6)) +
facet_wrap(cage ~ ., ncol=8) +
theme_minimal()
#all four
genotypes %>%
filter(ID %in% c("9_68750520","17_88356194","6_118519418","2_170210928")) %>%
select(ID,sample,genotype) %>%
inner_join(dominance_predictions, by=join_by(sample==mouse)) %>%
mutate(time = as.numeric(substr(time, 2, 2))) %>%
mutate(mouse_number = substr(sample, 5, 5)) %>%
ggplot(aes(x=time,y=dominance, group=sample, color=genotype)) +
geom_line(size=1) +
scale_color_manual(values=c("#D5CFCF","#86b9d4","#a6897c"))+
scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1)) +
scale_x_continuous(breaks = c(1:6)) +
facet_grid(ID ~ cage) +
theme_minimal()