Chapter 4 Data statistics
4.1 Sequencing reads statistics
sample_metadata %>%
summarise(Total=sum(reads_post_fastp * 150 / 1000000000) %>% round(2),
mean=mean(reads_post_fastp * 150 / 1000000000) %>% round(2),
sd=sd(reads_post_fastp * 150 / 1000000000) %>% round(2)) %>%
unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
tt()
Total | Average |
---|---|
652.04 | 6.21 ± 2.77 |
4.3 DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)
sequence_fractions %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()
Sample | Low quality | Mapped to host | Unmapped | Mapped to MAGs |
---|---|---|---|---|
EHI00069 | 1.1441521 | 0.920532571 | 2.0517957 | 4.1058263 |
EHI00070 | 0.2348919 | 0.937599880 | 0.8332809 | 1.6364476 |
EHI00072 | 0.3717090 | 0.078319449 | 0.9507999 | 4.0430716 |
EHI00073 | 0.1504762 | 0.007300164 | 0.6458066 | 1.8453666 |
EHI00074 | 0.5700275 | 0.034671349 | 1.7906564 | 5.7526051 |
EHI00075 | 0.2702205 | 0.444390387 | 0.9599202 | 2.4896265 |
EHI00076 | 0.3350547 | 0.081391903 | 1.4172925 | 3.8389468 |
EHI00077 | 0.1353495 | 0.641770627 | 0.3885242 | 1.3947384 |
EHI00079 | 0.3377906 | 2.721037061 | 0.5653554 | 0.8084465 |
EHI00080 | 0.7291964 | 1.605187153 | 2.3611523 | 2.6883254 |
EHI00081 | 0.2184180 | 0.727448090 | 0.5615342 | 1.3358755 |
EHI00085 | 0.2300087 | 0.212261834 | 0.5665817 | 1.9281604 |
EHI00086 | 0.3471176 | 0.417593612 | 1.0770869 | 1.7440344 |
EHI00088 | 0.2278663 | 0.071769210 | 0.5556301 | 3.0141044 |
EHI00089 | 0.1751910 | 0.034915171 | 0.5162821 | 1.9576155 |
EHI00091 | 0.2622745 | 0.230039482 | 0.8255133 | 2.4924602 |
EHI00092 | 0.3917724 | 0.170842450 | 1.0255541 | 4.9181624 |
EHI00093 | 0.3725739 | 0.199633318 | 1.1938025 | 2.3137473 |
EHI00095 | 0.1768550 | 0.255990148 | 0.3449738 | 1.9242079 |
EHI00097 | 0.5249214 | 0.366551383 | 0.8028468 | 2.9685729 |
EHI00098 | 0.3176395 | 0.040047629 | 1.7271586 | 3.1498120 |
EHI00100 | 0.4386907 | 2.398470758 | 0.8228622 | 4.1093643 |
EHI00101 | 0.7257361 | 0.451928365 | 4.9291736 | 1.9943098 |
EHI00103 | 0.2036111 | 0.843529516 | 0.5714195 | 1.3395113 |
EHI00104 | 0.1769983 | 0.643701669 | 0.6807099 | 1.2574891 |
EHI00105 | 0.2711051 | 0.005694785 | 0.9087805 | 2.7311372 |
EHI00106 | 1.1082626 | 1.058626752 | 1.8709694 | 2.0746217 |
EHI00107 | 0.3818355 | 0.184257358 | 1.5836979 | 3.0592637 |
EHI00108 | 0.3420046 | 0.394171066 | 0.5356632 | 2.7441687 |
EHI00110 | 0.3648881 | 0.591747269 | 0.5588077 | 2.6145261 |
EHI00111 | 0.4392502 | 0.151954279 | 1.1992388 | 3.5773008 |
EHI00112 | 0.7134927 | 0.085183185 | 1.1328903 | 3.6469985 |
EHI00113 | 0.8620893 | 0.617092903 | 1.4939464 | 7.3254896 |
EHI00114 | 0.4127445 | 0.087174427 | 2.3985081 | 4.6641659 |
EHI00115 | 0.6290438 | 0.062352261 | 2.6081814 | 3.0085937 |
EHI00116 | 0.4130247 | 0.039596686 | 0.8559983 | 4.7270478 |
EHI00117 | 0.6850197 | 0.423816725 | 1.2454024 | 7.0782364 |
EHI00118 | 0.3963952 | 0.784391517 | 0.9049477 | 3.7006953 |
EHI00119 | 0.4448016 | 0.038727564 | 1.5277021 | 4.6761692 |
EHI00120 | 0.3549466 | 0.051038023 | 0.7165254 | 3.8122875 |
EHI00121 | 0.2487524 | 1.196143747 | 0.9172490 | 1.9448859 |
EHI00122 | 0.3967333 | 0.056247498 | 1.7140526 | 5.3880487 |
EHI00124 | 0.3101617 | 0.113171041 | 1.6986936 | 3.2732918 |
EHI00125 | 0.3008464 | 0.313488992 | 1.2381544 | 3.1620784 |
EHI00128 | 0.3688688 | 0.149336152 | 1.0558709 | 2.9648899 |
EHI00129 | 0.2827708 | 0.212726094 | 0.7352845 | 2.7738704 |
EHI00130 | 0.2917071 | 0.082344877 | 1.0170695 | 2.1151693 |
EHI00131 | 0.1940832 | 0.943415382 | 0.4863996 | 1.2048589 |
EHI00133 | 0.2569782 | 0.429506925 | 0.3484713 | 3.3747982 |
EHI00134 | 0.5620395 | 1.988142143 | 1.9520876 | 3.9427347 |
EHI00137 | 0.2948339 | 0.250489910 | 1.0341318 | 2.4844583 |
EHI00138 | 0.2673241 | 0.179238573 | 0.5826993 | 2.8578460 |
EHI00139 | 0.3001940 | 0.034225947 | 0.9413847 | 2.1426906 |
EHI00176 | 0.7041777 | 0.100761385 | 0.7196926 | 2.7465205 |
EHI00177 | 0.2555111 | 0.016053624 | 0.5228890 | 2.1639513 |
EHI00178 | 0.2415179 | 0.045338274 | 1.3634101 | 2.6038297 |
EHI00179 | 0.1816121 | 0.023148016 | 0.9401620 | 2.3926575 |
EHI00180 | 0.5363481 | 0.037200326 | 0.7786810 | 2.4699635 |
EHI00181 | 0.1978972 | 0.301399301 | 0.4733087 | 2.4456488 |
EHI00422 | 0.1660426 | 0.058188268 | 0.4631427 | 2.3301737 |
EHI00426 | 0.6048076 | 0.374841259 | 1.6091057 | 5.6754435 |
EHI00428 | 0.4361908 | 0.354862015 | 0.9866519 | 3.7380622 |
EHI00433 | 0.6941201 | 0.730117194 | 1.6017157 | 3.7066518 |
EHI00438 | 0.2938182 | 0.100587490 | 0.8490354 | 4.7546269 |
EHI00441 | 0.5217550 | 0.202212815 | 2.3199030 | 6.8309541 |
EHI00451 | 0.4696437 | 0.318898071 | 1.2315115 | 5.0794188 |
EHI00456 | 0.5747753 | 0.097152439 | 0.6337192 | 4.7496272 |
EHI00458 | 0.5541280 | 0.326249839 | 1.7364884 | 4.9185106 |
EHI00462 | 0.5139086 | 0.330809687 | 1.3870295 | 4.8485639 |
EHI00464 | 0.5604449 | 0.962080506 | 1.5487451 | 5.0533364 |
EHI00465 | 0.1355889 | 0.041795770 | 0.7088416 | 1.4262076 |
EHI00467 | 0.4291035 | 0.854761759 | 1.6444666 | 3.7590494 |
EHI00470 | 0.6625583 | 1.679223689 | 1.8675738 | 8.1840405 |
EHI00472 | 0.3619652 | 0.586418138 | 1.8074071 | 3.3732380 |
EHI00473 | 0.4841385 | 0.477433785 | 1.2481742 | 4.8043613 |
EHI00477 | 0.4782444 | 0.087653441 | 2.7186490 | 3.7317223 |
EHI00479 | 0.8395454 | 0.074374376 | 1.9936025 | 12.8156007 |
EHI00480 | 0.7475415 | 0.793212202 | 3.1481051 | 4.3553735 |
EHI00481 | 0.5040521 | 0.094726409 | 1.3739735 | 5.3792865 |
EHI00483 | 0.4545170 | 1.532171008 | 0.8653232 | 4.7402692 |
EHI00484 | 0.3037426 | 0.269250378 | 1.3870200 | 5.2563238 |
EHI00488 | 0.4900356 | 2.212642337 | 2.0850126 | 4.3632065 |
EHI00490 | 0.6234248 | 0.180662818 | 3.5993286 | 7.8297890 |
EHI00493 | 0.4026458 | 2.030328521 | 2.7098633 | 2.4780854 |
EHI00496 | 0.4536382 | 0.415085470 | 1.7045368 | 6.1699650 |
EHI00499 | 3.5142773 | 2.204637514 | 3.4291586 | 3.2083353 |
EHI00504 | 0.5335873 | 2.167943278 | 1.2709452 | 4.9663386 |
EHI00506 | 0.7953759 | 0.639271744 | 1.7418071 | 8.0883653 |
EHI00507 | 0.4077748 | 0.136492692 | 1.5474048 | 5.5083429 |
EHI00512 | 0.6299478 | 8.067540048 | 0.5964127 | 1.3433168 |
EHI00514 | 0.4590217 | 0.336676348 | 1.5023882 | 6.2571308 |
EHI00518 | 0.4082296 | 0.012341521 | 1.0122951 | 5.8379043 |
EHI00524 | 0.4918121 | 1.320091255 | 2.0577845 | 4.8996823 |
EHI00525 | 0.3238390 | 0.105964164 | 1.1420157 | 5.0531429 |
EHI00527 | 0.5623443 | 0.128178226 | 2.3107744 | 7.6106934 |
EHI00529 | 0.3350620 | 0.129930230 | 1.3102065 | 5.1130080 |
EHI00536 | 0.4725239 | 0.010876654 | 1.2534820 | 7.7176039 |
EHI00537 | 0.3665586 | 0.089579032 | 2.4631156 | 1.4492223 |
EHI00538 | 0.4501201 | 2.638414328 | 0.6397030 | 3.1637601 |
EHI00541 | 0.7086242 | 0.152318595 | 1.4153324 | 6.8350770 |
EHI00547 | 1.0225445 | 2.648359289 | 7.0526142 | 6.1760904 |
EHI00566 | 0.5706734 | 0.218664855 | 1.7237625 | 7.3779017 |
EHI00567 | 0.4565067 | 0.133011864 | 1.9051503 | 4.4466414 |
EHI00568 | 0.5246686 | 0.219110833 | 1.7989251 | 3.5919103 |
EHI00569 | 0.6859077 | 0.169999886 | 2.6761990 | 6.7957645 |
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")
4.4 Recovered microbial fraction
singlem_table <- sequence_fractions %>%
mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
select(sample,mags_proportion,singlem_proportion) %>%
mutate(mags_proportion = ifelse(singlem_proportion == 0, 0, mags_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion == 0, NA, singlem_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion < mags_proportion, NA, singlem_proportion)) %>% #if singlem is smaller, then NA, to simplify plot
mutate(singlem_proportion = ifelse(singlem_proportion > 100, 100, singlem_proportion)) #simplify
singlem_table %>%
pivot_longer(!sample, names_to = "proportion", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
mutate(proportion = factor(proportion, levels = c("mags_proportion","singlem_proportion"))) %>%
ggplot(., aes(x = value, y = sample, color=proportion)) +
geom_line(aes(group = sample), color = "#f8a538") +
geom_point() +
scale_color_manual(name="Proportion",
breaks=c("mags_proportion","singlem_proportion"),
labels=c("Recovered","Estimated"),
values=c("#52e1e8", "#876b53"))+
facet_nested(species + sample_type ~ ., scales="free",space="free")+
theme_classic() +
labs(y = "Samples", x = "Prokaryotic fraction (%)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),
legend.position = "right",
strip.background.y=element_rect(color = NA, fill= "#f4f4f4"))