Chapter 3 Data statistics
3.1 Sequencing reads statistics
sample_metadata %>%
summarise(Total=sum(reads_post_fastp * 150 / 1000000000) %>% round(2),
mean=mean(reads_post_fastp * 150 / 1000000000) %>% round(2),
sd=sd(reads_post_fastp * 150 / 1000000000) %>% round(2)) %>%
unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
tt()
Total | Average |
---|---|
695.22 | 7.32 ± 4.23 |
3.2 DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)
sequence_fractions %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()
Sample | Low quality | Mapped to host | Unmapped | Mapped to MAGs |
---|---|---|---|---|
EHI01087 | 0.6695527 | 3.30228337 | 0.6957268 | 0.20593928 |
EHI01089 | 0.5971062 | 2.29809861 | 1.1973431 | 1.84936214 |
EHI01090 | 1.4964600 | 3.89164336 | 1.3021172 | 3.82801736 |
EHI01095 | 0.7389361 | 0.62896575 | 6.2164847 | 0.21759402 |
EHI01097 | 0.7418323 | 3.27777441 | 0.0000000 | 0.04751351 |
EHI01098 | 0.5212489 | 0.03728883 | 2.4836960 | 0.13056692 |
EHI01101 | 1.5817235 | 4.10712018 | 2.2793399 | 0.17292897 |
EHI01104 | 1.5719678 | 4.61746206 | 1.3723392 | 0.16044130 |
EHI01105 | 0.8644910 | 4.22993999 | 0.8162903 | 0.50547259 |
EHI01106 | 0.5427014 | 3.38227175 | 0.5618769 | 0.41503931 |
EHI01107 | 0.5335754 | 3.40275577 | 0.0000000 | 0.04804378 |
EHI01109 | 0.5278718 | 3.12579584 | 0.0000000 | 0.13027288 |
EHI01110 | 0.9038713 | 4.43491993 | 0.9117838 | 0.35202746 |
EHI01116 | 0.8842878 | 3.04219745 | 0.6159657 | 0.66298980 |
EHI01117 | 0.1999736 | 2.78000317 | 0.0000000 | 0.07682024 |
EHI01119 | 0.7539106 | 3.39389566 | 0.0000000 | 0.03098616 |
EHI01120 | 0.9881967 | 4.06972766 | 0.0000000 | 0.03145833 |
EHI01121 | 0.7124950 | 2.06120663 | 0.0000000 | 0.01973818 |
EHI01125 | 1.0700539 | 3.82587471 | 0.0000000 | 0.06227411 |
EHI01126 | 1.1757436 | 2.77684922 | 0.6792913 | 0.71885772 |
EHI01127 | 1.0904899 | 4.18023815 | 1.2676185 | 0.17519445 |
EHI01128 | 1.5554433 | 6.08276727 | 1.2970400 | 0.05411563 |
EHI01139 | 0.6949708 | 2.48591198 | 0.4907853 | 0.49675098 |
EHI01140 | 0.2040228 | 0.04138970 | 3.4209687 | 0.09867673 |
EHI01143 | 0.5606637 | 1.43370297 | 0.5489274 | 1.92101792 |
EHI01146 | 0.8148967 | 0.47826781 | 2.9751350 | 0.11397213 |
EHI01147 | 0.4981868 | 2.93518753 | 0.0000000 | 0.06260611 |
EHI01152 | 0.8195927 | 4.32232310 | 0.9434188 | 0.31894780 |
EHI01182 | 0.1243809 | 2.24473404 | 0.5261952 | 0.50429889 |
EHI01184 | 0.1589348 | 1.40999961 | 0.5088391 | 0.75227405 |
EHI01185 | 0.1250044 | 1.43317517 | 0.5480802 | 0.58482300 |
EHI01186 | 0.1589065 | 2.99185852 | 0.0000000 | 0.03626757 |
EHI01187 | 0.1349840 | 2.63197664 | 0.0000000 | 0.01337827 |
EHI01188 | 0.1419303 | 2.90636580 | 0.0000000 | 0.03179121 |
EHI01189 | 0.1100434 | 0.05916081 | 3.3216064 | 0.07905141 |
EHI01192 | 0.2128998 | 1.81792336 | 1.0978595 | 0.02249451 |
EHI01193 | 0.1702529 | 3.29318888 | 0.0000000 | 0.02276753 |
EHI01195 | 0.1024397 | 0.60303798 | 1.6713858 | 0.53101791 |
EHI01196 | 0.1378872 | 1.73253564 | 0.4890348 | 0.31340287 |
EHI01219 | 0.2474621 | 4.19371751 | 0.0000000 | 0.06165127 |
EHI01220 | 0.1814526 | 3.39104042 | 0.0000000 | 0.01324760 |
EHI01221 | 0.2745498 | 3.77407857 | 0.0000000 | 0.01578114 |
EHI01223 | 0.1418051 | 0.47832378 | 1.0662283 | 0.65405474 |
EHI01224 | 0.1385980 | 2.20367548 | 0.6199791 | 0.17098644 |
EHI01233 | 0.2434360 | 1.75735032 | 0.6612552 | 0.94703075 |
EHI01634 | 2.5523130 | 11.55227357 | 2.4751009 | 0.73594892 |
EHI01635 | 0.8623835 | 0.14779997 | 12.2568851 | 0.35557351 |
EHI01636 | 1.4007795 | 5.22346113 | 0.0000000 | 0.04366714 |
EHI01637 | 2.3073477 | 9.82065870 | 2.4170127 | 0.53837704 |
EHI01638 | 3.6522065 | 9.89351464 | 3.0457033 | 0.36004710 |
EHI01639 | 1.4419412 | 4.39506794 | 1.4128728 | 0.08734581 |
EHI01640 | 1.5525797 | 3.58260977 | 1.1837202 | 3.57782782 |
EHI01641 | 1.8184598 | 6.00289517 | 1.1916042 | 1.22369418 |
EHI01642 | 2.0553635 | 9.10937416 | 1.5977245 | 0.24488682 |
EHI01643 | 1.7970890 | 3.66560937 | 0.9248028 | 0.04797166 |
EHI01644 | 1.3410079 | 7.47979230 | 1.2521465 | 0.93797788 |
EHI01645 | 2.1784913 | 6.89845361 | 1.4015490 | 1.53186777 |
EHI01646 | 1.6811088 | 6.03852750 | 1.8308237 | 0.25643805 |
EHI01647 | 1.3494908 | 4.63130409 | 2.4088440 | 3.78419605 |
EHI01648 | 1.5150460 | 7.15341473 | 1.6004285 | 0.54419354 |
EHI01649 | 2.0450389 | 1.07543576 | 6.6674915 | 0.25945835 |
EHI01650 | 1.4194483 | 0.09050066 | 6.0447541 | 0.32287331 |
EHI01651 | 1.3454259 | 3.08500982 | 1.1741095 | 4.21210964 |
EHI01652 | 2.6842132 | 9.59602253 | 2.1009544 | 0.09206088 |
EHI01653 | 2.2916182 | 4.80246361 | 1.1851766 | 1.25063702 |
EHI01654 | 1.0943443 | 10.41143649 | 2.1505113 | 0.36602025 |
EHI01655 | 1.6111932 | 1.17984052 | 11.6143980 | 0.41744043 |
EHI01656 | 1.8725629 | 6.37777958 | 1.3681174 | 0.07809993 |
EHI01657 | 1.1052799 | 5.09895183 | 1.2043485 | 0.09463968 |
EHI01658 | 3.2137282 | 7.72093620 | 4.3113679 | 0.33956476 |
EHI01659 | 2.0765416 | 9.18426851 | 1.9212322 | 0.74857675 |
EHI01660 | 1.8125910 | 4.76738632 | 0.8315338 | 0.10031850 |
EHI01661 | 2.4821854 | 11.12555627 | 2.1247322 | 1.28737486 |
EHI01787 | 0.5643141 | 9.53716654 | 1.2746695 | 0.05575579 |
EHI01788 | 0.2538515 | 4.80202263 | 0.8725932 | 0.25281842 |
EHI01789 | 0.3283081 | 7.48009576 | 1.1659081 | 0.05425827 |
EHI01790 | 0.2871278 | 4.61405494 | 0.8506278 | 0.02170173 |
EHI01791 | 0.3534815 | 7.10434327 | 1.2290473 | 0.26873446 |
EHI01792 | 0.2346984 | 3.34290251 | 0.5777317 | 0.66278978 |
EHI01793 | 0.3511796 | 5.72729894 | 1.0781123 | 0.03127393 |
EHI01794 | 0.1452987 | 3.25893510 | 0.0000000 | 0.01176103 |
EHI01795 | 0.2490204 | 3.66019998 | 0.9229186 | 0.50654496 |
EHI01796 | 0.3108897 | 0.83028875 | 2.0772084 | 4.70108481 |
EHI01797 | 0.2016543 | 5.20510228 | 0.0000000 | 0.02059651 |
EHI01798 | 0.6250897 | 10.51469275 | 1.9856256 | 0.29698137 |
EHI01799 | 0.3877622 | 7.35379285 | 1.1085491 | 0.29731338 |
EHI01800 | 0.3650573 | 4.78426949 | 1.2923866 | 0.87831600 |
EHI01801 | 0.4752275 | 7.77474495 | 1.0255181 | 0.19001550 |
EHI01802 | 0.2305928 | 5.26382214 | 1.0324286 | 0.13196400 |
EHI01820 | 0.4205506 | 5.23690074 | 1.3934516 | 0.27801247 |
EHI01821 | 0.7978723 | 15.07314540 | 3.6386058 | 0.37288765 |
EHI01822 | 0.7643386 | 9.42668221 | 4.8100712 | 8.50220065 |
EHI01823 | 0.8624810 | 8.03574332 | 2.5911019 | 3.04705197 |
EHI01824 | 0.3794501 | 7.67185540 | 1.0119067 | 0.04082875 |
EHI01828 | 0.7616001 | 3.83102911 | 4.3959266 | 2.18343102 |
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")
3.3 Recovered microbial fraction
singlem_table <- sequence_fractions %>%
mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
select(sample,mags_proportion,singlem_proportion) %>%
mutate(mags_proportion = ifelse(singlem_proportion == 0, 0, mags_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion == 0, NA, singlem_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion < mags_proportion, NA, singlem_proportion)) %>% #if singlem is smaller, then NA, to simplify plot
mutate(singlem_proportion = ifelse(singlem_proportion > 100, 100, singlem_proportion)) #simplify
singlem_table %>%
pivot_longer(!sample, names_to = "proportion", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(proportion = factor(proportion, levels = c("mags_proportion","singlem_proportion"))) %>%
ggplot(., aes(x = value, y = sample, color=proportion)) +
geom_line(aes(group = sample), color = "#f8a538") +
geom_point() +
scale_color_manual(name="Proportion",
breaks=c("mags_proportion","singlem_proportion"),
labels=c("Recovered","Estimated"),
values=c("#52e1e8", "#876b53"))+
facet_nested(species + sample_type ~ ., scales="free",space="free")+
theme_classic() +
labs(y = "Samples", x = "Prokaryotic fraction (%)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),
legend.position = "right",
strip.background.y=element_rect(color = NA, fill= "#f4f4f4"))