Chapter 4 Data statistics
4.1 Sequencing reads statistics
sample_preprocessing %>%
summarise(Total=sum(reads_post_filt * 150 / 1000000000) %>% round(2),
mean=mean(reads_post_filt * 150 / 1000000000) %>% round(2),
sd=sd(reads_post_filt * 150 / 1000000000) %>% round(2)) %>%
unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
tt()
Total | Average |
---|---|
712.18 | 7.83 ± 8.08 |
4.2 DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_preprocessing, by = join_by(sample == sample)) %>%
select(sample,mags,bases_pre_filt,bases_post_filt,host_bases,metagenomic_bases) %>%
mutate(mags_bases = mags*150) %>%
mutate(lowqual_bases = bases_pre_filt - bases_post_filt) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)
sequence_fractions %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()
Sample | Low quality | Mapped to host | Unmapped | Mapped to MAGs |
---|---|---|---|---|
E18 | 4.7850204 | 6.9637290 | 24.05986617 | 0.52864665 |
E25 | 1.5376473 | 7.9804608 | 6.00871944 | 0.85221990 |
E27 | 0.4230030 | 0.0911577 | 1.95160083 | 0.44069160 |
E30 | 1.7928992 | 1.4413299 | 4.00947792 | 5.12918235 |
E31 | 0.6816818 | 0.0320553 | 0.50570530 | 2.84763450 |
E34 | 2.1948529 | 0.8249310 | 11.67502744 | 2.54218275 |
E44 | 0.4234418 | 5.9270472 | 0.44183940 | 0.24959295 |
E48 | 2.3197861 | 1.9469865 | 7.44564809 | 5.90044110 |
E56 | 0.5108719 | 0.0847401 | 1.09609707 | 1.82499015 |
E58 | 0.4880990 | 2.1264312 | 1.36506017 | 0.11759265 |
H06 | 0.2841785 | 2.0398764 | 1.46660757 | 0.01427955 |
H07 | 1.4500404 | 10.9416210 | 3.39146197 | 0.01941135 |
H08 | 0.5011987 | 0.2131668 | 0.57356153 | 0.45570180 |
H09 | 1.2653958 | 10.8166410 | 7.00507483 | 0.15877860 |
H10 | 0.7942211 | 1.3782558 | 4.03545973 | 0.11882340 |
H12 | 2.5813911 | 8.4009948 | 4.51254046 | 0.84020835 |
H16 | 2.1486462 | 4.0113864 | 9.56424671 | 3.69521370 |
H19 | 0.5557962 | 4.2411288 | 2.50314959 | 0.02286150 |
H20 | 0.9677842 | 4.3890999 | 3.78083553 | 0.07778835 |
H23 | 0.9420728 | 3.9595926 | 1.30102424 | 0.29551170 |
H25 | 1.0738812 | 7.7087100 | 3.42255644 | 0.68422815 |
H32 | 0.5427161 | 5.8415880 | 1.38975295 | 0.06290070 |
H34 | 1.3188004 | 12.4245048 | 5.12752821 | 0.02616405 |
H37 | 2.5737627 | 27.8754720 | 9.15837446 | 0.25084785 |
H39 | 0.2938515 | 2.4124689 | 0.75581958 | 0.02116935 |
H40 | 1.1315544 | 3.8592006 | 5.83728129 | 0.42791340 |
H43 | 0.7161526 | 6.3701718 | 2.50320009 | 0.03324300 |
H45 | 0.9215094 | 1.5407343 | 3.94772969 | 0.81685470 |
H47 | 0.9581476 | 5.6254728 | 2.15816743 | 0.32372385 |
P01 | 0.5733407 | 1.8086574 | 1.33450269 | 0.76149270 |
P05 | 1.1668645 | 9.3271338 | 0.63288528 | 0.36958890 |
P09 | 0.7574713 | 7.1642826 | 0.52521364 | 0.04058895 |
P14 | 0.5034011 | 3.8678286 | 0.03866504 | 0.64239540 |
P25 | 0.2950900 | 4.5486294 | 0.50927300 | 0.38403045 |
P33 | 3.3222731 | 33.2508960 | 7.86558092 | 0.43485525 |
P34 | 0.3459813 | 0.8618043 | 1.84351514 | 0.01439085 |
P36 | 1.2281449 | 10.5844332 | 0.28646419 | 0.71007735 |
P43 | 3.4204299 | 3.2932512 | 7.78711290 | 13.30940625 |
P45 | 1.6790221 | 2.9649723 | 5.42286382 | 0.12008145 |
P47 | 0.4775963 | 2.8718187 | 1.06401988 | 0.80290395 |
P48 | 3.0192702 | 0.9945204 | 17.82268997 | 6.15753405 |
P51 | 1.8101944 | 8.2580928 | 3.46629537 | 2.64862665 |
P53 | 1.5688483 | 24.5797020 | 2.14349384 | 0.12960855 |
P58 | 0.7963638 | 6.4119048 | 1.11305236 | 0.07796910 |
P60 | 2.3930517 | 33.2359848 | 8.57518064 | 0.20491080 |
P64 | 0.2656280 | 0.3023889 | 1.17634560 | 0.47208750 |
P65 | 0.4114934 | 0.2113239 | 0.89754587 | 1.90757895 |
P67 | 1.7026024 | 17.3557032 | 0.62786285 | 2.02223145 |
P72 | 0.2702016 | 3.9134910 | 0.57524887 | 0.20525790 |
P75 | 1.4392490 | 2.6274414 | 6.26632261 | 1.62440925 |
P79 | 1.7376207 | 12.1993284 | 7.73575004 | 1.52807535 |
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")
bat_species | mean_host_perc | sd_host_perc | max_host_perc | min_host_perc |
---|---|---|---|---|
Eptesicus bottae | 17.20026 | 24.00217 | 74.55613 | 0.39599794 |
Hypsugo ariel | 36.45110 | 16.35553 | 58.92606 | 6.53771139 |
Pipistrellus kuhlii | 43.54900 | 27.90509 | 78.35920 | 1.81265533 |
NA | 45.52395 | 33.97328 | 92.13648 | 0.02651141 |