Chapter 3 Data statistics

load("data/data.Rdata")

3.1 Sequencing reads statistics

sample_metadata %>% 
    summarise(Total=sum(reads_post_fastp * 150 / 1000000000) %>% round(2), 
              mean=mean(reads_post_fastp * 150 / 1000000000) %>% round(2),
              sd=sd(reads_post_fastp * 150 / 1000000000) %>% round(2)) %>%
    unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
    tt()
tinytable_9wd16w91izy20lln936y
Total Average
695.22 7.32 ± 4.23

3.2 DNA fractions

sequence_fractions <- read_counts %>%
  pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
  group_by(sample) %>%
  summarise(mags = sum(value)) %>%
    left_join(sample_metadata, by = join_by(sample == sample)) %>%
    select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
    mutate(mags_bases = mags*146) %>%
    mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
    mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
    mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
    select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)

sequence_fractions %>%
  mutate_at(vars(-sample), ~./1000000000) %>%
  rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
  tt()
tinytable_hugzex0acl6h4c7qwwn8
Sample Low quality Mapped to host Unmapped Mapped to MAGs
EHI01087 0.6695527 3.30228337 0.6957268 0.20593928
EHI01089 0.5971062 2.29809861 1.1973431 1.84936214
EHI01090 1.4964600 3.89164336 1.3021172 3.82801736
EHI01095 0.7389361 0.62896575 6.2164847 0.21759402
EHI01097 0.7418323 3.27777441 0.0000000 0.04751351
EHI01098 0.5212489 0.03728883 2.4836960 0.13056692
EHI01101 1.5817235 4.10712018 2.2793399 0.17292897
EHI01104 1.5719678 4.61746206 1.3723392 0.16044130
EHI01105 0.8644910 4.22993999 0.8162903 0.50547259
EHI01106 0.5427014 3.38227175 0.5618769 0.41503931
EHI01107 0.5335754 3.40275577 0.0000000 0.04804378
EHI01109 0.5278718 3.12579584 0.0000000 0.13027288
EHI01110 0.9038713 4.43491993 0.9117838 0.35202746
EHI01116 0.8842878 3.04219745 0.6159657 0.66298980
EHI01117 0.1999736 2.78000317 0.0000000 0.07682024
EHI01119 0.7539106 3.39389566 0.0000000 0.03098616
EHI01120 0.9881967 4.06972766 0.0000000 0.03145833
EHI01121 0.7124950 2.06120663 0.0000000 0.01973818
EHI01125 1.0700539 3.82587471 0.0000000 0.06227411
EHI01126 1.1757436 2.77684922 0.6792913 0.71885772
EHI01127 1.0904899 4.18023815 1.2676185 0.17519445
EHI01128 1.5554433 6.08276727 1.2970400 0.05411563
EHI01139 0.6949708 2.48591198 0.4907853 0.49675098
EHI01140 0.2040228 0.04138970 3.4209687 0.09867673
EHI01143 0.5606637 1.43370297 0.5489274 1.92101792
EHI01146 0.8148967 0.47826781 2.9751350 0.11397213
EHI01147 0.4981868 2.93518753 0.0000000 0.06260611
EHI01152 0.8195927 4.32232310 0.9434188 0.31894780
EHI01182 0.1243809 2.24473404 0.5261952 0.50429889
EHI01184 0.1589348 1.40999961 0.5088391 0.75227405
EHI01185 0.1250044 1.43317517 0.5480802 0.58482300
EHI01186 0.1589065 2.99185852 0.0000000 0.03626757
EHI01187 0.1349840 2.63197664 0.0000000 0.01337827
EHI01188 0.1419303 2.90636580 0.0000000 0.03179121
EHI01189 0.1100434 0.05916081 3.3216064 0.07905141
EHI01192 0.2128998 1.81792336 1.0978595 0.02249451
EHI01193 0.1702529 3.29318888 0.0000000 0.02276753
EHI01195 0.1024397 0.60303798 1.6713858 0.53101791
EHI01196 0.1378872 1.73253564 0.4890348 0.31340287
EHI01219 0.2474621 4.19371751 0.0000000 0.06165127
EHI01220 0.1814526 3.39104042 0.0000000 0.01324760
EHI01221 0.2745498 3.77407857 0.0000000 0.01578114
EHI01223 0.1418051 0.47832378 1.0662283 0.65405474
EHI01224 0.1385980 2.20367548 0.6199791 0.17098644
EHI01233 0.2434360 1.75735032 0.6612552 0.94703075
EHI01634 2.5523130 11.55227357 2.4751009 0.73594892
EHI01635 0.8623835 0.14779997 12.2568851 0.35557351
EHI01636 1.4007795 5.22346113 0.0000000 0.04366714
EHI01637 2.3073477 9.82065870 2.4170127 0.53837704
EHI01638 3.6522065 9.89351464 3.0457033 0.36004710
EHI01639 1.4419412 4.39506794 1.4128728 0.08734581
EHI01640 1.5525797 3.58260977 1.1837202 3.57782782
EHI01641 1.8184598 6.00289517 1.1916042 1.22369418
EHI01642 2.0553635 9.10937416 1.5977245 0.24488682
EHI01643 1.7970890 3.66560937 0.9248028 0.04797166
EHI01644 1.3410079 7.47979230 1.2521465 0.93797788
EHI01645 2.1784913 6.89845361 1.4015490 1.53186777
EHI01646 1.6811088 6.03852750 1.8308237 0.25643805
EHI01647 1.3494908 4.63130409 2.4088440 3.78419605
EHI01648 1.5150460 7.15341473 1.6004285 0.54419354
EHI01649 2.0450389 1.07543576 6.6674915 0.25945835
EHI01650 1.4194483 0.09050066 6.0447541 0.32287331
EHI01651 1.3454259 3.08500982 1.1741095 4.21210964
EHI01652 2.6842132 9.59602253 2.1009544 0.09206088
EHI01653 2.2916182 4.80246361 1.1851766 1.25063702
EHI01654 1.0943443 10.41143649 2.1505113 0.36602025
EHI01655 1.6111932 1.17984052 11.6143980 0.41744043
EHI01656 1.8725629 6.37777958 1.3681174 0.07809993
EHI01657 1.1052799 5.09895183 1.2043485 0.09463968
EHI01658 3.2137282 7.72093620 4.3113679 0.33956476
EHI01659 2.0765416 9.18426851 1.9212322 0.74857675
EHI01660 1.8125910 4.76738632 0.8315338 0.10031850
EHI01661 2.4821854 11.12555627 2.1247322 1.28737486
EHI01787 0.5643141 9.53716654 1.2746695 0.05575579
EHI01788 0.2538515 4.80202263 0.8725932 0.25281842
EHI01789 0.3283081 7.48009576 1.1659081 0.05425827
EHI01790 0.2871278 4.61405494 0.8506278 0.02170173
EHI01791 0.3534815 7.10434327 1.2290473 0.26873446
EHI01792 0.2346984 3.34290251 0.5777317 0.66278978
EHI01793 0.3511796 5.72729894 1.0781123 0.03127393
EHI01794 0.1452987 3.25893510 0.0000000 0.01176103
EHI01795 0.2490204 3.66019998 0.9229186 0.50654496
EHI01796 0.3108897 0.83028875 2.0772084 4.70108481
EHI01797 0.2016543 5.20510228 0.0000000 0.02059651
EHI01798 0.6250897 10.51469275 1.9856256 0.29698137
EHI01799 0.3877622 7.35379285 1.1085491 0.29731338
EHI01800 0.3650573 4.78426949 1.2923866 0.87831600
EHI01801 0.4752275 7.77474495 1.0255181 0.19001550
EHI01802 0.2305928 5.26382214 1.0324286 0.13196400
EHI01820 0.4205506 5.23690074 1.3934516 0.27801247
EHI01821 0.7978723 15.07314540 3.6386058 0.37288765
EHI01822 0.7643386 9.42668221 4.8100712 8.50220065
EHI01823 0.8624810 8.03574332 2.5911019 3.04705197
EHI01824 0.3794501 7.67185540 1.0119067 0.04082875
EHI01828 0.7616001 3.83102911 4.3959266 2.18343102
sequence_fractions %>%
    pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
    mutate(value = value / 1000000000) %>%
    mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
    ggplot(., aes(x = sample, y = value, fill=fraction)) +
        geom_bar(position="stack", stat = "identity") +
      scale_fill_manual(name="Sequence type",
                    breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
                    labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
                    values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
        labs(x = "Samples", y = "Amount of data (GB)") +
        theme_classic() +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")

3.3 Recovered microbial fraction

singlem_table <- sequence_fractions %>%
    mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
    left_join(sample_metadata, by = join_by(sample == sample))  %>%
    mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
    select(sample,mags_proportion,singlem_proportion) %>%
    mutate(mags_proportion = ifelse(singlem_proportion == 0, 0, mags_proportion)) %>% #convert zeros to NA
    mutate(singlem_proportion = ifelse(singlem_proportion == 0, NA, singlem_proportion)) %>% #convert zeros to NA
    mutate(singlem_proportion = ifelse(singlem_proportion < mags_proportion, NA, singlem_proportion)) %>% #if singlem is smaller, then NA, to simplify plot
    mutate(singlem_proportion = ifelse(singlem_proportion > 100, 100, singlem_proportion)) #simplify

singlem_table %>%
    pivot_longer(!sample, names_to = "proportion", values_to = "value") %>%
    left_join(sample_metadata, by = join_by(sample == sample))  %>%
    mutate(proportion = factor(proportion, levels = c("mags_proportion","singlem_proportion"))) %>%
    ggplot(., aes(x = value, y = sample, color=proportion)) +
            geom_line(aes(group = sample), color = "#f8a538") +
            geom_point() +
      scale_color_manual(name="Proportion",
                    breaks=c("mags_proportion","singlem_proportion"),
                    labels=c("Recovered","Estimated"),
                    values=c("#52e1e8", "#876b53"))+
      facet_nested(species + sample_type ~ ., scales="free",space="free")+
            theme_classic() +
            labs(y = "Samples", x = "Prokaryotic fraction (%)") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),
              legend.position = "right",
              strip.background.y=element_rect(color = NA, fill= "#f4f4f4"))