DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample, lowqual_bases, host_bases, unmapped_bases, mags_bases)
sequence_fractions %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()
tinytable_068brv1fwp18uo8ttxnp
Sample |
Low quality |
Mapped to host |
Unmapped |
Mapped to MAGs |
EHI01087 |
0.6695527 |
3.30228337 |
0.6957268 |
0.20593928 |
EHI01089 |
0.5971062 |
2.29809861 |
1.1973431 |
1.84936214 |
EHI01090 |
1.4964600 |
3.89164336 |
1.3021172 |
3.82801736 |
EHI01095 |
0.7389361 |
0.62896575 |
6.2164847 |
0.21759402 |
EHI01098 |
0.5212489 |
0.03728883 |
2.4836960 |
0.13056692 |
EHI01101 |
1.5817235 |
4.10712018 |
2.2793399 |
0.17292897 |
EHI01104 |
1.5719678 |
4.61746206 |
1.3723392 |
0.16044130 |
EHI01105 |
0.8644910 |
4.22993999 |
0.8162903 |
0.50547259 |
EHI01106 |
0.5427014 |
3.38227175 |
0.5618769 |
0.41503931 |
EHI01110 |
0.9038713 |
4.43491993 |
0.9117838 |
0.35202746 |
EHI01116 |
0.8842878 |
3.04219745 |
0.6159657 |
0.66298980 |
EHI01126 |
1.1757436 |
2.77684922 |
0.6792913 |
0.71885772 |
EHI01127 |
1.0904899 |
4.18023815 |
1.2676185 |
0.17519445 |
EHI01128 |
1.5554433 |
6.08276727 |
1.2970400 |
0.05411563 |
EHI01139 |
0.6949708 |
2.48591198 |
0.4907853 |
0.49675098 |
EHI01140 |
0.2040228 |
0.04138970 |
3.4209687 |
0.09867673 |
EHI01143 |
0.5606637 |
1.43370297 |
0.5489274 |
1.92101792 |
EHI01146 |
0.8148967 |
0.47826781 |
2.9751350 |
0.11397213 |
EHI01152 |
0.8195927 |
4.32232310 |
0.9434188 |
0.31894780 |
EHI01182 |
0.1243809 |
2.24473404 |
0.5261952 |
0.50429889 |
EHI01184 |
0.1589348 |
1.40999961 |
0.5088391 |
0.75227405 |
EHI01185 |
0.1250044 |
1.43317517 |
0.5480802 |
0.58482300 |
EHI01189 |
0.1100434 |
0.05916081 |
3.3216064 |
0.07905141 |
EHI01192 |
0.2128998 |
1.81792336 |
1.0978595 |
0.02249451 |
EHI01195 |
0.1024397 |
0.60303798 |
1.6713858 |
0.53101791 |
EHI01196 |
0.1378872 |
1.73253564 |
0.4890348 |
0.31340287 |
EHI01223 |
0.1418051 |
0.47832378 |
1.0662283 |
0.65405474 |
EHI01224 |
0.1385980 |
2.20367548 |
0.6199791 |
0.17098644 |
EHI01233 |
0.2434360 |
1.75735032 |
0.6612552 |
0.94703075 |
EHI01634 |
2.5523130 |
11.55227357 |
2.4751009 |
0.73594892 |
EHI01635 |
0.8623835 |
0.14779997 |
12.2568851 |
0.35557351 |
EHI01637 |
2.3073477 |
9.82065870 |
2.4170127 |
0.53837704 |
EHI01638 |
3.6522065 |
9.89351464 |
3.0457033 |
0.36004710 |
EHI01639 |
1.4419412 |
4.39506794 |
1.4128728 |
0.08734581 |
EHI01640 |
1.5525797 |
3.58260977 |
1.1837202 |
3.57782782 |
EHI01641 |
1.8184598 |
6.00289517 |
1.1916042 |
1.22369418 |
EHI01642 |
2.0553635 |
9.10937416 |
1.5977245 |
0.24488682 |
EHI01643 |
1.7970890 |
3.66560937 |
0.9248028 |
0.04797166 |
EHI01644 |
1.3410079 |
7.47979230 |
1.2521465 |
0.93797788 |
EHI01645 |
2.1784913 |
6.89845361 |
1.4015490 |
1.53186777 |
EHI01646 |
1.6811088 |
6.03852750 |
1.8308237 |
0.25643805 |
EHI01647 |
1.3494908 |
4.63130409 |
2.4088440 |
3.78419605 |
EHI01648 |
1.5150460 |
7.15341473 |
1.6004285 |
0.54419354 |
EHI01649 |
2.0450389 |
1.07543576 |
6.6674915 |
0.25945835 |
EHI01650 |
1.4194483 |
0.09050066 |
6.0447541 |
0.32287331 |
EHI01651 |
1.3454259 |
3.08500982 |
1.1741095 |
4.21210964 |
EHI01652 |
2.6842132 |
9.59602253 |
2.1009544 |
0.09206088 |
EHI01653 |
2.2916182 |
4.80246361 |
1.1851766 |
1.25063702 |
EHI01654 |
1.0943443 |
10.41143649 |
2.1505113 |
0.36602025 |
EHI01655 |
1.6111932 |
1.17984052 |
11.6143980 |
0.41744043 |
EHI01656 |
1.8725629 |
6.37777958 |
1.3681174 |
0.07809993 |
EHI01657 |
1.1052799 |
5.09895183 |
1.2043485 |
0.09463968 |
EHI01658 |
3.2137282 |
7.72093620 |
4.3113679 |
0.33956476 |
EHI01659 |
2.0765416 |
9.18426851 |
1.9212322 |
0.74857675 |
EHI01660 |
1.8125910 |
4.76738632 |
0.8315338 |
0.10031850 |
EHI01661 |
2.4821854 |
11.12555627 |
2.1247322 |
1.28737486 |
EHI01787 |
0.5643141 |
9.53716654 |
1.2746695 |
0.05575579 |
EHI01788 |
0.2538515 |
4.80202263 |
0.8725932 |
0.25281842 |
EHI01789 |
0.3283081 |
7.48009576 |
1.1659081 |
0.05425827 |
EHI01790 |
0.2871278 |
4.61405494 |
0.8506278 |
0.02170173 |
EHI01791 |
0.3534815 |
7.10434327 |
1.2290473 |
0.26873446 |
EHI01792 |
0.2346984 |
3.34290251 |
0.5777317 |
0.66278978 |
EHI01793 |
0.3511796 |
5.72729894 |
1.0781123 |
0.03127393 |
EHI01795 |
0.2490204 |
3.66019998 |
0.9229186 |
0.50654496 |
EHI01796 |
0.3108897 |
0.83028875 |
2.0772084 |
4.70108481 |
EHI01798 |
0.6250897 |
10.51469275 |
1.9856256 |
0.29698137 |
EHI01799 |
0.3877622 |
7.35379285 |
1.1085491 |
0.29731338 |
EHI01800 |
0.3650573 |
4.78426949 |
1.2923866 |
0.87831600 |
EHI01801 |
0.4752275 |
7.77474495 |
1.0255181 |
0.19001550 |
EHI01802 |
0.2305928 |
5.26382214 |
1.0324286 |
0.13196400 |
EHI01820 |
0.4205506 |
5.23690074 |
1.3934516 |
0.27801247 |
EHI01821 |
0.7978723 |
15.07314540 |
3.6386058 |
0.37288765 |
EHI01822 |
0.7643386 |
9.42668221 |
4.8100712 |
8.50220065 |
EHI01823 |
0.8624810 |
8.03574332 |
2.5911019 |
3.04705197 |
EHI01824 |
0.3794501 |
7.67185540 |
1.0119067 |
0.04082875 |
EHI01828 |
0.7616001 |
3.83102911 |
4.3959266 |
2.18343102 |
#Fractions of DNA that are low quality, host, unmapped, or mags
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","host_bases","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")

#Read Length
#Minimum read length
min_read = min(sequence_fractions$mags_bases)
min_read
[1] 21701732
min_row = which(match(sample_metadata$sample, sequence_fractions$sample[which(match(sequence_fractions$mags_bases, min(sequence_fractions$mags_bases))==1)])==1)
sample_metadata[min_row,c(1,3,5,7,20)]
# A tibble: 1 × 5
sample species sample_type sex state
<chr> <chr> <chr> <chr> <chr>
1 EHI01790 Coragyps atratus Stomach contents Female Texas
#Minimum read length
max_read = max(sequence_fractions$mags_bases)
max_read
[1] 8502200646
max_row = which(match(sample_metadata$sample, sequence_fractions$sample[which(match(sequence_fractions$mags_bases, min(sequence_fractions$mags_bases))==1)])==1)
sample_metadata[min_row,c(1,3,5,7,20)]
# A tibble: 1 × 5
sample species sample_type sex state
<chr> <chr> <chr> <chr> <chr>
1 EHI01790 Coragyps atratus Stomach contents Female Texas
#Completeness
#Average
mean(genome_metadata$completeness)
[1] 83.13285
#Standard Deviation
sd(genome_metadata$completeness)
[1] 16.04326
#Contamination
#Average
mean(genome_metadata$contamination)
[1] 2.114044
#Standard Deviation
sd(genome_metadata$contamination)
[1] 2.397883
Recovered microbial fraction
#Fraction of DNA that was recovered vs expected separated by species and sample type
singlem_table <- sequence_fractions %>%
mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
select(sample,mags_proportion,singlem_proportion) %>%
mutate(mags_proportion = ifelse(singlem_proportion == 0, 0, mags_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion == 0, NA, singlem_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion < mags_proportion, NA, singlem_proportion)) %>% #if singlem is smaller, then NA, to simplify plot
mutate(singlem_proportion = ifelse(singlem_proportion > 100, 100, singlem_proportion)) #simplify
singlem_table %>%
pivot_longer(!sample, names_to = "proportion", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(proportion = factor(proportion, levels = c("mags_proportion","singlem_proportion"))) %>%
ggplot(., aes(x = value, y = sample, color=proportion)) +
geom_line(aes(group = sample), color = "#f8a538") +
geom_point() +
scale_color_manual(name="Proportion",
breaks=c("mags_proportion","singlem_proportion"),
labels=c("Recovered","Estimated"),
values=c("#52e1e8", "#876b53"))+
facet_nested(species + sample_type ~ ., scales="free",space="free")+
theme_classic() +
labs(y = "Samples", x = "Prokaryotic fraction (%)") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),
legend.position = "right",
strip.background.y=element_rect(color = NA, fill= "#f4f4f4"))
