DNA fractions
#Per sample type
data_stats %>%
mutate(mapped_host_perc=mapped_SceUnd/trimmed_reads) %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
group_by(type) %>%
summarise(mean=mean(mapped_host_perc),sd=sd(mapped_host_perc))
# A tibble: 2 × 3
type mean sd
<chr> <dbl> <dbl>
1 cloaca 0.688 0.0254
2 feces 0.0480 0.0557
#Overall
data_stats %>%
mutate(mapped_perc=mapped_mags/trimmed_reads) %>%
summarise(mean=mean(mapped_perc),sd=sd(mapped_perc))
# A tibble: 1 × 2
mean sd
<dbl> <dbl>
1 0.294 0.292
#Per sample type
data_stats %>%
mutate(mapped_perc=mapped_mags/trimmed_reads) %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
group_by(type) %>%
summarise(mean=mean(mapped_perc),sd=sd(mapped_perc))
# A tibble: 2 × 3
type mean sd
<chr> <dbl> <dbl>
1 cloaca 0.0152 0.00906
2 feces 0.573 0.0849
data_stats %>%
left_join(., sample_metadata, by = join_by(sample == sample)) %>%
lmerTest::lmer(mapped_SceUnd ~ type + (1 | individual), data = ., REML = FALSE) %>%
summary()
Warning in as_lmerModLT(model, devfun): Model may not have converged with 1 eigenvalue close to zero: 5.8e-12
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: mapped_SceUnd ~ type + (1 | individual)
Data: .
AIC BIC logLik deviance df.resid
662.4 666.4 -327.2 654.4 16
Scaled residuals:
Min 1Q Median 3Q Max
-1.53104 -0.32590 -0.02621 0.19221 2.31623
Random effects:
Groups Name Variance Std.Dev.
individual (Intercept) 3.180e+12 1783257
Residual 6.856e+12 2618430
Number of obs: 20, groups: individual, 10
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 1.340e+07 1.002e+06 2.307e+01 13.37 2.37e-12 ***
typefeces -1.240e+07 1.171e+06 1.172e+26 -10.59 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr)
typefeces -0.584
data_stats %>%
mutate(
low_quality = raw_reads - trimmed_reads,
unmapped_reads = trimmed_reads - mapped_SceUnd - mapped_mags
) %>%
select(sample, low_quality, mapped_SceUnd, mapped_mags, unmapped_reads) %>%
pivot_longer(-sample) %>%
separate(col = "sample", into = c("sample", "tissue"), sep = "\\.", remove = FALSE) %>%
mutate(name=factor(name,levels=c("low_quality","mapped_SceUnd","unmapped_reads","mapped_mags"))) %>%
mutate(sample=factor(sample,levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
ggplot(aes(x = sample, y = value, fill = name)) +
geom_bar(stat = "identity", position = "fill") +
scale_fill_manual(name="Sequence type",
breaks=c("low_quality","mapped_SceUnd","unmapped_reads","mapped_mags"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
facet_wrap(~tissue, scales = "free", labeller = labeller(tissue = c(cloaca="Cloaca",feces="Feces"))) +
theme_minimal() +
labs(y="DNA sequence fraction",x="Samples")

Recovered microbial fraction
data_stats %>%
mutate(
unmapped_reads = trimmed_reads - mapped_SceUnd - mapped_mags,
mag_proportion = mapped_mags / (mapped_mags + unmapped_reads),
singlem_read_fraction = singlem_read_fraction
) %>%
select(sample, mag_proportion, singlem_read_fraction) %>%
mutate(
mag_proportion = if_else(singlem_read_fraction == 0, 0, mag_proportion),
singlem_read_fraction = if_else(singlem_read_fraction == 0, NA, singlem_read_fraction),
singlem_read_fraction = if_else(singlem_read_fraction < mag_proportion, NA, singlem_read_fraction),
singlem_read_fraction = if_else(singlem_read_fraction > 1, 1, singlem_read_fraction)
) %>%
pivot_longer(-sample, names_to = "proportion", values_to = "value") %>%
mutate(
proportion = factor(
proportion,
levels = c("mag_proportion", "singlem_read_fraction")
)
) %>%
separate(col = "sample", into = c("sample", "tissue"), sep = "\\.", remove = FALSE) %>%
mutate(sample=factor(sample,levels=rev(c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10")))) %>%
ggplot(aes(x = value, y = sample, color = proportion)) +
geom_line(aes(group = sample), color = "#f8a538") +
geom_point() +
scale_color_manual(name="Proportion",
breaks=c("mag_proportion","singlem_read_fraction"),
labels=c("Recovered","Estimated"),
values=c("#52e1e8", "#876b53"))+
facet_wrap(tissue~., scales = "free", labeller = labeller(tissue = c(cloaca="Cloaca",feces="Feces"))) +
theme_minimal() +
labs(y = "Samples", x = "Prokaryotic fraction") +
scale_x_continuous(limits = c(0, 1)) +
theme(
axis.text.x = element_text( angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "right"
)

Domain-adjusted mapping rate (DAMR)
data_stats %>%
mutate(
unmapped_reads = trimmed_reads - mapped_SceUnd - mapped_mags,
mag_proportion = mapped_mags / (mapped_mags + unmapped_reads),
singlem_read_fraction = singlem_read_fraction
) %>%
mutate(damr=pmin(1, mag_proportion/singlem_read_fraction)) %>%
select(sample,damr) %>%
separate_wider_delim(sample,".",names=c("sample", "type")) %>%
group_by(type) %>%
summarise(mean=mean(damr),sd=sd(damr)) %>%
tt()
tinytable_la5ixvwaya4qxouk2dwh
type |
mean |
sd |
cloaca |
0.7730484 |
0.1249452 |
feces |
0.8350923 |
0.1097052 |