Loading data
preprocessing <- read_tsv("data/preprocessing.tsv")
Rows: 48 Columns: 9
── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): sample
dbl (8): reads_raw, reads_discarded, reads_host, reads_metagenomic, bases_raw, bases_discarded, bases_host, b...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Total number of bases sequenced (Gb)
preprocessing %>%
summarise(Total=sum(reads_metagenomic * 150 / 1000000000) %>% round(2),
mean=mean(reads_metagenomic * 150 / 1000000000) %>% round(2),
sd=sd(reads_metagenomic * 150 / 1000000000) %>% round(2)) %>%
unite("Average",mean, sd, sep = " ± ", remove = TRUE) %>%
tt()
Total |
Average |
96.56 |
2.01 ± 2.86 |
DNA fractions
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(preprocessing, by = join_by(sample == sample)) %>%
dplyr::select(sample,mags,bases_metagenomic,bases_host ,bases_discarded, bases_raw) %>%
mutate(mags_bases = mags*146) %>% #total number of bases mapped to mags (multiplying by effective read length)
mutate(bases_discarded_proportion = bases_discarded/bases_raw) %>%
mutate(lowqual_bases = ((bases_metagenomic+bases_host)/(1-bases_discarded_proportion))-(bases_metagenomic+bases_host)) %>%
mutate(unmapped_bases = bases_metagenomic - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases))
sequence_fractions %>%
dplyr::select(sample, lowqual_bases, bases_host, unmapped_bases, mags_bases) %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
tt()
Sample |
Low quality |
Mapped to host |
Unmapped |
Mapped to MAGs |
fox10_D_37 |
2.6301023 |
8.1216298 |
0.88459923 |
1.65543326 |
fox10_F_39 |
2.2970487 |
2.8406966 |
2.77779874 |
4.80441528 |
fox10_K_38 |
0.5180657 |
8.8218626 |
0.04823171 |
0.05601246 |
fox10_M_40 |
0.5131760 |
9.5470219 |
0.22409171 |
0.36565627 |
fox14_D_1 |
1.5586674 |
7.5129415 |
0.20575144 |
0.47793027 |
fox14_F_3 |
3.0494709 |
3.4435576 |
1.62980735 |
3.32223394 |
fox14_K_2 |
0.5995430 |
5.6724002 |
0.02957405 |
0.03470770 |
fox14_M_4 |
0.3790174 |
5.2019894 |
0.03456248 |
0.04290575 |
fox16_D_33 |
1.8642653 |
16.0480617 |
0.11617587 |
0.11494288 |
fox16_F_35 |
1.8038610 |
8.1444304 |
1.65409742 |
3.60815903 |
fox16_K_34 |
0.8935251 |
6.1766422 |
0.03076079 |
0.03541931 |
fox16_M_36 |
0.7906496 |
7.2696400 |
0.04106251 |
0.06308295 |
fox1_D_17 |
1.0365225 |
9.5821911 |
0.41629582 |
0.68272520 |
fox1_K_18 |
1.2751933 |
13.7506079 |
0.07496196 |
0.10288401 |
fox1_M_20 |
0.3523016 |
2.9740678 |
0.01786800 |
0.02319049 |
fox22_D_29 |
0.8179759 |
3.9995783 |
1.18966373 |
7.16354957 |
fox22_F_31 |
1.2420612 |
0.5808306 |
3.04907749 |
6.16145112 |
fox22_K_30 |
1.4172912 |
10.1558721 |
0.05224991 |
0.06107603 |
fox22_M_32 |
0.7611953 |
9.4840202 |
0.31379754 |
0.58815662 |
fox24_D_5 |
2.2316394 |
10.1593674 |
0.12140898 |
0.47912002 |
fox24_F_7 |
1.4618429 |
7.2107388 |
0.35280228 |
0.64631557 |
fox24_K_6 |
0.5889175 |
11.1312892 |
0.05502918 |
0.06345175 |
fox24_M_8 |
0.5168462 |
9.9273909 |
0.05831746 |
0.08046586 |
fox26_D_41 |
4.8972087 |
11.8114623 |
0.57036670 |
0.15562082 |
fox26_F_43 |
3.2084935 |
4.3609827 |
2.43630187 |
2.91103049 |
fox26_K_42 |
0.5383661 |
9.6877966 |
0.06740662 |
0.06352402 |
fox26_M_44 |
0.8497864 |
11.0920109 |
0.28636408 |
0.43980879 |
fox29_D_9 |
2.7560848 |
7.8789344 |
0.17264936 |
0.36172230 |
fox29_F_11 |
1.5935373 |
5.6150417 |
0.70333918 |
0.93580540 |
fox29_K_10 |
0.8593511 |
6.4283843 |
0.03036465 |
0.03872504 |
fox29_M_12 |
0.6192141 |
5.8888423 |
0.03290705 |
0.04689389 |
fox30_D_13 |
3.7178316 |
9.4089714 |
0.63316607 |
0.37918361 |
fox30_F_15 |
1.5528117 |
4.8552355 |
2.51266195 |
3.40603999 |
fox30_K_14 |
0.5678181 |
9.7021594 |
0.05598970 |
0.06023989 |
fox30_M_16 |
0.7024185 |
10.0939226 |
0.29300930 |
0.44862749 |
fox3_D_25 |
1.7112057 |
13.1973431 |
0.18071781 |
0.38979255 |
fox3_F_27 |
1.5159373 |
2.0277157 |
1.24050570 |
2.83525985 |
fox3_M_28 |
1.4034647 |
7.1535047 |
0.04753161 |
0.08069362 |
fox5_D_10 |
2.7287199 |
13.6063868 |
0.61487849 |
1.54580318 |
fox5_F_12 |
1.8250414 |
4.3329962 |
1.55799919 |
3.72590584 |
fox5_K_11 |
0.3455828 |
3.3354846 |
0.01929405 |
0.01995294 |
fox5_M_9 |
0.2979118 |
3.3643827 |
0.02800099 |
0.02913255 |
fox8_D_21 |
2.3661247 |
4.0665726 |
2.66481434 |
5.19974240 |
fox8_F_23 |
2.5074581 |
2.2107557 |
2.67704976 |
4.26260403 |
fox8_K_22 |
0.6695787 |
11.3845138 |
0.07137123 |
0.08182030 |
fox8_M_24 |
0.5294341 |
9.9194827 |
0.17877318 |
0.33240900 |
#mean
sequence_fractions %>%
dplyr::select(sample, lowqual_bases, bases_host, unmapped_bases, mags_bases) %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
summarise(across(where(is.numeric), mean, na.rm = TRUE)) %>%
tt()
Warning: There was 1 warning in `summarise()`.
ℹ In argument: `across(where(is.numeric), mean, na.rm = TRUE)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
Low quality |
Mapped to host |
Unmapped |
Mapped to MAGs |
1.442664 |
7.590863 |
0.6620315 |
1.269209 |
#SD
sequence_fractions %>%
dplyr::select(sample, lowqual_bases, bases_host, unmapped_bases, mags_bases) %>%
mutate_at(vars(-sample), ~./1000000000) %>%
rename("Sample"=1, "Low quality"=2, "Mapped to host"=3, "Unmapped"=4, "Mapped to MAGs"=5) %>%
summarise(across(where(is.numeric), sd, na.rm = TRUE)) %>%
tt()
Low quality |
Mapped to host |
Unmapped |
Mapped to MAGs |
1.023265 |
3.559173 |
0.9109667 |
1.876046 |
sequence_fractions_plot <- sequence_fractions %>%
dplyr::select(sample, lowqual_bases, bases_host, unmapped_bases, mags_bases) %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","bases_host","unmapped_bases","mags_bases")))
ggplot(sequence_fractions_plot, aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(name="Sequence type",
breaks=c("lowqual_bases","bases_host","unmapped_bases","mags_bases"),
labels=c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values=c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c"))+
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size=6),legend.position = "bottom")

sequence_fractions_type <- sequence_fractions_plot %>%
left_join(sample_metadata)
ggplot(sequence_fractions_type, aes(x = sample, y = value, fill = fraction)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_manual(name = "Sequence type",
breaks = c("lowqual_bases","bases_host","unmapped_bases","mags_bases"),
labels = c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values = c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c")) +
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "bottom") +
facet_wrap(~ sample_type, scales = "free_x", nrow = 1)

ggplot(sequence_fractions_type, aes(x = sample, y = value, fill = fraction)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_manual(name = "Sequence type",
breaks = c("lowqual_bases","bases_host","unmapped_bases","mags_bases"),
labels = c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values = c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c")) +
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "bottom") +
facet_wrap(~ gut_location, scales = "free_x", nrow = 1)

ggplot(sequence_fractions_type, aes(x = sample, y = value, fill = fraction)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_manual(name = "Sequence type",
breaks = c("lowqual_bases","bases_host","unmapped_bases","mags_bases"),
labels = c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values = c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c")) +
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "bottom") +
facet_nested(~ sample_type + gut_location,
scales = "free_x", space = "free_x")

ggplot(sequence_fractions_type, aes(x = sample, y = value, fill = fraction)) +
geom_bar(position = "stack", stat = "identity") +
scale_fill_manual(name = "Sequence type",
breaks = c("lowqual_bases","bases_host","unmapped_bases","mags_bases"),
labels = c("Low quality","Mapped to host","Unmapped","Mapped to MAGs"),
values = c("#CCCCCC", "#bcdee1", "#d8b8a3","#93655c")) +
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 6),
legend.position = "bottom") +
facet_wrap(~ fox_behaviour, scales = "free_x", nrow = 1)
