Chapter 2 Metagenomic data preparation
2.1 Load data
Load the original data files outputted by the bioinformatic pipeline.
2.1.2 Taxonomy of metagenome-assembled genomes
genome_taxonomy <-
read_tsv("data/taxonomy_v2.tsv") %>%
rename(genome = 1) %>%
mutate(phylum = case_when(
phylum == "Actinobacteriota" ~ "Actinomycetota",
phylum == "Firmicutes" ~ "Bacillota",
phylum == "Firmicutes_A" ~ "Bacillota_A",
phylum == "Firmicutes_B" ~ "Bacillota_B",
phylum == "Firmicutes_C" ~ "Bacillota_C",
phylum == "Cyanobacteria" ~ "Cyanobacteriota",
phylum == "Proteobacteria" ~ "Pseudomonadota",
TRUE ~ phylum)) %>%
filter(!domain == "Archaea")2.2 Distilling Genome Inferred Functional Traits (GIFTs)
2.2.1 Distilling DRAM annotations
gifts_raw <- distill(genome_annotations, GIFT_db2,
genomecol = 2, annotcol = c(9,10,19), verbosity = F)
# Load ENA to genome_id table
ena_to_mag_id <- read_tsv("data/ena_to_mag_id.tsv")
gifts_matrix <-
gifts_raw %>%
data.frame() %>%
rownames_to_column(var = 'mag_name') %>%
left_join(ena_to_mag_id %>%
select(mag_name, mag_id),
by = 'mag_name') %>%
select(-mag_name) %>%
relocate(mag_id) %>%
filter(mag_id %in% genome_taxonomy$genome) %>%
column_to_rownames('mag_id') %>%
as.matrix()2.3 Create working objects
Transform the original data files into working objects for downstream analyses.
2.3.3 Prepare a phyloseq object
phylo_samples <-
chicken_metadata %>%
mutate(sampling_time = as.factor(sampling_time),
trial_age = paste(trial, sampling_time, sep = "_"),
trial_age = as.factor(trial_age)) %>%
column_to_rownames("animal_code") %>%
sample_data() # Convert to phyloseq sample_data object
phylo_genome <- otu_table(total, taxa_are_rows = TRUE)
phylo_taxonomy <-
genome_taxonomy %>%
column_to_rownames("genome") %>%
as.matrix() %>%
tax_table() # Convert to phyloseq tax_table object
phylo_tree <- phy_tree(genome_tree)
phylo_seq <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples, phylo_tree)2.4 Prepare color scheme
# As dataframe
taxonomy_colors <- read_tsv("data/taxonomy_colours.tsv")
# As vector
phylum_colors <-
c(Halobacteriota = "#f2f2f2",
Methanobacteriota = "#bfbfbf",
Patescibacteria = "#dacce3",
Campylobacterota = "#cdadca",
Synergistota = "#c08eb3",
Thermoplasmatota = "#777dae",
Verrucomicrobiota = "#0066ae",
Desulfobacterota = "#68b0dc",
Pseudomonadota = "#60cfde",
Bacteroidota = "#4dc87c",
Cyanobacteriota = "#92e09f",
Actinomycetota = "#c9e0af",
Deferribacterota = "#dff77e",
Bacillota = "#ffd366",
Bacillota_A = "#fd8854",
Bacillota_B = "#8b1222",
Bacillota_C = "#5a0c37")
order_colors <-
c(Methanomicrobiales = "#f2f2f2",
Methanobacteriales = "#bfbfbf",
Saccharimonadales = "#dacce3",
Campylobacterales = "#cdadca",
Synergistales = "#c08eb3",
Methanomassiliicoccales = "#777dae",
Victivallales = "#0066ae",
Opitutales = "#1c7ebc",
Verrucomicrobiales = "#4a96cc",
Desulfovibrionales = "#68b0dc",
Enterobacterales = "#60cfde",
RF32 = "#60dfd2",
Burkholderiales = "#5cdfb5",
'Rs-D84' = "#40df91",
Bacteroidales = "#4dc87c",
Flavobacteriales = "#88c88b",
Gastranaerophilales = "#92e09f",
Coriobacteriales = "#c9e0af",
Actinomycetales = "#d8e093",
Deferribacterales = "#dff77e",
RF39 = "#ecf76d",
Lactobacillales = "#feef68",
'ML615J-28' = "#fde671",
Erysipelotrichales = "#ffd366",
Acholeplasmatales = "#fdc151",
Bacillales = "#fc953d",
RFN20 = "#fd8035",
Oscillospirales = "#fd8854",
TANB77 = "#f4814d",
Christensenellales = "#c26340",
Lachnospirales = "#c28c5c",
Clostridiales = "#ce9360",
Monoglobales_A = "#ce734f",
Peptostreptococcales = "#c65631",
Monoglobales = "#b93725",
UBA1212 = "#b10f19",
UBA4068 = "#8b1222",
Selenomonadales = "#7a1a12",
Acidaminococcales = "#64121f",
Veillonellales = "#5a0c37")2.5 Wrap working objects
All working objects are wrapped into a single Rdata object to facilitate downstream usage.
save(
# Chicken data
chicken_metadata,
# Bacterial genome data
genome_taxonomy,
genome_stats,
genome_tree,
genome_kegg_paths,
read_counts,
# Transformed data
mag_weighted,
total,
hel,
# Phyloseq objects
phylo_seq,
# Functions
gifts_elements,
gifts_functions,
gifts_domains,
# Colors
taxonomy_colors,
phylum_colors,
order_colors,
sampling_day_colors,
# Define file path
file = "data/data_mg.Rdata")