Chapter 2 Data preparation
2.1 Load data
Load the original data files outputted by the bioinformatic pipeline.
2.1.2 Genome metadata
genome_metadata <- read_csv("data/genome_metadata.csv") %>%
mutate(phylum = case_when(
phylum == "Actinobacteriota" ~ "Actinomycetota",
phylum == "Firmicutes" ~ "Bacillota",
phylum == "Firmicutes_A" ~ "Bacillota_A",
phylum == "Firmicutes_C" ~ "Bacillota_C",
phylum == "Cyanobacteria" ~ "Cyanobacteriota",
phylum == "Proteobacteria" ~ "Pseudomonadota",
TRUE ~ phylum))
2.2 Create working objects
Transform the original data files into working objects for downstream analyses.
2.3 Prepare color scheme
AlberdiLab projects use unified color schemes developed for the Earth Hologenome Initiative, to facilitate figure interpretation.
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
mutate(phylum=str_remove_all(phylum, "p__")) %>%
right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
arrange(match(genome, genome_tree$tip.label)) %>%
dplyr::select(phylum, colors) %>%
unique() %>%
arrange(phylum) %>%
pull(colors, name=phylum)
location_colors <- c('#3D5C61','#41B6C0','#90C8C5','#E5D388','#BFA366','#6E5244')
origin_colors <- c("#bd70ae","#949293")