Chapter 6 Diversity analysis

load("data/data_03122025.Rdata")
load("data/03122025gifts.Rdata")
genome_counts_filt <- genome_counts_filt %>% 
  column_to_rownames(var = "genome") %>%
  select(any_of(c("genome",intersect(sample_metadata$Tube_code, colnames(read_counts))))) %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0)%>% 
  select(where(~ sum(.) > 0)) %>% 
  rownames_to_column(., "genome")

genome_tree <- keep.tip(genome_tree, tip=genome_counts_filt$genome)

table <- genome_counts_filt %>%
  t() %>%
  row_to_names(row_number = 1) %>% 
  as.data.frame() %>% 
  mutate_if(is.character,as.numeric) %>% 
  rownames_to_column(., "sample")
  
master_table <- sample_metadata %>%
  mutate(time_point = case_when(time_point %in% c("Transplant1") ~ "Inoculum1", TRUE ~ time_point)) %>%
  mutate(time_point = case_when(time_point %in% c("Transplant2") ~ "Inoculum2", TRUE ~ time_point)) %>%
  mutate(Population = case_when(Population %in% c("Cold_wet") ~ "Cold", TRUE ~ Population)) %>%
  mutate(Population = case_when(Population %in% c("Hot_dry") ~ "Warm", TRUE ~ Population)) %>%
  mutate(type = case_when(type %in% c("Hot_control") ~ "Warm_control", TRUE ~ type)) %>%
  mutate(type = case_when(type %in% c("Control") ~ "Cold_control", TRUE ~ type)) %>%
  mutate(type = case_when(type %in% c("Treatment") ~ "Cold_intervention", TRUE ~ type)) %>%
  mutate(sample = Tube_code) %>%
  mutate(Tube_code = str_remove_all(Tube_code, "_a")) %>%
  filter(Tube_code %in% table$sample) %>%
#  mutate(treatment = sub("^\\d+_", "", time_point)) %>%
  left_join(., table, by = join_by("Tube_code" == "sample"))

sample_metadata <- master_table %>% 
  select(1,2,3,4:8,10,12)

genome_counts_filt <- master_table %>% 
  select(12,13:322) %>% 
  t() %>%
  row_to_names(row_number = 1) %>% 
  as.data.frame() %>% 
  mutate_if(is.character,as.numeric) %>% 
  filter(rowSums(. != 0, na.rm = TRUE) > 0)%>%
  dplyr::select(where(~ !all(. == 0)))%>% 
  rownames_to_column(., "genome")

genome_tree <- keep.tip(genome_tree, tip=genome_counts_filt$genome)

genome_metadata <- genome_metadata %>% 
  filter(genome %in% genome_counts_filt$genome)

6.1 Phyloseq object

#Phyloseq object
count_phy <- genome_counts_filt %>%
  column_to_rownames(var="genome")%>%
  otu_table(., taxa_are_rows=T)

sample_info_tab_phy <- sample_metadata%>%
  column_to_rownames(var="sample")%>%
  sample_data()

TAX <- genome_metadata%>%
  column_to_rownames(var="genome")%>%
  select(1:7)%>%
  as.matrix()%>%
  tax_table()
tree <- phy_tree(genome_tree)

physeq_all = phyloseq(count_phy, TAX, sample_info_tab_phy, tree)

6.2 Functional data

# Aggregate bundle-level GIFTs into the compound level
GIFTs_elements <- to.elements(genome_gifts, GIFT_db)
GIFTs_elements_filtered <- GIFTs_elements[rownames(GIFTs_elements) %in% genome_counts_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

elements <- GIFTs_elements_filtered %>%
  as.data.frame()

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
  as.data.frame()

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

uniqueGIFT_db <- unique(GIFT_db[c(2,3,4,5,6)]) %>% unite("Function",Function:Element, sep= "_", remove=FALSE)
element_gift <- GIFTs_elements_community %>%
  as.data.frame() %>%
  rownames_to_column(., "Tube_code") %>%
  left_join(sample_metadata[c(1, 3, 7, 9)], by = "Tube_code") 
save(sample_metadata, 
     genome_metadata, 
     genome_counts_filt, 
     genome_tree,
     phylum_colors,
     data_stats,
     genome_gifts,
     master_table,
     physeq_all,
     GIFTs_elements_filtered,
     GIFTs_elements_community,
     GIFTs_functions_community,
     GIFTs_domains_community,
     element_gift,
     uniqueGIFT_db,
     file = "data/objects_03122025.Rdata")

6.3 Calculate diversities

6.3.1 Alpha diversity

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

genome_gifts1 <- genome_gifts[genome_counts_filt$genome,]
genome_gifts1 <- genome_gifts1[, colSums(genome_gifts1 != 0) > 0]

genome_counts_filt1 <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts1),]
genome_counts_filt1 <- genome_counts_filt1 %>% 
  remove_rownames() %>% 
  column_to_rownames(var = "genome") %>% 
      filter(rowSums(. != 0, na.rm = TRUE) > 0) %>% 
  rownames_to_column(., "genome")

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample))
save(alpha_div, 
     file = "data/alpha_16092025.Rdata")

6.3.2 Beta diversity

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  filter(rowSums(. != 0, na.rm = TRUE) > 0) %>%
  select_if(~!all(. == 0)) %>%
  hillpair(., q = 1, tree = genome_tree)

genome_gifts1 <- genome_gifts[genome_counts_filt$genome,]
genome_gifts1 <- genome_gifts1[, colSums(genome_gifts1 != 0) > 0]
genome_counts_filt1 <- genome_counts_filt[genome_counts_filt$genome %in% rownames(genome_gifts1),]
save(beta_q0n, 
     beta_q1n, 
     beta_q1p, 
     file = "data/beta_16092025.Rdata")