Chapter 12 Behaviour-origin congruence

12.1 Taxonomic congruence

tame_domestic <- intersect(behaviour_tame$genome, origin_domestic$genome) %>% length()
tame_feral <- intersect(behaviour_tame$genome, origin_feral$genome) %>% length()
aggressive_domestic <- intersect(behaviour_aggresive$genome, origin_domestic$genome) %>% length()
aggressive_feral <- intersect(behaviour_aggresive$genome, origin_feral$genome) %>% length()

tibble(origin=c("domestic","feral"),aggressive=c(aggressive_domestic,aggressive_feral),tame=c(tame_domestic,tame_feral)) %>% 
  tt()
origin aggressive tame
domestic 6 15
feral 10 0

12.1.1 Tame - Domestic

behaviour_tame %>% 
  filter(genome %in% intersect(behaviour_tame$genome, origin_domestic$genome)) %>%
  tt()
genome phylum class order family genus species value
bin_CaboVerde.50 Actinomycetota Actinomycetia Actinomycetales Actinomycetaceae Trueperella NA 0.088
bin_Aruba.47 Actinomycetota Actinomycetia Mycobacteriales Mycobacteriaceae Lawsonella NA 0.044
bin_Brazil.109 Bacillota_A Clostridia Oscillospirales Butyricicoccaceae AM07-15 AM07-15 sp003477405 0.028
bin_Aruba.28 Bacillota_A Clostridia Oscillospirales Oscillospiraceae Dysosmobacter NA 0.079
bin_Aruba.54 Bacillota_A Clostridia Tissierellales Peptoniphilaceae Peptoniphilus_A NA 0.034
bin_Aruba.36 Bacillota_A Clostridia Tissierellales Peptoniphilaceae Peptoniphilus_C Peptoniphilus_C sp902363535 0.089
bin_Brazil.70 Desulfobacterota Desulfovibrionia Desulfovibrionales Desulfovibrionaceae Mailhella Mailhella sp003150275 0.016
bin_Brazil.9 Pseudomonadota Alphaproteobacteria RF32 CAG-239 CAG-495 CAG-495 sp000436375 0.021
bin_Brazil.146 Pseudomonadota Alphaproteobacteria RF32 CAG-239 CAG-495 CAG-495 sp001917125 0.015
bin_Brazil.92 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 CAG-521 sp000437635 0.033
bin_Spain.43 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 CAG-521 sp000437635 0.032
bin_Brazil.64 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 NA 0.034
bin_Brazil.51 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella Sutterella wadsworthensis_A 0.070
bin_Brazil.15 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella NA 0.063
bin_Aruba.15 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella NA 0.035

12.1.2 Tame - Feral

behaviour_tame %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_tame$genome, origin_feral$genome))
# A tibble: 0 × 7
# ℹ 7 variables: genome <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>, genus <chr>, species <chr>

12.1.3 Aggressive - Domestic

behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_domestic$genome)) %>%
  tt()
genome phylum class order family genus species
bin_Denmark.17 Actinomycetota Coriobacteriia Coriobacteriales Coriobacteriaceae Collinsella Collinsella bouchesdurhonensis
bin_Malaysia.96 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Caecibacter Caecibacter sp003467125
bin_Brazil.58 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera elsdenii
bin_Aruba.64 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera sp000417505
bin_CaboVerde.38 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera sp000417505
bin_Malaysia.81 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera stantonii

12.1.4 Aggressive - Feral

behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_feral$genome)) %>%
  tt()
genome phylum class order family genus species
bin_Brazil.12 Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus Enterococcus faecalis
bin_Denmark.27 Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus_B Enterococcus_B hirae
bin_CaboVerde.60 Bacillota Bacilli Lactobacillales Lactobacillaceae Ligilactobacillus Ligilactobacillus agilis
bin_Malaysia.127 Bacillota Bacilli Lactobacillales Lactobacillaceae Ligilactobacillus Ligilactobacillus agilis
bin_Denmark.117 Bacillota Bacilli Lactobacillales Streptococcaceae Lactococcus Lactococcus garvieae
bin_Denmark.113 Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus alactolyticus
bin_Denmark.50 Bacillota_A Clostridia Peptostreptococcales Peptostreptococcaceae Peptacetobacter Peptacetobacter sp900539645
bin_Brazil.140 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_A Fusobacterium_A sp900543175
bin_Brazil.159 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_B Fusobacterium_B sp900541465
bin_Malaysia.6 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_B Fusobacterium_B sp900545035

12.2 Domestic - aggressive incongruence

domestic_aggressive <- sample_metadata %>% 
        rowwise() %>%
        mutate(behaviour = sum(c_across(c(bites, hisses,retreats,avoidance)), na.rm = TRUE)/4) %>%
        ungroup() %>%
        filter(origin == "Domestic" & behaviour > 0) %>%
        pull(sample)

domestic_tame <- sample_metadata %>% 
        rowwise() %>%
        mutate(behaviour = sum(c_across(c(bites, hisses,retreats,avoidance)), na.rm = TRUE)/4) %>%
        ungroup() %>%
        filter(origin == "Domestic" & behaviour == 0) %>%
        pull(sample)

focal_genomes <- behaviour_aggresive %>% 
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_domestic$genome))  %>% 
  filter(family == "Megasphaeraceae") %>% 
  pull(genome)


phylo_samples <- sample_metadata %>% 
    filter(sample %in% c(domestic_aggressive,domestic_tame)) %>% 
    mutate(behaviour = case_when(
    sample %in% domestic_aggressive ~ "aggresive",
    sample %in% domestic_tame ~ "tame",
    TRUE ~ NA_character_
  )) %>% 
    column_to_rownames("sample") %>% 
    sample_data()

phylo_genome <- genome_counts_filt %>% 
     filter(genome %in% focal_genomes) %>% 
     column_to_rownames("genome") %>% 
     mutate_all(~ replace(., . == 0, 0.00001)) %>%
      otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata %>% 
     filter(genome %in% focal_genomes) %>% 
     mutate(genome2=genome) %>% #create a pseudo genome name column
     column_to_rownames("genome2") %>% 
     dplyr::select(division,phylum,class,order,family,genus,species,genome) %>%
     as.matrix() %>% 
     tax_table()

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)

ancom_rand_output = ancombc2(data = physeq_genome_filtered, 
                  assay_name = "counts",
                  tax_level = NULL, #change to agglomerate analysis to a higher taxonomic range
                  fix_formula = "behaviour", #fixed variable(s)
#                  rand_formula = "(1|Individual)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut = 0, 
                  s0_perc = 0,
                  group = NULL, 
                  struc_zero = FALSE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = NULL,
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)



genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% 
  filter(genome %in% focal_genomes) %>% 
  filter(sample %in% c(domestic_aggressive,domestic_tame)) %>% 
  mutate(behaviour = case_when(
    sample %in% domestic_aggressive ~ "aggresive",
    sample %in% domestic_tame ~ "tame",
    TRUE ~ NA_character_
  )) %>% 
  ggplot(aes(x=behaviour,y=count, group=behaviour, color=genome))+
    geom_jitter()

12.3 Functional

aggressive_domestic <- intersect(positive_behaviour$trait,positive_origin$trait) %>% length()
tame_domestic <- intersect(negative_behaviour$trait,positive_origin$trait) %>% length()
aggressive_feral <- intersect(positive_behaviour$trait, negative_origin$trait) %>% length()
tame_feral <- intersect(negative_behaviour$trait, negative_origin$trait) %>% length()

tibble(origin=c("domestic","feral"),aggressive=c(aggressive_domestic,aggressive_feral),tame=c(tame_domestic,tame_feral)) %>% 
  tt()
origin aggressive tame
domestic 0 0
feral 12 0

12.3.1 Aggresive - Domestic

positive_behaviour %>% 
  inner_join(positive_origin,by="trait")
# A tibble: 0 × 9
# ℹ 9 variables: trait <chr>, mean.x <dbl>, p1.x <dbl>, p9.x <dbl>, support.x <dbl>, mean.y <dbl>, p1.y <dbl>, p9.y <dbl>, support.y <dbl>

12.3.2 Tame - Domestic

negative_behaviour %>% 
  inner_join(positive_origin,by="trait") 
# A tibble: 0 × 9
# ℹ 9 variables: trait <chr>, mean.x <dbl>, p1.x <dbl>, p9.x <dbl>, support.x <dbl>, mean.y <dbl>, p1.y <dbl>, p9.y <dbl>, support.y <dbl>

12.3.3 Aggresive - Feral

positive_behaviour %>% 
  inner_join(negative_origin,by="trait") %>% 
  tt()
trait mean.x p1.x p9.x support.x mean.y p1.y p9.y support.y
D0610 0.0002013966 4.555716e-06 0.0004808042 0.928 -0.0002419241 -0.0004928747 -4.243458e-05 0.963
D0606 0.0012892344 6.049932e-05 0.0029723259 0.938 -0.0015296097 -0.0028249718 -5.316331e-04 0.985
D0104 0.0043756981 3.195157e-04 0.0089221855 0.914 -0.0135005280 -0.0385428493 -1.146699e-05 0.901
B0401 0.0064628168 5.687129e-04 0.0125691510 0.918 -0.0248207028 -0.0554449822 -6.805442e-03 0.981
B0403 0.0066014276 3.016926e-03 0.0098000566 0.985 -0.0199725357 -0.0403179003 -7.305304e-03 0.992
D0302 0.0077732053 6.767830e-04 0.0148435492 0.922 -0.0269396348 -0.0641994039 -5.212827e-03 0.984
B0213 0.0086312286 1.817108e-03 0.0155766545 0.945 -0.0332721937 -0.0826234506 -5.886948e-04 0.908
D0301 0.0093071525 3.421881e-03 0.0150961210 0.966 -0.0298192044 -0.0670681997 -2.964810e-04 0.902
B0307 0.0124627007 3.594703e-03 0.0205566196 0.966 -0.0425026109 -0.0975614479 -1.094176e-02 1.000
D0305 0.0155741873 5.440648e-03 0.0253723277 0.965 -0.0292374751 -0.0605709415 -6.452455e-03 0.968
B0708 0.0164060418 5.603298e-03 0.0265865633 0.964 -0.0365146987 -0.0892306768 -1.833453e-03 0.921
D0502 0.0198776080 7.825305e-03 0.0302026038 0.973 -0.0478939045 -0.0903191601 -1.202898e-02 0.978

12.3.4 Tame - Feral

negative_behaviour %>% 
  inner_join(negative_origin,by="trait")
# A tibble: 0 × 9
# ℹ 9 variables: trait <chr>, mean.x <dbl>, p1.x <dbl>, p9.x <dbl>, support.x <dbl>, mean.y <dbl>, p1.y <dbl>, p9.y <dbl>, support.y <dbl>

12.4 Functional contrast

aggressive_feral <- behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_feral$genome)) %>% 
  pull(genome)

aggresive_taxonomy <- behaviour_aggresive$genome
aggresive_function <- positive_behaviour$trait

tame_domestic <-behaviour_tame %>% 
  filter(genome %in% intersect(behaviour_tame$genome, origin_domestic$genome)) %>%
  pull(genome)

tame_taxonomy <- behaviour_tame$genome
tame_function <- negative_behaviour$trait

genome_gifts %>% 
  to.elements(., GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="genome") %>%
    pivot_longer(!genome,names_to="trait",values_to="gift") %>%
    filter(genome %in% c(aggresive_taxonomy,tame_taxonomy)) %>% 
    mutate(taxonomy=ifelse(genome %in% aggresive_taxonomy,"aggressive","tame")) %>% 
    filter(trait %in% (positive_behaviour %>% 
    inner_join(negative_origin,by="trait") %>% pull(trait))) %>% 
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=genome,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(. ~ taxonomy, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) + 
        labs(y="Traits",x="Samples",fill="GIFT")

12.5 Functional ordination

PCoA functional ordination with PCA loadings.

gift_pcoa <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    vegdist(method="euclidean") %>%
    pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]


# Get genome positions
gift_pcoa_vectors <- gift_pcoa$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2) # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- cov(genome_gifts, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(genome_gifts)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,5)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(label %in% (positive_behaviour %>% 
    inner_join(negative_origin,by="trait") %>% pull(trait)))
scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(genome_metadata, by="genome") %>%
  mutate(relation = case_when(genome %in% aggresive_taxonomy ~ "aggresive",
                              genome %in% tame_taxonomy ~ "tame",
                              TRUE ~ "neutral")) %>%
  group_by(relation) %>%
  mutate(x_cen = mean(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>% 
  mutate(phylum=ifelse(relation=="neutral","NA",phylum)) %>% 
  ggplot() +
      #genome positions
      scale_color_manual(values=phylum_colors,na.value = "#EDEDED")+
      geom_point(aes(x=Axis.1,y=Axis.2, color=phylum, size=length), alpha=0.9, shape=16) +
      new_scale_color() +
      scale_color_manual(values = c("aggresive" = "#949293", "tame" = "#7657A4")) + 
      geom_segment(data = . %>% filter(relation != "neutral"),
                   aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=relation), 
                   alpha = 0.5) +      
      new_scale_color() +
      #loading positions
      geom_segment(data=gift_pcoa_gifts, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = gift_pcoa_gifts,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    theme_minimal() + 
    theme(legend.position = "none")