Chapter 12 Behaviour-origin congruence

12.1 Taxonomic congruence

tame_domestic <- intersect(behaviour_tame$genome, origin_domestic$genome) %>% length()
tame_feral <- intersect(behaviour_tame$genome, origin_feral$genome) %>% length()
aggressive_domestic <- intersect(behaviour_aggresive$genome, origin_domestic$genome) %>% length()
aggressive_feral <- intersect(behaviour_aggresive$genome, origin_feral$genome) %>% length()

tibble(origin=c("domestic","feral"),aggressive=c(aggressive_domestic,aggressive_feral),tame=c(tame_domestic,tame_feral)) %>% 
  tt()
origin aggressive tame
domestic 6 15
feral 10 0

12.1.1 Tame - Domestic

behaviour_tame %>% 
  filter(genome %in% intersect(behaviour_tame$genome, origin_domestic$genome)) %>%
  tt()
genome phylum class order family genus species value
bin_CaboVerde.50 Actinomycetota Actinomycetia Actinomycetales Actinomycetaceae Trueperella NA 0.088
bin_Aruba.47 Actinomycetota Actinomycetia Mycobacteriales Mycobacteriaceae Lawsonella NA 0.044
bin_Brazil.109 Bacillota_A Clostridia Oscillospirales Butyricicoccaceae AM07-15 AM07-15 sp003477405 0.028
bin_Aruba.28 Bacillota_A Clostridia Oscillospirales Oscillospiraceae Dysosmobacter NA 0.079
bin_Aruba.54 Bacillota_A Clostridia Tissierellales Peptoniphilaceae Peptoniphilus_A NA 0.034
bin_Aruba.36 Bacillota_A Clostridia Tissierellales Peptoniphilaceae Peptoniphilus_C Peptoniphilus_C sp902363535 0.089
bin_Brazil.70 Desulfobacterota Desulfovibrionia Desulfovibrionales Desulfovibrionaceae Mailhella Mailhella sp003150275 0.016
bin_Brazil.9 Pseudomonadota Alphaproteobacteria RF32 CAG-239 CAG-495 CAG-495 sp000436375 0.021
bin_Brazil.146 Pseudomonadota Alphaproteobacteria RF32 CAG-239 CAG-495 CAG-495 sp001917125 0.015
bin_Brazil.92 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 CAG-521 sp000437635 0.033
bin_Spain.43 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 CAG-521 sp000437635 0.032
bin_Brazil.64 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae CAG-521 NA 0.034
bin_Brazil.51 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella Sutterella wadsworthensis_A 0.070
bin_Brazil.15 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella NA 0.063
bin_Aruba.15 Pseudomonadota Gammaproteobacteria Burkholderiales Burkholderiaceae Sutterella NA 0.035

12.1.2 Tame - Feral

behaviour_tame %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_tame$genome, origin_feral$genome))
# A tibble: 0 × 7
# ℹ 7 variables: genome <chr>, phylum <chr>, class <chr>, order <chr>, family <chr>, genus <chr>, species <chr>

12.1.3 Aggressive - Domestic

behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_domestic$genome)) %>%
  tt()
genome phylum class order family genus species
bin_Denmark.17 Actinomycetota Coriobacteriia Coriobacteriales Coriobacteriaceae Collinsella Collinsella bouchesdurhonensis
bin_Malaysia.96 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Caecibacter Caecibacter sp003467125
bin_Brazil.58 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera elsdenii
bin_Aruba.64 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera sp000417505
bin_CaboVerde.38 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera sp000417505
bin_Malaysia.81 Bacillota_C Negativicutes Veillonellales Megasphaeraceae Megasphaera Megasphaera stantonii

12.1.4 Aggressive - Feral

behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_feral$genome)) %>%
  tt()
genome phylum class order family genus species
bin_Brazil.12 Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus Enterococcus faecalis
bin_Denmark.27 Bacillota Bacilli Lactobacillales Enterococcaceae Enterococcus_B Enterococcus_B hirae
bin_CaboVerde.60 Bacillota Bacilli Lactobacillales Lactobacillaceae Ligilactobacillus Ligilactobacillus agilis
bin_Malaysia.127 Bacillota Bacilli Lactobacillales Lactobacillaceae Ligilactobacillus Ligilactobacillus agilis
bin_Denmark.117 Bacillota Bacilli Lactobacillales Streptococcaceae Lactococcus Lactococcus garvieae
bin_Denmark.113 Bacillota Bacilli Lactobacillales Streptococcaceae Streptococcus Streptococcus alactolyticus
bin_Denmark.50 Bacillota_A Clostridia Peptostreptococcales Peptostreptococcaceae Peptacetobacter Peptacetobacter sp900539645
bin_Brazil.140 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_A Fusobacterium_A sp900543175
bin_Brazil.159 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_B Fusobacterium_B sp900541465
bin_Malaysia.6 Fusobacteriota Fusobacteriia Fusobacteriales Fusobacteriaceae Fusobacterium_B Fusobacterium_B sp900545035

12.2 Domestic - aggressive incongruence

domestic_aggressive <- sample_metadata %>% 
        rowwise() %>%
        mutate(behaviour = sum(c_across(c(bites, hisses,retreats,avoidance)), na.rm = TRUE)/4) %>%
        ungroup() %>%
        filter(origin == "Domestic" & behaviour > 0) %>%
        pull(sample)

domestic_tame <- sample_metadata %>% 
        rowwise() %>%
        mutate(behaviour = sum(c_across(c(bites, hisses,retreats,avoidance)), na.rm = TRUE)/4) %>%
        ungroup() %>%
        filter(origin == "Domestic" & behaviour == 0) %>%
        pull(sample)

focal_genomes <- behaviour_aggresive %>% 
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_domestic$genome))  %>% 
  filter(family == "Megasphaeraceae") %>% 
  pull(genome)


phylo_samples <- sample_metadata %>% 
    filter(sample %in% c(domestic_aggressive,domestic_tame)) %>% 
    mutate(behaviour = case_when(
    sample %in% domestic_aggressive ~ "aggresive",
    sample %in% domestic_tame ~ "tame",
    TRUE ~ NA_character_
  )) %>% 
    column_to_rownames("sample") %>% 
    sample_data()

phylo_genome <- genome_counts_filt %>% 
     filter(genome %in% focal_genomes) %>% 
     column_to_rownames("genome") %>% 
     mutate_all(~ replace(., . == 0, 0.00001)) %>%
      otu_table(., taxa_are_rows = TRUE)

phylo_taxonomy <- genome_metadata %>% 
     filter(genome %in% focal_genomes) %>% 
     mutate(genome2=genome) %>% #create a pseudo genome name column
     column_to_rownames("genome2") %>% 
     dplyr::select(division,phylum,class,order,family,genus,species,genome) %>%
     as.matrix() %>% 
     tax_table()

physeq_genome_filtered <- phyloseq(phylo_genome, phylo_taxonomy, phylo_samples)

ancom_rand_output = ancombc2(data = physeq_genome_filtered, 
                  assay_name = "counts",
                  tax_level = NULL, #change to agglomerate analysis to a higher taxonomic range
                  fix_formula = "behaviour", #fixed variable(s)
#                  rand_formula = "(1|Individual)",
                  p_adj_method = "holm", 
                  pseudo_sens = TRUE,
                  prv_cut = 0, 
                  s0_perc = 0,
                  group = NULL, 
                  struc_zero = FALSE, 
                  neg_lb = FALSE,
                  alpha = 0.05, 
                  n_cl = 2, 
                  verbose = TRUE,
                  global = FALSE, 
                  pairwise = FALSE, 
                  dunnet = FALSE, 
                  trend = FALSE,
                  iter_control = list(tol = 1e-5, max_iter = 20, verbose = FALSE),
                  em_control = list(tol = 1e-5, max_iter = 100),
                  lme_control = NULL,
                  mdfdr_control = list(fwer_ctrl_method = "holm", B = 100), 
                  trend_control = NULL)



genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% 
  filter(genome %in% focal_genomes) %>% 
  filter(sample %in% c(domestic_aggressive,domestic_tame)) %>% 
  mutate(behaviour = case_when(
    sample %in% domestic_aggressive ~ "aggresive",
    sample %in% domestic_tame ~ "tame",
    TRUE ~ NA_character_
  )) %>% 
  ggplot(aes(x=behaviour,y=count, group=behaviour, color=genome))+
    geom_jitter()

12.3 Functional

aggressive_domestic <- intersect(positive_behaviour$trait,positive_origin$trait) %>% length()
tame_domestic <- intersect(negative_behaviour$trait,positive_origin$trait) %>% length()
aggressive_feral <- intersect(positive_behaviour$trait, negative_origin$trait) %>% length()
tame_feral <- intersect(negative_behaviour$trait, negative_origin$trait) %>% length()

tibble(origin=c("domestic","feral"),aggressive=c(aggressive_domestic,aggressive_feral),tame=c(tame_domestic,tame_feral)) %>% 
  tt()
origin aggressive tame
domestic 0 1
feral 11 0

12.3.1 Aggresive - Domestic

positive_behaviour %>% 
  inner_join(positive_origin,by="trait")
# A tibble: 0 × 9
# ℹ 9 variables: trait <chr>, mean.x <dbl>, p1.x <dbl>, p9.x <dbl>, support.x <dbl>, mean.y <dbl>, p1.y <dbl>,
#   p9.y <dbl>, support.y <dbl>

12.3.2 Tame - Domestic

negative_behaviour %>% 
  inner_join(positive_origin,by="trait") 
# A tibble: 1 × 9
  trait   mean.x     p1.x      p9.x support.x mean.y    p1.y   p9.y support.y
  <chr>    <dbl>    <dbl>     <dbl>     <dbl>  <dbl>   <dbl>  <dbl>     <dbl>
1 D0208 -0.00275 -0.00518 -0.000117     0.912 0.0124 0.00283 0.0239      0.96

12.3.3 Aggresive - Feral

positive_behaviour %>% 
  inner_join(negative_origin,by="trait") %>% 
  tt()
trait mean.x p1.x p9.x support.x mean.y p1.y p9.y support.y
D0610 0.0002189315 7.169404e-06 0.0005506645 0.942 -0.0002451283 -0.0004720608 -3.710366e-05 0.959
D0606 0.0013173362 8.607854e-05 0.0029287549 0.949 -0.0014698813 -0.0026556828 -4.561816e-04 0.981
D0104 0.0042525723 2.538930e-04 0.0087261456 0.919 -0.0128418566 -0.0357366876 -1.303277e-05 0.900
B0401 0.0063799240 2.951833e-04 0.0126829773 0.913 -0.0249512083 -0.0516978300 -6.446289e-03 0.986
B0403 0.0066671349 3.372894e-03 0.0098188489 0.990 -0.0199484439 -0.0377026994 -7.228965e-03 0.991
D0302 0.0075980799 7.317559e-04 0.0144770772 0.921 -0.0262106412 -0.0614788173 -5.412084e-03 0.983
B0213 0.0083508635 1.615050e-03 0.0154868430 0.948 -0.0333934318 -0.0842189386 -1.107973e-03 0.915
B0307 0.0122331069 4.235712e-03 0.0201228396 0.966 -0.0407109436 -0.0924524535 -1.089288e-02 0.999
D0305 0.0155120011 4.525690e-03 0.0254848323 0.958 -0.0284344966 -0.0579412304 -5.129183e-03 0.946
B0708 0.0160958441 5.642192e-03 0.0262116110 0.971 -0.0358940422 -0.0895677790 -1.852451e-03 0.925
D0502 0.0199192005 8.610099e-03 0.0302091623 0.977 -0.0463987237 -0.0873258083 -1.241226e-02 0.970

12.3.4 Tame - Feral

negative_behaviour %>% 
  inner_join(negative_origin,by="trait")
# A tibble: 0 × 9
# ℹ 9 variables: trait <chr>, mean.x <dbl>, p1.x <dbl>, p9.x <dbl>, support.x <dbl>, mean.y <dbl>, p1.y <dbl>,
#   p9.y <dbl>, support.y <dbl>

12.4 Functional contrast

aggressive_feral <- behaviour_aggresive %>% 
  select(-value) %>%
  filter(genome %in% intersect(behaviour_aggresive$genome, origin_feral$genome)) %>% 
  pull(genome)

aggresive_taxonomy <- behaviour_aggresive$genome
aggresive_function <- positive_behaviour$trait

tame_domestic <-behaviour_tame %>% 
  filter(genome %in% intersect(behaviour_tame$genome, origin_domestic$genome)) %>%
  pull(genome)

tame_taxonomy <- behaviour_tame$genome
tame_function <- negative_behaviour$trait

genome_gifts %>% 
  to.elements(., GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="genome") %>%
    pivot_longer(!genome,names_to="trait",values_to="gift") %>%
    filter(genome %in% c(aggresive_taxonomy,tame_taxonomy)) %>% 
    mutate(taxonomy=ifelse(genome %in% aggresive_taxonomy,"aggressive","tame")) %>% 
    filter(trait %in% (positive_behaviour %>% 
    inner_join(negative_origin,by="trait") %>% pull(trait))) %>% 
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=genome,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(. ~ taxonomy, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) + 
        labs(y="Traits",x="Samples",fill="GIFT")

12.5 Functional ordination

PCoA functional ordination with PCA loadings.

gift_pcoa <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame() %>%
    vegdist(method="euclidean") %>%
    pcoa()

gift_pcoa_rel_eigen <- gift_pcoa$values$Relative_eig[1:10]


# Get genome positions
gift_pcoa_vectors <- gift_pcoa$vectors %>% #extract vectors
  as.data.frame() %>% 
  select(Axis.1,Axis.2) # keep the first 2 axes

gift_pcoa_eigenvalues <- gift_pcoa$values$Eigenvalues[c(1,2)]

gift_pcoa_gifts <- cov(genome_gifts, scale(gift_pcoa_vectors)) %*% diag((gift_pcoa_eigenvalues/(nrow(genome_gifts)-1))^(-0.5)) %>%
  as.data.frame() %>% 
  rename(Axis.1=1,Axis.2=2) %>% 
  rownames_to_column(var="label") %>% 
  #get function summary vectors
  mutate(func=substr(label,1,5)) %>% 
  group_by(func) %>% 
  summarise(Axis.1=mean(Axis.1),
            Axis.2=mean(Axis.2)) %>% 
  rename(label=func) %>% 
  filter(label %in% (positive_behaviour %>% 
    inner_join(negative_origin,by="trait") %>% pull(trait)))
scale <- 15 # scale for vector loadings
gift_pcoa_vectors %>% 
  rownames_to_column(var="genome") %>% 
  left_join(genome_metadata, by="genome") %>%
  mutate(relation = case_when(genome %in% aggresive_taxonomy ~ "aggresive",
                              genome %in% tame_taxonomy ~ "tame",
                              TRUE ~ "neutral")) %>%
  group_by(relation) %>%
  mutate(x_cen = mean(Axis.1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(Axis.2, na.rm = TRUE)) %>%
  ungroup() %>% 
  mutate(phylum=ifelse(relation=="neutral","NA",phylum)) %>% 
  ggplot() +
      #genome positions
      scale_color_manual(values=phylum_colors,na.value = "#EDEDED")+
      geom_point(aes(x=Axis.1,y=Axis.2, color=phylum, size=length), alpha=0.9, shape=16) +
      new_scale_color() +
      scale_color_manual(values = c("aggresive" = "#949293", "tame" = "#7657A4")) + 
      geom_segment(data = . %>% filter(relation != "neutral"),
                   aes(x = x_cen, y = y_cen, xend = Axis.1, yend = Axis.2, color=relation), 
                   alpha = 0.5) +      
      new_scale_color() +
      #loading positions
      geom_segment(data=gift_pcoa_gifts, 
                   aes(x=0, y=0, xend=Axis.1 * scale, yend=Axis.2 * scale),
                    arrow = arrow(length = unit(0.3, "cm"), 
                    type = "open", 
                    angle = 25),
                    linewidth = 0.5, 
                    color = "black") +
     #Primary and secondary scale adjustments
     scale_x_continuous(name = paste0("PCoA1 (",round(gift_pcoa_rel_eigen[1]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA1")
            ) +
     scale_y_continuous(name = paste0("PCoA2 (",round(gift_pcoa_rel_eigen[2]*100, digits = 2), " %)"),
                      sec.axis = sec_axis(~ . / scale, name = "Loadings on PCoA2")
            ) +
    geom_label_repel(data = gift_pcoa_gifts,
                     aes(label = label, x = Axis.1 * scale, y = Axis.2 * scale),
                     segment.color = 'transparent') +
    theme_minimal() + 
    theme(legend.position = "none")