Chapter 5 Alpha diversity
5.1 Hill numbers
# Calculate Hill numbers
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
phylogenetic <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, tree = genome_tree) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(phylogenetic = 1) %>%
rownames_to_column(var = "sample")
# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
to.elements(., GIFT_db) %>%
traits2dist(., method = "gower")
functional <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where(~ !all(. == 0))) %>%
hilldiv(., q = 1, dist = dist) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(functional = 1) %>%
rownames_to_column(var = "sample") %>%
mutate(functional = if_else(is.nan(functional), 1, functional))
# Merge all metrics
alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample)) %>%
full_join(phylogenetic, by = join_by(sample == sample)) %>%
full_join(functional, by = join_by(sample == sample))
5.2 By location
5.2.1 Plots
richness_mean <- alpha_div %>%
left_join(sample_metadata, by = join_by(sample == sample))%>%
group_by(location) %>%
dplyr::summarise_at(.vars = names(.)[2], .funs = c("Richness mean" = "mean", "Richness sd" = "sd"))
neutral_mean <- alpha_div %>%
left_join(sample_metadata, by = join_by(sample == sample))%>%
group_by(location) %>%
dplyr::summarise_at(.vars = names(.)[3], .funs = c("Neutral mean" = "mean", "Neutral sd" = "sd"))
phylogenetic_mean <- alpha_div %>%
left_join(sample_metadata, by = join_by(sample == sample))%>%
group_by(location) %>%
dplyr::summarise_at(.vars = names(.)[4], .funs = c("Phylogenetic mean" = "mean", "Phylogenetic sd" = "sd"))
cbind(richness_mean, neutral_mean[, 2:3], phylogenetic_mean[, 2:3])%>%
tt()
location | Richness mean | Richness sd | Neutral mean | Neutral sd | Phylogenetic mean | Phylogenetic sd |
---|---|---|---|---|---|---|
Aruba | 22.12500 | 20.75532 | 11.82228 | 9.034244 | 5.522053 | 1.715380 |
Brazil | 74.81250 | 23.70434 | 25.03024 | 11.453472 | 6.319856 | 1.456310 |
CaboVerde | 31.93333 | 14.60659 | 15.22392 | 4.306461 | 6.984720 | 1.414464 |
Spain | 70.93333 | 15.76373 | 24.75182 | 11.327483 | 6.440501 | 1.526864 |
Denmark | 61.80000 | 18.60952 | 20.50564 | 9.170413 | 5.594599 | 1.142487 |
Malaysia | 46.46667 | 25.00248 | 24.35444 | 11.129897 | 6.509729 | 1.569361 |
group_n <- alpha_div %>%
left_join(., sample_metadata, by = join_by(sample == sample))%>%
select(location) %>%
pull() %>%
unique() %>%
length()
#pdf("figures/diversity_location.pdf",width=20, height=9)
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = "sample") %>%
mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
ggplot(aes(y = value, x = location, group=location, color=location, fill=location)) +
geom_boxplot(outlier.shape = NA, show.legend = FALSE) +
geom_jitter(alpha=0.5) +
scale_color_manual(values = location_colors) +
scale_fill_manual(values = str_c(location_colors, "50")) +
facet_wrap(. ~ metric, scales = "free", ncol=4) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
axis.ticks.x = element_blank(),
strip.text.x = element_text(size = 12, color="black",face="bold"),
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
legend.text=element_text(size=10),
legend.title = element_text(size=12))+
guides(fill = guide_legend(override.aes = list(size=3)))
5.2.2 Mixed models
5.2.2.1 Richness
Analysis of Deviance Table
Model: Negative Binomial(4.3858), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 91 165.145
location 5 66.294 86 98.851 6.039e-13 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R2m R2c
delta 0.4502528 0.4502528
lognormal 0.4782539 0.4782539
trigamma 0.4193938 0.4193938
$emmeans
location emmean SE df asymp.LCL asymp.UCL
Aruba 3.10 0.131 Inf 2.84 3.35
Brazil 4.31 0.123 Inf 4.07 4.56
CaboVerde 3.46 0.131 Inf 3.21 3.72
Spain 4.26 0.127 Inf 4.01 4.51
Denmark 4.12 0.128 Inf 3.87 4.37
Malaysia 3.84 0.129 Inf 3.59 4.09
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Aruba - Brazil -1.2183 0.179 Inf -6.793 <.0001
Aruba - CaboVerde -0.3669 0.185 Inf -1.979 0.3541
Aruba - Spain -1.1650 0.182 Inf -6.392 <.0001
Aruba - Denmark -1.0272 0.183 Inf -5.624 <.0001
Aruba - Malaysia -0.7420 0.184 Inf -4.041 0.0008
Brazil - CaboVerde 0.8513 0.180 Inf 4.732 <.0001
Brazil - Spain 0.0532 0.177 Inf 0.301 0.9997
Brazil - Denmark 0.1911 0.177 Inf 1.079 0.8899
Brazil - Malaysia 0.4762 0.178 Inf 2.674 0.0805
CaboVerde - Spain -0.7981 0.183 Inf -4.365 0.0002
CaboVerde - Denmark -0.6603 0.183 Inf -3.604 0.0042
CaboVerde - Malaysia -0.3751 0.184 Inf -2.036 0.3215
Spain - Denmark 0.1378 0.180 Inf 0.766 0.9732
Spain - Malaysia 0.4230 0.181 Inf 2.337 0.1794
Denmark - Malaysia 0.2852 0.181 Inf 1.572 0.6172
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 6 estimates
5.2.2.2 Neutral
Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
location 5 2438.6 487.72 5.1405 0.0003575 ***
Residuals 86 8159.6 94.88
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R2m R2c
[1,] 0.2202391 0.2202391
$emmeans
location emmean SE df lower.CL upper.CL
Aruba 11.8 2.44 86 6.98 16.7
Brazil 25.0 2.44 86 20.19 29.9
CaboVerde 15.2 2.52 86 10.22 20.2
Spain 24.8 2.52 86 19.75 29.8
Denmark 20.5 2.52 86 15.51 25.5
Malaysia 24.4 2.52 86 19.35 29.4
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Aruba - Brazil -13.208 3.44 86 -3.835 0.0032
Aruba - CaboVerde -3.402 3.50 86 -0.972 0.9257
Aruba - Spain -12.930 3.50 86 -3.693 0.0051
Aruba - Denmark -8.683 3.50 86 -2.480 0.1415
Aruba - Malaysia -12.532 3.50 86 -3.580 0.0073
Brazil - CaboVerde 9.806 3.50 86 2.801 0.0669
Brazil - Spain 0.278 3.50 86 0.080 1.0000
Brazil - Denmark 4.525 3.50 86 1.292 0.7884
Brazil - Malaysia 0.676 3.50 86 0.193 1.0000
CaboVerde - Spain -9.528 3.56 86 -2.679 0.0901
CaboVerde - Denmark -5.282 3.56 86 -1.485 0.6747
CaboVerde - Malaysia -9.131 3.56 86 -2.567 0.1168
Spain - Denmark 4.246 3.56 86 1.194 0.8387
Spain - Malaysia 0.397 3.56 86 0.112 1.0000
Denmark - Malaysia -3.849 3.56 86 -1.082 0.8872
P value adjustment: tukey method for comparing a family of 6 estimates
5.2.2.3 Phylogenetic
Analysis of Variance Table
Response: phylogenetic
Df Sum Sq Mean Sq F value Pr(>F)
location 5 24.581 4.9161 2.2328 0.05818 .
Residuals 86 189.353 2.2018
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R2m R2c
[1,] 0.109275 0.109275
$emmeans
location emmean SE df lower.CL upper.CL
Aruba 5.52 0.371 86 4.78 6.26
Brazil 6.32 0.371 86 5.58 7.06
CaboVerde 6.98 0.383 86 6.22 7.75
Spain 6.44 0.383 86 5.68 7.20
Denmark 5.59 0.383 86 4.83 6.36
Malaysia 6.51 0.383 86 5.75 7.27
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Aruba - Brazil -0.7978 0.525 86 -1.521 0.6520
Aruba - CaboVerde -1.4627 0.533 86 -2.743 0.0773
Aruba - Spain -0.9184 0.533 86 -1.722 0.5211
Aruba - Denmark -0.0725 0.533 86 -0.136 1.0000
Aruba - Malaysia -0.9877 0.533 86 -1.852 0.4386
Brazil - CaboVerde -0.6649 0.533 86 -1.247 0.8125
Brazil - Spain -0.1206 0.533 86 -0.226 0.9999
Brazil - Denmark 0.7253 0.533 86 1.360 0.7505
Brazil - Malaysia -0.1899 0.533 86 -0.356 0.9992
CaboVerde - Spain 0.5442 0.542 86 1.004 0.9153
CaboVerde - Denmark 1.3901 0.542 86 2.566 0.1172
CaboVerde - Malaysia 0.4750 0.542 86 0.877 0.9511
Spain - Denmark 0.8459 0.542 86 1.561 0.6260
Spain - Malaysia -0.0692 0.542 86 -0.128 1.0000
Denmark - Malaysia -0.9151 0.542 86 -1.689 0.5428
P value adjustment: tukey method for comparing a family of 6 estimates
5.2.2.4 Functional
Analysis of Variance Table
Response: functional
Df Sum Sq Mean Sq F value Pr(>F)
location 5 0.06044 0.0120886 2.3531 0.04726 *
Residuals 86 0.44181 0.0051373
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R2m R2c
[1,] 0.1144882 0.1144882
$emmeans
location emmean SE df lower.CL upper.CL
Aruba 1.41 0.0179 86 1.37 1.45
Brazil 1.43 0.0179 86 1.39 1.47
CaboVerde 1.46 0.0185 86 1.42 1.50
Spain 1.45 0.0185 86 1.41 1.49
Denmark 1.47 0.0185 86 1.43 1.51
Malaysia 1.49 0.0185 86 1.45 1.52
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Aruba - Brazil -0.02104 0.0253 86 -0.830 0.9611
Aruba - CaboVerde -0.05095 0.0258 86 -1.978 0.3635
Aruba - Spain -0.04197 0.0258 86 -1.629 0.5818
Aruba - Denmark -0.05914 0.0258 86 -2.296 0.2071
Aruba - Malaysia -0.07819 0.0258 86 -3.035 0.0363
Brazil - CaboVerde -0.02991 0.0258 86 -1.161 0.8539
Brazil - Spain -0.02093 0.0258 86 -0.812 0.9645
Brazil - Denmark -0.03810 0.0258 86 -1.479 0.6784
Brazil - Malaysia -0.05714 0.0258 86 -2.218 0.2402
CaboVerde - Spain 0.00898 0.0262 86 0.343 0.9994
CaboVerde - Denmark -0.00820 0.0262 86 -0.313 0.9996
CaboVerde - Malaysia -0.02724 0.0262 86 -1.041 0.9027
Spain - Denmark -0.01717 0.0262 86 -0.656 0.9861
Spain - Malaysia -0.03622 0.0262 86 -1.384 0.7365
Denmark - Malaysia -0.01904 0.0262 86 -0.728 0.9780
P value adjustment: tukey method for comparing a family of 6 estimates
5.3 By behaviour and location
5.3.1 Plots
5.3.1.1 Richness
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = "sample") %>%
filter(!is.na(location)) %>%
filter(metric=="richness") %>%
ggplot(aes(y = value, x = origin, group=origin, color=origin, fill=origin)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_color_manual(values = origin_colors) +
scale_fill_manual(values = str_c(origin_colors, "50")) +
facet_wrap(. ~ location, scales = "fixed", ncol=6) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
5.3.1.2 Neutral
#pdf("figures/alpha_q1n_behaviour.pdf",width=9, height=5)
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = "sample") %>%
filter(!is.na(location)) %>%
filter(metric=="neutral") %>%
ggplot(aes(y = value, x = origin, group=origin, color=origin, fill=origin)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_color_manual(values = origin_colors) +
scale_fill_manual(values = str_c(origin_colors, "50")) +
facet_wrap(. ~ location, scales = "fixed", ncol=6) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
5.3.1.3 Phylogenetic
#pdf("figures/alpha_q1p_behaviour.pdf",width=9, height=5)
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = "sample") %>%
filter(!is.na(location)) %>%
filter(metric=="phylogenetic") %>%
ggplot(aes(y = value, x = origin, group=origin, color=origin, fill=origin)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_color_manual(values = origin_colors) +
scale_fill_manual(values = str_c(origin_colors, "50")) +
facet_wrap(. ~ location, scales = "fixed", ncol=6) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
5.3.1.4 Functional
alpha_div %>%
pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
left_join(sample_metadata, by = "sample") %>%
filter(!is.na(location)) %>%
filter(metric=="functional") %>%
ggplot(aes(y = value, x = origin, group=origin, color=origin, fill=origin)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_color_manual(values = origin_colors) +
scale_fill_manual(values = str_c(origin_colors, "50")) +
facet_wrap(. ~ location, scales = "fixed", ncol=6) +
coord_cartesian(xlim = c(1, NA)) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = .1, color = "grey"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
5.3.2 Mixed models
5.3.2.1 Richness
Model_richness <- glmer.nb(richness ~ origin+sex+(1|location), data = alpha_div_meta)
summary(Model_richness)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(4.2906) ( log )
Formula: richness ~ origin + sex + (1 | location)
Data: alpha_div_meta
AIC BIC logLik deviance df.resid
857.1 872.2 -422.5 845.1 86
Scaled residuals:
Min 1Q Median 3Q Max
-1.6859 -0.6041 -0.0643 0.5858 3.2671
Random effects:
Groups Name Variance Std.Dev.
location (Intercept) 0.1653 0.4065
Number of obs: 92, groups: location, 6
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.90133 0.19455 20.053 <2e-16 ***
originFeral 0.00238 0.10976 0.022 0.983
sexMale -0.25024 0.11805 -2.120 0.034 *
sexUnknown 0.23928 0.36462 0.656 0.512
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) orgnFr sexMal
originFeral -0.228
sexMale -0.260 0.044
sexUnknown -0.281 -0.113 0.076
5.3.2.2 Neutral
Model_neutral <- lme(fixed = neutral ~ origin+sex, data = alpha_div_meta,
random = ~ 1 | location)#log(seq_depth)+
summary(Model_neutral)
Linear mixed-effects model fit by REML
Data: alpha_div_meta
AIC BIC logLik
680.5938 695.4578 -334.2969
Random effects:
Formula: ~1 | location
(Intercept) Residual
StdDev: 5.311173 9.630345
Fixed effects: neutral ~ origin + sex
Value Std.Error DF t-value p-value
(Intercept) 20.948125 2.856806 83 7.332709 0.0000
originFeral 1.703197 2.090004 83 0.814925 0.4174
sexMale -4.147994 2.232360 83 -1.858120 0.0667
sexUnknown 0.883327 5.749323 83 0.153640 0.8783
Correlation:
(Intr) orgnFr sexMal
originFeral -0.306
sexMale -0.352 0.058
sexUnknown -0.309 -0.117 0.121
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.9366916 -0.6727309 -0.1549049 0.6419273 2.8015816
Number of Observations: 92
Number of Groups: 6
5.3.2.3 Phylogenetic
Model_phylo <- lme(fixed = phylogenetic ~ origin+sex, data = alpha_div_meta,
random = ~ 1 | location)
summary(Model_phylo)
Linear mixed-effects model fit by REML
Data: alpha_div_meta
AIC BIC logLik
345.8789 360.7429 -166.9394
Random effects:
Formula: ~1 | location
(Intercept) Residual
StdDev: 0.3796989 1.473687
Fixed effects: phylogenetic ~ origin + sex
Value Std.Error DF t-value p-value
(Intercept) 6.599600 0.3116529 83 21.176119 0.0000
originFeral 0.019084 0.3174909 83 0.060108 0.9522
sexMale -0.621959 0.3392944 83 -1.833097 0.0704
sexUnknown -0.955378 0.6039183 83 -1.581965 0.1175
Correlation:
(Intr) orgnFr sexMal
originFeral -0.442
sexMale -0.507 0.060
sexUnknown -0.335 -0.124 0.225
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.86150185 -0.62279155 0.02054232 0.74108402 2.09713515
Number of Observations: 92
Number of Groups: 6
5.3.2.4 Functional
Model_func <- lme(fixed = functional ~ origin+sex, data = alpha_div_meta,
random = ~ 1 | location)
summary(Model_func)
Linear mixed-effects model fit by REML
Data: alpha_div_meta
AIC BIC logLik
-186.0933 -171.2292 99.04663
Random effects:
Formula: ~1 | location
(Intercept) Residual
StdDev: 0.02356286 0.0712589
Fixed effects: functional ~ origin + sex
Value Std.Error DF t-value p-value
(Intercept) 1.4453479 0.01635760 83 88.35944 0.0000
originFeral 0.0194761 0.01539254 83 1.26530 0.2093
sexMale -0.0148974 0.01644558 83 -0.90586 0.3676
sexUnknown 0.0178121 0.03256538 83 0.54696 0.5859
Correlation:
(Intr) orgnFr sexMal
originFeral -0.404
sexMale -0.465 0.059
sexUnknown -0.330 -0.121 0.192
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.0213571 -0.6393834 0.0797046 0.5702719 3.7232590
Number of Observations: 92
Number of Groups: 6