Chapter 4 Alpha diversity
4.1 Metagenomics
load("resources/metagenomics/data_filtered.Rdata")
species_color <- c("#e5bd5b", "#6b7398", "#76b183")
sample_metadata <- sample_metadata %>%
select(1,2,7,8) %>%
mutate(method="Metagenomics")4.1.1 MAG level
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
# Merge all metrics
alpha_div_meta <- richness %>%
full_join(neutral, by = join_by(sample == sample))microbial_fraction_mapped <- microbial_fraction %>%
select(sample, mapped_microbial_fraction) %>%
right_join(alpha_div_meta, by="sample") %>%
left_join(sample_metadata, by="sample")
microbial_fraction_mapped %>%
summarise(
Total_mean = mean(mapped_microbial_fraction, na.rm = T),
Total_sd = sd(mapped_microbial_fraction, na.rm = T),
Eb_mean = mean(mapped_microbial_fraction[Species == "Eb"], na.rm = T),
Eb_sd = sd(mapped_microbial_fraction[Species == "Eb"], na.rm = T),
Ha_mean = mean(mapped_microbial_fraction[Species == "Ha"], na.rm = T),
Ha_sd = sd(mapped_microbial_fraction[Species == "Ha"], na.rm = T),
Pk_mean = mean(mapped_microbial_fraction[Species == "Pk"], na.rm = T),
Pk_sd = sd(mapped_microbial_fraction[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(Total_mean, 3), "±", round(Total_sd, 3)),
Cnephaeus = str_c(round(Eb_mean, 3), "±", round(Eb_sd, 3)),
Hypsugo = str_c(round(Ha_mean, 3), "±", round(Ha_sd, 3)),
Pipistrellus = str_c(round(Pk_mean, 3), "±", round(Pk_sd, 3))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(
Total,
Cnephaeus,
Hypsugo,
Pipistrellus) %>%
tt()| Total | Cnephaeus | Hypsugo | Pipistrellus |
|---|---|---|---|
| 24.104±24.375 | 33.35±26.484 | 9.277±11.338 | 35.238±26.185 |
Kruskal-Wallis rank sum test
data: mapped_microbial_fraction by Species
Kruskal-Wallis chi-squared = 14.513, df = 2, p-value = 0.0007055
Correlation between diversity and estimate microbial fraction
cor.test(microbial_fraction_mapped$mapped_microbial_fraction, microbial_fraction_mapped$richness, method = "spearman")
Spearman's rank correlation rho
data: microbial_fraction_mapped$mapped_microbial_fraction and microbial_fraction_mapped$richness
S = 8375.6, p-value = 0.0006649
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.4834688
cor.test(microbial_fraction_mapped$mapped_microbial_fraction, microbial_fraction_mapped$neutral, method = "spearman")
Spearman's rank correlation rho
data: microbial_fraction_mapped$mapped_microbial_fraction and microbial_fraction_mapped$neutral
S = 10011, p-value = 0.008683
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.3826143
# Histogram
hist(microbial_fraction_mapped$mapped_microbial_fraction, main = "Read Fraction", xlab = "read_fraction")# QQ plots
qqnorm(microbial_fraction_mapped$mapped_microbial_fraction); qqline(microbial_fraction_mapped$mapped_microbial_fraction)
Shapiro-Wilk normality test
data: microbial_fraction_mapped$mapped_microbial_fraction
W = 0.85774, p-value = 4.915e-05
Shapiro-Wilk normality test
data: microbial_fraction_mapped$richness
W = 0.68491, p-value = 3.533e-09
p1 <- ggplot(microbial_fraction_mapped, aes(x = mapped_microbial_fraction, y = richness)) +
geom_point(size = 3, color = "#2E86AB") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Richness")
p2 <- ggplot(microbial_fraction_mapped, aes(x = mapped_microbial_fraction, y = neutral)) +
geom_point(size = 3, color = "#F39C12") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")
p1 + p2metagenome_alpha_div <- alpha_div_meta %>%
select(1:3) %>%
left_join(sample_metadata, by = join_by(sample == sample))Diversity values at MAG level
alpha_div_meta %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 9±11.45 | 19.7±18.6 | 8.82±8.73 | 3.58±2.73 |
| neutral | 4.42±5.06 | 7.33±6.87 | 5.05±5.48 | 2.15±1.42 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = metagenome_alpha_div,trace=TRUE)
summary(Model_richness)
Call:
MASS::glm.nb(formula = richness ~ Species, data = metagenome_alpha_div,
trace = TRUE, init.theta = 1.617425835, link = log)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.9806 0.2587 11.523 < 2e-16 ***
SpeciesHa -1.7055 0.3379 -5.048 4.46e-07 ***
SpeciesPk -0.8038 0.3165 -2.540 0.0111 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for Negative Binomial(1.6174) family taken to be 1)
Null deviance: 78.633 on 50 degrees of freedom
Residual deviance: 51.459 on 48 degrees of freedom
AIC: 317.29
Number of Fisher Scoring iterations: 1
Theta: 1.617
Std. Err.: 0.370
2 x log-likelihood: -309.287
Analysis of Deviance Table
Model: Negative Binomial(1.6174), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 78.633
Species 2 27.173 48 51.459 1.257e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 2.98 0.259 Inf 2.474 3.49
Ha 1.28 0.217 Inf 0.849 1.70
Pk 2.18 0.182 Inf 1.819 2.53
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 1.706 0.338 Inf 5.048 <.0001
Eb - Pk 0.804 0.316 Inf 2.540 0.0298
Ha - Pk -0.902 0.284 Inf -3.178 0.0042
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 190.82 95.411 4.1954 0.02093 *
Residuals 48 1091.60 22.742
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 7.33 1.51 48 4.2937 10.36
Ha 2.15 1.09 48 -0.0467 4.35
Pk 5.05 1.02 48 3.0060 7.09
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 5.17 1.86 48 2.776 0.0210
Eb - Pk 2.28 1.82 48 1.251 0.4295
Ha - Pk -2.90 1.49 48 -1.940 0.1386
P value adjustment: tukey method for comparing a family of 3 estimates
4.1.2 At different taxonomic levels
load("resources/metagenomics/data_filtered.Rdata")
genome_metadata <- genome_metadata %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax <- genome_counts_filt %>%
left_join(genome_metadata[c(1,3,4,6)], by = "genome")
# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() 4.1.2.1 Family level
richness_f_metag <- family_level_agg %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_f_metag <- family_level_agg %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_f_metag <- richness_f_metag %>%
full_join(neutral_f_metag, by = join_by(sample == sample))
alpha_div_f_metag_data <- alpha_div_f_metag %>%
left_join(sample_metadata, by = join_by(sample == sample))Diversity values
alpha_div_f_metag %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 6.47±6.67 | 12.2±10.51 | 6.73±5.35 | 3.16±2.27 |
| neutral | 3.2±2.79 | 4.63±4.12 | 3.64±2.75 | 1.94±1.12 |
Richness
Model_richness <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_metag_data,trace=TRUE)
anova(Model_richness)Analysis of Deviance Table
Model: Negative Binomial(2.2583), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 71.305
Species 2 21.013 48 50.292 2.736e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 2.50 0.229 Inf 2.052 2.95
Ha 1.15 0.200 Inf 0.758 1.54
Pk 1.91 0.164 Inf 1.585 2.23
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 1.352 0.304 Inf 4.445 <.0001
Eb - Pk 0.595 0.282 Inf 2.113 0.0872
Ha - Pk -0.756 0.259 Inf -2.925 0.0097
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_f_metag_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 55.07 27.5358 3.9508 0.0258 *
Residuals 48 334.54 6.9696
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 4.63 0.835 48 2.95 6.31
Ha 1.94 0.606 48 0.72 3.16
Pk 3.64 0.563 48 2.51 4.77
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 2.693 1.030 48 2.611 0.0316
Eb - Pk 0.988 1.010 48 0.981 0.5923
Ha - Pk -1.705 0.827 48 -2.063 0.1085
P value adjustment: tukey method for comparing a family of 3 estimates
4.1.2.2 Phylum level
richness_p_metag <- phylum_level_agg %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_p_metag <- phylum_level_agg %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum_metag <- richness_p_metag %>%
full_join(neutral_p_metag, by = join_by(sample == sample))
alpha_div_phylum_metag_data <- alpha_div_phylum_metag %>%
left_join(sample_metadata, by = join_by(sample == sample))Diversity values
alpha_div_phylum_metag %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 2.78±2.21 | 5.1±2.96 | 3.09±1.66 | 1.21±0.54 |
| neutral | 1.77±1.11 | 2.37±1.43 | 2.09±1.14 | 1.07±0.2 |
Richness
Model_richness_p <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_metag_data,trace=TRUE)
anova(Model_richness_p)Analysis of Deviance Table
Model: Negative Binomial(7888.097), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 76.955
Species 2 37.613 48 39.342 6.799e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.629 0.140 Inf 1.355 1.90
Ha 0.191 0.209 Inf -0.218 0.60
Pk 1.128 0.121 Inf 0.891 1.37
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 1.438 0.251 Inf 5.725 <.0001
Eb - Pk 0.501 0.185 Inf 2.703 0.0189
Ha - Pk -0.937 0.241 Inf -3.886 0.0003
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = alpha_div_phylum_metag_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 15.015 7.5073 7.8056 0.001161 **
Residuals 48 46.166 0.9618
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 2.37 0.310 48 1.747 2.99
Ha 1.07 0.225 48 0.622 1.53
Pk 2.09 0.209 48 1.666 2.51
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 1.297 0.383 48 3.385 0.0040
Eb - Pk 0.285 0.374 48 0.761 0.7284
Ha - Pk -1.012 0.307 48 -3.296 0.0052
P value adjustment: tukey method for comparing a family of 3 estimates
4.2 Amplicon
load("resources/amplicon/data_standard_filt.Rdata")
sample_metadata <- sample_metadata %>%
select(1,9,10,13) %>%
mutate(method="Metabarcoding_standard")
no_archaea <- genome_counts_filt %>%
left_join(genome_metadata[c(1,2)], by="genome") %>%
filter(domain!="Archaea") %>%
select(-domain)4.2.1 ASV level
richness <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- genome_counts_filt %>%
column_to_rownames(var = "genome") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
amplicon_alpha_div <- richness %>%
full_join(neutral, by = join_by(sample == sample))Correlation between diversity and estimate microbial fraction
fraction_ampli <- read_csv("resources/metagenomics/microbial_fraction_estimation.csv") %>%
mutate(sample = str_remove(sample, "_M_1")) %>%
select(sample, read_fraction) %>%
right_join(amplicon_alpha_div, by="sample") %>%
left_join(sample_metadata, by="sample")
cor.test(fraction_ampli$read_fraction, fraction_ampli$richness, method = "spearman")
Spearman's rank correlation rho
data: fraction_ampli$read_fraction and fraction_ampli$richness
S = 17781, p-value = 0.5231
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.09659512
Spearman's rank correlation rho
data: fraction_ampli$read_fraction and fraction_ampli$neutral
S = 16198, p-value = 0.9948
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.001048412
p1_ampli <- ggplot(fraction_ampli, aes(x = read_fraction, y = richness)) +
geom_point(size = 3, color = "#2E86AB") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Richness", title= "Correlation between estimated microbial fraction and diversity")
p2_ampli <- ggplot(fraction_ampli, aes(x = read_fraction, y = neutral)) +
geom_point(size = 3, color = "#F39C12") +
geom_smooth(method = "lm", se = TRUE, color = "black") +
theme_classic() +
labs(x = "Read Fraction (%)", y = "Neutral Alpha Diversity")
p1_ampli + p2_ampli Diversity values
| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 103.63±92.06 | 155.4±140.37 | 87.55±89.59 | 95±49.42 |
| neutral | 20.43±20.03 | 30.59±33.95 | 17.86±17.52 | 18.06±10.46 |
amplicon_alpha_div_data <- amplicon_alpha_div %>%
left_join(sample_metadata, by = join_by(sample == sample))Richness
Model_richness_ampli <- MASS::glm.nb(richness ~ Species, data = amplicon_alpha_div_data,trace=TRUE)
anova(Model_richness_ampli)Analysis of Deviance Table
Model: Negative Binomial(1.8863), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 60.068
Species 2 4.859 48 55.209 0.08808 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Neutral
Model_neutral <- lm(formula = neutral ~ Species, data = amplicon_alpha_div_data)
anova(Model_neutral)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 1284.1 642.07 1.6408 0.2045
Residuals 48 18783.4 391.32
4.2.2 At different taxonomic level
load("resources/amplicon/data_standard_filt.Rdata")
sample_metadata_amplicon <- sample_metadata[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli")) %>%
mutate(method="Metabarcoding_standard")
genome_metadata_ampli <- genome_metadata %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_ampli, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()4.2.2.1 Family level
richness_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_f_ampli <- family_level_agg_ampli %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_f_ampli <- richness_f_ampli %>%
full_join(neutral_f_ampli, by = join_by(sample == sample))
alpha_div_f_ampli_data <- alpha_div_f_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_f_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 38.61±20.88 | 40.4±20.86 | 33.73±22.76 | 43.32±18.28 |
| neutral | 7.93±5.03 | 8.65±6.76 | 6.74±5.19 | 8.94±3.61 |
Richness
Model_richness_ampli_f <- MASS::glm.nb(richness ~ Species, data = alpha_div_f_ampli_data,trace=TRUE)
anova(Model_richness_ampli_f)Analysis of Deviance Table
Model: Negative Binomial(3.6305), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 55.61
Species 2 2.2101 48 53.40 0.3312
Neutral
Model_neutral_ampli_f <- lm(formula = neutral ~ Species, data = alpha_div_f_ampli_data)
anova(Model_neutral_ampli_f)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 55.89 27.944 1.1072 0.3388
Residuals 48 1211.41 25.238
4.2.2.2 Phylum level
richness_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_p_ampli <- phylum_level_agg_ampli %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum_ampli <- richness_p_ampli %>%
full_join(neutral_p_ampli, by = join_by(sample == sample))
alpha_div_phylum_ampli_data <- alpha_div_phylum_ampli %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample))Diversity values
alpha_div_phylum_ampli %>%
pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
left_join(sample_metadata_amplicon, by = join_by(sample == sample)) %>%
group_by(alpha) %>%
summarise(
total_mean = mean(value, na.rm = T),
total_sd = sd(value, na.rm = T),
Eb_mean = mean(value[Species == "Eb"], na.rm = T),
Eb_sd = sd(value[Species == "Eb"], na.rm = T),
Ha_mean = mean(value[Species == "Ha"], na.rm = T),
Ha_sd = sd(value[Species == "Ha"], na.rm = T),
Pk_mean = mean(value[Species == "Pk"], na.rm = T),
Pk_sd = sd(value[Species == "Pk"], na.rm = T)
) %>%
mutate(
Total = str_c(round(total_mean, 2), "±", round(total_sd, 2)),
Cnephaeus = str_c(round(Eb_mean, 2), "±", round(Eb_sd, 2)),
Hypsugo = str_c(round(Ha_mean, 2), "±", round(Ha_sd, 2)),
Pipistrellus = str_c(round(Pk_mean, 2), "±", round(Pk_sd, 2))
) %>%
arrange(-Eb_mean) %>%
dplyr::select(alpha, Total, Cnephaeus, Pipistrellus, Hypsugo) %>%
tt()| alpha | Total | Cnephaeus | Pipistrellus | Hypsugo |
|---|---|---|---|---|
| richness | 8.04±3.65 | 10±4.16 | 7.27±3.83 | 7.89±2.9 |
| neutral | 2.55±1.08 | 2.98±1.37 | 2.58±1.19 | 2.28±0.69 |
Richness
Model_richness_ampli_p <- MASS::glm.nb(richness ~ Species, data = alpha_div_phylum_ampli_data,trace=TRUE)
anova(Model_richness_ampli_p)Analysis of Deviance Table
Model: Negative Binomial(18.5682), link: log
Response: richness
Terms added sequentially (first to last)
Df Deviance Resid. Df Resid. Dev Pr(>Chi)
NULL 50 53.958
Species 2 4.2299 48 49.728 0.1206
Neutral
Model_neutral_ampli_p <- lm(formula = neutral ~ Species, data = alpha_div_phylum_ampli_data)
anova(Model_neutral_ampli_p)Analysis of Variance Table
Response: neutral
Df Sum Sq Mean Sq F value Pr(>F)
Species 2 3.311 1.6557 1.4477 0.2452
Residuals 48 54.898 1.1437
4.3 Comparison
4.3.1 At family and phylum level
load("resources/amplicon/data_standard_filt.Rdata")
sample_metadata_amplicon <- sample_metadata[c(1,9,10,13)] %>%
mutate(sample = paste0(sample, "_ampli")) %>%
mutate(method="Metabarcoding_standard")
genome_metadata_ampli <- genome_metadata %>%
mutate(
class = if_else(is.na(class), paste0(phylum, "_Unclassified"), class),
order = if_else(is.na(order),
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(is.na(family),
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(is.na(genus),
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax_ampli <- genome_counts_filt %>%
column_to_rownames(., "genome")%>%
rename_with(~ paste0(., "_ampli")) %>%
rownames_to_column(., "genome")%>%
left_join(genome_metadata_ampli, by = "genome")
family_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
phylum_level_agg_ampli <- genome_tax_ampli %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame()load("resources/metagenomics/data_filtered.Rdata")
genome_metadata <- genome_metadata %>%
mutate(
class = if_else(str_trim(class) == "", paste0(phylum, "_Unclassified"), class),
order = if_else(str_trim(order) == "",
if_else(str_detect(class, "Unclassified"), class, paste0(class, "_Unclassified")),
order),
family = if_else(str_trim(family) == "",
if_else(str_detect(order, "Unclassified"), order, paste0(order, "_Unclassified")),
family),
genus = if_else(str_trim(genus) == "",
if_else(str_detect(family, "Unclassified"), family, paste0(family, "_Unclassified")),
genus)
)
genome_tax <- genome_counts_filt %>%
left_join(genome_metadata[c(1,3,4,6)], by = "genome")
# Aggregate counts at the Family level
family_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum, class, family) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() %>%
select(-c(phylum,class))
# Aggregate counts at the Phylum level
phylum_level_agg <- genome_tax %>%
select(-genome) %>%
group_by(phylum) %>%
summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE)), .groups = "drop")%>%
as.data.frame() merged_table <- full_join(family_level_agg_ampli,family_level_agg,
by = c("family"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
merged_table_phylum <- full_join(phylum_level_agg_ampli,phylum_level_agg,
by = c("phylum"))%>%
mutate(across(where(is.numeric), ~replace_na(., 0)))
sample_shot <- sample_metadata %>%
select(-c(Habitat,Longitude, Latitude, Region))%>%
mutate(method="Metagenomics")
all_samples <- rbind(sample_shot, sample_metadata_amplicon)richness <- merged_table %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral <- merged_table %>%
column_to_rownames(var = "family") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_family <- richness %>%
full_join(neutral, by = join_by(sample == sample))
alpha_div_family_data <- alpha_div_family %>%
left_join(all_samples, by = join_by(sample == sample)) %>%
mutate(sampleid = str_remove(sample, "_ampli"))4.3.1.1 Family level
alpha_div_family_data %>%
select(-sample,-sampleid, -Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Pk", "Ha"),
labels = c("C. bottae", "P. kuhlii", "H. ariel")
)
) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
strip.text = element_text(size = 14, lineheight = 0.6)
) +
labs(
x = "Methodological approach",
y = "Alpha diversity at family level",
color = "Species"
)Richness
Model_richness_stand <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_family_data)
summary(Model_richness_stand)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(3.0526) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_family_data
AIC BIC logLik deviance df.resid
742.7 763.7 -363.3 726.7 94
Scaled residuals:
Min 1Q Median 3Q Max
-1.4298 -0.7920 -0.2740 0.5214 2.3835
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.003989 0.06316
Number of obs: 102, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.69577 0.19136 19.313 < 2e-16 ***
SpeciesHa 0.07113 0.23368 0.304 0.761
SpeciesPk -0.18218 0.22911 -0.795 0.427
methodMetagenomics -1.20029 0.27796 -4.318 1.57e-05 ***
SpeciesHa:methodMetagenomics -1.41893 0.35967 -3.945 7.98e-05 ***
SpeciesPk:methodMetagenomics -0.41133 0.33999 -1.210 0.226
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.808
SpeciesPk -0.803 0.663
mthdMtgnmcs -0.647 0.537 0.562
SpcsH:mthdM 0.503 -0.636 -0.433 -0.771
SpcsPk:mthM 0.527 -0.438 -0.670 -0.819 0.631
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 3.879 1.939 1.9394
method 1 199.977 199.977 199.9766
Species:method 2 18.326 9.163 9.1628
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 3.10 0.147 Inf 2.81 3.38
Ha 2.46 0.117 Inf 2.23 2.69
Pk 2.71 0.108 Inf 2.50 2.92
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.638 0.183 Inf 3.489 0.0014
Eb - Pk 0.388 0.171 Inf 2.270 0.0601
Ha - Pk -0.250 0.154 Inf -1.629 0.2334
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 3.70 0.191 Inf 3.321 4.07
Metagenomics 2.50 0.212 Inf 2.079 2.91
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 3.77 0.138 Inf 3.497 4.04
Metagenomics 1.15 0.186 Inf 0.783 1.51
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 3.51 0.137 Inf 3.245 3.78
Metagenomics 1.90 0.154 Inf 1.601 2.20
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 1.20 0.278 Inf 4.318 <.0001
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 2.62 0.229 Inf 11.434 <.0001
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 1.61 0.195 Inf 8.265 <.0001
Results are given on the log (not the response) scale.
Neutral
Modelq1 <- lme(fixed = neutral ~ Species*method, data = alpha_div_family_data,
random = ~ 1 | sampleid)
summary(Modelq1)Linear mixed-effects model fit by REML
Data: alpha_div_family_data
AIC BIC logLik
565.6677 586.1824 -274.8338
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 2.370619 3.237882
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 8.648785 1.269004 48 6.815411 0.0000
SpeciesHa 0.289247 1.567781 48 0.184495 0.8544
SpeciesPk -1.913748 1.530477 48 -1.250426 0.2172
methodMetagenomics -4.018137 1.448025 48 -2.774909 0.0078
SpeciesHa:methodMetagenomics -2.982507 1.788950 48 -1.667183 0.1020
SpeciesPk:methodMetagenomics 0.925954 1.746384 48 0.530212 0.5984
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.829 0.671
methodMetagenomics -0.571 0.462 0.473
SpeciesHa:methodMetagenomics 0.462 -0.571 -0.383 -0.809
SpeciesPk:methodMetagenomics 0.473 -0.383 -0.571 -0.829 0.671
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-1.62037711 -0.57512732 -0.04722462 0.37902692 3.12052590
Number of Observations: 102
Number of Groups: 51
numDF denDF F-value p-value
(Intercept) 1 48 145.46863 <.0001
Species 2 48 0.68907 0.5069
method 1 48 54.41395 <.0001
Species:method 2 48 3.86407 0.0278
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 6.64 1.040 48 4.54 8.74
Ha 5.44 0.756 48 3.92 6.96
Pk 5.19 0.703 48 3.78 6.60
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 1.202 1.29 48 0.934 0.6220
Eb - Pk 1.451 1.26 48 1.154 0.4860
Ha - Pk 0.249 1.03 48 0.241 0.9685
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 8.65 1.270 50 6.0999 11.20
Metagenomics 4.63 1.270 48 2.0791 7.18
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 8.94 0.921 48 7.0870 10.79
Metagenomics 1.94 0.921 48 0.0863 3.79
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 6.74 0.856 48 5.0148 8.46
Metagenomics 3.64 0.856 48 1.9226 5.36
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 4.02 1.450 48 2.775 0.0078
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 7.00 1.050 48 6.664 <.0001
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 3.09 0.976 48 3.167 0.0027
Degrees-of-freedom method: containment
4.3.1.2 Phylum level
richness_phylum <- merged_table_phylum %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 0) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(richness = 1) %>%
rownames_to_column(var = "sample")
neutral_phylum <- merged_table_phylum %>%
column_to_rownames(var = "phylum") %>%
dplyr::select(where( ~ !all(. == 0))) %>%
hilldiv(., q = 1) %>%
t() %>%
as.data.frame() %>%
dplyr::rename(neutral = 1) %>%
rownames_to_column(var = "sample")
alpha_div_phylum <- richness_phylum %>%
full_join(neutral_phylum, by = join_by(sample == sample))
alpha_div_phylum_data <- alpha_div_phylum %>%
left_join(all_samples, by = join_by(sample == sample)) %>%
mutate(sampleid = str_remove(sample, "_ampli"))alpha_div_phylum_data %>%
select(-sample, -sampleid, -Life_stage, -Sex) %>%
pivot_longer(
cols = -c(method, Species),
names_to = "metric",
values_to = "value"
) %>%
mutate(
metric = factor(
metric,
levels = c("richness", "neutral"),
labels = c("Richness", "Neutral")
),
Species = factor(
Species,
levels = c("Eb", "Pk", "Ha"),
labels = c("C. bottae", "P. kuhlii", "H. ariel")
)
) %>%
ggplot(aes(y = value, x = method, color = Species, fill = Species)) +
geom_boxplot(
width = 0.4,
alpha = 0.5,
outlier.shape = NA,
position = position_dodge(width = 0.75)
) +
geom_jitter(
position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75),
size = 1,
alpha = 0.8,
show.legend = FALSE
) +
facet_wrap(~metric, scales = "free_y") +
scale_color_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii"))))+
scale_fill_manual(values=species_color, labels = c(expression(italic("C. bottae")),
expression(italic("H. ariel")),
expression(italic("P. kuhlii")))) +
theme_classic() +
theme(
strip.background = element_blank(),
panel.grid.minor.x = element_line(size = 0.1, color = "grey"),
axis.text.y = element_text(size = 12),
axis.text.x = element_text(size = 12),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)),
strip.text = element_text(size = 14, lineheight = 0.6)
) +
labs(
x = "Methodological approach",
y = "Alpha diversity at phylum level",
color = "Species"
)Richness
Model_richness_random <- glmer.nb(richness ~ Species*method+(1|sampleid), data = alpha_div_phylum_data)
summary(Model_richness_random)Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: Negative Binomial(608.3082) ( log )
Formula: richness ~ Species * method + (1 | sampleid)
Data: alpha_div_phylum_data
AIC BIC logLik deviance df.resid
455.4 476.4 -219.7 439.4 94
Scaled residuals:
Min 1Q Median 3Q Max
-1.5919 -0.4741 -0.1426 0.3313 2.6884
Random effects:
Groups Name Variance Std.Dev.
sampleid (Intercept) 0.04368 0.209
Number of obs: 102, groups: sampleid, 51
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.2784 0.1216 18.737 < 2e-16 ***
SpeciesHa -0.2317 0.1540 -1.504 0.1326
SpeciesPk -0.3177 0.1516 -2.096 0.0361 *
methodMetagenomics -0.6733 0.1730 -3.891 9.99e-05 ***
SpeciesHa:methodMetagenomics -1.2017 0.2833 -4.242 2.22e-05 ***
SpeciesPk:methodMetagenomics -0.1820 0.2259 -0.806 0.4204
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.783
SpeciesPk -0.793 0.626
mthdMtgnmcs -0.483 0.381 0.388
SpcsH:mthdM 0.295 -0.388 -0.237 -0.611
SpcsPk:mthM 0.370 -0.292 -0.482 -0.766 0.468
Analysis of Variance Table
npar Sum Sq Mean Sq F value
Species 2 6.856 3.428 3.4279
method 1 98.840 98.840 98.8404
Species:method 2 19.561 9.780 9.7804
$emmeans
Species emmean SE df asymp.LCL asymp.UCL
Eb 1.94 0.1100 Inf 1.726 2.16
Ha 1.11 0.1230 Inf 0.869 1.35
Pk 1.53 0.0864 Inf 1.364 1.70
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
contrast estimate SE df z.ratio p.value
Eb - Ha 0.833 0.164 Inf 5.079 <.0001
Eb - Pk 0.409 0.139 Inf 2.946 0.0090
Ha - Pk -0.424 0.149 Inf -2.842 0.0124
Results are averaged over the levels of: method
Results are given on the log (not the response) scale.
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 2.278 0.1220 Inf 2.040 2.517
Metagenomics 1.605 0.1560 Inf 1.299 1.911
Species = Ha:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 2.047 0.0959 Inf 1.859 2.235
Metagenomics 0.172 0.2140 Inf -0.249 0.592
Species = Pk:
method emmean SE df asymp.LCL asymp.UCL
Metabarcoding_standard 1.961 0.0923 Inf 1.780 2.142
Metagenomics 1.105 0.1300 Inf 0.850 1.361
Results are given on the log (not the response) scale.
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 0.673 0.173 Inf 3.891 0.0001
Species = Ha:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 1.875 0.224 Inf 8.359 <.0001
Species = Pk:
contrast estimate SE df z.ratio p.value
Metabarcoding_standard - Metagenomics 0.855 0.145 Inf 5.887 <.0001
Results are given on the log (not the response) scale.
Neutral
Modelq1_random <- lme(fixed = neutral ~ Species*method, data = alpha_div_phylum_data,
random = ~ 1 | sampleid)
summary(Modelq1_random)Linear mixed-effects model fit by REML
Data: alpha_div_phylum_data
AIC BIC logLik
292.4413 312.9561 -138.2206
Random effects:
Formula: ~1 | sampleid
(Intercept) Residual
StdDev: 0.7637683 0.6851316
Fixed effects: neutral ~ Species * method
Value Std.Error DF t-value p-value
(Intercept) 2.9831154 0.3244607 48 9.194074 0.0000
SpeciesHa -0.7063902 0.4008522 48 -1.762221 0.0844
SpeciesPk -0.4047138 0.3913143 48 -1.034242 0.3062
methodMetagenomics -0.6121400 0.3064002 48 -1.997845 0.0514
SpeciesHa:methodMetagenomics -0.5905534 0.3785395 48 -1.560084 0.1253
SpeciesPk:methodMetagenomics 0.1200440 0.3695325 48 0.324854 0.7467
Correlation:
(Intr) SpecsH SpcsPk mthdMt SpcH:M
SpeciesHa -0.809
SpeciesPk -0.829 0.671
methodMetagenomics -0.472 0.382 0.392
SpeciesHa:methodMetagenomics 0.382 -0.472 -0.317 -0.809
SpeciesPk:methodMetagenomics 0.392 -0.317 -0.472 -0.829 0.671
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-2.7220701 -0.3730800 -0.1714426 0.3351791 3.6366141
Number of Observations: 102
Number of Groups: 51
numDF denDF F-value p-value
(Intercept) 1 48 289.57575 <.0001
Species 2 48 4.76013 0.013
method 1 48 33.08181 <.0001
Species:method 2 48 2.92926 0.063
$emmeans
Species emmean SE df lower.CL upper.CL
Eb 2.68 0.286 48 2.10 3.25
Ha 1.68 0.207 48 1.26 2.09
Pk 2.33 0.193 48 1.94 2.72
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
Eb - Ha 1.002 0.353 48 2.835 0.0180
Eb - Pk 0.345 0.345 48 0.999 0.5809
Ha - Pk -0.657 0.283 48 -2.319 0.0626
Results are averaged over the levels of: method
Degrees-of-freedom method: containment
P value adjustment: tukey method for comparing a family of 3 estimates
$emmeans
Species = Eb:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 2.98 0.324 50 2.331 3.63
Metagenomics 2.37 0.324 48 1.719 3.02
Species = Ha:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 2.28 0.235 48 1.803 2.75
Metagenomics 1.07 0.235 48 0.601 1.55
Species = Pk:
method emmean SE df lower.CL upper.CL
Metabarcoding_standard 2.58 0.219 48 2.139 3.02
Metagenomics 2.09 0.219 48 1.646 2.53
Degrees-of-freedom method: containment
Confidence level used: 0.95
$contrasts
Species = Eb:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 0.612 0.306 48 1.998 0.0514
Species = Ha:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 1.203 0.222 48 5.411 <.0001
Species = Pk:
contrast estimate SE df t.ratio p.value
Metabarcoding_standard - Metagenomics 0.492 0.207 48 2.382 0.0212
Degrees-of-freedom method: containment