In this notebook we inspect how different methods to estimate EPS compare.
source("Scripts/vaf_dynamics_functions.R")
##
## Attaching package: 'greta'
## The following objects are masked from 'package:stats':
##
## binomial, cov2cor, poisson
## The following objects are masked from 'package:base':
##
## %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
## eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
## tapply
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble 3.0.4 ✔ dplyr 1.0.2
## ✔ tidyr 1.1.2 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.0
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::slice() masks greta::slice()
## This is bayesplot version 1.7.2
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
##
## Attaching package: 'extraDistr'
## The following objects are masked from 'package:gtools':
##
## ddirichlet, rdirichlet
## The following object is masked from 'package:purrr':
##
## rdunif
##
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggpubr':
##
## rotate
## The following object is masked from 'package:stats':
##
## cutree
##
## Attaching package: 'ape'
## The following objects are masked from 'package:dendextend':
##
## ladderize, rotate
## The following object is masked from 'package:ggpubr':
##
## rotate
## Registered S3 method overwritten by 'treeio':
## method from
## root.phylo ape
## ggtree v2.0.4 For help: https://yulab-smu.github.io/treedata-book/
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## [36m-[39m Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
## [36m-[39m Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
##
## rotate
## The following object is masked from 'package:dendextend':
##
## rotate
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:ggpubr':
##
## rotate
##
## Attaching package: 'reghelper'
## The following object is masked from 'package:greta':
##
## beta
## The following object is masked from 'package:base':
##
## beta
source("Scripts/make_ultrametric.R")
library(parallel)
library(Matrix)
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:ggtree':
##
## expand
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(castor)
## Loading required package: Rcpp
library(phangorn)
library(phylodyn)
library(minpack.lm)
INLA:::inla.dynload.workaround()
change_point <- function(x, b0, m1, m2, delta) {
b0 + (x*m1) + (sapply(x-delta, function (t) max(0, t)) * m2)
}
plot_tree <- function(obj) {
tree_ultra <- obj$tree_ultra
tree_ultra$edge.length[is.infinite(tree_ultra$edge.length)] <- 0
tree_ultra$tip.label <- obj$tree$S
driver_list <- Filter(function(x) length(x) > 5,obj$driver_list)
if (length(driver_list) > 1) {
colours <- RColorBrewer::brewer.pal(length(driver_list),"Set3")
} else {
colours <- c("black")
}
colour_code <- rep("black",length(tree_ultra$tip.label))
R <- 1:length(driver_list)
if (length(driver_list) == 0) {
R <- numeric(0)
}
for (i in R) {
D <- driver_list[[i]]
colour_code[tree_ultra$tip.label %in% unique(D)] <- colours[i]
}
plot(tree_ultra,tip.color = colour_code)
}
clade_from_mrca <- function(tree,mrca) {
edge_idx <- which(tree$edge[,1] == mrca)
continue <- T
output <- c(which(tree$edge[,2] == mrca))
while (continue == T) {
if (length(edge_idx) > 0) {
output <- c(output,edge_idx)
new_edge_idx <- c()
for (idx in edge_idx) {
nodes <- tree$edge[idx,2]
new_idxs <- which(tree$edge[,1] == nodes)
new_edge_idx <- c(new_edge_idx,new_idxs)
}
edge_idx <- new_edge_idx
} else {
continue <- F
}
}
return(output)
}
build_tree <- function(sub_data,subsample_size=100,
detection_threshold=0) {
sub_af <- sub_data %>%
group_by(CloneID) %>%
summarise(N = max(NClones),.groups = "drop") %>%
arrange(CloneID)
MaxClone <- max(sub_data$CloneID)
MaxMut <- max(sub_data$MutID)
sub_af_sparse <- rep(0,MaxClone)
sub_af_sparse[sub_af$CloneID] <- sub_af$N
Presence <- sparseMatrix(i = sub_data$CloneID,
j = sub_data$MutID,
dims = c(MaxClone,MaxMut))
Presence[cbind(sub_data$CloneID,sub_data$MutID)] <- 1
if (sum(as.logical(sub_af_sparse)) < subsample_size) {
sub_af_sparse <- rep(1,length(sub_af_sparse))
}
S <- sample(MaxClone,subsample_size,replace = F,prob = sub_af_sparse)
af <- sub_af_sparse[S]
Presence <- Presence[S,]
Presence <- as.matrix(rbind(Presence,wt=0))
Presence <- Presence[,colSums(Presence) > 0]
dst <- as.matrix(dist(Presence > 0,method = "manhattan"))
tree <- root(njs(dst),
outgroup = "wt",
resolve.root = T,
edgelabel = T)
tree <- drop.tip(tree,"wt")
return(list(tree = tree,S = S,af = af))
}
read_clonex <- function(path,d = 1000) {
x <- read_tsv(path,
col_names = c("Gen","NClones","CloneID","MutID"),
col_types = c(col_integer(),col_integer(),col_integer(),col_integer()),
progress = F) %>%
mutate(Driver = MutID <= d)
return(x)
}
trajectory_from_td <- function(x=0:1000, td=0, s=0.01, N=2e5, g=1){
s <- s/g
t0 <- td - 1/(s)
t <- x-t0
t <- t*g
y <- pmax(0,t)
if (s==0)
return(y)
td <- 1/(s)
te <- log(N*s -1)/s + td
y[t>td] <- N/(1+exp(-(s*(t[t>td]-te))))
return(y)
}
trajectory_from_t0 <- function(x=0:1000, t0=0, s=0.01, N=2e5, g=1){
s <- s/g
t <- x-t0
t <- t*g
y <- pmax(0,t)
if (s==0)
return(y)
td <- 1/(s)
te <- log(N*s -1)/s + td
y[t>td] <- N/(1+exp(-(s*(t[t>td]-te))))
return(y)
}
In this section we calculate/load all BNPR trajectories for the trees inferred from Wright-Fisher (WF) simulations. These simulations are done for 6 different fitness levels - 0.005, 0.010, 0.015, 0.020, 0.025 and 0.030 - over 800 generations and with a fixed population size (200,000).
The first step is to build the trees, which we then display. One can immediately see that expansions become increasingly prevalent for higher fitness effects, with quite a few cases of a single clone sweeping all of the population.
N <- 2e5
N_DRIVERS <- 50
all_file_paths <- list.files(
"hsc_output_bnpr_complete",pattern = "last_generation",
full.names = T,recursive = T)
all_driver_file_paths <- list.files(
"hsc_output_bnpr_complete",pattern = "driver",
full.names = T,recursive = T)
file_name <- "data_output/simulated_trees_trajectories.RDS"
if (file.exists(file_name) == T) {
tree_traj <- readRDS(file_name)
trees <- tree_traj[[1]]
all_driver_trajectories <- tree_traj[[2]]
} else {
all_driver_trajectories <- all_driver_file_paths %>%
lapply(function(x) {
read.csv(x,header = F) %>%
arrange(V1,V2) %>%
select(Gen = V1,MutID = V2,Count = V3)})
trees <- mclapply(
all_file_paths,
mc.cores = 6,
mc.cleanup = T,
function(path) {
s <- str_match(path,"hsc_[0-9.]+") %>%
gsub(pattern = "hsc_",replacement = "") %>%
as.numeric()
system(sprintf('echo "%s"', path)) # prints during mclapply by using bash
x <- read_clonex(path,d = N_DRIVERS) %>%
subset(Gen == 800) %>%
subset(MutID != 0)
x$R <- as.numeric(str_match(path,"[0-9.]+$"))
x$s <- s
x$drift_threshold <- 1 / (N * (s))
driver_list <- x %>%
subset(Driver == T) %>%
select(CloneID,MutID)
driver_list_out <- list()
for (driver_id in unique(driver_list$MutID)) {
tmp <- driver_list %>%
subset(MutID == driver_id) %>%
select(CloneID) %>%
unlist
driver_list_out[[as.character(driver_id)]] <- tmp
}
tree <- x %>%
build_tree()
ultrametric_tree <- make.ultrametric.tree(tree$tree)
gc()
list(
tree = tree,
tree_ultra = ultrametric_tree,
driver_list = driver_list_out,
path = path
) %>%
return
}
)
names(trees) <- gsub('/last_generation','',all_file_paths)
names(all_driver_trajectories) <- gsub('/driver_trajectory','',all_driver_file_paths)
saveRDS(object = list(trees,all_driver_trajectories),file = file_name)
}
for (tree_name in names(trees)) {
trees[[tree_name]]$tree_ultra$edge.length <- trees[[tree_name]]$tree_ultra$edge.length * 800
}
par(mfrow = c(4,5),mar = c(2,0.5,1,0.5))
for (tree_name in names(trees)) {
tree <- trees[[tree_name]]
fitness <- str_match(tree_name,"[0-9.]+")
plot(tree$tree_ultra,main = fitness)
}
Here we calculate the actual EPS trajectories using BNPR and display them.
file_name <- "data_output/simulated_bnpr_trees.RDS"
if (file.exists(file_name) == T) {
all_estimates_trees <- readRDS(file_name)
} else {
all_estimates_trees <- lapply(
trees,
function(x) {
system(sprintf('echo "%s"', x$path)) # prints during mclapply by using bash
BNPR(x$tree_ultra) %>%
return
}
)
saveRDS(all_estimates_trees,file_name)
}
mcmc.popsize
and skyline trajectories for the whole treesWe accompany our BNPR estimates with those obtained through mcmc.popsize
and skyline estimations to prove that what we are observing are not artefacts specific to BNPR. Interestingly, mcmc.popsize
appears to suffer from its own bias - in some trajectories a very large initial EPS followed by a dip is observable. This holds for both possible priors - constant population size and a skyline-process population size, with the latter ameliorating this effect slightly.
all_estimates_trees_skyline <- mclapply(
trees,
mc.cores = 2,
mc.cleanup = T,
function(x) skyline(x$tree_ultra,epsilon = 0.01)
)
all_estimates_trees_skyline_wide <- mclapply(
trees,
mc.cores = 2,
mc.cleanup = T,
function(x) skyline(x$tree_ultra,epsilon = 0.05)
)
all_estimates_trees_mcmc <- mclapply(
trees,
mc.cores = 2,
mc.cleanup = T,
function(x) mcmc.popsize(x$tree_ultra,nstep=10000,thinning = 100,
progress.bar = F,
burn.in = 1000,lambda = 0.1,
method.prior.heights = "constant")
)
all_estimates_trees_mcmc_skyline <- mclapply(
trees,
mc.cores = 2,
mc.cleanup = T,
function(x) mcmc.popsize(x$tree_ultra,nstep=10000,thinning = 100,
progress.bar = F,
burn.in = 1000,lambda = 0.1,
method.prior.heights = "skyline")
)
par(mfrow = c(4,5),mar = c(1.5,2,2,1))
for (name in 1:length(all_estimates_trees)) {
plot_BNPR(all_estimates_trees[[name]])
lines(all_estimates_trees_skyline[[name]],col = "green")
lines(all_estimates_trees_skyline_wide[[name]],col = "#026440")
lines(extract.popsize(all_estimates_trees_mcmc[[name]]),col = "red")
lines(extract.popsize(all_estimates_trees_mcmc_skyline[[name]]),col = "purple")
}
file_name <- "data_output/simulated_bnpr_clades.RDS"
if (file.exists(file_name) == T) {
all_estimates_full <- readRDS(file_name)
all_estimates <- all_estimates_full[[1]]
all_estimates_trimmed_5_years <- all_estimates_full[[2]]
all_estimates_trimmed_10_years <- all_estimates_full[[3]]
} else {
all_estimates <- list()
i <- 1
for (obj_name in names(trees)) {
obj <- trees[[obj_name]]
print(obj_name)
s <- str_match(obj_name,"[0-9.]+") %>%
as.numeric
Tree <- list(tree = obj$tree_ultra)
Tree$tree$tip.label <- obj$tree$S
Tree$tree$edge.length[is.infinite(Tree$tree$edge.length)] <- 0
tree <- Tree$tree
driver_id_list <- Filter(function(x) length(x) > 5,obj$driver_list)
all_drivers <- do.call(c,driver_id_list)
clades <- cut_tree(tree = tree,depth = 0.1) %>%
Filter(f = function(x) length(x) >= 5)
clades_ <- list()
for (clade in clades) {
clade_tips <- tree$tip.label[clade]
for (driver in names(driver_id_list)) {
if (any(clade_tips %in% driver_id_list[[driver]])) {
clades_[[length(clades_)+1]] <- list(
clade = clade,
driver = driver
)
}
}
}
clades <- clades_
if (!(obj_name %in% names(all_estimates))) {
bnpr_estimates <- mclapply(
clades,
mc.cores = 2,
mc.cleanup = T,
function(clad) {
#tip_in_clade <- which(Tree$tree$tip.label %in% clad)
tip_in_clade <- clad$clade
if (length(tip_in_clade) > 4) {
sub_tree <- keep.tip(Tree$tree,tip_in_clade) %>%
multi2di()
if (!is.null(sub_tree)) {
bnpr_estimates <- NULL
bnpr_estimate <- BNPR(sub_tree)
if (!is.null(bnpr_estimate)) {
list(
tree = tree,
bnpr = bnpr_estimate,
clade = clad$clade,
tree = sub_tree,
driver = clad$driver) %>%
return
}
}
}
}
) %>%
Filter(f = function(x) !is.null(x))
all_estimates[[obj_name]] <- bnpr_estimates
}
gc()
}
saveRDS(all_estimates,file_name)
}
mcmc.popsize
trajectories for each cladeall_estimates_mcmc <- list()
i <- 1
par(mfrow = c(4,5),mar=c(1,2,2,1))
for (obj_name in names(all_estimates)) {
print(obj_name)
if (!(obj_name %in% names(all_estimates_mcmc))) {
obj <- all_estimates[[obj_name]]
estimates <- list()
for (child_obj in obj) {
Tr <- child_obj$tree %>%
keep.tip(child_obj$clade) %>%
multi2di()
pr <- ifelse(length(child_obj$tree$tip.labels) > 10,"skyline","constant")
pop_size_eps <- mcmc.popsize(Tr,10000,100,1000,
lambda = 0.1,
progress.bar = F,
method.prior.heights = pr)
estimates[[length(estimates)+1]] <- list(pop_size_eps = pop_size_eps)
}
estimates <- estimates %>%
Filter(f = function(x) !is.null(x))
all_estimates_mcmc[[obj_name]] <- estimates
}
gc()
}
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_1"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_10"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_11"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_12"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_13"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_14"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_15"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_16"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_17"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_18"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_19"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_2"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_20"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_3"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_4"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_5"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_6"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_7"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_8"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_9"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_1"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_10"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_11"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_12"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_13"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_14"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_15"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_16"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_17"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_18"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_19"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_2"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_20"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_3"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_4"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_5"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_6"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_7"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_8"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_9"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_1"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_10"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_11"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_12"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_13"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_14"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_15"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_16"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_17"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_18"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_19"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_2"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_20"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_3"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_4"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_5"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_6"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_7"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_8"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_9"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_1"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_10"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_11"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_12"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_13"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_14"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_15"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_16"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_17"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_18"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_19"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_2"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_20"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_3"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_4"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_5"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_6"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_7"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_8"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_9"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_1"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_10"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_11"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_12"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_13"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_14"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_15"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_16"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_17"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_18"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_19"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_2"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_20"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_3"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_4"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_5"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_6"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_7"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_8"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_9"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_1"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_10"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_11"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_12"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_13"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_14"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_15"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_16"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_17"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_18"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_19"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_2"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_20"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_3"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_4"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_5"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_6"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_7"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_8"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_9"
all_estimates_skyline <- list()
names_done <- names(all_estimates_skyline)
i <- 1
par(mfrow = c(4,5),mar=c(1,2,2,1))
for (obj_name in names(all_estimates)) {
print(obj_name)
if (!(obj_name %in% names_done)) {
obj <- all_estimates[[obj_name]]
estimates <- list()
for (child_obj in obj) {
Tr <- child_obj$tree %>%
keep.tip(child_obj$clade) %>%
multi2di()
sk <- skyline(Tr,epsilon = 0.02)
estimates[[length(estimates)+1]] <- list(skyline = sk)
}
estimates <- estimates %>%
Filter(f = function(x) !is.null(x))
all_estimates_skyline[[obj_name]] <- estimates
}
gc()
}
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_1"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_10"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_11"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_12"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_13"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_14"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_15"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_16"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_17"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_18"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_19"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_2"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_20"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_3"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_4"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_5"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_6"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_7"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_8"
## [1] "hsc_output_bnpr_complete/hsc_0.005_200_9"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_1"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_10"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_11"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_12"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_13"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_14"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_15"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_16"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_17"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_18"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_19"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_2"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_20"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_3"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_4"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_5"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_6"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_7"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_8"
## [1] "hsc_output_bnpr_complete/hsc_0.01_50_9"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_1"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_10"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_11"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_12"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_13"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_14"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_15"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_16"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_17"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_18"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_19"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_2"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_20"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_3"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_4"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_5"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_6"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_7"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_8"
## [1] "hsc_output_bnpr_complete/hsc_0.015_20_9"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_1"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_10"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_11"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_12"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_13"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_14"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_15"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_16"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_17"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_18"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_19"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_2"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_20"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_3"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_4"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_5"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_6"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_7"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_8"
## [1] "hsc_output_bnpr_complete/hsc_0.02_15_9"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_1"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_10"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_11"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_12"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_13"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_14"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_15"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_16"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_17"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_18"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_19"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_2"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_20"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_3"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_4"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_5"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_6"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_7"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_8"
## [1] "hsc_output_bnpr_complete/hsc_0.025_8_9"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_1"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_10"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_11"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_12"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_13"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_14"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_15"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_16"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_17"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_18"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_19"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_2"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_20"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_3"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_4"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_5"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_6"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_7"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_8"
## [1] "hsc_output_bnpr_complete/hsc_0.03_5_9"
mcmc.popsize
and skyline trajectories# fewer than 10 tips per clade
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
obj <- all_estimates[[name]]
obj_mcmc <- all_estimates_mcmc[[name]]
obj_skyl <- all_estimates_skyline[[name]]
if (length(obj) > 0) {
for (i in 1:length(obj)) {
if (length(obj[[i]]$clade) <= 10) {
bnpr_traj <- obj[[i]]$bnpr
mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
skyl_traj <- obj_skyl[[i]]$skyline
plot_BNPR(bnpr_traj)
try(lines(extract.popsize(mcmc_traj),col = "red"))
lines(skyl_traj,col = "green")
}
}
}
}
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Warning in plot.window(...): nonfinite axis limits [GScale(-inf,173.085,2, .);
## log=1]
# 10-50 tips
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
obj <- all_estimates[[name]]
obj_mcmc <- all_estimates_mcmc[[name]]
obj_skyl <- all_estimates_skyline[[name]]
if (length(obj) > 0) {
for (i in 1:length(obj)) {
if (length(obj[[i]]$clade) < 50 & length(obj[[i]]$clade) > 10) {
bnpr_traj <- obj[[i]]$bnpr
mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
skyl_traj <- obj_skyl[[i]]$skyline
plot_BNPR(bnpr_traj)
try(lines(extract.popsize(mcmc_traj),col = "red"))
lines(skyl_traj,col = "green")
}
}
}
}
## Error in prep$pos[index.right] : subscript out of bounds
# 50-100 clades
par(mfrow = c(4,5),mar = c(1,1,1,1))
for (name in names(all_estimates)) {
obj <- all_estimates[[name]]
obj_mcmc <- all_estimates_mcmc[[name]]
obj_skyl <- all_estimates_skyline[[name]]
if (length(obj) > 0) {
for (i in 1:length(obj)) {
if (length(obj[[i]]$clade) >= 50) {
bnpr_traj <- obj[[i]]$bnpr
mcmc_traj <- obj_mcmc[[i]]$pop_size_eps
skyl_traj <- obj_skyl[[i]]$skyline
plot_BNPR(bnpr_traj)
try(lines(extract.popsize(mcmc_traj),col = "red"))
# extracting population size fails for a few cases
lines(skyl_traj,col = "green")
}
}
}
}
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
set.seed(4242)
samples_to_plot <- sample(names(all_estimates),replace = F,size = 9)
data_for_plot <- samples_to_plot %>%
lapply(
function(i) {
data_list <- list()
obj <- all_estimates[[i]]
obj_mcmc <- all_estimates_mcmc[[i]]
obj_skyl <- all_estimates_skyline[[i]]
if (length(obj) > 0) {
for (j in 1:length(obj)) {
try(silent = T,{
bnpr_traj <- obj[[j]]$bnpr
mcmc_traj <- obj_mcmc[[j]]$pop_size_eps
skyl_traj <- obj_skyl[[j]]$skyline
pop_size <- extract.popsize(mcmc_traj) %>%
as.matrix
data_list[[j]] <- data.frame(
X = bnpr_traj$summary$time,
Y = bnpr_traj$summary$quant0.5,
Y025 = bnpr_traj$summary$quant0.025,
Y975 = bnpr_traj$summary$quant0.975,
MCMC_Y = approx(x = pop_size[,1],
y = pop_size[,3],
xout = bnpr_traj$summary$time)$y,
MCMC_Y025 = approx(x = pop_size[,1],
y = pop_size[,4],
xout = bnpr_traj$summary$time)$y,
MCMC_Y975 = approx(x = pop_size[,1],
y = pop_size[,5],
xout = bnpr_traj$summary$time)$y,
SKYL_Y = approx(x = skyl_traj$time,
y = skyl_traj$population.size,
xout = bnpr_traj$summary$time,rule = 2)$y,
id = i,
clade_no = j,
clade_size = length(obj[[j]]$clade)
)
})
}
}
return(do.call(rbind,data_list))
}
)
data_for_plot %>%
do.call(what = rbind) %>%
mutate(fitness = str_match(id,pattern = "[0-9.]+")) %>%
mutate(traj_id = as.numeric(as.factor(paste(id,clade_no)))) %>%
ggplot(aes(x = 800 - X)) +
geom_ribbon(aes(ymin = Y025,ymax = Y975,fill = "BNPR"),alpha = 0.3) +
geom_line(aes(y = Y,colour = "BNPR"),size = 0.25) +
geom_ribbon(aes(ymin = MCMC_Y025,ymax = MCMC_Y975,fill = "mcmc.popsize"),alpha = 0.3) +
geom_line(aes(y = MCMC_Y,colour = "mcmc.popsize"),size = 0.25) +
geom_line(aes(y = SKYL_Y,colour = "skyline",fill = "skyline"),size = 0.25) +
facet_wrap(~ reorder(sprintf("id=%s (N=%s;s=%s)",traj_id, clade_size,fitness),clade_size)) +
scale_y_continuous(trans = 'log10') +
coord_cartesian(ylim = c(1e-1,1e8)) +
scale_colour_aaas(name = NULL) +
scale_fill_aaas(name = NULL) +
theme_gerstung(base_size = 6) +
theme(strip.text = element_text(margin = margin()),
legend.position = "bottom",
legend.key.size = unit(0.1,"cm")) +
xlab("Time (generations)") +
ylab("Neff") +
ggsave(filename = sprintf("figures/simulations/eps_comparison.pdf"),
useDingbats = F,
width = 5.5,height = 3)
## Warning: Ignoring unknown aesthetics: fill
mcmc.popsize
and skylineThere is quite good agreement between log-linear trajectories fitted to skyline estimation and BNPR. This does not hold as well when comparing mcmc.popsize
and BNPR - probably due to the initial dip in population size, the EPS estimates derived from mcmc.popsize
lead to a slight underestimation of log-linear growth trajectories. As for early/late growth rate comparisons we observe little agreement between BNPR and mcmc.popsize
or skyline estimation.
linear_estimates <- list()
early_late_estimates <- list()
idx <- 1
for (x in names(all_estimates)) {
obj <- all_estimates[[x]]
obj_mcmc <- all_estimates_mcmc[[x]]
obj_skyl <- all_estimates_skyline[[x]]
linear_estimates[[x]] <- list()
early_late_estimates[[x]] <- list()
if (length(obj) > 0){
for (i in 1:length(obj)) {
try(
{
bnpr_data <- data.frame(
X = (800 - obj[[i]]$bnpr$summary$time),
Y = log(obj[[i]]$bnpr$summary$mean),
W = (log(obj[[i]]$bnpr$summary$quant0.975)-log(obj[[i]]$bnpr$summary$quant0.025))^2/16)
ps <- extract.popsize(obj_mcmc[[i]]$pop_size_eps)
X_ <- seq(min(obj_skyl[[i]]$skyline$time),max(obj_skyl[[i]]$skyline$time),length.out = 100)
sk_Y <- obj_skyl[[i]]$skyline$population.size[as.numeric(cut(X_,c(0,obj_skyl[[i]]$skyline$time)))]
skyl_data <- data.frame(
X = (800 - X_),
Y = log(sk_Y)
) %>%
na.omit
mcmc_data <- data.frame(
X = (800 - ps[,1]),
Y = log(ps[,3])) %>%
na.omit
linear_estimates[[x]][[i]] <- list(
id = idx,
bnpr = lm(Y ~ X,data = bnpr_data),
skyl = lm(Y ~ X,data = skyl_data),
mcmc = lm(Y ~ X,data = mcmc_data)
)
opt_fn_bnpr <- function(par) {
se <- (bnpr_data$Y - change_point(bnpr_data$X,par[1],par[2],par[3],par[4]))^2
return(mean(se / bnpr_data$W))
}
cpr_bnpr <- optim(
par = c(linear_estimates[[x]][[i]]$bnpr$coefficients[1],
linear_estimates[[x]][[i]]$bnpr$coefficients[2],
0,
mean(bnpr_data$X)),
method = "L-BFGS-B",
lower = c(NA,0,NA,min(bnpr_data$X) + diff(range(bnpr_data$X)) * 0.25),
upper = c(NA,NA,NA,max(bnpr_data$X) - diff(range(bnpr_data$X)) * 0.25),
fn = opt_fn_bnpr)
opt_fn_skyl <- function(par) {
se <- (skyl_data$Y - change_point(skyl_data$X,par[1],par[2],par[3],par[4]))^2
return(mean(se))
}
cpr_skyl <- optim(
par = c(linear_estimates[[x]][[i]]$skyl$coefficients[1],
linear_estimates[[x]][[i]]$skyl$coefficients[2],
0,
mean(bnpr_data$X)),
method = "L-BFGS-B",
lower = c(NA,0,NA,min(bnpr_data$X) + diff(range(bnpr_data$X)) * 0.25),
upper = c(NA,NA,NA,max(bnpr_data$X) - diff(range(bnpr_data$X)) * 0.25),
fn = opt_fn_skyl)
opt_fn_mcmc <- function(par) {
se <- (mcmc_data$Y - change_point(mcmc_data$X,par[1],par[2],par[3],par[4]))^2
return(mean(se))
}
cpr_mcmc <- optim(
par = c(linear_estimates[[x]][[i]]$mcmc$coefficients[1],
linear_estimates[[x]][[i]]$mcmc$coefficients[2],
0,
mean(bnpr_data$X)),
method = "L-BFGS-B",
lower = c(NA,0,NA,min(bnpr_data$X) + diff(range(bnpr_data$X)) * 0.25),
upper = c(NA,NA,NA,max(bnpr_data$X) - diff(range(bnpr_data$X)) * 0.25),
fn = opt_fn_mcmc)
early_late_estimates[[x]][[i]] <- list(
id = idx,
bnpr = cpr_bnpr,
skyl = cpr_skyl,
mcmc = cpr_mcmc,
W_sum = mean(bnpr_data$W)
)
}
)
}
}
idx <- idx + 1
}
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
## Error in prep$pos[index.right] : subscript out of bounds
linear_estimates_bnpr_mcmc_skyl <- linear_estimates %>%
lapply(function(x) do.call(rbind,
lapply(x, function(y) c(y$id,
y$bnpr$coefficient[2],
y$mcmc$coefficient[2],
y$skyl$coefficient[2])))) %>%
do.call(what = rbind) %>%
as.data.frame()
colnames(linear_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","mcmc.popsize","Skyline")
linear_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[linear_estimates_bnpr_mcmc_skyl$id]
linear_estimates_bnpr_mcmc_skyl$fitness <- linear_estimates_bnpr_mcmc_skyl$id %>%
str_match("[0-9.]+") %>%
as.numeric
early_estimates_bnpr_mcmc_skyl <- early_late_estimates %>%
lapply(function(x) do.call(rbind,
lapply(x, function(y) c(y$id,
y$bnpr$par[2],
y$mcmc$par[2],
y$skyl$par[2],
y$W_sum)))) %>%
do.call(what = rbind) %>%
as.data.frame()
colnames(early_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","mcmc.popsize","Skyline","W_sum")
early_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[early_estimates_bnpr_mcmc_skyl$id]
early_estimates_bnpr_mcmc_skyl$fitness <- early_estimates_bnpr_mcmc_skyl$id %>%
str_match("[0-9.]+") %>%
as.numeric
late_estimates_bnpr_mcmc_skyl <- early_late_estimates %>%
Filter(f = function(f) !is.null(x)) %>%
lapply(function(x) do.call(
rbind,
lapply(x, function(y) c(y$id,
ifelse(!is.null(y$bnpr),
sum(y$bnpr$par[2:3]),NA),
sum(y$mcmc$par[2:3]),
sum(y$skyl$par[2:3]))))) %>%
do.call(what = rbind) %>%
as.data.frame()
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 1)
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 3)
## Warning in (function (..., deparse.level = 1) : number of columns of result is
## not a multiple of vector length (arg 3)
colnames(late_estimates_bnpr_mcmc_skyl) <- c("id","BNPR","mcmc.popsize","Skyline")
late_estimates_bnpr_mcmc_skyl$id <- names(all_estimates)[late_estimates_bnpr_mcmc_skyl$id]
late_estimates_bnpr_mcmc_skyl$fitness <- late_estimates_bnpr_mcmc_skyl$id %>%
str_match("[0-9.]+") %>%
as.numeric
A <- linear_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = Skyline,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
B <- early_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = Skyline,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
C <- late_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = Skyline,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
D <- linear_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
E <- early_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
FF <- late_estimates_bnpr_mcmc_skyl %>%
ggplot() +
geom_point(aes(x = mcmc.popsize,y = BNPR),size = 0.5) +
geom_abline(slope = 1,linetype = 2,size = 0.25) +
theme_gerstung(base_size = 6) +
coord_cartesian(xlim = c(-0.05,0.05),
ylim = c(-0.05,0.05))
plot_grid(A,B,C,D,E,FF,nrow = 2)
## Warning: Removed 3 rows containing missing values (geom_point).
data.frame(
BNPR = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$BNPR),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$BNPR))^2,
Skyline = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$Skyline),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$Skyline))^2,
mcmc.popsize = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$mcmc.popsize),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$mcmc.popsize))^2,
fit = c("Log-linear","Early log-linear"))
data.frame(
BNPR = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$BNPR),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$BNPR))^2,
Skyline = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$Skyline),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$Skyline))^2,
mcmc.popsize = c(cor(linear_estimates_bnpr_mcmc_skyl$fitness,linear_estimates_bnpr_mcmc_skyl$mcmc.popsize),
cor(early_estimates_bnpr_mcmc_skyl$fitness,early_estimates_bnpr_mcmc_skyl$mcmc.popsize))^2,
fit = c("Log-linear","Early log-linear")) %>%
gather(key = "key",value = "value",BNPR,Skyline,mcmc.popsize) %>%
ggplot(aes(x = key,y = fit,fill = value)) +
geom_tile() +
geom_text(aes(label = round(value,2)),size = 2.6,colour = "white") +
theme_gerstung(base_size = 6) +
theme(legend.position = "bottom",legend.key.height = unit(0.1,"cm"),
legend.key.width = unit(0.4,"cm"),
legend.box.spacing = unit(0,"cm")) +
scale_y_discrete(expand = c(0,0)) +
scale_x_discrete(expand = c(0,0)) +
ylab("") +
xlab("") +
scale_fill_material(palette = "deep-purple",name = "R2") +
ggsave(filename = sprintf("figures/simulations/eps_comparison_heatmap.pdf"),
useDingbats = F,
width = 2.2,height = 1)