1 setup

## 
## Attaching package: 'greta'
## The following objects are masked from 'package:stats':
## 
##     binomial, cov2cor, poisson
## The following objects are masked from 'package:base':
## 
##     %*%, apply, backsolve, beta, chol2inv, colMeans, colSums, diag,
##     eigen, forwardsolve, gamma, identity, rowMeans, rowSums, sweep,
##     tapply
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ tibble  3.0.4     ✔ dplyr   1.0.2
## ✔ tidyr   1.1.2     ✔ stringr 1.4.0
## ✔ readr   1.4.0     ✔ forcats 0.5.0
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::slice()  masks greta::slice()
## This is bayesplot version 1.7.2
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
## 
## Attaching package: 'extraDistr'
## The following objects are masked from 'package:gtools':
## 
##     ddirichlet, rdirichlet
## The following object is masked from 'package:purrr':
## 
##     rdunif
## 
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:stats':
## 
##     cutree
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:dendextend':
## 
##     ladderize, rotate
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## Registered S3 method overwritten by 'treeio':
##   method     from
##   root.phylo ape
## ggtree v2.0.4  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:dendextend':
## 
##     rotate
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## 
## Attaching package: 'reghelper'
## The following object is masked from 'package:greta':
## 
##     beta
## The following object is masked from 'package:base':
## 
##     beta

2 bnpr estimates

3 plotting estimates

3.1 fitting models and creating plots

## Warning in nls(Y_exp ~ SSlogis(X, Asym, b2, b3), start = list(Asym = max(Y_exp)
## + : Convergence failure: singular convergence (7)
## Warning in nls(Y_exp ~ SSlogis(X, Asym, b2, b3), start = list(Asym = max(Y_exp)
## + : Convergence failure: initial par violates constraints
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `mutate_()` is deprecated as of dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: Using size for a discrete variable is not advised.

## Warning: Using size for a discrete variable is not advised.
## Warning in nls(Y_exp ~ SSlogis(X, Asym, b2, b3), start = list(Asym = max(Y_exp)
## + : Convergence failure: singular convergence (7)
## Warning: Using size for a discrete variable is not advised.
## Warning in nls(Y_exp ~ SSlogis(X, Asym, b2, b3), start = list(Asym = max(Y_exp)
## + : Convergence failure: false convergence (8)
## Warning: Using size for a discrete variable is not advised.
all_growth_estimates_df <- do.call(
  rbind,
  lapply(
    c("KX003","KX004","KX007","KX008"),
    function(x) do.call(rbind,lapply(all_growth_estimates[[x]],function(x) x$data))
  )
)

final_pop_size <- all_growth_estimates_df %>%
  group_by(clade,id,clade_id) %>%
  summarise(N = Y[which.max(X)],N025 = log(Y025[which.max(X)]),N975 = log(Y975[which.max(X)]),
            n_samples = n_samples[1]) %>%
  as.data.frame %>% 
  rbind(
    data.frame(
      clade = rep(NA,4),
      id = c("KX003","KX004","KX007","KX008"),
      clade_id = rep("Total",4),
      N = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.5[1])),
      N025 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.025[1])),
      N975 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.975[1])),
      n_samples = sapply(trees,function(x) length(x$tip.label))
    )
  )

all_growth_estimates_values_df <- lapply(
  c("KX003","KX004","KX007","KX008"),
  function(x) data.frame(
    linear = unlist(lapply(all_growth_estimates[[x]],
                           function(x) x$linear$coefficients[2])),
    linear_se = unlist(lapply(all_growth_estimates[[x]],
                              function(x) summary(x$linear)$coefficients[2,2])),
    nonlinear = unlist(lapply(all_growth_estimates[[x]],
                              function(x) 1/x$nonlinear$m$getPars()[3])),
    nonlinear_vaf = unlist(lapply(all_growth_estimates[[x]],
                                  function(x) 1/x$nonlinear_vaf$m$getPars()[2])),
    linear_early = unlist(lapply(all_growth_estimates[[x]],
                                 function(x) x$linear_early$coefficients[2])),
    linear_late = unlist(lapply(all_growth_estimates[[x]],
                                function(x) x$linear_late$coefficients[2])),
    offset = unlist(lapply(all_growth_estimates[[x]],
                           function(x) x$change_point_regression$par[1])),
    cp_1 = unlist(lapply(all_growth_estimates[[x]],
                         function(x) x$change_point_regression$par[2])),
    cp_2 = unlist(
      lapply(
        all_growth_estimates[[x]],
        function(x) sum(x$change_point_regression$par[c(3,2)]))),
    midpoint = unlist(lapply(all_growth_estimates[[x]],
                             function(x) x$change_point_regression$par[4])),
    id = x,
    vaf_from_clade = unlist(lapply(all_growth_estimates[[x]],
                                   function(x) x$vaf_from_clade)),
    vaf_from_clade_005 = unlist(lapply(all_growth_estimates[[x]],
                                       function(x) x$vaf_from_clade_005)),
    vaf_from_clade_095 = unlist(lapply(all_growth_estimates[[x]],
                                       function(x) x$vaf_from_clade_095)),
    expected_vaf = unlist(lapply(all_growth_estimates[[x]],
                                 function(x) x$expected_vaf)),
    min_age = unlist(lapply(all_growth_estimates[[x]],
                            function(x) min(x$data$X))),
    max_age = unlist(lapply(all_growth_estimates[[x]],
                            function(x) max(x$data$X))),
    w_sum = unlist(lapply(all_growth_estimates[[x]],
                          function(x) mean(x$data$W))),
    clade = unlist(lapply(all_growth_estimates[[x]],
                          function(x) x$data$clade_id[1])),
    clade_no = unlist(lapply(all_growth_estimates[[x]],
                             function(x) x$data$clade[1])),
    N_samples = clade_identification[[x]]$number_samples[
      c(1:length(all_growth_estimates[[x]]))]
  )
) %>% 
  do.call(what = rbind)

size_estimates_df <- lapply(
  c("KX003","KX004","KX007","KX008"),
  function(x) data.frame(
    no_cap = unlist(lapply(all_growth_estimates[[x]],function(x) x$nonlinear$m$getPars()[1])),
    id = x,
    clade = clade_identification[[x]]$variant_ID[c(1:length(all_growth_estimates[[x]]))],
    N_samples = clade_identification[[x]]$number_samples[c(1:length(all_growth_estimates[[x]]))]
  )
) %>% 
  do.call(what = rbind)

3.2 Population size estimates at last timepoint

3.3 plotting coefficients for linear and sigmoidal fits

all_growth_estimates_values_df %>%
  group_by(id) %>%
  mutate(clade = factor(clade,levels = clade_identification[[id[1]]]$variant_ID)) %>% 
  mutate(clade_N = seq(1,length(clade))) %>%
  mutate(Color = grepl("DNMT3A",clade)) %>% 
  subset(N_samples >= 5) %>% 
  ggplot(aes(x = clade,y = linear,
             ymin = linear - linear_se,ymax = linear + linear_se,
             colour = Color)) + 
  geom_hline(yintercept = 0.1,size = 0.25) +
  geom_hline(yintercept = 0.3,size = 0.25) +
  geom_linerange(size = 0.25) +
  geom_point(size = 0.25,shape = 3) + 
  facet_grid(~ id,scales = "free_x",space = "free_x") + 
  scale_color_manual(breaks = c(F,T),
                     values = colours[1:2],
                     guide = F) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(ylim = c(0,NA)) +
  ylab("Annual growth") + 
  xlab("Clade") + 
  rotate_x_text(angle = 45) + 
  ggtitle("Plotting coefficients for linear fit") +
  ggsave("figures/model_ch/trees/mitchell_coefficients_linear.pdf",
         height = 2,
         width = 4)

all_growth_estimates_values_df %>%
  group_by(id) %>%
  mutate(clade = factor(clade,levels = clade_identification[[id[1]]]$variant_ID)) %>% 
  mutate(clade_N = seq(1,length(clade))) %>%
  mutate(Color = grepl("DNMT3A",clade)) %>% 
  subset(N_samples >= 5) %>% 
  ggplot(aes(x = clade,y = nonlinear,
             colour = as.factor(Color))) + 
  geom_hline(yintercept = 0.1,size = 0.25) +
  geom_hline(yintercept = 0.3,size = 0.25) +
  geom_point(size = 0.25,shape = 3) + 
  facet_grid(~ id,scales = "free_x",space = "free_x") + 
  scale_color_manual(breaks = c(F,T),
                     values = colours[1:2],
                     guide = F) +
  theme_gerstung(base_size = 6) + 
  coord_cartesian(ylim = c(0,NA)) +
  ylab("Annual growth") + 
  xlab("Clade") + 
  rotate_x_text(angle = 45) + 
  ggtitle("Plotting coefficients for sigmoidal fit") +
  ggsave("figures/model_ch/trees/mitchell_coefficients_nonlinear.pdf",
         height = 2,
         width = 4)

3.4 Growth rates when using early and late coefficients

all_growth_estimates_values_df %>%
  subset(N_samples >= 5) %>% 
  gather(key = "key",value = "value",cp_1,cp_2) %>%
  mutate(key = ifelse(key == "cp_1","Early","Late")) %>% 
  ggplot(aes(x = key,y = exp(value) - 1)) + 
  geom_point(size = 0.5) +
  geom_line(aes(group = paste(id,clade)),alpha = 0.4,size = 0.25) + 
  geom_boxplot(size = 0.25,outlier.colour = NA) + 
  theme_gerstung(base_size = 6) + 
  coord_cartesian(ylim = c(0,1.1)) + 
  ylab("Annual growth rate") + 
  xlab("") + 
  ggtitle("Comparing growth rates estimates when\nconsidering early and late growth rates") +
  ggsave("figures/model_ch/trees/mitchell_coefficients_early_late.pdf",
         height = 2.5,
         width = 2)

3.5 Trajectories for different types of fit

min_clades <- 5
all_growth_estimates_df_var <- all_growth_estimates_df %>% 
  group_by(id,clade) %>% 
  mutate(X = X - min(X)) %>% 
  group_by(clade,id) %>%
  mutate(V = mean(W)) %>% 
  ungroup %>% 
  mutate(alpha_W = W <= 5) %>% 
  subset(n_samples >= min_clades) %>%
  mutate(col = ifelse(grepl("DNMT3A",clade_id),"DNMT3A",
                      ifelse(grepl("Clade",clade_id),"No known driver","Other driver"))) %>%
  group_by(clade,id) %>%
  mutate(C = cumsum(c(0,diff(alpha_W) == 1)))

subset(all_growth_estimates_df_var,alpha_W == T) %>%
  ggplot(aes(x = X,y = exp(Y),colour = col,group = paste(clade,C))) + 
  geom_hline(linetype = 2,size = 0.25,yintercept = 50e3) +
  geom_hline(linetype = 2,size = 0.25,yintercept = 200e3) +
  geom_line(data = all_growth_estimates_df_var,aes(group = clade),alpha = 0.3,size = 0.25) +
  geom_line(size = 0.25) + 
  facet_wrap(~ id) + 
  scale_colour_manual(values = c(DNMT3A = "#BE0032",
                                 `Other driver` = "#18A558",
                                 `No known driver` = "grey"),
                      guide = F) + 
  scale_y_continuous(trans = 'log10',expand = c(0,0),breaks = c(1e-2,1,1e2,1e4,1e6)) +
  scale_x_continuous(expand = c(0,0)) +
  coord_cartesian(ylim = c(0.5,1e7)) +
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coal. (years)") +
  ylab("Neff") +
  theme(strip.text = element_text(margin = margin(b = 0.5))) +
  ggsave("figures/model_ch/trees/mitchell_trajectories.pdf",
         height = 1.5,
         width = 1.8)

all_growth_estimates_df %>% 
  group_by(clade,id) %>% 
  mutate(X = X - min(X)) %>% 
  subset(n_samples >= min_clades) %>%
  ggplot(aes(x = X,y = pred_l,colour = as.factor(clade))) + 
  geom_line(aes(y = Y),size = 0.25) + 
  geom_line(size = 0.25,linetype = 2) + 
  facet_wrap(~ id) + 
  scale_color_manual(breaks = c(1:18),
                     values = c(colours[1:18]),
                     guide = F) + 
  scale_y_continuous(breaks = seq(-21,21,by = 3),labels = 10^seq(-21,21,by = 3)) + 
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coalescence (years)") +
  ylab("Effective population size") +
  ggtitle("Linear fit") +
  ggsave("figures/model_ch/trees/mitchell_trajectories_predicted.pdf",
         height = 2,
         width = 3)

all_growth_estimates_df %>% 
  group_by(clade,id) %>% 
  mutate(X = X - min(X)) %>% 
  subset(n_samples >= min_clades) %>%
  ggplot(aes(x = X,y = pred_nl,colour = as.factor(clade))) + 
  geom_line(aes(y = exp(Y)),size = 0.25) + 
  geom_line(size = 0.25,linetype = 2) + 
  facet_wrap(~ id) + 
  scale_color_manual(breaks = c(1:18),
                     values = c(colours[1:18]),
                     guide = F) + 
  scale_y_continuous(trans = 'log10') + 
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coalescence (years)") +
  ylab("Effective population size") +
  ggtitle("Sigmoidal fit") +
  ggsave("figures/model_ch/trees/mitchell_trajectories_predicted_nl.pdf",
         height = 2,
         width = 3)

nl_sc_pop_size <- data.frame(
  clade = rep(NA,4),
  id = c("KX003","KX004","KX007","KX008"),
  clade_id = rep("Total",4),
  N = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.5[1])),
  N025 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.025[1])),
  N975 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.975[1])),
  n_samples = sapply(trees,function(x) length(x$tip.label))
)

3.6 Trajectories for different types of fit (using only trajectories fitted at coalescence points)

min_clades <- 5
all_growth_estimates_df_var <- all_growth_estimates_df %>% 
  group_by(id,clade) %>% 
  mutate(X = X - min(X)) %>% 
  group_by(clade,id) %>%
  mutate(V = mean(W)) %>% 
  ungroup %>% 
  mutate(alpha_W = W <= 5) %>% 
  subset(n_samples >= min_clades) %>%
  mutate(col = ifelse(grepl("DNMT3A",clade_id),"DNMT3A",
                      ifelse(grepl("Clade",clade_id),"No known driver","Other driver"))) %>%
  group_by(clade,id) %>%
  mutate(C = cumsum(c(0,diff(alpha_W) == 1)))

subset(all_growth_estimates_df_var,alpha_W == T) %>%
  ggplot(aes(x = X,y = exp(Y),colour = col,group = paste(clade,C))) + 
  geom_rect(aes(xmin = 0,xmax = max(all_growth_estimates_df_var$X),
                ymin = 50e3,ymax = 200e3),fill = "grey92",colour = NA) +
  geom_line(data = all_growth_estimates_df_var,aes(group = clade),linetype = 3,size = 0.25) +
  geom_line(size = 0.25) + 
  facet_wrap(~ id) + 
  scale_colour_manual(values = c(DNMT3A = "#BE0032",
                                 `Other driver` = "#18A558",
                                 `No known driver` = "grey40"),
                      guide = F) + 
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(trans = 'log10',expand = c(0,0),breaks = c(1e-2,1,1e2,1e4,1e6)) +
  coord_cartesian(ylim = c(0.5,1e7)) +
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coal. (years)") +
  ylab("Neff") +
  theme(strip.text = element_text(margin = margin(b = 0.5))) +
  ggsave("figures/model_ch/trees/mitchell_trajectories.pdf",
         height = 1.5,
         width = 1.8)

all_growth_estimates_df %>% 
  group_by(clade,id) %>% 
  mutate(X = X - min(X)) %>% 
  subset(n_samples >= min_clades) %>%
  ggplot(aes(x = X,y = pred_l,colour = as.factor(clade))) + 
  geom_line(aes(y = Y),size = 0.25) + 
  geom_line(size = 0.25,linetype = 2) + 
  facet_wrap(~ id) + 
  scale_color_manual(breaks = c(1:18),
                     values = c(colours[1:18]),
                     guide = F) + 
  scale_y_continuous(breaks = seq(-21,21,by = 3),labels = 10^seq(-21,21,by = 3)) + 
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coalescence (years)") +
  ylab("Effective population size") +
  ggtitle("Linear fit") +
  ggsave("figures/model_ch/trees/mitchell_trajectories_predicted.pdf",
         height = 2,
         width = 3)

all_growth_estimates_df %>% 
  group_by(clade,id) %>% 
  mutate(X = X - min(X)) %>% 
  subset(n_samples >= min_clades) %>%
  ggplot(aes(x = X,y = pred_nl,colour = as.factor(clade))) + 
  geom_line(aes(y = exp(Y)),size = 0.25) + 
  geom_line(size = 0.25,linetype = 2) + 
  facet_wrap(~ id) + 
  scale_color_manual(breaks = c(1:18),
                     values = c(colours[1:18]),
                     guide = F) + 
  scale_y_continuous(trans = 'log10') + 
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coalescence (years)") +
  ylab("Effective population size") +
  ggtitle("Sigmoidal fit") +
  ggsave("figures/model_ch/trees/mitchell_trajectories_predicted_nl.pdf",
         height = 2,
         width = 3)

all_growth_estimates_df %>% 
  group_by(clade,id) %>% 
  mutate(X = X - min(X)) %>% 
  subset(n_samples >= min_clades) %>%
  ggplot(aes(x = X,y = exp(pred_cp),colour = as.factor(clade))) + 
  geom_line(aes(y = exp(Y)),size = 0.25) + 
  geom_line(size = 0.25,linetype = 2) + 
  facet_wrap(~ id) + 
  scale_color_manual(breaks = c(1:18),
                     values = c(colours[1:18]),
                     guide = F) + 
  scale_y_continuous(trans = 'log10') + 
  theme_gerstung(base_size = 6) + 
  xlab("Time since first coalescence (years)") +
  ylab("Effective population size") +
  ggtitle("Changepoint regression") +
  ggsave("figures/model_ch/trees/mitchell_trajectories_predicted_nl.pdf",
         height = 2,
         width = 3)

nl_sc_pop_size <- data.frame(
  clade = rep(NA,4),
  id = c("KX003","KX004","KX007","KX008"),
  clade_id = rep("Total",4),
  N = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.5[1])),
  N025 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.025[1])),
  N975 = sapply(individual_bnpr_estimates,function(x) log(x$summary$quant0.975[1])),
  n_samples = sapply(trees,function(x) length(x$tip.label))
)

3.7 Trees and trajectories

plot_grid(
  plot_grid(all_plots$KX003$tree,all_plots$KX003$bnpr,ncol = 1,rel_heights = c(1,0.5)),
  plot_grid(all_plots$KX004$tree,all_plots$KX004$bnpr,ncol = 1,rel_heights = c(1,0.5)),
  nrow = 1
) + 
  ggsave("figures/model_ch/trees/mitchell_1.pdf",
         height = 6,
         width = 7)
## Warning: Removed 642 rows containing missing values (geom_label_repel).
## Warning: Removed 886 rows containing missing values (geom_label_repel).
## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

plot_grid(
  plot_grid(all_plots$KX007$tree,all_plots$KX007$bnpr,ncol = 1,rel_heights = c(1,0.5)),
  plot_grid(all_plots$KX008$tree,all_plots$KX008$bnpr,ncol = 1,rel_heights = c(1,0.5)),
  nrow = 1
) + 
  ggsave("figures/model_ch/trees/mitchell_2.pdf",
         height = 6,
         width = 7)
## Warning: Removed 611 rows containing missing values (geom_label_repel).
## Warning: Removed 721 rows containing missing values (geom_label_repel).
## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

## Warning in numnotnull("lwd"): NAs introduced by coercion

write.csv(all_growth_estimates_values_df,"data_output/mitchell_bnpr_coefficients.csv",row.names = T)

3.8 Representation of early and biphasic growth

coefs <- all_growth_estimates_values_df[1,]
bnpr_trajectory <- bnpr_estimates$KX003[[1]]$summary %>%
  mutate(time = ages$KX003 - time)

b_early <- coefs$cp_1
b_late <- coefs$cp_2
X <- seq(min(bnpr_trajectory$time),81)
plotting_scheme_exp <- data.frame(
  time = X,
  traj_true = exp(change_point(X,coefs$offset,b_early,b_late - b_early,coefs$midpoint)),
  traj_early = exp(X * b_early + coefs$offset))

plotting_scheme_exp %>%
  mutate(extrapol = traj_true != traj_early) %>% 
  ggplot(aes(x = time,y = traj_early)) + 
  geom_ribbon(data = bnpr_trajectory,aes(x = time,ymin = quant0.025,ymax = quant0.975),
              inherit.aes = F,
              alpha = 0.2) +
  geom_line(data = bnpr_trajectory,aes(x = time,y = quant0.5),
            inherit.aes = F,
            alpha = 0.75,size = 0.25) +
  geom_point(aes(x = 81,y = coefs$vaf_from_clade * 2e5),
             inherit.aes = F,
             colour = "grey50") +
  geom_line(aes(y = traj_true),colour = "blue",size = 0.25) +
  geom_line(colour = "red",size = 0.25,aes(linetype = extrapol)) + 
  scale_y_continuous(trans = 'log10',breaks = NULL) + 
  ylab("Neff") +
  xlab("Time") + 
  theme_gerstung(base_size = 6) + 
  scale_x_continuous(breaks = NULL) + 
  scale_linetype_manual(values = c(1,3),guide = F) +
  ggsave(filename = sprintf("figures/model_ch/dynamic_coefficients/early_late_difference_trajectory.pdf"),
         useDingbats = F,
         width = 0.8,height = 0.8)

all_vafs <- list()

for (i in 1:nrow(all_growth_estimates_values_df)) {
  coefs <- all_growth_estimates_values_df[i,]
  ID <- coefs$id
  clade_number <- coefs$clade_no

  all_vafs[[i]] <- data.frame(
    vaf = coefs$vaf_from_clade,
    vaf_005 = coefs$vaf_from_clade_005,
    vaf_095 = coefs$vaf_from_clade_095,
    clade = coefs$clade,
    id = ID,
    age = ages[[ID]]
  )
}

all_vafs_df <- all_vafs %>%
  do.call(what = rbind)

low_var_estimates <- all_growth_estimates_df %>%
  mutate(clade = clade_id) %>% 
  group_by(clade,id) %>% 
  mutate(WW = mean(W)) %>%
  subset(WW < 10) 

all_vafs_df <- all_vafs_df %>%
  subset(paste(clade,id) %in% paste(low_var_estimates$clade,low_var_estimates$id)) %>%
  arrange(vaf)

low_var_estimates %>% 
  mutate(extrapol = pred_cp != pred_cp_early) %>% 
  ggplot(aes(x = X)) +
  geom_ribbon(aes(ymin = Y025,ymax = Y975),
              alpha = 0.2) +
  geom_line(aes(y = exp(Y)),size = 0.25) +
  geom_line(aes(y = exp(pred_cp)),colour = "blue") +
  geom_line(aes(y = exp(pred_cp_early),linetype = extrapol),colour = "red") + 
  # geom_linerange(data = all_vafs_df,
  #                aes(x = age,ymin = vaf_005*1000e3,ymax = vaf_095*1000e3)) +
  geom_point(data = all_vafs_df,aes(x = age,y = vaf * 1000e3),size = 0.5) +
  geom_point(data = all_vafs_df,aes(x = age,y = vaf * 200e3),size = 0.5,colour = "red") +
  facet_wrap(~ factor(sprintf("%s\n%s",id,clade),paste(all_vafs_df$id,all_vafs_df$clade,sep = "\n")))+ 
  theme_gerstung(base_size = 6) + 
  scale_y_continuous(trans = 'log10') + 
  theme(strip.text = element_text(margin = margin())) + 
  coord_cartesian(ylim = c(1,1e8)) + 
  xlab("Time (years)") + 
  ylab("Neff") + 
  scale_linetype_manual(values = c(1,3),guide = F) +
  ggsave(filename = sprintf("figures/model_ch/trees/early_late_difference_trajectory_all.pdf"),
         useDingbats = F,
         width = 5,height = 3)