analyses/phylogenetic/reports-ius/importationSummaryMultiState.Rmd

---
title: "SARS-CoV-2 State Introductions"
subtitle: "Importation Summary for states"
date: '`r format(Sys.time(), "Last modified: %d %b %Y")`'
output:
  pdf_document:
    toc: true
    toc_depth: 3
    number_sections: true
    keep_tex: false
    fig_crop: false
layout: page
editor_options: 
  chunk_output_type: inline
params: 
  inputpath       :  "../../../data/phylogenetic/"
  casefile        : "../data/cases-rki-by-state.csv"
  infectionfile   : "../../../data/epidemiological/flaxman-results.csv"
  epimobilitypath : "../../epidemiological/results/"
  asymptomatic    : 31
  cluster_f       : "DTA"
  alpha           : 0.7189865
  beta            : 28.91369
  startDate       : "2020-10-01"
  endDate         : "2021-06-01"
  device          : "pdf"
  metadata        : "../../../../data/data/gisaid-20210602-metadata.tsv"
  outputfolder    : "../results/beast/run/lin-ius/"

---


```{r rsetup, include=FALSE}

  # metadata        : "../results/gisaid-20210602-metadata-sampled-unsampled.tsv"
  # outputpath      : "../results/trees-gisaid-Hamburg-20210417-lin-rich/"
  # state           : "Hamburg"
  
    library(lubridate)
    library(plyr)
    library(gplots)
    library(beastio)
    library(knitr)
    library(tictoc)
    library(stringr)
    source("../reports/palettes.R")
    source("../reports/plotutils.R")
    source("../reports/clusterutils.R")

    inputpath    <- params$inputpath
    casefile     <- params$casefile
    infectionfile<- params$infectionfile
    epipath      <- params$epimobilitypath
    mobilitypath <- params$epimobilitypath
    asymptomatic <- params$asymptomatic
    cluster_f    <- params$cluster_f
    alpha        <- params$alpha
    beta         <- params$beta
    
    startDate    <- as.Date(params$startDate)
    endDate      <- as.Date(params$endDate)
  
    outputfolder <- params$outputfolder
    # outputpath   <- params$outputpath
    # state        <- params$state

    figpath      <- paste0(outputfolder, "figures/importation_figures_", params$device, "/")
    dir.create(figpath, recursive = TRUE, showWarnings = FALSE)
    
    cachepath  <- paste0(outputfolder, "figures/cache/importationSummary_", params$device, "/")


    knitr::opts_chunk$set(tidy=FALSE, cache=FALSE, cache.path = cachepath,
                          dev=params$device, fig.path=figpath, dpi=300,
                          message=FALSE, error=TRUE, warning=TRUE, echo=FALSE)
    
    plotList     <- c("China",
                      "Italy",
                      "Spain", 
                      "France", 
                      "Belgium",
                      "Netherlands",
                      "Ireland", 
                      "Switzerland", 
                      "US")

    travelDates <- list(china    = as.Date("2020-01-28"), 
                        italy    = as.Date("2020-02-25"), 
                        lockdown = as.Date("2020-03-23"))

    ############
    # Metadata #
    ############
    #metadata              <- read.csv(paste0(inputpath, "metadata.csv"))
    metadata <- read.table(params$metadata, sep="\t", head=TRUE, na.strings=c("NA", ""), fill=TRUE, stringsAsFactors=FALSE, quote="|")
    metadata <- metadata[metadata$Virus.name != "Virus.name",]
    metadata$sample_date_orig  <- ymd(metadata$Collection.date)
    # metadata$sample_date  <- ymd(metadata$date_corrected)
    metadata$sample_date  <- metadata$sample_date_orig
    metadata$decimal_date <- decimal_date(metadata$sample_date)    
    metadata$taxon_label  <- metadata$Accession.ID
    metadata$country  <- str_trim(sapply(str_split(metadata$Location, "/"), "[[", 2))
    # metadata$state    <- str_trim(sapply(str_split(paste0(metadata$Location,"/",metadata$Additional.location.information), "/"), "[[", 3))
    
    set_instate <- function(metadata, state) {
      return(sapply(str_split(paste0(metadata$Location,"/",metadata$Additional.location.information), "/"), function(x) {return(str_trim(x[2]) == "Germany" & (grepl(state, x[3], fixed=TRUE) | ( length(x) >= 4 & grepl(state, x[4], fixed=TRUE) )) );}))
    }
    # metadata$instate      <- set_instate(metadata, state)
    

    smooth <- function(cs) {
      x <- cs
      x[8:length(x)] <- (cs[8:length(x)] - cs[1:(length(x)-7)])/7
      x[1:7] <- cs[1:7] / 1:7
      return(x);
    }
    
    # case_data <- read.csv(params$casefile)
    # case_data$case <- smooth(case_data$sum_cases)
    # case_data$date <- ymd(as.POSIXct(case_data$time_iso8601))
    # case_data$seq <- sapply(case_data$date, function(x) sum(!is.na(metadata$sample_date_orig) & metadata$sample_date_orig == x & metadata$instate))
    # case_data$seq_smooth <- smooth(cumsum(case_data$seq))

    stateFiles = data.frame(state = c("Germany"), 
                            state.name = c("Germany"),
                            adm.level = c(2),
                            outputpath=c("../results/beast/run/lin-ius/") )
                            # outputpath=c("../results/beast/run/lin/") )
                            # outputpath=c("../results/beast/run/lin-rich-sk/") )
    stateInfo <- list()
    stateInfo$metadata_instate <- data.frame(matrix(0, nrow=nrow(metadata), ncol=0))
    for (i in 1:nrow(stateFiles)) {
      state <- stateFiles$state[i]
      state.name <- stateFiles$state.name[i]
      # DEBUG TODO
      #stateInfo$metadata_instate <- cbind(stateInfo$metadata_instate, rep(FALSE, nrow(metadata)))
      if (stateFiles$adm.level[i] == 3)
        stateInfo$metadata_instate <- cbind(stateInfo$metadata_instate, set_instate(metadata, state.name))
      else if (stateFiles$adm.level[i] == 2)
        stateInfo$metadata_instate <- cbind(stateInfo$metadata_instate, metadata$country == state)
      colnames(stateInfo$metadata_instate)[ncol(stateInfo$metadata_instate)] <- state
    }

    stateInfo$stateOrder <- c(1)
    
    # for comments and documents before codes.
    states <- paste(stateFiles$state)

    rsumstate <- function(f) {
      return(sapply(seq(nrow(stateFiles)), function(s) { return (paste(stateFiles$state[s], ":", f(stateFiles$state[s], s, stateInfo$clusterStatsMCC[[s]]))); }))
    }
    
```


# Summary
This notebook plots figures about the dataset, applies the importation lag model to the `r states` transmission lineage TMRCAs and plots figures with lineage importations. 

## Data and Method
- GISAID tree until ???? as initial tree.
- The tree contains ??? `r state` sequences.
- The tree is time-calibrated by TreeTime.
- Sankoff algorithm is used to assign location (`r states` and non-`r states`) to inner vertices of the tree.


```{r load-data, cache=TRUE}
    
    stateInfo$clusterStatsMCC <- stateInfo$clusterSamplesMCC <- list()
    for (i in 1:nrow(stateFiles)) {
      state <- stateFiles$state[i]
      outputpath <- stateFiles$outputpath[i]
      
        
      #################################
      # Load and shift lineage TMRCAs #
      #################################
      #clusterStats      <- read.csv(paste0(outputpath, "clusters_", cluster_f, ".csv"))
      #clusterStatsMCC   <- read.csv(paste0(outputpath, "clusters_", cluster_f, "_MCC_0.5.csv"))
      # clusterStatsMCC   <- read.csv(paste0(outputpath, "clusters_", cluster_f, "_MCC.csv"))
      clusterStatsMCC   <- read.table(paste0(outputpath, "clusters_", cluster_f, "_MCC_0.5.tsv"), sep="\t", head=TRUE, na.strings=c("NA", ""), fill=TRUE, stringsAsFactors=FALSE, quote="|")
      #clusterSamplesMCC <- read.csv(paste0(outputpath, "clusterSamples_", cluster_f, "_MCC_0.5.csv"))
      # clusterSamplesMCC <- read.csv(paste0(outputpath, "clusterSamples_", cluster_f, "_MCC.csv"))
      clusterSamplesMCC <- read.table(paste0(outputpath, "clusterSamples_", cluster_f, "_MCC_0.5.tsv"), sep="\t", head=TRUE, na.strings=c("NA", ""), fill=TRUE, stringsAsFactors=FALSE, quote="|")

      clusterStatsMCC$cluster <- sapply(clusterStatsMCC$cluster, function(cluster) paste(strsplit( gsub("-", "_", cluster), split="_")[[1]][c(3,6,7)], collapse="_"))
      clusterSamplesMCC$cluster <- sapply(clusterSamplesMCC$cluster, function(cluster) paste(strsplit( gsub("-", "_", cluster), split="_")[[1]][c(3,6,7)], collapse="_"))
      
       clusterSamplesMCC$country  <- str_trim(sapply(str_split(clusterSamplesMCC$Location, "/"), "[[", 2))

      
      # clusterStats$tmrca_calendar    <- ymd(clusterStats$tmrca_calendar)
      clusterStatsMCC$tmrca_calendar <- ymd(clusterStatsMCC$tmrca_calendar)
      #clusterSamplesMCC$sample_date  <- ymd(clusterSamplesMCC$sample_date)
  
      clusterStatsMCC$shift <- alpha + beta/clusterStatsMCC$seqs
      clusterStatsMCC$tmrca_shifted <- clusterStatsMCC$tmrca - (clusterStatsMCC$shift/366)
      clusterStatsMCC$tmrca_shifted_calendar <- as.Date(round_date(date_decimal(clusterStatsMCC$tmrca_shifted), unit = "day"))
      clusterStatsMCC$detection_lag <- as.Date(round_date(date_decimal(clusterStatsMCC$oldest), unit = "day")) - clusterStatsMCC$tmrca_shifted_calendar
  
      # #write.csv(clusterStatsMCC, paste0(outputpath, "clusters_", cluster_f, "_MCC_0.5_shifted.csv"), quote=FALSE, row.names=FALSE)
      # write.csv(clusterStatsMCC, paste0(outputpath, "clusters_", cluster_f, "_MCC_shifted.csv"), quote=FALSE, row.names=FALSE)
  
      
      stateInfo$clusterStatsMCC[[i]] = clusterStatsMCC
      stateInfo$clusterSamplesMCC[[i]] = clusterSamplesMCC
      
    
#    ############################################
#    # Germany weekly cases and estimated infections #
#    ############################################
#    
#    ukCases      <- read.csv(casefile)
#    ukCases$date <- as.Date(ukCases$date)
#
#    europeInfections      <- read.csv(infectionfile)
#    europeInfections$time <- as.Date(europeInfections$time)
#    
#    weekBreaks <- seq.Date(as.Date("2020-01-19"), as.Date("2020-06-28"), by="weeks")
#    ukWeekly   <- data.frame(date       = weekBreaks[2:length(weekBreaks)], 
#                             seqs       = hist(metadata$sample_date[metadata$country == "UK"], breaks=weekBreaks, plot=FALSE, right=FALSE)$counts, 
#                             cases      = getHistogram(ukCases$date, ukCases$newCasesBySpecimenDate,  
#                                                       breaks=weekBreaks)$counts,
#                             infections = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#                                                       europeInfections$predicted_infections_mean[europeInfections$country == "United_Kingdom"],
#                                                       breaks=weekBreaks)$counts,
#                             infectionsLower = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#                                                            europeInfections$predicted_infections_lower_CI_95[europeInfections$country == "United_Kingdom"],
#                                                            breaks=weekBreaks)$counts,
#                             infectionsUpper = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#                                                            europeInfections$predicted_infections_higher_CI_95_cumulative.1[europeInfections$country == "United_Kingdom"],
#                                                            breaks=weekBreaks)$counts)
#    ukWeekly$seqsCum       <- cumsum(ukWeekly$seqs)
#    ukWeekly$casesCum      <- cumsum(ukWeekly$cases)
#    ukWeekly$infectionsCum <- cumsum(ukWeekly$infections)
#    ukWeekly$infectionsLowerCum <- cumsum(ukWeekly$infectionsLower)
#    ukWeekly$infectionsUpperCum <- cumsum(ukWeekly$infectionsUpper)
#    
#    
#    # dayBreaks <- seq.Date(as.Date("2020-01-19"), as.Date("2020-06-28"), by="day")
#    # ukDaily   <- data.frame(date       = dayBreaks[2:length(dayBreaks)], 
#    #                          seqs       = hist(metadata$sample_date[metadata$country == "UK"], breaks=dayBreaks, plot=FALSE, right=FALSE)$counts, 
#    #                          cases      = getHistogram(ukCases$date, ukCases$newCasesBySpecimenDate,  
#    #                                                    breaks=dayBreaks)$counts,
#    #                          infections = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#    #                                                    europeInfections$predicted_infections_mean[europeInfections$country == "United_Kingdom"],
#    #                                                    breaks=dayBreaks)$counts,
#    #                          infectionsLower = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#    #                                                         europeInfections$predicted_infections_lower_CI_95[europeInfections$country == "United_Kingdom"],
#    #                                                         breaks=dayBreaks)$counts,
#    #                          infectionsUpper = getHistogram(europeInfections$time[europeInfections$country == "United_Kingdom"], 
#    #                                                         europeInfections$predicted_infections_higher_CI_95_cumulative.1[europeInfections$country == "United_Kingdom"],
#    #                                                         breaks=dayBreaks)$counts)
#    # ukDaily$seqsCum       <- cumsum(ukDaily$seqs)
#    # ukDaily$casesCum      <- cumsum(ukDaily$cases)
#    # ukDaily$infectionsCum <- cumsum(ukDaily$infections)
#    # ukDaily$infectionsLowerCum <- cumsum(ukDaily$infectionsLower)
#    # ukDaily$infectionsUpperCum <- cumsum(ukDaily$infectionsUpper)
#    
#    
#    
#    #######
#    # EII #
#    #######
#    
#    eii      <- read.csv(paste0(epipath, "estimated-introduction-index-", asymptomatic, ".csv"))
#    eii$date <- as.Date(eii$date)
#    colnames(eii)[2] <- "location"
#    eii$location <- revalue(eii$location, c("other"="Other"))
#
#    countryList <- levels(eii$location)
#    countryList <- countryList[-which(countryList == "Other")]
#    
#    dates  <- sort(unique(eii$date))
#    eiiAll <- data.frame(date       = dates, 
#                         location   = rep("all", length(dates)),
#                         num_intros = sapply(dates, function(x) sum(eii$num_intros[eii$date == x])))
#    
#    eii <- rbind(eiiAll, eii)
#
#    # plot(eii$date[eii$location == "all"], eii$num_intros[eii$location == "all"], type='l')
#    # lines(eii$date[eii$location == "Spain"], eii$num_intros[eii$location == "Spain"], type='l', lty=2)
#    # lines(eii$date[eii$location == "France"], eii$num_intros[eii$location == "France"], type='l', lty=2)
#    
#    
#    #########################################
#    # Estimated daily numbers of infections #
#    #########################################
#    
#    infections <- read.csv(paste0(epipath, "estimated-daily-infections.csv"))
#    infections$date <- as.Date(infections$date)
#    
#    infectionsSpecific <- remove <- c()
#    for (country in countryList) {
#      mask   <- which(infections$location == country)
#      remove <- c(remove, mask)
#      infectionsSpecific <- rbind(infectionsSpecific, infections[mask, ])
#    }
#    infectionsSpecific$location <- as.character(infectionsSpecific$location)
#    
#    dates <- sort(unique(infections$date))
#    infectionsAll <- data.frame(date     = dates, 
#                                num_infs = sapply(dates, function(x) sum(infections$num_infs[infections$date == x])),
#                                location = rep("all", length(dates)))
#    
#    infections <- infections[-remove, ]
#    infectionsOther <- data.frame(date     = dates, 
#                                  num_infs = sapply(dates, function(x) sum(infections$num_infs[infections$date == x])),
#                                  location = rep("Other", length(dates)))
#    
#    infections <- rbind(infectionsAll, rbind(infectionsSpecific, infectionsOther))
#    
#    # Smooth with 7-day rolling mean
#    infections$num_infs_smoothed <- rep(0, nrow(infections))
#    for (country in levels(infections$location)) {
#      
#      mask      <- which(infections$location == country)
#      dateOrder <- mask[order(infections$date[mask])]
#      
#      infections$num_infs_smoothed[dateOrder] <- sapply(1:length(dateOrder), function(i) mean(infections$num_infs[dateOrder[max(i-3,1):min(i+3, length(dateOrder))]], na.rm = TRUE))
#    }
#    
#    # Total number of infected in each country by the end of the dataset
#    totalInfected <- sort(sapply(levels(infections$location), function(x) sum(infections$num_infs[infections$location == x], na.rm=TRUE)), decreasing = TRUE)
#    
#    # loc <- "Switzerland"
#    # plot(infections$date[infections$location == loc], infections$num_infs[infections$location == loc], type='l', lty=2)
#    # lines(infections$date[infections$location == loc], infections$num_infs_smoothed[infections$location == loc])
#    
#    
#    ############################
#    # Estimated daily arrivals #
#    ############################
#       
#    arrivals <- read.csv(paste0(epipath, "estimated-arrivals.csv"))
#    arrivals$date    <- as.Date(arrivals$date)
#    arrivals$location <- revalue(arrivals$location, c("United States"  = "US",
#                                                      "Czech Republic" = "Czechia",
#                                                      "Dominican Rep"  = "Dominican Republic", 
#                                                      "Korea (South)"  = "Korea, South"))
#    
#    arrivalsSpecific <- remove <- c()
#    for (country in countryList) {
#      mask   <- which(arrivals$location == country)
#      remove <- c(remove, mask)
#      arrivalsSpecific <- rbind(arrivalsSpecific, arrivals[mask, c("date", "location", "estimate")])
#    }
#    arrivalsSpecific$location <- as.character(arrivalsSpecific$location)
#    
#    dates <- sort(unique(arrivals$date))
#    arrivalsAll <- data.frame(date     = dates, 
#                              location = rep("all", length(dates)),
#                              estimate = sapply(dates, function(x) sum(arrivals$estimate[arrivals$date == x])))
#    
#    arrivals <- arrivals[-remove, ]
#    arrivalsOther <- data.frame(date     = dates, 
#                                location = rep("Other", length(dates)),
#                                estimate = sapply(dates, function(x) sum(arrivals$estimate[arrivals$date == x])))
#    
#    arrivals <- rbind(arrivalsAll, rbind(arrivalsSpecific, arrivalsOther))
#    
#    # Smooth with 7-day rolling mean
#    arrivals$estimate_smoothed <- rep(0, nrow(arrivals))
#    for (country in levels(arrivals$location)) {
#      
#      mask      <- which(arrivals$location == country)
#      dateOrder <- mask[order(arrivals$date[mask])]
#      
#      arrivals$estimate_smoothed[dateOrder] <- sapply(1:length(dateOrder), function(i) mean(arrivals$estimate[dateOrder[max(i-3,1):min(i+3, length(dateOrder))]], na.rm = TRUE))
#    }
#    
#    # plot(arrivals$date[arrivals$location == "Spain"], arrivals$estimate[arrivals$location == "Spain"], type='l')
#    #lines(arrivals$date[arrivals$location == "Spain"], arrivals$estimate_smoothed[arrivals$location == "Spain"], type='l', lty=2)
#    #lines(arrivals$date[arrivals$location == "France"], arrivals$estimate[arrivals$location == "France"], type='l', lty=2)
#    
#    
#    #################################
#    # Lag model parameter estimates #
#    #################################
    
    # lagmodel <- read.csv(paste0(epipath, "tree-varying-lag-estimates.csv"))
    
    }

```

\clearpage

# `r states` Sequenced Genomes


```{r sample-histogram, fig.width=7, fig.height=3, fig.cap = "Collection dates of the `nrow(metadata)` genomes analysed here (left-hand axis). Genomes are coloured by sampling location."}

   sampleBreaks <- seq.Date(startDate, 
                            max(metadata$sample_date_orig, na.rm = TRUE)+1, by="1 day")

   seq_hist_oth <- hist(metadata$sample_date_orig[metadata$sample_date_orig> startDate],  
                        breaks=sampleBreaks, plot=FALSE, right=FALSE)
   
   seq_hist_names <- c()
   seq_hist_breakdown <- data.frame(matrix(0, nrow=length(seq_hist_oth$counts), ncol=0))
   
   # seq_hist_names <- c(seq_hist_names, "all")
   # seq_hist_breakdown <- cbind(seq_hist_breakdown, seq_hist_oth$counts)
   
   for (s in 1:nrow(stateFiles)) {
      state <- stateFiles$state[s]
      seq_hist_state <- hist(metadata$sample_date_orig[stateInfo$metadata_instate[s] & metadata$sample_date_orig> startDate], 
                       breaks=sampleBreaks, plot=FALSE, right=FALSE)
      
      seq_hist_breakdown <- cbind(seq_hist_breakdown, seq_hist_state$count)
      seq_hist_names <- c(seq_hist_names, state)

      seq_hist_state <- hist(metadata$sample_date_orig[!stateInfo$metadata_instate[s] & metadata$sample_date_orig> startDate], 
                       breaks=sampleBreaks, plot=FALSE, right=FALSE)
      
      seq_hist_breakdown <- cbind(seq_hist_breakdown, seq_hist_state$count)
      seq_hist_names <- c(seq_hist_names, paste("Non", state))
   }
   
   colnames(seq_hist_breakdown) <- seq_hist_names
   
   # seq_hist_eng <- hist(metadata$sample_date[metadata$adm1 == "UK-ENG"], breaks=sampleBreaks, plot=FALSE, right=FALSE)
   # seq_hist_sct <- hist(metadata$sample_date[metadata$adm1 == "UK-SCT"], breaks=sampleBreaks, plot=FALSE, right=FALSE)
   # seq_hist_wls <- hist(metadata$sample_date[metadata$adm1 == "UK-WLS"], breaks=sampleBreaks, plot=FALSE, right=FALSE)
   # seq_hist_nir <- hist(metadata$sample_date[metadata$adm1 == "UK-NIR"], breaks=sampleBreaks, plot=FALSE, right=FALSE)
   # seq_hist_oth <- hist(metadata$sample_date[metadata$country != "UK"],  breaks=sampleBreaks, plot=FALSE, right=FALSE)

                                    
   # seq_hist_breakdown <- data.frame("eng" = seq_hist_eng$counts,
   #                                  "sct" = seq_hist_sct$counts,
   #                                  "wls" = seq_hist_wls$counts,
   #                                  "nir" = seq_hist_nir$counts,
   #                                  "oth" = seq_hist_oth$counts)
   
   draw_seq_freq_states <- function(selected_names, ymax = NA) {
     
     selected_columns <- seq_hist_names %in% selected_names
     if (is.na(ymax)) {
        if (sum(selected_columns) > 1) {
           ymax = max(rowSums(seq_hist_breakdown[,selected_columns])) * 1.05
        } else {
           ymax = max(seq_hist_breakdown[,selected_columns]) * 1.05
        }
     }
     
     if (sum(selected_columns) == 1)
       pal <- dePal[which(names(dePal) == seq_hist_names[selected_columns])]
     else {
       pal <- dePal[match(seq_hist_names, names(dePal))]
       pal <- pal[selected_columns]
     }
     
     cat(paste(pal))
     
     dateFreqDistribution(t(seq_hist_breakdown[,selected_columns]), sampleBreaks, plot.ci=FALSE, barplot=TRUE,
                          startDate = sampleBreaks[1], 
                          endDate = sampleBreaks[length(sampleBreaks)-1],    
                          col=mPal(unlist(pal), 0.75), border=mPal(unlist(pal)), ymax=ymax,
                          ylab = "Frequency of genomes\n(per day)")
     
     
     legend("top", horiz=FALSE, inset=c(0,-0.3), bty='n', xpd=TRUE, ncol=3,
            fill=mPal(unlist(pal), 0.75), border = mPal(unlist(pal)),
            legend = seq_hist_names[selected_columns],
            # legend = c("Germany", "Other countries"),
            #legend = c("England", "Scotland", "Wales"#FF7F00, "Northern Ireland", "Other countries"),
            cex=0.8)
     
   }
   
   # Sequence histogram
   par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
   draw_seq_freq_states(c("Germany", "Non Germany"))
   draw_seq_freq_states(c("Germany"), ymax=max(seq_hist_breakdown[, "Germany"]) * 1.05)
   ymax=400 * 1.05
   # draw_seq_freq_states(c("Hamburg"), ymax=ymax)
   # draw_seq_freq_states(c("Bavaria"), ymax=ymax)
   # draw_seq_freq_states(c("North_Rhine-Westphalia"), ymax=ymax)
   # draw_seq_freq_states(c("Saarland"), ymax=ymax)
   # # draw_seq_freq_states(c("Saxony"))
   # draw_seq_freq_states(c("Lower_Saxony"), ymax=ymax)
   # 
   # draw_seq_freq_states(c("Munich"), ymax=ymax)
   # draw_seq_freq_states(c("Dusseldorf"), ymax=ymax)

   # # dateFreqDistribution(t(seq_hist_breakdown), sampleBreaks, plot.ci=FALSE, barplot=TRUE,
   # #                      startDate = sampleBreaks[1], endDate = sampleBreaks[length(sampleBreaks)-1], col=mPal(unlist(ukPal)[1], 0.75), border=mPal(unlist(ukPal))[1], ymax=1100,
   # #                      ylab = "Frequency of genomes\n(per day)")
   # 
   # dateFreqDistribution(t(seq_hist_breakdown), sampleBreaks, plot.ci=FALSE, barplot=TRUE,
   #                      startDate = sampleBreaks[1], 
   #                      endDate = sampleBreaks[length(sampleBreaks)-1],    
   #                      col=mPal(unlist(dePal), 0.75), border=mPal(unlist(dePal)), ymax=max(seq_hist_breakdown)+2,
   #                      ylab = "Frequency of genomes\n(per day)")
   # 
   

      # 
   # legend("top", horiz=FALSE, inset=c(0,-0.3), bty='n', xpd=TRUE, ncol=3,
   #        fill=mPal(unlist(ukPal), 0.75), border = mPal(unlist(ukPal)),
   #        legend = seq_hist_names,
   #        # legend = c("Germany", "Other countries"),
   #        #legend = c("England", "Scotland", "Wales", "Northern Ireland", "Other countries"),
   #        cex=0.8)
   # 
   # dateFreqDistribution(t(seq_hist_breakdown), sampleBreaks, plot.ci=FALSE, barplot=TRUE,
   #                      startDate = sampleBreaks[1], 
   #                      endDate = sampleBreaks[length(sampleBreaks)-1],    
   #                      col=mPal(unlist(dePal), 0.75), border=mPal(unlist(dePal)), ymax=max(seq_hist_breakdown)+2,
   #                      ylab = "Frequency of genomes\n(per day)")
   # 
   # 
   # 
   # legend("top", horiz=FALSE, inset=c(0,-0.3), bty='n', xpd=TRUE, ncol=3,
   #        fill=mPal(unlist(ukPal), 0.75), border = mPal(unlist(ukPal)),
   #        legend = seq_hist_names,
   #        # legend = c("Germany", "Other countries"),
   #        #legend = c("England", "Scotland", "Wales", "Northern Ireland", "Other countries"),
   #        cex=0.8)

   
   #   seq_hist_breakdown <- data.frame("de" = seq_hist_de$counts)  
   #   
   #   dateFreqDistribution(t(seq_hist_breakdown), sampleBreaks, plot.ci=FALSE, barplot=TRUE,
   #                      startDate = sampleBreaks[1], endDate = sampleBreaks[length(sampleBreaks)-1], col=mPal(unlist(dePal)[1], 0.75), border=mPal(unlist(dePal))[1], ymax=max(seq_hist_breakdown)+2,
   #                      ylab = "Frequency of genomes\n(per day)")
   # 
   # 
   # legend("top", horiz=FALSE, inset=c(0,-0.3), bty='n', xpd=TRUE, ncol=3,
   #        fill=mPal(unlist(ukPal), 0.75), border = mPal(unlist(ukPal)),
   #        legend = c(state, "Other places"),
   #        # legend = c("Germany", "Other countries"),
   #        #legend = c("England", "Scotland", "Wales", "Northern Ireland", "Other countries"),
   #        cex=0.8)

   normalize_hist <- function(counts, breaks, normalization_break, normalization_value) {
     hist_index <- match(breaks[-length(breaks)], normalization_break)
     hist_weighted <- counts * normalization_value[hist_index]
     valid_index <- !is.na(hist_weighted) & !is.nan(hist_weighted)
     hist_weighted <- hist_weighted[valid_index]
     valid_index[max(which(valid_index))+1] <- TRUE
     hist_breaks <- breaks[valid_index]
     return (list("hist"=hist_weighted, "breaks"=hist_breaks));
   }
   
   # x <- normalize_hist(seq_hist_de$counts, sampleBreaks, case_data$date, case_data$case / case_data$seq_smooth)
   # seq_hist_de_weighted = x$hist;
   # sampleBreaks_weighted = x$breaks;
   # 
   # # hist_index <- match(sampleBreaks[-length(sampleBreaks)], case_data$date)
   # # seq_hist_de_weighted <- seq_hist_de$counts / case_data$seq_smooth[hist_index] * case_data$case[hist_index]
   # # 
   # # valid_index <- !is.na(seq_hist_de_weighted) & !is.nan(seq_hist_de_weighted)
   # # seq_hist_de_weighted <- seq_hist_de_weighted[valid_index]
   # # valid_index[max(which(valid_index))+1] <- TRUE
   # # sampleBreaks_weighted <- sampleBreaks[valid_index]
   # 
   # seq_hist_breakdown <- data.frame("de" = seq_hist_de_weighted)  
   # 
   # # dateFreqDistribution(t(seq_hist_breakdown), sampleBreaks_weighted, plot.ci=FALSE, barplot=TRUE,
   # #                      startDate = sampleBreaks_weighted[1], endDate = sampleBreaks_weighted[length(sampleBreaks_weighted)-1], col=mPal(unlist(dePal)[1], 0.75), border=mPal(unlist(dePal))[1], ymax=50000,
   # #                      ylab = "Frequency of genomes\n(per day)")
   # # 
   # # 
   # # legend("top", horiz=FALSE, inset=c(0,-0.3), bty='n', xpd=TRUE, ncol=3,
   # #        fill=mPal(unlist(ukPal), 0.75), border = mPal(unlist(ukPal)),
   # #        legend = c("Germany", "Other countries"),
   # #        #legend = c("England", "Scotland", "Wales", "Northern Ireland", "Other countries"),
   # #        cex=0.8)
   # # 
   # # Cumulative sequences, cases and infections
   # # par(new = TRUE)
   # # ylims <- c(1,1E7)
   # # plot(1, type='n', xlim=c(startDate, as.Date("2020-06-21")), ylim=ylims,
   # #      axes=FALSE, xlab="", ylab="", log='y', yaxs='i', xaxs='i')
   # # 
   # # #axis(4, las=1, at=axTicks(4), labels=format(axTicks(4), scientific=FALSE))
   # # plotLogAxis(lim=ylims, side=4)
   # # mtext(side=4, text="Cumulative cases and genomes", line=2, cex=0.8)
   # 
   # 
   # #ukseqs_daily <- data.frame(date = sampleBreaks[1:(length(sampleBreaks)-1)],
   # #                          seqs = rowSums(seq_hist_breakdown[, -c(5)]))
   # #lines(ukseqs_daily$date, cumsum(ukseqs_daily$seqs), lwd=2)
   # 
   # # polygon(c(europeInfections$time[europeInfections$country == "United_Kingdom"],
   # #           rev(europeInfections$time[europeInfections$country == "United_Kingdom"])),
   # #         c(europeInfections$predicted_infections_higher_CI_95_cumulative[europeInfections$country == "United_Kingdom"],
   # #           rev(europeInfections$predicted_infections_lower_CI_95_cumulative[europeInfections$country == "United_Kingdom"])),
   # #           border=NA, col = mPal(ukPal$oth, 0.75))
   # # lines(europeInfections$time[europeInfections$country == "United_Kingdom"], europeInfections$predicted_infections_mean_cumulative[europeInfections$country == "United_Kingdom"],
   # #       lty=3, lwd=2)
   # #lines(c(ukCases$date, min(ukCases$date)-1), c(ukCases$cumCasesBySpecimenDate, 1E-10), lwd=2, lty=2)
   # 
   # # idx <- which(ukWeekly$date == as.Date("2020-05-17"))
   # # text(x = as.Date("2020-05-17"), y = ukWeekly$infectionsCum[idx]+5E6, "Estimated UK\ninfections",
   # #      pos=1, xpd=TRUE, cex=0.6)
   # # text(x = as.Date("2020-05-17"), y = ukWeekly$casesCum[idx]+1.5E5, "Reported UK\ncases",
   # #      pos=1, xpd=TRUE, cex=0.6)
   # # text(x = as.Date("2020-05-17"), y = ukWeekly$seqsCum[idx], "UK genomes\nsequenced",
   # #      pos=1, xpd=TRUE, cex=0.6)
   # 

```


```{r sample-density, fig.width=6, fig.height=3, fig.cap = "Top row: Proportions of laborotory-confirmed cases sequenced. Bottom row: Proportions of the estimated number of infections (estimate from Flaxman et al. 2020)."}

#    layout(matrix(1:4, nrow = 2, byrow=FALSE))
#
#    idx <- which(ukWeekly$date == as.Date("2020-04-05"))
#
#    # Sequencing proportions
#    par(mar=c(0,0,1,2))
#    plotPieProp(ukWeekly$seqsCum[idx], ukWeekly$casesCum[idx], title="Reported cases by 5 April", col=ukPal$sct, line=0)
#    
#    par(mar=c(0,0,1,2))
#    plotPieProp(ukWeekly$seqsCum[idx], ukWeekly$infectionsCum[idx], title="Estimated infections by 5 April", col=ukPal$sct, line=0)
#    
#    idx <- which(ukWeekly$date == as.Date("2020-05-03"))
#
#    # Sequencing proportions
#    par(mar=c(0,0,1,2))
#    plotPieProp(ukWeekly$seqsCum[idx], ukWeekly$casesCum[idx], title="Reported cases by May 3", col=ukPal$sct, line=0)
#    
#    par(mar=c(0,0,1,2))
#    plotPieProp(ukWeekly$seqsCum[idx], ukWeekly$infectionsCum[idx], title="Estimated infections by May 3", col=ukPal$sct, line=0)
#

```

```{r sampling-proportion-weekly, fig.width=7, fig.height=3, fig.cap = "Proportion of weekly reported cases (solid line) and estimated infections (dotted line; Flaxman et al. 2020) included in our genome sequence dataset."}

#    cleanProp <- function(x) {
#        x[is.nan(x)] <- NA
#        x[is.infinite(x)] <- NA
#        x[x > 1] <- 1
#        return(x)
#    }
#
#    caseProp <- cleanProp(ukWeekly$seqs/ukWeekly$cases)
#    infProp  <- cleanProp(ukWeekly$seqs/ukWeekly$infections)
#    infMask  <- !is.na(infProp)
#
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#
#    plotShadedAxes(xlim=c(startDate, as.Date("2020-06-21")), ylim=c(0, 1.015), yaxs='i', ylab="Proportion")
#  
#    #polygon(c(ukWeekly$date[infMask], rev(ukWeekly$date[infMask])), c(cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)[infMask], rev(cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)[infMask])), 
#    #        col = mPal(ukPal$oth, 0.75), border=NA)
#    infPropLower <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)
#    infPropUpper <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)
#    for (i in 2:length(infMask)) {
#        rect(ukWeekly$date[i-1], infPropLower[i], ukWeekly$date[i], infPropUpper[i], col = mPal(ukPal$oth, 0.75), border=NA)
#    }
#    lines(ukWeekly$date, infProp, lty=3, lwd=2, type='S')
#    idx <- min(which(infMask))
#    lines(ukWeekly$date[(idx-1):idx], rep(infProp[idx],2), lty=3, lwd=2)
#    lines(ukWeekly$date, caseProp, lty=1, lwd=2, type='S')
#    
#    
#    legend("bottom", horiz=FALSE, inset=c(0,1), bty='n', xpd=TRUE, ncol=2,
#           lty=c(1,3), lwd=2,
#           legend = c("reported cases", "estimated infections"), title="Sequenced proportion of weekly", cex=par("cex.lab"))
#    
#    # Inset start
#    ymax <- 0.1
#    insetStart <- as.Date("2020-02-16")
#    insetEnd   <- as.Date("2020-05-10")
#    
#    rect(insetStart, 0, insetEnd, ymax, lty=2)
#
#    par(mar=c(2,2,0.5,0.5), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0), fig=c(0.445, 0.875, 0.45, 0.88), new=TRUE)
#    
#    plotShadedAxes(xlim=c(insetStart, insetEnd), ylim=c(0, ymax), yaxs='i', side=4)
#  
#    #polygon(c(ukWeekly$date[infMask], rev(ukWeekly$date[infMask])), c(cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)[infMask], rev(cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)[infMask])), 
#    #        col = mPal(ukPal$oth, 0.75), border=NA)
#    for (i in 2:length(infMask)) {
#        rect(ukWeekly$date[i-1], infPropLower[i], ukWeekly$date[i], infPropUpper[i], col = mPal(ukPal$oth, 0.75), border=NA)
#    }
#    lines(ukWeekly$date, infProp, lty=3, lwd=2, type='S')
#    idx <- min(which(infMask))
#    lines(ukWeekly$date[(idx-1):idx], rep(infProp[idx],2), lty=3, lwd=2)
#    lines(ukWeekly$date, caseProp, lty=1, lwd=2, type='S')
#
#    rect(insetStart, 0, insetEnd, ymax, xpd=TRUE)
#
#
#    #plotShadedAxes(xlim=c(startDate, as.Date("2020-06-21")), ylim=c(0, 10000), yaxs='i')
#    #lines(ukCases$date, ukCases$newCasesBySpecimenDate)
#    
#    #dateFreqDistribution(rev(ukCases$newCasesBySpecimenDate[2:nrow(ukCases)]), rev(ukCases$date), startDate="2020-01-31", endDate="2020-06-21", barplot=TRUE)
    
```


```{r sampling-proportion-weekly-flipped, fig.width=7, fig.height=3, fig.cap = "Proportion of weekly reported cases (solid line) and estimated infections (dotted line; Flaxman et al. 2020) included in our genome sequence dataset."}

#    cleanProp <- function(x) {
#        x[is.nan(x)] <- NA
#        x[is.infinite(x)] <- NA
#        x[x > 1] <- 1
#        return(x)
#    }
#
#    caseProp <- cleanProp(ukWeekly$seqs/ukWeekly$cases)
#    infProp  <- cleanProp(ukWeekly$seqs/ukWeekly$infections)
#    infMask  <- !is.na(infProp)
#
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#
#    plotShadedAxes(xlim=c(startDate, as.Date("2020-06-21")), ylim=c(0, 0.1), yaxs='i', ylab="Proportion")
#  
#    infPropLower <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)
#    infPropUpper <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)
#    for (i in 2:length(infMask)) {
#        rect(ukWeekly$date[i-1], infPropLower[i], ukWeekly$date[i], infPropUpper[i], col = mPal(ukPal$oth, 0.75), border=NA)
#    }
#    lines(ukWeekly$date, infProp, lty=3, lwd=2, type='S')
#    idx <- min(which(infMask))
#    lines(ukWeekly$date[(idx-1):idx], rep(infProp[idx],2), lty=3, lwd=2)
#    lines(ukWeekly$date, caseProp, lty=1, lwd=2, type='S')
#    
#    
#    legend("bottomleft", horiz=FALSE, inset=c(0,1), bty='n', xpd=TRUE, ncol=2,
#           lty=c(1,3), lwd=2,
#           legend = c("reported cases", "estimated infections"), title="Sequenced proportion of weekly", cex=par("cex.lab"))
#
#    # Inset start
#    ymax <- 1
#    insetStart <- as.Date("2020-01-19")
#    insetEnd   <- as.Date("2020-06-21")
#    
#    #rect(insetStart, 0, insetEnd, ymax, lty=2)
#    
#    par(mar=c(2,2,0.5,0.5), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0), fig=c(0.65, 0.95, 0.55, 0.95), new=TRUE)
#    
#    plotShadedAxes(xlim=c(insetStart, insetEnd), ylim=c(0, ymax), yaxs='i', side=2, smallBreaks = "weeks", thinXLabel = 4, thinYLabel=5)
#    
#    #polygon(c(ukWeekly$date[infMask], rev(ukWeekly$date[infMask])), c(cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)[infMask], rev(cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)[infMask])),
#    #        col = mPal(ukPal$oth, 0.75), border=NA)
#    for (i in 2:length(infMask)) {
#        rect(ukWeekly$date[i-1], infPropLower[i], ukWeekly$date[i], infPropUpper[i], col = mPal(ukPal$oth, 0.75), border=NA)
#    }
#    lines(ukWeekly$date, infProp, lty=3, lwd=2, type='S')
#    idx <- min(which(infMask))
#    lines(ukWeekly$date[(idx-1):idx], rep(infProp[idx],2), lty=3, lwd=2)
#    lines(ukWeekly$date, caseProp, lty=1, lwd=2, type='S')
#    
#    rect(insetStart, 0, insetEnd, ymax, xpd=TRUE)
    
```


```{r sampling-proportion-weekly-infections, fig.width=7, fig.height=3, fig.cap = "Proportion of weekly estimated infections (Flaxman et al. 2020) included in our genome sequence dataset."}
#
#    cleanProp <- function(x) {
#        x[is.nan(x)] <- NA
#        x[is.infinite(x)] <- NA
#        x[x > 1] <- 1
#        return(x)
#    }
#
#
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#
#    plotShadedAxes(xlim=c(startDate, as.Date("2020-06-21")), ylim=c(0, 0.1), yaxs='i', ylab="Proportion")
#
#    infProp  <- cleanProp(ukWeekly$seqs/ukWeekly$infections)
#    infMask  <- !is.na(infProp)
#    infPropLower <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsLower)
#    infPropUpper <- cleanProp(ukWeekly$seqs/ukWeekly$infectionsUpper)
#    for (i in 2:length(infMask)) {
#        rect(ukWeekly$date[i-1], infPropLower[i], ukWeekly$date[i], infPropUpper[i], col = mPal(ukPal$oth, 0.75), border=NA)
#    }
#    lines(ukWeekly$date, infProp, lty=3, lwd=2, type='S')
#    idx <- min(which(infMask))
#    lines(ukWeekly$date[(idx-1):idx], rep(infProp[idx],2), lty=3, lwd=2)
#    
#    #legend("bottomleft", horiz=FALSE, inset=c(0,1), bty='n', xpd=TRUE, ncol=2,
#    #       lty=c(1,3), lwd=2,
#    #       legend = c("reported cases", "estimated infections"), title="Sequenced proportion of weekly", cex=par("cex.lab"))
#
#    
#    
```


```{r sampling-proportion-cumulative, fig.width=7, fig.height=3, fig.cap = "Proportion of the cumulative weekly reported cases (solid line) and estimated infections (dotted line; Flaxman et al. 2020) included in our genome sequence dataset over time."}
#
#    getCumulativeSequences <- function(dateRange, seqDates) {
#        
#        return <- data.frame(date = dateRange, 
#                             seqs = sapply(dateRange, function(x) sum(seqDates <= x)))
#    
#    }
#    
#    seqsCum     <- getCumulativeSequences(ukCases$date, metadata$sample_date[metadata$country == "UK"])
#    cumCaseProp <- cleanProp(seqsCum$seqs/ukCases$cumCasesBySpecimenDate)
#    
#    seqsCum     <- getCumulativeSequences(europeInfections$time[europeInfections$country == "United_Kingdom"], metadata$sample_date[metadata$country == "UK"])
#    cumInfProp  <- cleanProp(seqsCum$seqs/europeInfections$predicted_infections_mean_cumulative[europeInfections$country == "United_Kingdom"])
#    cumInfPropL <- cleanProp(seqsCum$seqs/europeInfections$predicted_infections_lower_CI_95_cumulative[europeInfections$country == "United_Kingdom"])
#    cumInfPropU <- cleanProp(seqsCum$seqs/europeInfections$predicted_infections_higher_CI_95_cumulative[europeInfections$country == "United_Kingdom"])  
#    
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#    
#    plotShadedAxes(xlim=c(startDate, as.Date("2020-06-21")), ylim=c(0, 1.015), yaxs='i', ylab="Cumulative proportion")
#    
#    polygon(c(seqsCum$date, rev(seqsCum$date)), c(cumInfPropL, rev(cumInfPropU)), col = mPal(ukPal$oth, 0.75), border=NA)
#    lines(seqsCum$date, cumInfProp, lty=3, lwd=2)
#    lines(ukCases$date, cumCaseProp, lty=1, lwd=2)
#    
#    legend("bottom", horiz=FALSE, inset=c(0,1), bty='n', xpd=TRUE, ncol=2,
#           lty=c(1,3), lwd=2,
#           legend = c("reported cases", "estimated infections"), title="Sequenced proportion of cumulative", cex=par("cex.lab"))
#    
#    # Inset start
#    ymax <- 0.05
#    insetStart <- as.Date("2020-02-16")
#    insetEnd   <- as.Date("2020-05-10")
#    
#    rect(insetStart, 0, insetEnd, ymax, lty=2)
#    
#    par(mar=c(2,2,0.5,0.5), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0), fig=c(0.445, 0.875, 0.45, 0.88), new=TRUE)
#    
#    plotShadedAxes(xlim=c(insetStart, insetEnd), ylim=c(0, ymax), yaxs='i', side=4)
#    
#    polygon(c(seqsCum$date, rev(seqsCum$date)), c(cumInfPropL, rev(cumInfPropU)), col = mPal(ukPal$oth, 0.75), border=NA)
#    lines(seqsCum$date, cumInfProp, lty=3, lwd=2)
#    lines(ukCases$date, cumCaseProp, lty=1, lwd=2)
#    
#    rect(insetStart, 0, insetEnd, ymax, xpd=TRUE)
#
```


```{r travel-cases-all, fig.width=5, fig.height=3, fig.cap="Estimated number of inbound travellers to the per day (black) and estimated number of infectious cases worldwide (dashed red, 7-day rolling average). Arrows here shows from left to right the first self-isolation advice for returning travellers from China, Italy, and the start of the national lockdown."}
#
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3.25,0.75,0))
#    plotArrivalsInfections(arrivals, infections, location="all", startDate = startDate, endDate = endDate, cex.lab=0.8)
#    
#    # Mark lockdown    
#    points(x=travelDates$lockdown, y=0.1*1E6, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(ukPal$eng))
#    
#    # Mark self-isolation advice
#    points(x=travelDates$china, y=0.1*1E6, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(countryPal$China))
#    points(x=travelDates$italy, y=0.1*1E6, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(countryPal$Italy))
```


```{r travel-cases-countries-top12, fig.width=15, fig.height=12, fig.cap="Estimated numbers of inbound travellers to the `r state` per day, and estimated number of new infections per day, for the 12 countries we estimate to have been responsible for the most importations to the (see Table 3). We estimate that these 12 countries contributed 97.6% of importations to the `r states`"}
#
#    layout(matrix(c(1:12), nrow=4, byrow=TRUE))
#    par(mar=c(4,6,2.5,6), cex.axis=0.8, cex.lab=1.2, cex.main=1.5, mgp=c(3.25,0.5,0))
#
#    countries1 <- c("Spain", 
#                    "France", 
#                    "Italy",
#                    "US",
#                    "Netherlands",
#                    "Belgium",
#                    "Ireland", 
#                    "Germany",
#                    "Switzerland",
#                    "Portugal",
#                    "Sweden", 
#                    "China")
#    
#    # Specific countries
#    for (i in 1:length(countries1)) {
#        plotArrivalsInfections(arrivals, infections, location=countries1[i], startDate = startDate, endDate = endDate, cex.lab=0.8, label=LETTERS[i])
#        title(capitalise(countries1[i]))
#    }
    
```

```{r travel-cases-countries-other, fig.width=15, fig.height=12, fig.cap="Estimated numbers of inbound travellers to the `r states` per day, and estimated number of new infections per day, for a range of countries. (A-F) shows the 6 countries that made the largest contribution to importations after the 12 in the previous figure (see Table 3). Together these 6 countries contributed 1.19% of estimated importations to the `r states` (G-L) 6 countries with large epidemics that did not contribute many importations to the `r states`, either because of low numbers of inbound travellers or because their epidemics started later. These 6 countries combined contributed less than 0.5% of importations into `r states`."}
#
#    layout(matrix(c(1:12), nrow=4, byrow=TRUE))
#    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=1.2, cex.main=1.5, mgp=c(3.25,0.5,0))
#
#    countries2 <- c("Denmark", "Austria", "Romania", "Norway", "Poland", "Canada", "Iran", "Brazil", "India", "Russia", "Mexico", "South Africa") # Japan, Korea, South
#    
#    # Specific countries
#    for (i in 1:length(countries2)) {
#        plotArrivalsInfections(arrivals, infections, location=countries2[i], startDate = startDate, endDate = endDate, cex.lab=0.8, label=LETTERS[i])
#        title(capitalise(countries2[i]))
#    }
#    
    
```


```{r eii-tmrca-all, fig.width=5, fig.height=3, fig.cap="Estimated importation intensity (EII) curve (black) and the histogram of lineage TMRCAs (grey). Arrows here shows from left to right the first self-isolation advice for returning travellers from China, Italy, and the start of the `r states` national lockdown."}


    #tmrcaBreaks <- seq.Date(as.Date("2019-12-01"), as.Date("2020-06-26"), by="days")
    # tmrcaBreaks <- seq.Date(min(clusterStatsMCC$tmrca_shifted_calendar), max(clusterStatsMCC$tmrca_calendar), by="1 days")
    # tmrca_hist  <- hist(clusterStatsMCC$tmrca_calendar, breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
    

    # par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
    # dateFreqDistribution(tmrca_hist$counts, tmrcaBreaks, plot.ci=FALSE, side=4, ylab="",
    #                      #startDate = startDate, endDate = endDate, col=mPal(ukPal$oth), ymax=70)
    #                      startDate = tmrcaBreaks[1], endDate = tmrcaBreaks[length(tmrcaBreaks)-1], col=mPal(ukPal$oth), ymax=30)
    # mtext(side=4, text="Frequency of TMRCAs\n(per day)", line=3, cex=par("cex.lab"))
    # 
    # par(new=TRUE)
    # ymax <- 1200
    # 
    # plot(1, type='n', xlim=c(startDate, endDate), ylim=c(0,1200), axes=FALSE, 
    #      xaxs='i', yaxs='i', xlab="", ylab="")
    # lines(eii$date[eii$location == "all"], eii$num_intros[eii$location == "all"], lwd=2)
    # axis(2, las=1)
    # mtext(side=2, text="Estimated importation intensity\n(solid line)", line=2, cex=par("cex.lab"))
    # 
    # # Mark lockdown    
    # points(x=travelDates$lockdown, y=0.1*ymax, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(ukPal$eng))
    # 
    # # Mark self-isolation advice
    # points(x=travelDates$china, y=0.1*ymax, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(countryPal$China))
    # points(x=travelDates$italy, y=0.1*ymax, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(countryPal$Italy))

```


```{r eii-tmrca-countries, fig.width=10, fig.height=8, fig.cap="Estimated importation intensity (EII) curves for the 12 countries estimated to have contributed the most importations to the `r states` epidemic (see Table 3). Panel A shows the EII for all countries. The red arrows indicate the start of the `r states` lockdown."}
#    
#    layout(matrix(c(1,2,5,6,3,4,7,8), nrow=4, byrow=TRUE), heights=c(3,1,3,1))
#    par(mar=c(4,6,2.5,6), cex.axis=1, cex.lab=1.2, cex.main=1.5, mgp=c(3,0.75,0))
#  
#    plotShadedAxes(xlim=c(startDate, endDate), ylim=c(0, 1200), axes=FALSE, label="A", line=0, yaxs='i', 
#                   ylab = "Estimated importation intensity")
#    lines(eii$date[eii$location == "all"], eii$num_intros[eii$location == "all"], lwd=2)
#    abline(v=as.Date(c("2020-03-12", "2020-03-16", "2020-03-20")), lty=3, col="black")
#    abline(v=as.Date("2020-03-23"), lty=1, col=ukPal$eng)
#    
#    #points(x=travelDates$lockdown, y=0.075*1200, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(ukPal$eng))
#
#    
#    ymax <- list(B=500, C=100, D=20)
#    panelCountries <- list(B=c("Spain", "France", "Italy", "Belgium"), 
#                           C=c("Netherlands", "Ireland", "Switzerland", "US"), 
#                           D=c("Germany", "Sweden", "Portugal", "China", "Other"))
#    for (i in names(panelCountries)) {
#      
#      
#        plotCountries <- panelCountries[[i]]
#        
#        plotShadedAxes(xlim=c(startDate, endDate), ylim=c(0, ymax[[i]]), axes=FALSE, label=i, line=0, yaxs='i', 
#                       ylab = "Estimated importation intensity")
#    
#        for (country in plotCountries) {
#            lines(eii$date[eii$location == country], eii$num_intros[eii$location == country], col=mPal(countryPal[[country]]), lwd=2)
#            #points(eii$date[eii$location == country], eii$num_intros[eii$location == country], col=mPal(countryPal[[country]]), pch=20)
#        }
#        
#        legend('bottom', horiz=FALSE, inset=c(0, 1), bty='n', 
#               col = sapply(plotCountries, function(x) countryPal[[x]]), 
#               legend=plotCountries, 
#               ncol=ceiling(length(plotCountries)/2), seg.len=1, lwd=2, lty=1, xpd=TRUE, cex=par("cex.axis"))
#        
#        # Mark lockdown    
#        #points(x=travelDates$lockdown, y=0.075*ymax[[i]], pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(ukPal$eng))
#        abline(v=as.Date("2020-03-23"), lty=1, col=ukPal$eng)
#        
#        if (i == "B") { 
#            abline(v=as.Date("2020-02-25"), lty=3, col=countryPal$Italy)
#        } else 
#        if (i == "D") {
#            abline(v=as.Date("2020-01-28"), lty=3, col=countryPal$China)
#        }
#
#    }
#    
#    
#    
#    ###################################
#    # NPIs and travel recommendations #
#    ###################################
#    par(mar=c(1,6,0,6))
#
#    plot(1, type='n', ylim=c(0,8.5), xlim=c(startDate, endDate),
#         xaxs='i', yaxs='i', bty='n', axes=FALSE, xlab="", ylab="", las=2)
#    
#    # Self isolate if symptomatic (experiencing a cough or fever symptoms)
#    rect(as.Date("2020-03-12"), 6, endDate, 7, border=NA, col=mPal(ukPal$oth))
#    
#    # Social distancing encouraged (advice against non-essential travel and contact with others, 
#    # avoid pubs, clubs, theatres and work from home if possible)
#    rect(as.Date("2020-03-16"), 4.5, endDate, 5.5, border=NA, col=mPal(ukPal$oth))
#    
#    # School closure ordered, closure of public venues (pubs, restaurants, gyms, leisure centres, nightclubs, theatres, cinemas)
#    rect(as.Date("2020-03-20"), 3, endDate, 4, border=NA, col=mPal(ukPal$oth))
#    
#    # Lockdown
#    rect(as.Date("2020-03-23"), 1.5, endDate, 2.5, border=NA, col=mPal(ukPal$eng))
#    
#    text(x=endDate, y=7.5, "Non-pharmaceutical interventions ", cex=par("cex.lab"), pos=2)
#    text(x=endDate, y=c(2, 3.5, 5, 6.5)-0.1,
#           c("Lockdown", 
#             "School closure", 
#             "Social distancing", 
#             "Self-isolation"),
#         pos=2, srt=0, cex=par("cex.axis"), col="white", srt=0, xpd=TRUE)
#
#    
#    
#    
#    
#    plot(1, type='n', ylim=c(0,8.5), xlim=c(startDate, endDate),
#         xaxs='i', yaxs='i', bty='n', axes=FALSE, xlab="", ylab="", las=2)
#    
#    # Italy, Iran, Vietnam, Cambodia, Laos, Myanmar
#    rect(as.Date("2020-02-25"), 6, endDate, 7, border=NA, col=mPal(countryPal$Italy))
#    rect(as.Date("2020-02-25"), 6, as.Date("2020-03-05"), 7, border=NA, angle=45, density = 50, col="white")
#
#
#    text(x=endDate, y=7.5, "Self-isolation advice", cex=par("cex.lab"), pos=2)
#    text(x=endDate, y=6.5, "Italy", pos=2, srt=0, cex=par("cex.axis"), col="white", srt=0, xpd=TRUE)
#
#    
#        
#    plot(1, type='n', axes=FALSE, ylab="", xlab="")
#    
#    
#    
#    plot(1, type='n', ylim=c(0,8.5), xlim=c(startDate, endDate),
#         xaxs='i', yaxs='i', bty='n', axes=FALSE, xlab="", ylab="", las=2)
#
#    # China
#    rect(as.Date("2020-01-28"), 6, endDate, 7, border=NA, col=mPal(countryPal$China))
#    rect(as.Date("2020-01-28"), 6, as.Date("2020-01-31"), 7, border=NA, angle=45, density = 50, col="white")
#    
#    text(x=endDate, y=7.5, "Self-isolation advice", cex=par("cex.lab"), pos=2)
#    text(x=endDate, y=6.5, "China", pos=2, srt=0, cex=par("cex.axis"), col="white", srt=0, xpd=TRUE)
#
```


\clearpage


# Lineage importation distribution (shifted TMRCA distribution)
- GISAID tree until `r endDate` as initial tree.
- The tree is time-collibrated by TreeTime.
- Sankoff algorithm is used to assign location (`r states` and non-`r states`) to inner vertices of the tree, for each state sparately.
- Dataset contains 
  `r rsumstate(function(state, i, clusterStatsMCC) length(levels(as.factor(clusterStatsMCC$cluster))))` 
  transmission lineages (2 or more sequences), comprising 
  `r rsumstate(function(state, i, clusterStatsMCC)  sum(clusterStatsMCC$seqs))` 
  sequences from the `r states`, as well as a further 
  `r rsumstate(function(state, i, clusterStatsMCC)  sum(stateInfo$metadata_instate[,i]) - sum(clusterStatsMCC$seqs))` 
  singletons.
- Mean and SD of the importation (shifted TMRCA) distribution: 
  `r rsumstate(function(state, i, clusterStatsMCC) paste(round_date(date_decimal(mean(clusterStatsMCC$tmrca_shifted)), unit="day"), "±",  round(sd(clusterStatsMCC$tmrca_shifted)*366,3) ) )` days (singletons excluded).
- Median and interquartile range of the importation (shifted TMRCA) distribution `r rsumstate(function(state, i, clusterStatsMCC) round_date(date_decimal(median(clusterStatsMCC$tmrca_shifted)), unit="day") )` [`r rsumstate(function(state, i, clusterStatsMCC) round_date(date_decimal(quantile(clusterStatsMCC$tmrca_shifted, c(0.25, 0.75), na.rm=TRUE)), unit="day") )`] (singletons excluded).
- 80% of importations fall in [`r rsumstate(function(state, i, clusterStatsMCC) round_date(date_decimal(quantile(clusterStatsMCC$tmrca_shifted, c(0.1, 0.9), na.rm=TRUE)), unit="day") )`].


```{r shift-distribution, results="asis"}

  sizeShiftTable <- data.frame(size = c("All", "2 to 10", "11 to 100", "101 to 1000", "Bigger than 1000"))
  colnames(sizeShiftTable) <- c("Lineages of size")
  
  weekInfo <- list()

  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    clusterStatsMCC <- stateInfo$clusterStatsMCC[[s]] 
    clusterSamplesMCC <- stateInfo$clusterSamplesMCC[[s]] 

    sizeShifts <- list(all   = clusterStatsMCC$shift, 
                       small = clusterStatsMCC$shift[clusterStatsMCC$seqs <= 10],
                       med   = clusterStatsMCC$shift[clusterStatsMCC$seqs > 10 & clusterStatsMCC$seqs <= 100],
                       big   = clusterStatsMCC$shift[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000],
                       huge  = clusterStatsMCC$shift[clusterStatsMCC$seqs > 1000])

    sizeLags <- list(all   = clusterStatsMCC$detection_lag, 
                     small = clusterStatsMCC$detection_lag[clusterStatsMCC$seqs <= 10],
                     med   = clusterStatsMCC$detection_lag[clusterStatsMCC$seqs > 10 & clusterStatsMCC$seqs <= 100],
                     big   = clusterStatsMCC$detection_lag[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000],
                     huge  = clusterStatsMCC$detection_lag[clusterStatsMCC$seqs > 1000])

     sizeSeqs <- list(all   = clusterStatsMCC$seqs, 
                     small = clusterStatsMCC$seqs[clusterStatsMCC$seqs <= 10],
                     med   = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 10 & clusterStatsMCC$seqs <= 100],
                     big   = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000],
                     huge  = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 1000])


    weekShifts <- weekLags <- weekSizes <- list()
    weekBreaks <- seq.Date(startDate, endDate, by="weeks")
    for (i in 2:length(weekBreaks)) {
        weekShifts[[format.Date(weekBreaks[i-1], format="%y %b %d")]] <- clusterStatsMCC$shift[clusterStatsMCC$tmrca_shifted_calendar >=  weekBreaks[i-1] & 
                                                                      clusterStatsMCC$tmrca_shifted_calendar < weekBreaks[i]]
        weekLags[[format.Date(weekBreaks[i-1], format="%y %b %d")]]   <- clusterStatsMCC$detection_lag[clusterStatsMCC$tmrca_shifted_calendar >=  weekBreaks[i-1] & 
                                                                      clusterStatsMCC$tmrca_shifted_calendar < weekBreaks[i]]
        weekSizes[[format.Date(weekBreaks[i-1], format="%y %b %d")]]  <- clusterStatsMCC$seqs[clusterStatsMCC$tmrca_shifted_calendar >=  weekBreaks[i-1] & 
                                                                      clusterStatsMCC$tmrca_shifted_calendar < weekBreaks[i]]
    }
    
    sizeShiftTable <- cbind(sizeShiftTable, 
                            lineages = sapply(sizeShifts, length), 
                                 shiftMeanSD   = sapply(sizeShifts, function(x) paste0(round(mean(x),2), " ± ", round(sd(x),2))), 
                                 ShiftMedIQR   = sapply(sizeShifts, function(x) paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75), na.rm=TRUE),2), collapse="-"), "]")),
                                 lagMeanSD   = sapply(sizeLags, function(x) paste0(round(mean(x),2), " ± ", round(sd(x),2))), 
                                 #lagMedIQR   = sapply(sizeLags, function(x) paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75)),2), collapse="-"), "]")) )
                                 lagMedIQR   = sapply(sizeLags, function(x) paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75), na.rm=TRUE),2), collapse="-"), "]")),
                                 seqs = sapply(sizeSeqs, sum),
                                 seqsRatio = round(sapply(sizeSeqs, sum) / sum(sapply(sizeSeqs, sum)) * 2 * 100)

                            )
    
    colnames(sizeShiftTable)[(ncol(sizeShiftTable)-5-2+1):ncol(sizeShiftTable)] <- c(paste("No. of lineages of ", state), 
                                  paste("Importation lag (mean ± SD)", state), paste("Importation lag (median and IQR)", state), 
                                  paste("Detection lag (mean ± SD)", state),   paste("Detection lag (median and IQR)", state),
                                  "No sequences", "% of sequences")
    
    weekInfo[[s]] = list(weekShifts = weekShifts, weekLags = weekLags, weekSizes = weekSizes)
    weekShiftNames = names(weekShifts)

  }

    
    capSizeShifts            <- paste0("Estimated importation lags for ", "multistate", " transmission lineages of different sizes. Importation lag is the waiting time between importation date and the TMRCA of the sampled genomes in the transmission lineage. Detection lag is the waiting time from the importation date to the sampling time of the oldest (first) sampled genome in the transmission lineage.")
    
    kable(sizeShiftTable, row.names=FALSE, caption = capSizeShifts)
    cat(knitr::kable(sizeShiftTable, row.names=FALSE, caption = capSizeShifts, format="latex"), file = paste0(figpath, "shift-size-distribution.tex"))
    cat(knitr::kable(sizeShiftTable, row.names=FALSE, caption = capSizeShifts, format="html"),  file = paste0(figpath, "shift-size-distribution.html"))
    # write.csv(sizeShiftTable, file = paste0(figpath, "shift-size-distribution.csv"), row.names=FALSE, quote=FALSE)
    
    # cat(kable(sizeShiftTable[,c(1, seq(2, ncol(sizeShiftTable), 5))], row.names=FALSE, caption = capSizeShifts, format="html"))
    

    getMeanSDText <- function(x) {
        if (length(x) > 1) {
            paste0(round(mean(x),2), " ± ", round(sd(x),2))
        } else 
        if (length(x) > 0) {
            round(x,2)
        } else {
            "-"
        }
    }
    
    getMedianIQRText <- function(x) {
        if (length(x) > 1) {
            paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75), na.rm=TRUE),2), collapse="-"), "]")
        } else 
        if (length(x) > 0) {
            round(x,2)
        } else {
            "-"
        }
    }
    
    cat("\n\\clearpage\n")
    
    
    weekShiftTable <- data.frame(week = weekShiftNames, 
                                 epiweek  = date_epiweek(weekBreaks[1:(length(weekBreaks)-1)]))
    colnames(weekShiftTable) <- c("Week starting", "Epi-week")

  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]

    weekShifts <- weekInfo[[s]]$weekShifts
    weekLags <- weekInfo[[s]]$weekLags
    weekSizes <- weekInfo[[s]]$weekSizes
    
    weekShiftTable <- cbind(weekShiftTable, 
                                 lineages = sapply(weekShifts, length), 
                                 #seqs     = sapply(weekSizes, getMeanSDText),
                                 seqs     = sapply(weekSizes, getMedianIQRText),
                                 shifts   = sapply(weekShifts, getMeanSDText),
                                 lags     = sapply(weekLags,   getMeanSDText))
                                 #medIQR   = sapply(weekShifts, function(x) {
                                  #                  if (length(x) > 0) {
                                  #                      paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75)),2), collapse=", "), "]")
                                  #                  } else {
                                  #                      "-"
                                  #                  }}))
                                 
    colnames(weekShiftTable)[(ncol(weekShiftTable)-4+1):ncol(weekShiftTable)] <- c(
        paste("Estimated no. of importations of", state), 
        paste("Lineage sizes (median and IQR)", state), 
        paste("Importation lag (mean ± SD)", state), paste("Detection lag (mean ± SD)", state))

  }
    capWeekShifts            <- paste0("3. Estimated importation and detection lags for ", state, " transmission lineages ordered by importation date and aggregated by epi-week. Importation lag is the waiting time between importation date and the TMRCA of the sampled genomes in the transmission lineage. Detection lag is the waiting time from the importation date to the sampling time of the oldest (first) sampled genome in the transmission lineage. All statistics show means and standard deviations computed from the MCC trees.")
    
    kable(weekShiftTable, row.names=FALSE, caption = capWeekShifts)
    cat(knitr::kable(weekShiftTable, row.names=FALSE, caption = capWeekShifts, format="latex"), file = paste0(figpath, "shift-week-distribution.tex"))
    cat(knitr::kable(weekShiftTable, row.names=FALSE, caption = capWeekShifts, format="html"),  file = paste0(figpath, "shift-week-distribution.html"))
    # write.csv(weekShiftTable, file = paste0(figpath, "shift-week-distribution.csv"), row.names=FALSE, quote=FALSE)


```

- Among `sum(metadata$country == "Germany")` initial samples from Germany, `dim(clusterSamplesMCC)[1]` samples have been kept in the final lineages (after removing incomplete information and small sub-trees) and `sum(clusterStatsMCC$seqs)` appear in non-singleton lineages. 


```{r size-distribution, results="asis"}

  # weekInfo <- list()
  sizeTable <- data.frame(size = c("All", "2 to 10", "11 to 100", "101 to 1000", "Bigger than 1000"))
  colnames(sizeTable) <- c("Lineages of size")

  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    clusterStatsMCC <- stateInfo$clusterStatsMCC[[s]] 
    clusterSamplesMCC <- stateInfo$clusterSamplesMCC[[s]] 

    sizes <- list(all   = clusterStatsMCC$seqs, 
                       small = clusterStatsMCC$seqs[clusterStatsMCC$seqs <= 10],
                       med   = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 10 & clusterStatsMCC$seqs <= 100],
                       big   = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000],
                       huge  = clusterStatsMCC$seqs[clusterStatsMCC$seqs > 1000])


    sizeTable <- cbind(sizeTable, 
                            lineages = sapply(sizes, length), 
                           size = sapply(sizes, sum),
                           size_percent = sapply(sizes, sum) / sum(clusterStatsMCC$seqs)
                       )
    
    # weekInfo[[s]] = list(weekShifts = weekShifts, weekLags = weekLags, weekSizes = weekSizes)
    # weekShiftNames = names(weekShifts)

  }

    
    capSizeShifts            <- paste0("Population of each linege size.")
    
    kable(sizeTable, row.names=FALSE, caption = capSizeShifts)
    # cat(knitr::kable(sizeTable, row.names=FALSE, caption = capSizeShifts, format="latex"), file = paste0(figpath, "shift-size-distribution.tex"))
    # cat(knitr::kable(sizeTable, row.names=FALSE, caption = capSizeShifts, format="html"),  file = paste0(figpath, "shift-size-distribution.html"))
    
    clusterStatSizeOrder <- order(-clusterStatsMCC$seqs)
    
   bigestLineages <- cbind(cluster = clusterStatsMCC$cluster[clusterStatSizeOrder[1:8]],
                           size = clusterStatsMCC$seqs[clusterStatSizeOrder[1:8]],
                           oldest = as.character(as.Date(date_decimal(clusterStatsMCC$oldest[clusterStatSizeOrder[1:8]]))),
                           state_of_oldest = sapply(  clusterStatsMCC$cluster[clusterStatSizeOrder[1:8]] , function(cluster) toString( str_trim(sapply(str_split(paste0(clusterSamplesMCC[clusterSamplesMCC$sample_date == min(clusterSamplesMCC$sample_date[clusterSamplesMCC$cluster == cluster]) & clusterSamplesMCC$cluster == cluster,"Location"], "/") , "/"), "[[", 3)) ) )
                           )
    kable(bigestLineages, row.names=FALSE, caption = "Bigest lineages")

```

- Number of sequences in 8 largest lineage = `sum(clusterStatsMCC[order(-clusterStatsMCC$seqs)[1:8],"seqs"])`
- Percentage of sequences in 8 largest lineage = `sum(clusterStatsMCC[order(-clusterStatsMCC$seqs)[1:8],"seqs"]) / dim(clusterSamplesMCC)[1]` including singletones
- The largest lineage contains `clusterStatsMCC$seqs[clusterStatSizeOrder[1]]` sequences hand has duration of `difftime(as.Date(date_decimal(clusterStatsMCC$mostrecent[clusterStatSizeOrder[1]])),  clusterStatsMCC$tmrca_calendar[clusterStatSizeOrder[1]], units = "day")` days
- The second largest lineage contains `clusterStatsMCC$seqs[clusterStatSizeOrder[2]]` sequences hand has duration of `difftime(as.Date(date_decimal(clusterStatsMCC$mostrecent[clusterStatSizeOrder[2]])),  clusterStatsMCC$tmrca_calendar[clusterStatSizeOrder[2]], units = "day")` days


```{r importation-distribution-weekly, fig.width=7, fig.height=5, fig.cap="Boxplots of the estimated importation lags for `r states` transmission lineages ordered by importation date and aggregated by epi-week."}
  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    weekShifts <- weekInfo[[s]]$weekShifts
    par(mar=c(5,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
    plotStats(weekShifts, ylim=c(0,20), names=names(weekShifts), ylab="Importation lag", las=2, ny=16, xlab=paste("TMRCAs for week starting on", "(", state, ")"))
  }
```

```{r detection-distribution-weekly, fig.width=7, fig.height=5, fig.cap="Boxplots of the estimated detection lags for `r states` transmission lineages ordered by importation date and aggregated by epi-week."}
  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    weekLags <- weekInfo[[s]]$weekLags

    par(mar=c(5,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
    plotStats(weekLags, ylim=c(0,110), names=names(weekLags), ylab="Detection lag", las=2, ny=16, xlab=paste("TMRCAs for week starting on", "(", state, ")"))
}
```


```{r tmrca-shift-comparison, fig.width=5, fig.height=3, fig.cap="Estimated importation intensity (EII) curve (black) and the histogram of lineage TMRCAs (grey) and shifted TMRCAs representing importaitons (red). Arrow here shows the start of the `r states` lockdown."}

    # tmrca_hist         <- hist(clusterStatsMCC$tmrca_calendar, breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
    # tmrca_hist_shifted <- hist(clusterStatsMCC$tmrca_shifted_calendar, breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
    
#     plotEnd <- endDate
#     
#     par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#     
#     dateFreqDistribution(tmrca_hist$counts, tmrcaBreaks, plot.ci=FALSE, side=4, ylab="", 
#                          #startDate = startDate, endDate = plotEnd, col=mPal(ukPal$oth), ymax=70)
#                          startDate = tmrcaBreaks[1], endDate = tmrcaBreaks[length(tmrcaBreaks)-1], col=mPal(ukPal$oth), ymax=1300)
#     
#     dateFreqDistribution(tmrca_hist_shifted$counts, tmrcaBreaks, plot.ci=FALSE, add=TRUE, 
#                          #startDate = startDate, endDate = plotEnd, col=mPal(ukPal$eng, 0.25), ymax=70)
#                          startDate = tmrcaBreaks[1], endDate = tmrcaBreaks[length(tmrcaBreaks)-1], col=mPal(ukPal$oth), ymax=1300)
#     
#     mtext(side=4, text="Frequency of TMRCAs\n(per day)", line=3, cex=par("cex.lab"))
# 
#     par(new=TRUE)
#     ymax <- 1200
#     plot(1, type='n', xlim=c(startDate, plotEnd), ylim=c(0,ymax), axes=FALSE, 
#          xaxs='i', yaxs='i', xlab="", ylab="")
# #    lines(eii$date[eii$location == "all"], eii$num_intros[eii$location == "all"], lwd=2)
#     axis(2, las=1)
#     mtext(side=2, text="Estimated importation intensity\n(solid line)", line=3, cex=par("cex.lab"))
#     
#     
#     # Mark lockdown    
#     points(x=travelDates$lockdown, y=0.1*ymax, pch=175, font=5, cex=1.5, xpd=TRUE, col=mPal(ukPal$eng))
```
    
        
```{r tmrca-shift-comparison-sizes-large-ones, fig.width=7, fig.height=6, fig.cap="LARGE (A) Histogram of lineage TMRCAs, coloured by lineage size. (B) Histogram of lineage importations, coloured by lineage size.", eval=TRUE}

  for (s in stateInfo$stateOrder ) {
    state <- stateFiles$state[s]
    clusterStatsMCC <- stateInfo$clusterStatsMCC[[s]] 
    
    # tmrcaBreaks <- seq.Date(as.Date("2019-01-01"), as.Date("2021-06-20"), by="days")
    tmrcaBreaks <- seq.Date(min(clusterStatsMCC$tmrca_shifted_calendar), max(clusterStatsMCC$tmrca_calendar), by="week")

    layout(matrix(1:2, nrow=2, byrow=TRUE))
    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, cex.main=1.5, mgp=c(3,0.75,0))

    tmrcaStat <- c("tmrca_calendar", "tmrca_shifted_calendar")
    # plotStart <- as.Date("2020-02-20")
    # plotEnd <- as.Date("2020-12-20")
    plotStart <- tmrcaBreaks[1]
    plotEnd <- tmrcaBreaks[length(tmrcaBreaks)-1]
    ylabs <- c("Frequency of TMRCAs\n(per day)", "Frequency of importations\n(per day)")
    
    
    for (i in 1:length(tmrcaStat)) {
   
        cond <- clusterStatsMCC[, tmrcaStat[i]] >= tmrcaBreaks[1] & clusterStatsMCC[, tmrcaStat[i]] <= tmrcaBreaks[length(tmrcaBreaks)]

        tmrca_hist_small <- hist(clusterStatsMCC[cond & clusterStatsMCC$seqs <= 10, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_med   <- hist(clusterStatsMCC[cond & clusterStatsMCC$seqs > 10  & clusterStatsMCC$seqs <= 100, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_big   <- hist(clusterStatsMCC[cond & clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_huge  <- hist(clusterStatsMCC[cond & clusterStatsMCC$seqs > 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        
        # Stacked barplot
        tmrca_hist_breakdown <- data.frame(huge  = tmrca_hist_huge$counts,
                                           big   = tmrca_hist_big$counts,
                                           med   = tmrca_hist_med$counts,
                                           small = tmrca_hist_small$counts)
	
	#tmrca_hist_breakdown = log(tmrca_hist_breakdown + 1)
	#tmrca_hist_breakdown[tmrca_hist_breakdown <= 0] = NA

#         dateFreqDistribution(t(tmrca_hist_breakdown[,1:2]), tmrcaBreaks, plot.ci=FALSE, barplot=TRUE, label=LETTERS[i], ylab=ylabs[i],
#                              startDate = plotStart, endDate = plotEnd, 
#                         # startDate = plotStart, endDate = tmrcaBreaks[length(tmrcaBreaks)-1], 
# 				col=mPal(unlist(ukPal), 0.75), border=mPal(unlist(ukPal)), ymax=1.5)
        
        
         dateFreqDistribution(t(tmrca_hist_breakdown), tmrcaBreaks, plot.ci=FALSE, barplot=TRUE, label=LETTERS[i], ylab=ylabs[i],
                             startDate = plotStart, endDate = plotEnd, 
                        # startDate = plotStart, endDate = tmrcaBreaks[length(tmrcaBreaks)-1], 
				col=mPal(unlist(ukPal), 0.75), border=mPal(unlist(ukPal)), ymax=max(rowSums(tmrca_hist_breakdown))+2)
         #ymax=max(rowSums(tmrca_hist_breakdown))+2
        
        
        # legend("top", horiz=FALSE, inset=c(0,-0.33), bty='n', xpd=TRUE, ncol=2,
        #        fill=mPal(unlist(ukPal, 0.75)), border = mPal(unlist(ukPal)), 
        #        legend = c("Bigger than 1000", "101 to 1000"), title = "Transmission lineage size for big size lineages",
        #        cex=0.8)
        legend("top", horiz=FALSE, inset=c(0,-0.33), bty='n', xpd=TRUE, ncol=2,
               fill=mPal(unlist(ukPal, 0.75)), border = mPal(unlist(ukPal)), 
               legend = c("Bigger than 1000", "101 to 1000", "11 to 100", "10 or smaller"), title = paste("Transmission lineage size of", state),
               cex=0.8)
    }
  }
    
```


```{r tmrca-shift-comparison-sizes, fig.width=7, fig.height=6, fig.cap="(A) Histogram of lineage TMRCAs, coloured by lineage size. (B) Histogram of lineage importations, coloured by lineage size.", eval=TRUE}

  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    clusterStatsMCC <- stateInfo$clusterStatsMCC[[s]] 
    
    layout(matrix(1:2, nrow=2, byrow=TRUE))
    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, cex.main=1.5, mgp=c(3,0.75,0))
    tmrcaBreaks <- seq.Date(min(clusterStatsMCC$tmrca_shifted_calendar), max(clusterStatsMCC$tmrca_calendar), by="1 days")

    tmrcaStat <- c("tmrca_calendar", "tmrca_shifted_calendar")
    # plotEnd <- as.Date("2020-06-21")
    ylabs <- c("Frequency of TMRCAs\n(per day", "Frequency of importations\n(per day)")
    
    for (i in 1:length(tmrcaStat)) {

        tmrca_hist_small <- hist(clusterStatsMCC[clusterStatsMCC$seqs <= 10, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_med   <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 10  & clusterStatsMCC$seqs <= 100, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_big   <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_huge  <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        
        # Stacked barplot
        tmrca_hist_breakdown <- data.frame(huge  = tmrca_hist_huge$counts,
                                           big   = tmrca_hist_big$counts,
                                           med   = tmrca_hist_med$counts,
                                           small = tmrca_hist_small$counts)
	
	#tmrca_hist_breakdown = log(tmrca_hist_breakdown + 1)
	#tmrca_hist_breakdown[tmrca_hist_breakdown <= 0] = NA

        dateFreqDistribution(t(tmrca_hist_breakdown), tmrcaBreaks, plot.ci=FALSE, barplot=TRUE, label=LETTERS[i], ylab=ylabs[i],
                             #startDate = startDate, endDate = plotEnd, 
                         startDate = tmrcaBreaks[1], endDate = tmrcaBreaks[length(tmrcaBreaks)-1], 
				col=mPal(unlist(ukPal), 0.75), border=mPal(unlist(ukPal)), ymax=max(rowSums(tmrca_hist_breakdown))+2)
         # Mark sequencing    
    points(x=travelDates$lockdown, y=130, pch=25, font=50, cex=50, xpd=TRUE, col=mPal(ukPal$eng))
        
        legend("top", horiz=FALSE, inset=c(0,-0.33), bty='n', xpd=TRUE, ncol=2,
               fill=mPal(unlist(ukPal, 0.75)), border = mPal(unlist(ukPal)), 
               legend = c("Bigger than 1000", "101 to 1000", "11 to 100", "10 or smaller"), title = paste("Transmission lineage size of", state),
               cex=0.8)
        
    }
  }
```


```{r tmrca-shift-comparison-sizes-weekly, fig.width=7, fig.height=6, fig.cap="(A) Histogram of lineage TMRCAs, coloured by lineage size. (B) Histogram of lineage importations, coloured by lineage size.", eval=TRUE}

  for (s in 1:nrow(stateFiles)) {
    state <- stateFiles$state[s]
    clusterStatsMCC <- stateInfo$clusterStatsMCC[[s]] 
    

    # tmrcaBreaks <- seq.Date(as.Date("2019-01-01"), as.Date("2021-06-20"), by="7 days")
    tmrcaBreaks <- seq.Date(min(clusterStatsMCC$tmrca_shifted_calendar), max(clusterStatsMCC$tmrca_calendar)+7, by="7 days")

    layout(matrix(1:2, nrow=2, byrow=TRUE))
    par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8, cex.main=1.5, mgp=c(3,0.75,0))

    tmrcaStat <- c("tmrca_calendar", "tmrca_shifted_calendar")
    # plotEnd <- as.Date("2020-06-21")
    ylabs <- c("Frequency of TMRCAs\n(per week)", "Frequency of importations\n(per week)")
    
    for (i in 1:length(tmrcaStat)) {

        tmrca_hist_small <- hist(clusterStatsMCC[clusterStatsMCC$seqs <= 10, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_med   <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 10  & clusterStatsMCC$seqs <= 100, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_big   <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 100 & clusterStatsMCC$seqs <= 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        tmrca_hist_huge  <- hist(clusterStatsMCC[clusterStatsMCC$seqs > 1000, tmrcaStat[i]], breaks=tmrcaBreaks, plot=FALSE, right=FALSE)   
        
        # Stacked barplot
        tmrca_hist_breakdown <- data.frame(huge  = tmrca_hist_huge$counts,
                                           big   = tmrca_hist_big$counts,
                                           med   = tmrca_hist_med$counts,
                                           small = tmrca_hist_small$counts)
	
	#tmrca_hist_breakdown = log(tmrca_hist_breakdown + 1)
	#tmrca_hist_breakdown[tmrca_hist_breakdown <= 0] = NA

        dateFreqDistribution(t(tmrca_hist_breakdown), tmrcaBreaks, plot.ci=FALSE, barplot=TRUE, label=LETTERS[i], ylab=ylabs[i],
                             #startDate = startDate, endDate = plotEnd, 
                         startDate = tmrcaBreaks[1], endDate = tmrcaBreaks[length(tmrcaBreaks)-1], 
				col=mPal(unlist(ukPal), 0.75), border=mPal(unlist(ukPal)), ymax=20)
        #max(rowSums(tmrca_hist_breakdown))+10
         # Mark sequencing    
    points(x=travelDates$lockdown, y=130, pch=25, font=50, cex=50, xpd=TRUE, col=mPal(ukPal$eng))
        
        legend("top", horiz=FALSE, inset=c(0,-0.33), bty='n', xpd=TRUE, ncol=2,
               fill=mPal(unlist(ukPal, 0.75)), border = mPal(unlist(ukPal)), 
               legend = c("Bigger than 1000", "101 to 1000", "11 to 100", "10 or smaller"), title = paste("Transmission lineage size of", state),
               cex=0.8)
        
    }
  }
   
```


```{r import-proportion-data}

#    getImportsByDate <- function(eii, countryList, tmrca_hist_shifted, breaks) {
#
#        # Proportion of imports on any day attributable to a country
#        importProp <- data.frame(date = sort(unique(eii$date)))
#        for (country in countryList) {
#            # Have to make very sure date order is correct
#            importProp[[country]] <- sapply(importProp$date, function(x) eii$num_intros[eii$location == country & eii$date == x] / 
#                                                                         eii$num_intros[eii$location == "all" & eii$date == x])
#        }
#        importDates      <- importProp$date
#        importProp$all   <- NULL
#        importProp$date  <- NULL
#        rownames(importProp) <- format.Date(importDates)
#        
#        z <- importProp$Other
#        importProp$Other <- NULL
#        importProp$Other <- rep(1, nrow(importProp)) - rowSums(importProp)
#        
#        # Count of imports from each country (using shifted TMRCA distribution)
#        # Not absolute count, can have fractions of importations!
#        start <- which(breaks == min(importDates))
#        end   <- which(breaks == max(importDates))
#        importCount <- apply(importProp, 2, function(x) x * tmrca_hist_shifted$counts[start:end])
#    
#        # Need to add one date so the props and counts act like a histogram (dates delimit breaks)
#        importDates <- c(importDates, importDates[length(importDates)] + (importDates[length(importDates)] - importDates[length(importDates)-1]))
#        
#        return(list(importDates = importDates, importProp = importProp, importCount = importCount))
#    }
#
#    importsAll <- getImportsByDate(eii, levels(eii$location), tmrca_hist_shifted, tmrcaBreaks)
#    importsSub <- getImportsByDate(eii, plotList, tmrca_hist_shifted, tmrcaBreaks)

```
    
  
```{r totalCountsTable, cache=TRUE}
#
#    # Sort and aggregate top 40 on MCC tree
   #  totalCounts <- colSums(importsAll$importCount) %>% sort(decreasing=TRUE)
   #  idx <- which(names(totalCounts) == "Other")
   #  totalCounts <- totalCounts[c(1:(idx-1), (idx+1):41, idx)]    
   #  
   #  # Get statistics across all posterior trees for the same 40 countries
   #  nreps  <- nrow(lagmodel)
   #  failed <- 0
   #  countryOrder <- names(totalCounts)
   #  totalCountsReps <- c()
   #  
   #  tic(paste0("\n\nNumbers of importations across ", nreps, " posterior trees: "))
   #  for (i in 1:nreps) {
   #    
   #      if (lagmodel$exit_code[i] == 1) {
   #          clusterStatsTemp <- clusterStats[clusterStats$tree == i, ]
   #          clusterStatsTemp$shift <- lagmodel$alpha[i] + lagmodel$beta[i]/clusterStatsTemp$seqs
   #          clusterStatsTemp$tmrca_shifted <- clusterStatsTemp$tmrca - (clusterStatsTemp$shift/366)
   #          clusterStatsTemp$tmrca_shifted_calendar <- as.Date(round_date(date_decimal(clusterStatsTemp$tmrca_shifted), unit = "day"))
   #          clusterStatsTemp$detection_lag <- as.Date(round_date(date_decimal(clusterStatsTemp$oldest), unit = "day")) - clusterStatsTemp$tmrca_shifted_calendar
   #          
   #          importHistTemp <- hist(clusterStatsTemp$tmrca_shifted_calendar, breaks=tmrcaBreaks, plot=FALSE, right=FALSE)  
   #          importsTemp    <- getImportsByDate(eii, levels(eii$location), importHistTemp, tmrcaBreaks)
   #          
   #          # Sort and aggregate top 40
   #          totalCountsTemp <- colSums(importsTemp$importCount) 
   #          totalCountsReps <- cbind(totalCountsReps, totalCountsTemp[countryOrder])
   #      } else {
   #          failed <- failed + 1
   #      }
   #  }
   #  totalPercReps <- apply(totalCountsReps, 2, function(x) 100*x/sum(x))
   #  toc()
   #  
   #  
   #  
   #  getHPDText <- function(x, digits=3) {
   #     res <- round(getHPD.boa(x), digits)
   #     return(paste0(res[2], " [",res[1],"-",res[3],"]"))
   # }
   # 
   # 
   # 
   # totalCountsTable <- data.frame(importsMCC        = round(totalCounts,2),
   #                                importsHPD        = apply(totalCountsReps, 1, getHPDText, digits=2),
   #                                #importsMeanSD    = apply(totalCountsReps, 1, getMeanSDText),
   #                                #importsMedIQR    = apply(totalCountsReps, 1, function(x) paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75)),2), collapse="-"), "]")),
   #                                percentageMCC     = round(100*totalCounts/sum(totalCounts), 3),
   #                                percentageHPD     = apply(totalPercReps, 1, getHPDText))
   #                                #percentageMeanSD = apply(totalPercReps, 1, getMeanSDText),
   #                                #percentageMedIQR = apply(totalPercReps, 1, function(x) paste0(round(median(x),2), " [", paste(round(quantile(x, c(0.25, 0.75)),2), collapse="-"), "]")))
   # 
   # colnames(totalCountsTable) <- c("Observed importations (MCC tree)",
   #                                 "Observed importations (median, 95% HPD)",
   #                                 "Percentage (MCC tree)",
   #                                 "Percentage (median, 95% HPD)")
   # capTotalCounts <- paste0("Number of observed importations in our dataset and the percentage of the total that can be attributed to the 40 countries inferred to be sources for the most importations on the MCC tree and across a set of ", nreps-failed, " posterior trees (the optimisation procedure failed to converge on ", failed, " posterior trees).")
   # 
   # 
   # rownames(totalCountsTable) <- sub("Korea, South", "South Korea", rownames(totalCountsTable))
   # kable(totalCountsTable, caption=capTotalCounts)
   # cat(knitr::kable(totalCountsTable, caption = capTotalCounts, format="latex"), file = paste0(figpath, "import-counts.tex"))
   # cat(knitr::kable(totalCountsTable, caption = capTotalCounts, format="html"),  file = paste0(figpath, "import-counts.html"))
   # write.csv(totalCountsTable, file = paste0(figpath, "import-counts.csv"), row.names=TRUE, quote=FALSE)
   # 
   #  
```

```{r import-proportions, fig.width=7, fig.height=3, fig.cap="The estimated proportion of importation events that are attributable to inbound travellers from each of several source countries over time.", eval=TRUE}

   # par(mar=c(4,6,2.5,6), cex.axis=0.7, cex.lab=0.8)
   # 
   # dateFreqDistribution(t(importsSub$importProp), importsSub$importDates, plot.ci=FALSE,  barplot=FALSE,
   #                      startDate = startDate, endDate = endDate,
   #                      col=sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]], 0.75)), border = "#000000",
   #                      ymax = 1, ylab = "Proportion of samples")
   # rect(startDate, 0, endDate, 1, xpd=TRUE)
   # 
   # legend("top", horiz=FALSE, inset=c(0,-0.35), bty='n', xpd=TRUE, ncol=5,
   #        fill=unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]], 0.75))),
   #        legend <- colnames(importsSub$importProp), cex=0.8)

```


```{r import-hist, fig.width=9.5, fig.height=4, fig.cap="Estimated histogram of virus lineage importation events per day, obtained from our lag model. Colours show the proportion attributable each day to inbound travel from various countries. This assignment is statistical, i.e. we cannot ascribe a specific source location to any given lineage."}
#
#    par(mar=c(4,6,2.5,8), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#
#    dateFreqDistribution(t(importsSub$importCount), importsSub$importDates, plot.ci=FALSE,  barplot=TRUE, 
#                         startDate = startDate, endDate = endDate, 
#                         col=sapply(colnames(importsSub$importCount), function(x) mPal(countryPal[[x]], 0.75)), 
#                         border = sapply(colnames(importsSub$importCount), function(x) mPal(countryPal[[x]])),
#                         ymax = 70, ylab = "Importation events leading to an\nobserved transmission lineage")
#    
#    legend("topleft", horiz=FALSE, inset=c(1,0), bty='n', xpd=TRUE, ncol=1,
#           border = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]]))), 
#           fill   = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]], 0.75))), 
#           legend <- colnames(importsSub$importProp), cex=1)

```

```{r import-hist-extended, fig.width=9, fig.height=4, fig.cap="Estimated histogram of virus lineage importation events per day, obtained from our lag model. Colours show the proportion attributable each day to inbound travel from various countries. This assignment is statistical, i.e. we cannot ascribe a specific source location to any given lineage. **This is the same as the previous plot but the axes have been extended. Lineages with TMRCAs after 30 April are not plotted, since data on inbound travellers after 30 April are not available.**"}
#
#    par(mar=c(4,6,2.5,7), cex.axis=0.7, cex.lab=0.8, mgp=c(3,0.75,0))
#
#    # Extend with zeros to plot on same axes as TMRCAs (need post-processing, since unassigned TMRCAs > Apr 30 are NOT plotted)
#    tempImportDates  <- c(importsSub$importDates, seq(max(importsSub$importDates+1), as.Date("2020-07-01"), by="days"))
#    tempImportCounts <- rbind(importsSub$importCount, matrix(nrow=(length(tempImportDates)-nrow(importsSub$importCount)-1), ncol=ncol(importsSub$importCount)))
#    
#
#    dateFreqDistribution(t(tempImportCounts), tempImportDates, plot.ci=FALSE,  barplot=TRUE, 
#                         startDate = startDate, endDate = "2020-06-21", 
#                         col=sapply(colnames(importsSub$importCount), function(x) mPal(countryPal[[x]], 0.75)), 
#                         border = sapply(colnames(importsSub$importCount), function(x) mPal(countryPal[[x]])),
#                         ymax = 70, ylab = "Importation events leading to an\nobserved transmission lineage")
#    
#    legend("topleft", horiz=FALSE, inset=c(1,0), bty='n', xpd=TRUE, ncol=1,
#           border = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]]))), 
#           fill   = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]], 0.75))), 
#           legend <- colnames(importsSub$importProp), cex=1)

```


```{r import-hist-cumulative, fig.width=5, fig.height=3, fig.cap="Estimated cumulative number of virus lineage importation events per day, obtained from our lag model. Colours show the proportion attributable each day to inbound travel from various countries. This assignment is statistical, i.e. we cannot ascribe a specific source location to any given lineage. The black line shows the cumulative global EII curve. **This plot is to check the fit of the importation distribution to the EII curve by eye.**"}
#
#    par(mar=c(4,5,2.5,6), cex.axis=0.7, cex.lab=0.8)
#
#    heights <- t(apply(importsSub$importCount, 2, cumsum))
#
#    dateFreqDistribution(heights, importsSub$importDates, plot.ci=FALSE,  barplot=FALSE, 
#                         startDate = startDate, endDate = endDate, 
#                         col=sapply(colnames(importsSub$importCount), function(x) mPal(countryPal[[x]], 0.75)), border = NA,
#                         ymax = max(colSums(heights))*1.1, ylab = "Importation events leading to an\nobserved transmission lineage")
#    
#    legend("top", horiz=FALSE, inset=c(0,-0.35), bty='n', xpd=TRUE, ncol=4,
#           border = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]]))), 
#           fill   = unname(sapply(colnames(importsSub$importProp), function(x) mPal(countryPal[[x]], 0.75))), 
#           legend <- colnames(importsSub$importProp), cex=0.8)
#    
#    par(new=TRUE)
#    
#    y <- cumsum(eii$num_intros[eii$location == "all"])
#    
#    plot(1, type='n', xlim=as.Date(c(startDate, endDate)), ylim=c(0,max(y)*1.1), axes=FALSE, 
#         xaxs='i', yaxs='i', xlab="", ylab="")
#    lines(eii$date[eii$location == "all"], y, lwd=2)
#    axis(4, las=1)
#    mtext(side=4, text="Cumulative estimated importation intensity\n(solid line)", line=4, cex=0.8)

```

```{r import-proportions-total, fig.width=6, fig.height=3, fig.cap="The estimated total fraction of importation events that are attributable to inbound travellers from each country."}
#
#    # Total proportion of observed (seen a TMRCA) importations attributable to a country
#    totalProps <- colSums(importsSub$importCount)/sum(importsSub$importCount)
#    
#    par(mar=c(0,0,0,0))
#    pie(totalProps, labels=sapply(names(totalProps), function(x) paste0(x, " (", round(totalProps[x]*100, 2), "%)")), 
#        col=sapply(names(totalProps), function(x) mPal(countryPal[[x]], 0.75)), border=NA, cex=1)

```


```{r import-hist-countries, fig.width=15, fig.height=12, fig.cap="Estimated histogram of virus lineage importation events per day, obtained from our lag model, for the 12 countries estimated to have contributed the most importations (see Table 3). This assignment is statistical, i.e. we cannot ascribe a specific source location to any given lineage. **This plot is just a sanity check and isn't useful for drawing any conclusions.**"}
#
#    # Adjust margins
#    ymax1 <- list(Spain       = 30, 
#                  France      = 30, 
#                  Italy       = 30, 
#                  Belgium     = 10, 
#                  Netherlands = 10, 
#                  Ireland     = 10, 
#                  Switzerland = 10, 
#                  US          = 10,
#                  Germany     = 3,
#                  Portugal    = 1,
#                  Sweden      = 1,
#                  China       = 4) 
#
#    ymax2 <- list(Spain       = 500, 
#                  France      = 500, 
#                  Italy       = 500, 
#                  Belgium     = 140, 
#                  Netherlands = 140, 
#                  Ireland     = 140, 
#                  Switzerland = 140, 
#                  US          = 140,
#                  Germany     = 50,
#                  Portugal    = 15,
#                  Sweden      = 15,
#                  China       = 1)
#
#    par(mar=c(4,6,2.5,6), cex.axis=1, cex.lab=1.2, cex.main=1.5, mgp=c(2,0.75,0))
#
#    layout(matrix(c(1:12), nrow=4, byrow=TRUE))
#    for (country in names(ymax1)) {
#
#        dateFreqDistribution(unname(importsAll$importCount[, country]), importsAll$importDates, plot.ci=FALSE,  barplot=TRUE, 
#                             startDate = startDate, endDate = endDate, 
#                             col=mPal(countryPal[[country]], 0.75), border = NA, 
#                             ymax = ymax1[[country]], ylab = "Importation events leading to an\nobserved transmission lineage", label=LETTERS[which(names(ymax1) == country)])
#        
#        par(new=TRUE)
#        plot(1, type='n', xlim=as.Date(c(startDate, endDate)), ylim=c(0,ymax2[[country]]), axes=FALSE, 
#             xaxs='i', yaxs='i', xlab="", ylab="")
#        lines(eii$date[eii$location == country], eii$num_intros[eii$location == country], lwd=2)
#        axis(4, las=1)
#        mtext(side=4, text="Estimated importation intensity\n(solid line)", line=4, cex=0.8)
#        title(capitalise(country))
#
#    }
#
```


\clearpage

# Session info

```{r sessionInfo, results='markup'}
    sessionInfo()
```