diff --git a/R/walk.forward.R b/R/walk.forward.R index 3b0379b..3345cd5 100644 --- a/R/walk.forward.R +++ b/R/walk.forward.R @@ -2,7 +2,7 @@ # R (http://r-project.org/) Quantitative Strategy Model Framework # # Copyright (c) 2009-2015 -# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich +# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich # # This library is distributed under the terms of the GNU Public License (GPL) # for full details see the file COPYING @@ -16,7 +16,7 @@ ############################################################################### #' Rolling Walk Forward Analysis -#' +#' #' A wrapper for \code{\link{apply.paramset}} and \code{\link{applyStrategy}}, #' implementing a Rolling Walk Forward Analysis (WFA). #' @@ -28,30 +28,30 @@ #' following the training window (testing window). Once completed, the training #' window is shifted forward by a time period equal to the testing window size, #' and the process is repeated. The final testing window may be shorter than the -#' full testing window, if the length of the time series does not allow a full +#' full testing window, if the length of the time series does not allow a full #' testing window. -#' +#' #' 'anchored' walk forward forces all training windows to start on the first #' observation of the market data. This can be useful if the indicators make #' use of all the data, e.g. for a risk metric such as a volatility estimator, #' for a regime model, or for a long-memory process of some sort. If #' \code{anchored=TRUE} and you have specified \code{k.training}, then the -#' performance of each paramset will only be evaluated on the rolling training -#' window, even though larger (anchored) periods are used for input calculations. +#' performance of each paramset will only be evaluated on the rolling training +#' window, even though larger (anchored) periods are used for input calculations. #' #' Note that walk.forward will generate out of sample (OOS) transactions using #' the chosen parameter set into the portfolio designated by portfolio.st. So -#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid -#' issues such as out of order transactions. +#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid +#' issues such as out of order transactions. #' #' The \code{psgc} argument is a tradeoff between memory efficiency and speed. -#' \R does garbage collection promarily when it is running low on RAM, but this +#' \R does garbage collection promarily when it is running low on RAM, but this #' automatic detection works poorly in parallel processes. If TRUE, the default, #' \code{walk.proward} and \code{\link{apply.paramset}} will call \code{gc()} -#' at key points to limit RAM usage. For small tests, this is probably +#' at key points to limit RAM usage. For small tests, this is probably #' unecessary and will only slow the test. For large tests, even on substantial #' hardware, it may be the difference between completing the test and crashing \R. -#' +#' #' @param portfolio.st the name of the portfolio object #' @param account.st the name of the account object #' @param strategy.st the name of the strategy object @@ -70,15 +70,15 @@ #' @param savewf boolean, default FALSE. if TRUE, saves audit information on training and testing periods to working directory for later analysis #' @param saveenv boolean, default FALSE. if TRUE, save the paramset environment information for each trial, and not just the tradeStats and chosen paramset #' @param psgc boolean, if TRUE, the default, run gc() in each worker session to conserve RAM. -#' +#' #' @return a list consisting of a slot containing detailed results for each training + testing period, as well as the portfolio and the tradeStats() for the portfolio #' #' @references Tomasini, E. and Jaekle, U. Trading Systems. 2009. Chapter 6 -#' @seealso +#' @seealso #' \code{\link{applyStrategy}} , #' \code{\link{apply.paramset}} , #' \code{\link{chart.forward}} , -#' \code{\link{chart.forward.training}} , +#' \code{\link{chart.forward.training}} , #' \code{\link{endpoints}} , #' \code{\link[blotter]{tradeStats}} #' @@ -120,27 +120,37 @@ walk.forward <- function( strategy.st ep <- endpoints(symbol.data, on=period) - total.start <- ep[1] - total.timespan <- paste(index(symbol.data[total.start]), '', sep='/', index(last(symbol.data))) + total.start <- ep[2] # ep[1] is always 0 + total.timespan <- paste(index(symbol.data[total.start]), index(last(symbol.data)), sep='/') # construct the subsets to use for training/testing - training.end.v <- ep[c(k.training,k.training+cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing))))] - if( is.na(last(training.end.v)) ) { - training.end.v <- training.end.v[-length(training.end.v)] - } - - training.start.v <- c(1,1+ep[cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing)))]) - + #define first Training interval + first.training.end<-1+k.training + #Calculate how many complete training periods are in the Dataset. + len<-length(ep[-length(ep)]) #Removed last period, because ep[nrow(ep)]-ep[nrow(ep)-1] isnt always a full period (e.g.period = 'months'= 01.01-01.15) + trainingperiods.total<-as.integer((len-first.training.end)/k.testing) + training.steps<-rep(k.testing,trainingperiods.total) + + #construct index for ep + ii<-first.training.end+cumsum(training.steps) + i<-c(first.training.end,ii) + training.end.v <- ep[i] + + #construct Training starting points by subtracting k.training from calculated training endpoints + first.training.start<-1 + i<-ii-k.training + training.start.v <- c(first.training.start,ep[i]) + if(anchored || anchored=='anchored' || anchored=='rolling.subset'){ perf.start.v <- training.start.v perf.start <- index(symbol.data[training.start.v]) } else { perf.start <- perf.start.v <- rep(NA,length(training.start.v)) } - + testing.start.v <- 1+training.end.v testing.end.v <- c(training.end.v[-1],last(ep)) - + training.start <- index(symbol.data[training.start.v]) if(anchored || anchored=='anchored' || anchored=='rolling.subset'){ training.start.v <- rep(1,length(training.start.v)) @@ -149,7 +159,7 @@ walk.forward <- function( strategy.st training.end <- index(symbol.data[training.end.v]) testing.start <- index(symbol.data[testing.start.v]) testing.end <- index(symbol.data[testing.end.v]) - + wf.subsets <- data.frame( training.start=training.start , training.end=training.end , testing.start=testing.start @@ -166,7 +176,7 @@ walk.forward <- function( strategy.st # set up our control variables old.param.combo <- NULL - + # now loop over training and testing periods, collecting output # do the traditional rolling method, computationally expensive for(i in 1:nrow(wf.subsets)) @@ -177,7 +187,7 @@ walk.forward <- function( strategy.st } else { .audit=NULL } - + training.timespan <- paste(wf.subsets[i,'training.start'], wf.subsets[i,'training.end'], sep='/') testing.timespan <- paste(wf.subsets[i,'testing.start'], wf.subsets[i,'testing.end'], sep='/') @@ -187,16 +197,16 @@ walk.forward <- function( strategy.st } else { perf.subset <- training.timespan } - + t.start <- wf.subsets[i,'training.start'] t.end <- wf.subsets[i,'training.end'] - + result$training.timespan <- training.timespan result$testing.timespan <- testing.timespan - + print(paste('=== training', paramset.label, 'on', training.timespan)) - - + + # run backtests on training window result$apply.paramset <- apply.paramset( strategy.st=strategy.st , paramset.label=paramset.label @@ -211,14 +221,14 @@ walk.forward <- function( strategy.st , perf.subset=perf.subset , ...=... ) - + tradeStats.list <- result$apply.paramset$tradeStats - + if(!missing(k.testing) && k.testing>0) { if(!is.function(obj.func)) stop(paste(obj.func, 'unknown obj function', sep=': ')) - + # select best param.combo param.combo.idx <- try(do.call(obj.func, obj.args)) if(length(param.combo.idx) == 0 || class(param.combo.idx)=="try-error"){ @@ -233,37 +243,37 @@ walk.forward <- function( strategy.st } } else { if(length(param.combo.idx)>1){ - # choose the last row because expand.grid in paramsets will make - # the last row the row with the largest parameter values, roughly - # equivalent to highest stability of data usage, + # choose the last row because expand.grid in paramsets will make + # the last row the row with the largest parameter values, roughly + # equivalent to highest stability of data usage, # or lowest degrees of freedom param.combo.idx <- last(param.combo.idx) } param.combo <- tradeStats.list[param.combo.idx, 1:grep('Portfolio', names(tradeStats.list)) - 1] param.combo.nr <- row.names(tradeStats.list)[param.combo.idx] } - + old.param.combo<-param.combo - + result$testing.param.combo <- param.combo result$testing.param.combo.idx <- param.combo.idx - + if(!is.null(.audit)) { assign('obj.func', obj.func, envir=.audit) assign('param.combo.idx', param.combo.idx, envir=.audit) assign('param.combo.nr', param.combo.nr, envir=.audit) assign('param.combo', param.combo, envir=.audit) - } - + } + # configure strategy to use selected param.combo strategy <- install.param.combo(strategy, param.combo, paramset.label) - + result$testing.timespan <- testing.timespan - + print(paste('=== testing param.combo', param.combo.nr, 'on', testing.timespan)) print(param.combo) - + # run backtest using selected param.combo # NOTE, this will generate OOS transactions in the portfolio identified, # so strart with a clean portfolio environment. @@ -281,11 +291,11 @@ walk.forward <- function( strategy.st iso.format <- "%Y%m%dT%H%M%S" time.range <- paste(format(index(symbol.data[t.start]), iso.format), format(index(symbol.data[t.end]), iso.format), sep=".") - + if(!is.null(.audit) && !is.null(audit.prefix)){ - + result$audit <- .audit - + if(savewf){ filestr<-paste(audit.prefix, symbol.st, time.range, "RData", sep=".") if(verbose) cat('Saving .audit env in file: ',filestr,'\n') @@ -296,19 +306,19 @@ walk.forward <- function( strategy.st if(include.insamples){ results[[time.range]] <- result } - + } # end full rolling training/testing loop if(include.insamples){ # run apply.paramset on the entire period if(!is.null(.audit)){ - # only keep the debug auditing information if we are + # only keep the debug auditing information if we are # keeping it for the rest of the simulation .insampleaudit <- new.env() } else { .insampleaudit <- NULL } - results$insample.apply.paramset <- + results$insample.apply.paramset <- apply.paramset( strategy.st=strategy.st , paramset.label=paramset.label , portfolio.st=portfolio.st @@ -335,12 +345,12 @@ walk.forward <- function( strategy.st results$blotter <- .blotter results$strategy <- .strategy results$wf.subsets <- wf.subsets - + results$portfolio.st <- portfolio.st - + results$testing.parameters <- NULL for (tp in ls(pattern='*.[0-9]+',pos=results)){ - tr <- cbind(results[[tp]][['testing.param.combo']], + tr <- cbind(results[[tp]][['testing.param.combo']], results[[tp]][['testing.timespan']]) if(is.null(results$testing.parameters)){ results$testing.parameters <- tr @@ -349,17 +359,17 @@ walk.forward <- function( strategy.st } } colnames(results$testing.parameters)[ncol(results$testing.parameters)] <- 'testing.timespan' - + if(!is.null(.audit) && !is.null(audit.prefix)) { results$audit <- .audit } - + if(savewf){ filestr<-paste(audit.prefix, symbol.st, time.range,"Results","RData", sep=".") cat('\n','Saving final results env in file: ',filestr,'\n') save(results, file = filestr) } - + return(results) }