MuSSCRat_UCLN_script.Rev

################################################################################
#
# RevBayes: Bayesian inference of rates of evolution under a
#                   state-dependent Brownian-motion model (MuSSCRat)
#
# This script performs the full implementation of MuSSCRat (May and Moore 2020. Syst. Biol.)
#
# base script author: Michael R. May
# script modified by: Edward D. Burress & Katherine Corn
#
################################################################################

####################
# Read in the Data #
####################


### Read in the tree
T <- readTrees("data/recalibrated_reef_fishes_dated.tre")[1]
ntips <- T.ntips()
nbranches <- 2 * ntips - 2

### Read in the continuous-character data
cont <- readContinuousCharacterData("data/input_rfmd_continuous_characters.nex")
cont.excludeCharacter([2])
nchar <- cont.nchar()

### Read in the discrete-character data
disc <- readDiscreteCharacterData("data/input_rfmd_discrete_character.nex")
num_disc_states <- disc.getStateDescriptions().size()

# Create some vector for the moves and monitors of this analysis
moves    = VectorMoves()
monitors = VectorMonitors()

# specify he run
run_id = 3
output_fn = "output_UCLN_state_dependent_" + run_id

##########################
# Specify the tree model #
##########################

tree <- T

########################################
# Specify the discrete-character model #
########################################

# make the Q matrix
Q <- fnJC(num_disc_states)

# make the transition rate parameter
lambda ~ dnLoguniform(1e-10, 100)
moves.append( mvScale(lambda, weight=10.0) )
lambda.setValue( 500.0 / tree.treeLength() )
"rate: " + lambda

# make the data-augmented CTCM model
X ~ dnPhyloCTMCDASiteIID(tree, Q, branchRates=lambda, type="Standard", nSites=1)
X.clamp(disc)

# include proposals for the discrete character history
moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="node",   proposal="rejection", weight=20.0) )
moves.append( mvCharacterHistory(ctmc=X, qmap_site=Q, graph="branch", proposal="rejection", weight=20.0) )

# keep track of the number of transitions
for(i in 1:nbranches) {
    num_changes[i] := sum(X.numCharacterChanges(i))
}
total_num_changes := sum(num_changes)


########################################################################################################
#specify the correlation matrix and build the variance-covariance matrix for the continuous characters #
########################################################################################################

#specify the proportional rates
alpha <- 1.0
proportional_rates ~ dnDirichlet( rep(alpha, nchar) )
relative_rates := proportional_rates * nchar
moves.append( mvBetaSimplex(proportional_rates, weight=10.0) )

#specify an LKJ prior on correlation matrix
eta <- 1.0
P ~ dnLKJPartial( eta, nchar )

moves.append( mvCorrelationMatrixRandomWalk(P, weight=10.0, sigma=0.0001) )
# moves.append( mvCorrelationMatrixSingleElementBeta(P, weight=10.0) )

for(i in 1:(nchar - 1)) {
    for(j in (i+1):nchar) {
        moves.append( mvCorrelationMatrixSpecificElementBeta(P, i, j, alpha=10.0, weight=5.0, tuneTarget=0.4) )
    }
}


R := fnPartialToCorr(P)

correlations := R.upperTriangle()

#construct variance-covariance matrix
V := fnDecompVarCovar( relative_rates^0.5, R )


#####################################################
# Define the lognormal-distributed background rates #
#####################################################

background_mean_log ~ dnUniform(-8,3)
moves.append( mvSlide(background_mean_log, weight=20.0) ) #moves[move_index++] =
background_mean := 10^background_mean_log

# background_mean_log.setValue( 1 )

expected_sd = 0.5
background_sd ~ dnExponential(1 / expected_sd)
moves.append(mvScale(background_sd, weight=10.0)) #moves[move_index++] =

# for(i in 1:num_branches) {
#     background_branch_rate_log[i] ~ dnNormal(ln(background_mean), background_sd)
#     moves[move_index++] = mvSlide(background_branch_rate_log[i], weight=2.0)
#     background_branch_rate[i] := exp(background_branch_rate_log[i])
# }
# moves[move_index++] = mvVectorSlideRecenter(background_branch_rate_log, background_mean_log, weight=5.0, tuneTarget=0.234)
# moves[move_index++] = mvShrinkExpand(background_branch_rate_log, background_sd, weight=5.0, tuneTarget=0.234)

for(i in 1:nbranches) {
    background_branch_rate_log[i] ~ dnNormal(ln(background_mean), background_sd)
    moves.append( mvSlide(background_branch_rate_log[i], weight=0.5)) #moves[move_index++] =
	background_branch_rate[i] := exp(background_branch_rate_log[i])
}

moves.append( mvShrinkExpand(background_branch_rate_log, background_sd, weight=5.0) )
moves.append( mvVectorSlideRecenter(background_branch_rate_log, background_mean_log, weight=5.0) )

# moves.append( mvVectorScale(background_branch_rate, weight=10.0)) #moves[move_index++] =


####################################################################################################################################
# specify the relative state-dependent rates (with sum 1) and calculate posterior probability that these rates are state-dependent #
####################################################################################################################################

concentration <- 1.0
proportional_zeta ~ dnReversibleJumpMixture( simplex(rep(1,num_disc_states)), dnDirichlet( rep(concentration, num_disc_states) ), p=0.5 )
moves.append( mvRJSwitch(proportional_zeta, weight=10.0) )
moves.append( mvBetaSimplex(proportional_zeta, weight=1.0) )
is_state_dependent := ifelse( proportional_zeta == simplex(rep(1,num_disc_states)), 0.0, 1.0)


##############################################################
# compute the state dependent rates and overall branch rates #
##############################################################

# compute the state dependent rates (with mean 1)
zeta := proportional_zeta * num_disc_states

# compute the state-dependent branch rates
for(i in 1:nbranches) {
    state_branch_rate[i] := sum(X.relativeTimeInStates(i,1) * abs(zeta))
}

# compute the overall branch rates (including the background rates)
branch_rates := background_branch_rate * state_branch_rate


##########################
# Specify the BM process #
##########################

Y ~ dnPhyloMultivariateBrownianREML(tree, branchRates=branch_rates^0.5, rateMatrix=V)
Y.clamp(cont)


#############
# The Model #
#############

mymodel = model(Y)

### set up the monitors that will output parameter values to file and screen
monitors.append( mnModel(filename=output_fn + "params.log", printgen=10) )
monitors.append( mnScreen(printgen=1, total_num_changes, lambda, background_mean) )


###############################################################################
# add monitors for the state branch rates, background rates, and branch rates #
###############################################################################

monitors.append( mnExtNewick(filename=output_fn + "state_branch_rates.trees", tree, state_branch_rate, printgen=10, isNodeParameter=true))
monitors.append( mnExtNewick(filename=output_fn + "branch_rates.trees", tree, branch_rates, printgen=10, isNodeParameter=true))
monitors.append( mnExtNewick(filename=output_fn + "background_rates.trees", tree, background_branch_rate, printgen=10, isNodeParameter=true))


################
# The Analysis #
################

### workspace mcmc ###
# mymcmc = mcmc(mymodel, monitors, moves, nruns=1, combine="mixed", moveschedule="single")
mymcmc = mcmc(mymodel, monitors, moves, nruns=1, combine="mixed")

### run the MCMC ###
mymcmc.burnin(generations=20000, tuningInterval=50)
mymcmc.operatorSummary()
mymcmc.run(generations=200000)


##################################################################################################
# summarize state branch rates, background rates, and branch rates as maximum a posteriori trees #
##################################################################################################

treetrace=readTreeTrace(output_fn + "state_branch_rates.trees",treetype="clock")
map_tree=mapTree(treetrace,output_fn + "state_branch_rates_tree.tre")

treetrace=readTreeTrace(output_fn + "branch_rates.trees",treetype="clock")
map_tree=mapTree(treetrace,output_fn + "branch_rates_tree.tre")

treetrace=readTreeTrace(output_fn + "background_rates.trees",treetype="clock")
map_tree=mapTree(treetrace,output_fn + "background_rates_tree.tre")


## quit ##
#q()