restaurant_choice.wppl

// make sure code runs in browser and on command line:
var inBrowser = typeof(argv) === 'undefined',
    _argv = inBrowser ? {} : argv;

// agent parameters:
var params = extend({
    maxLambda: 0.99, // upper bound on local relative aspiration in each step (must be minLambda...1)
    minLambda: 0.01, // lower bound on local relative aspiration in each step (must be 0...maxLambda)
    lossCoeff4variance: 100.0, // weight of variance in loss function, must be >= 0
    lossCoeff4KLdiv: 0.0, // weight of KL divergence in loss function, must be >= 0
    lossCoeff4entropy: 1.0, // weight of entropy in loss function, must be >= 0
    lossCoeff4random: 0.0, // weight of random noise in loss function, must be >= 0
    softmaxInvTemp: 1.0, // inverse temperature of softmax mixture of actions, must be > 0
    rescaling4Successors: 1.0, // degree (0...1) of aspiration rescaling. (expectation is only preserved if this is 1.0)
  }, _argv);

// simulation options:
var options = extend({
      aleph0: [1.2,1.4], // initial global aspiration (desired expected return);
      debug: false, // if true, print debug messages
    }, _argv),
    aleph0 = typeof(options.aleph0Lo) === 'undefined' ? options.aleph0 : [options.aleph0Lo, options.aleph0Hi];

// fold: Restaurant constants, tableToUtilityFunction

var ___ = ' ';
var G1 = { name : 'Donut N' };
var DS = { name : 'Donut S' };
var V = { name : 'Veg' };
var N = { name : 'Noodle' };

var tableToIndicatorIncrementFct = function(table, feature) {
  return function(state, action) {
    var f = feature(state), stateFeatureName = f.name, inc = stateFeatureName ? table[stateFeatureName] : table[f['0']];
    return inc;
  };
};
// 

// Construct world 

var grid = [
  ['#', '#', '#', '#',  V , '#'],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', G1 , 's', '#', ___],
  ['#', '#', '#', 's', '#', ___],
  ['#', '#', '#', ___, ___, ___],
  ['#', '#', '#', ___, '#',  N ],
  [___, ___, ___, ___, '#', '#'],
  [DS , '#', '#', ___, '#', '#']
];

var mdp = makeGridWorldMDP({
  grid,
  start: [3, 1],
  totalTime: 9
});

var world = mdp.world;
var transition = world.transition;
var stateToActions = world.stateToActions;


// Specify distribution of indicator increments (via expected value and variancs) 
// (Note: under a "maximization" paradigm, this would be called "reward" or "utility")

var expectedIndicatorIncrementTable = {
  'Donut S': 1,
  'Donut N': 1,
  'Veg': 3,
  'Noodle': 2,
  's': -0.1, // -0.3 sand roads take time
  ' ': -0.1
};
var varianceOfIndicatorIncrementTable = {
  'Donut S': 0,
  'Donut N': 0,
  'Veg': 0,
  'Noodle': 0,
  's': 0, // sand roads are free
  ' ': 0.01 // normal roads might have traffic jams
};

var f0 = world.feature, feature = function(s) {return extend(f0(s), {test: "hello"})};

var expectedDelta = tableToIndicatorIncrementFct(expectedIndicatorIncrementTable, feature),
    varianceOfDelta = tableToIndicatorIncrementFct(varianceOfIndicatorIncrementTable, feature); 


// uninformedP policy for use in Shannon entropy: 
var uninformedPolicy = function(state) {
  return Categorical({vs: ["u", "d", "l", "r"], ps: [0.25, 0.25, 0.25, 0.25]});
}
// (Note: when refining an action a into variants a', a'', 
// the probabilities of a under uninformedPolicy should split into two parts for a' and a'' additively,
// since then behavior remains consistent. In particular, if an action is cloned and its uninformedPolicy 
// probability is split somehow, behavior should remain invariant.)

// Reference policy for use in KL divergence
// (can be used to steer agent towards certain actions):
var refPolicy = function(state) { 
  return Categorical({vs: ["u", "d", "l", "r"], ps: [0.7, 0.1, 0.1, 0.1]}); 
};

// initialize the agent
var agent = makeMDPAgentSatisfia(extend(params, {
    expectedIndicatorIncrement: expectedDelta, varianceOfIndicatorIncrement: varianceOfDelta, 
    uninformedPolicy, refPolicy,
    options
  }), world);

// extract its methods:
var getLocalPolicy = agent.getLocalPolicy, propagateAspiration = agent.propagateAspiration,
    getAspiration4state = agent.getAspiration4state, 
    V = agent.V, V2 = agent.V2,
    entropy = agent.entropy, KLdiv = agent.KLdiv;

// Generate and draw a trajectory:
var simulate = function(state, aleph) {
  if (options.debug) console.log("simulate", state, aleph);
  var localPolicy = getLocalPolicy(state, aleph),
      action = sample(localPolicy),
      r = expectedDelta(state, action),
      r2 = squared(r) + varianceOfDelta(state, action);
  var sa = [state, action];
  if (state.terminateAfterAction) {
    return { 
      trajectory: [sa], // sequence of [state, action] pairs
      conditionalExpectedIndicator: r, // expected indicator conditional on this trajectory
      conditionalExpectedSquaredIndicator: r2 // expected squared indicator conditional on this trajectory
    };
  } else {
    var nextState = transition(state, action),
        nextAleph = propagateAspiration(state, aleph, action, r, nextState),
        nextOut = simulate(nextState, nextAleph);
    return { 
      trajectory: [sa].concat(nextOut.trajectory), 
      conditionalExpectedIndicator: r + nextOut.conditionalExpectedIndicator,
      conditionalExpectedSquaredIndicator: r2 + 2*r*nextOut.conditionalExpectedIndicator + nextOut.conditionalExpectedSquaredIndicator
    };
  }
};


// calculate expected return and return std.dev. by using the inbuilt expectation function:

var expectedIndicator = expectation(Infer({ model() {
  return simulate(mdp.startState, aleph0).conditionalExpectedIndicator;
}}));
console.log("in expectation, we desired indicator", aleph0, "and actually get", expectedIndicator);

var expectedSquaredIndicator = expectation(Infer({ model() {
  return simulate(mdp.startState, aleph0).conditionalExpectedSquaredIndicator;
}})),
stddev = Math.sqrt(expectedSquaredIndicator - Math.pow(expectedIndicator,2));
console.log("indicator has a std.dev. of", stddev, 
            "which should equal", Math.sqrt(V2(mdp.startState, aleph0) - Math.pow(V(mdp.startState, aleph0),2)));

console.log("Entropy of policy is", entropy(mdp.startState, aleph0));
console.log("KL divergence from reference policy is", KLdiv(mdp.startState, aleph0));

if (inBrowser) {
  // simulate and show a single trajectory:
  var out = simulate(mdp.startState, aleph0);
  viz.gridworld(world, { trajectory: map(first, out.trajectory) })
}

if (false) {

  // Define the filename for the JSON file
  var filename = 'output_rotateminlambda.json';

  // Read existing JSON data from the file (if it exists)
  var existingData = json.read(filename) || [];

  // Assuming you have a single data object with keys and values
  var data1 = {
    key1: 'expectedIndicator',
    value1: expectedIndicator,
    key2: 'aleph0',
    value2: aleph0,
    key3: 'minlambda',
    value3: params.minLambda
    
  };

  // Add new data to the existing data
  existingData.push(data1);

  // Write the updated data back to the JSON file
  json.write(filename, existingData);

}