faq_episode.m

function [q, stats] = facl_episode( q, rates, segments, qualities, complexities, capacities, alpha, lambda, tau, markov)
% EPISODE = a video episode using the FA(Q) learner
%
%  q = initial Q-value table
%  rates = the available video bitrates
%  segments = the length of the video in segments
%  qualities = the quality matrix for every segment and every bitrate
%  complexities = vector of the complexity indexes
%  capacities = channel vector
%  alpha = learning rate for the episode
%  lambda = exponential discount factor
%  tau = exploration temperature for the episode
%  markov = 1 if the channel is Markovian, 0 if its values are given at
%           10ms intervals
%
%  q = the updated Q-value table at the end of the episode
%  stats = a matrix containing the episode statistic. The columns contain, for 
%          each segment:
%          s_t (1) | w_t (1) | s_t (2) | w_t (2) | h_t | a_t | B_t | r_t | q_t |
%          r_t^q | r_t^b (1) | r_t^b (2) | t | exp | Q(s_t, *)
%          (s_t is divided into the components of the linear combination, and 
%          the two parts of r_t^b are given separately; exp is 1 if the action 
%          is exploratory)


% initialization
timer = 0;
buffer = 0;
last_update = 1;
download_time = 0;
update = false;
stats = zeros(segments, 22);

% segment-by-segment decision loop
for segment = 1 : segments,
    
    % state value initialization (necessary for segment 1)
    capacity = 0;
    old_capacity = 0;
    prev_quality = 0;
    
    % state value retrieval
    if(segment > 1),
        capacity = stats(segment - 1, 5);
        old_capacity = stats(max(segment - 2, 1), 5);
        prev_quality = stats(segment - 1, 8);
    end
    % state calculation
    states = faq_state(capacity, old_capacity, prev_quality, complexities(segment), buffer, parallel, segment);
    % stat collection: s_t
    stats(segment, 1 : 4) = states;
    
    % Q-value update for the offline algorithm
    if (parallel + benchmark == 0 && (update || segment == segments)),
        q = update_q(q, segment, last_update, stats(:, 1 : 4), stats(:, 6), stats(:, 8), lambda, alpha);
        last_update = segment;
    end
    
    % FA(Q) Softmax policy
    q_a = find_q(q, states);
    stats(segment, 15 : 22) = q_a;
    [update, action] = choose_action(q_a, tau);
    
    % stat collection: a_t and exp
    stats(segment, 6) = action;
    if(update)
        stats(segment, 14) = 1;
    end
    
    %Segment download (for Markovian and instantaneous channels)
    if (markov == 1),
        download_time = download_markov(rates(action), capacities(segment));
        stats(segment, 5) = capacities(segment);
    else
        [download_time, stats(segment, 5)] = download_step(capacities, rate, timer);
    end
    
    % stat collection: a_t and exp
    stats(segment, 6) = action;
    if (update),
        stats(segment,14) = 1;
    end
    
    % reward calculation and stat collection
    stats(segment, 9) = qualities(segment, action);
    if (segment > 1),
        [r_u, r_b, pen] = faq_reward(stats(segment, 6), stats(segment - 1, 6), buffer, download_time);
        stats(segment, 10) = r_u;
        stats(segment, 11) = r_b;
        stats(segment, 12) = pen;
    end
    stats(segment, 8) = stats(segment, 10) - stats(segment, 11) - stats(segment, 12);
    
    % buffer update
    buffer = max(0, buffer - download_time) + 2;
    timer = timer + download_time;
    % buffer overflow control
    if (buffer > 20),
        timer = timer + 2 - download_time;
        buffer = buffer - 2 + download_time;
    end
    stats(segment, 7) = buffer;
    stats(segment, 13) = timer;
end

end