function run_algorithm(task, cooplev, method, cfg)
% RUN_ALGORITHM Runs a given MARL algorithm on a given type of task.
% RUN_ALGORITHM(TASK, COOPLEV, METHOD, CFG)
%
% Runs a MARL algorithm on a given type of task. Uses 5x5 robots
% gridworlds as tasks. Specifically designed for use with alg_gui, but can
% also be used independently. Saves the algorithm data in a field of the
% 'algdata' structure in the base workspace. Creates the variable if not
% present. The field name is created as a combination of task type and
% algorihtm type. Any previous data for that task/algorithm pair is
% cleared and the resources associated with it released.
%
% Parameters:
% TASK - specifies the type of task. One of 'nav', 'rescue',
% for navigation task, or search & rescue task, respectively.
% COOPLEV - specifies the cooperation level in the task. One of 'coop', 'comp',
% 'mixed', for fully cooperative, fully competitive, and mixed tasks,
% respectively.
% METHOD - specifies the algorithm to run. One of 'q' (single-agent
% Q-learning), 'fsq' (full-state Q-learning), 'asf', (adaptive state
% focus Q-learning), 'teamq' (team Q-learning), 'wolf',
% (Win-or-Learn-Fast policy hill climbing), 'opt' (predefined optimal
% sequences on the given task).
% CFG - configuration of learning process. Structure with the
% following fields (all optional)
% 'stopearly' - if learning should be stopped upon
% convergence. Default 1 (yes).
% 'showafter' - make world visible only after this number of
% trials. If Inf, world will be invisible throughout the learnign
% process. Default Inf.
% 'slowdownafter' - similarly, but for slowing down the learning
% process for visualization. Takes effect only after 'showafter'
% trials. Default Inf.
% 'pausebeforerun' - if world should be held on screen until
% keypress before starting the learning. Default 1 (hold on
% screen).
% 'useseeds' - whether the rand generator should be
% deterministically initialized for reproducible behavior of
% algorithms. Default 1 (use reproducible behavior).
%
% See also replay_algorithm
% Author: Lucian Busoniu
% Version: 2.0
% History: 6 Sept 2006, 2.0 -- added support for robot rescue gridworld, removed competitive /
% minimax code
DEF_CFG.stopearly = 1;
DEF_CFG.pausebeforerun = 1;
DEF_CFG.useseeds = 1;
DEF_VIEWCFG.repres = 'abstract'; % no longer present in GUI, always abstract repres
DEF_VIEWCFG.showafter = Inf;
DEF_VIEWCFG.slowdownafter = Inf;
DEF_CFG.viewcfg = DEF_VIEWCFG;
if nargin < 3, cfg = DEF_CFG;
else cfg = checkparams(cfg, DEF_CFG); end;
% task
% cooplev
% method
% cfg
% viewcfg = cfg.viewcfg
% return
% top-level def learning params
Nag = 2; % two agents - most of the times
lp.trials = 100;
lp.maxiter = 1000;
if cfg.stopearly, lp.convgoal = 0.001;
else lp.convgoal = -1;
end;
lp.avgtrials = 10;
lp.showconv = 1; % show online conv plot for all algorithms
lp.closeconv = 0;
% top-level default agent learning params
alp.alpha = .1;
alp.gamma = .98;
alp.lambda = 0.5;
alp.epsilon = .7;
% init structs
seeds = struct();
OVWR = struct();
% ---------------------------
% By task type and cooperation level
% setup task structure + reward function
% setup common learning params
% setup learning params overwrites
switch task,
% ------------------------------------------
% Navigation task
case 'nav',
% common learning parameters/overwrites for navigation task
alp.window = 256; % for ASF
lp.trials = 100;
lp.maxiter = 1000;
alp.alpha = .2;
alp.gamma = .98;
alp.lambda = 0.5;
alp.epsilon = .7;
% A set of optimal action sequences
OPTSEQ.coop = {'ursruusu', 'ulluurul'};
OPTSEQ.mixed = {'ursruuurr', 'ulluuull'};
switch cooplev,
case 'coop',
grid = [1 0 4 0 1; % Y
0 0 0 0 0; % |
1 1 0 1 1; % |
0 0 0 0 0; % |
2 0 0 0 3]; % |
% 0 -- -------> X 0
goal.type = 'coop';
goal.rew = [-.1 NaN 10];
% random seeds
seeds.q = 1e12;
seeds.wolf = 1e6;
seeds.teamq = 1e3;
seeds.fsq = 1e10;
seeds.asf = 1e9;
% max trials settings, -1 = default
max_trials.q = 40;
max_trials.wolf = -1;
max_trials.teamq = -1;
max_trials.fsq = -1;
max_trials.asf = -1;
case 'mixed',
% agent 1: 2, agent 2: 3, goal 1: 4, goal 2: 5
grid = [5 0 0 0 4; % Y
0 0 0 0 0; % |
1 1 0 1 1; % |
0 0 0 0 0; % |
2 0 0 0 3]; % |
% 0 -- -------> X 0
goal.type = 'mixed';
goal.rew = [-.1 -.1 10];
% random seeds
seeds.q = 1e12;
seeds.wolf = 1e6; % 15 steps, but can't get it to work better...
seeds.teamq = 1e3;
seeds.fsq = 1e10;
seeds.asf = 1e9;
% max trials settings, -1 = default
max_trials.q = 40;
max_trials.wolf = -1;
max_trials.fsq = -1;
max_trials.asf = -1;
otherwise, error(['Unknown cooperation level [' cooplev ']']);
end; % cooplev switch for navigation task
grid = fliplr(grid');
% process grid matrix into world data structures
[i j] = find(grid == 1);
obstacles = [i j]';
worldsize = size(grid)';
[x1 y1] = find(grid == 2); [x2 y2] = find(grid == 3);
[xg1 yg1] = find(grid == 4); [xg2 yg2] = find(grid == 5);
positions = [x1 y1; x2 y2]'; goal.pos = [xg1 yg1; xg2 yg2]';
% -----------------------------------
% param overwrites for methods
% custom rewards for teamq
ovwr.name = 'goal.rew';
ovwr.value = '[0 NaN 10]';
OVWR.teamq = {ovwr};
% END navigation task CASE
% ------------------------------------------
% Rescue robotics simulated task
case 'rescue',
% common lps/overwrites for the rescue task
lp.avgtrials = 20;
% common lps for algorithms on this task
lp.trials = 100;
lp.maxiter = 1000;
alp.alpha = .2;
alp.gamma = .98;
alp.lambda = 0.5;
alp.epsilon = .7;
switch cooplev,
case 'coop',
% % Agents can pass narrowway, have to coordinate upon return to homebase
% grid = [0 0 0 0 0 0;
% 0 0 0 101.1 101 101.2; % |
% 0 0 0 -1 -1 -1; % |
% 0 0 0 0 0 2;
% 1 10 10 10 -1 0]; % |
% % 0 -- -------> X 0
% % random seeds
% seeds.q = 1e10; % OK, stop near target
% seeds.wolf = 0;
评论17