smg

player controller
  cont
endplayer

player shield
  [reduceNS_Shield], [reduceEW_Shield], [reduceNS_ShieldDev], [reduceEW_ShieldDev]
endplayer

player environment
  env
endplayer

global move : [0..2];

const int lmax;

global N : [0..lmax];
global E : [0..lmax];
global S : [0..lmax];
global W : [0..lmax];

module env
  [] move=0 -> 1/8 : (N'=min(lmax,N+1)) & (move'=1) +
               1/8 : (S'=min(lmax,S+1)) & (move'=1) +
               1/8 : (E'=min(lmax,E+1)) & (move'=1) +
               1/8 : (W'=min(lmax,W+1)) & (move'=1) +

               7/120 : (N'=min(lmax,N+1)) & (S'=min(lmax,S+1)) & (move'=1) +
               7/120 : (S'=min(lmax,S+1)) & (W'=min(lmax,W+1)) & (move'=1) +
               7/120 : (E'=min(lmax,E+1)) & (N'=min(lmax,N+1)) & (move'=1) +
               7/120 : (W'=min(lmax,W+1)) & (N'=min(lmax,N+1)) & (move'=1) +
               7/120 : (E'=min(lmax,E+1)) & (S'=min(lmax,S+1)) & (move'=1) +
               7/120 : (W'=min(lmax,W+1)) & (S'=min(lmax,S+1)) & (move'=1) +

               3/120 : (E'=min(lmax,E+1)) & (N'=min(lmax,N+1)) & (W'=min(lmax,W+1)) & (move'=1) +
               3/120 : (W'=min(lmax,W+1)) & (N'=min(lmax,N+1)) & (E'=min(lmax,E+1)) & (move'=1) +
               3/120 : (E'=min(lmax,E+1)) & (S'=min(lmax,S+1)) & (W'=min(lmax,W+1)) & (move'=1) +
               3/120 : (W'=min(lmax,W+1)) & (S'=min(lmax,S+1)) & (N'=min(lmax,N+1)) & (move'=1) +

               6/120 : (W'=min(lmax,W+1)) & (S'=min(lmax,S+1)) & (E'=min(lmax,E+1)) & (N'=min(lmax,N+1)) & (move'=1);
endmodule

module cont
  action : [0..1];

  [] move=1 -> (action'=0) & (move'=2);
  [] move=1 -> (action'=1) & (move'=2);
endmodule

module sh
  [reduceNS_Shield] move=2 & action=0    -> (N'=max(0,N-1)) & (S'=max(0,S-1)) & (move'=0);
  [reduceEW_Shield] move=2 & action=1    -> (W'=max(0,W-1)) & (E'=max(0,E-1)) & (move'=0);

  [reduceNS_ShieldDev] move=2 & action=1 -> (N'=max(0,N-1)) & (S'=max(0,S-1)) & (move'=0);
  [reduceEW_ShieldDev] move=2 & action=0 -> (W'=max(0,W-1)) & (E'=max(0,E-1)) & (move'=0);
endmodule

formula diff = pow(pow((N+S)-(E+W),2),0.5);
rewards "difference"
  true : diff;
endrewards

const double lambda = 0.8;
const double interference = 2 * lmax;

rewards "differenceWithInterferenceCost"
  [reduceNS_Shield] true : lambda * diff;
  [reduceEW_Shield] true : lambda * diff;
  [reduceNS_ShieldDev] true : lambda * diff + (1 - lambda) * interference;
  [reduceEW_ShieldDev] true : lambda * diff + (1 - lambda) * interference;
endrewards