6-832-spring-2009-underactuated-robotics/pendfun.m

function [J,dJdalpha] = pendfun(alpha_in)

% dynamics dt
dt = 0.01; T = 2.5;

% pendulum parameters
global m g l I b xdes;
m=1; g = 9.8; l = 1; I = m*l*l; b = 0.1;
xdes = [pi 0]'; % the desired final state

N = floor(T/dt)+1;
xtape = zeros(2,N);
utape = zeros(1,N);
alpha = zeros(N,1);
if nargin>0
    alpha = alpha_in;
end

% Simulate forward
IC = [0 0]';
x = IC; % arbitrary (but fixed) initial condition
for i=1:N
    xtape(:,i) = x;
    u = alpha(i);
    utape(i) = u;
    x = x + dynamics(x,u).*dt;
end

figure(24)
plot(xtape(1,:),xtape(2,:)); drawnow;

dJdalpha = compute_gradients(xtape,utape,dt);
J = sum(cost(xtape,utape,dt)) + finalCost(xtape(:,N));

end % of pendfun


% =========================================================
% This function returns the gradients by
% integrating the adjoint equations
% =========================================================
function dJdalpha = compute_gradients(x,u,dt)
global xdes; %desired x location
N = size(x,2);

[Q,R,Qend] = get_QR;
Q = Q.*dt; R = R.*dt;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%FILL IN THIS SECTION TO INTEGRATE ADJOINT EQUATIONS TO PERFORM BPTT
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[dfdx,dfdu] = gradients(x(:,N),u(N)); 
dgdu = u(N)*R;
dudalpha = zeros(1,N); dudalpha(N) = 1; %gradient of u w.r.t. parameters for open loop policy at N
F_alpha = dfdu*dudalpha;
G_alpha = dgdu*dudalpha;

y = ; %give terminal condition for y
dJdalpha = (G_alpha'-F_alpha'*y).*dt; % dJdalpha for first time step
for n=N-1:-1:1 %integrate adjoint equations backwards in time
    dgdx = ; %gradient of cost with respect to current state 
    dgdu = ; %gradient of cost with respect to current action
    [dfdx,dfdu] = gradients(x(:,n),u(n)); %gradient of f w.r.t. current position, action
    F_x = dfdx;
    G_x = dgdx;
    dudalpha = zeros(1,N); dudalpha(n) = 1; %gradient of u w.r.t. parameters for open loop policy at current time
    F_alpha = dfdu*dudalpha;
    G_alpha = dgdu*dudalpha;
    y = ; %solve for y
    dJdalpha = ; %add this step's contribution to dJdalpha
end

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end

% ============================================================
% This function evaulates the gradients at a particular x,u
% ============================================================
function [dfdx,dfdu] = gradients(x,u)
% pend parameters
global m g l I b;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Give the gradients of the dynamics with respect to 
% a particular state and action
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dfdx = ;
dfdu = ;

end

% =============================================================
% This function defines the continuous dynamics of the pendulum
% =============================================================
function xdot = dynamics(x,u)
% pendulum parameters
global m g l I b;

xdot = [x(2,:); (u-m*g*l*sin(x(1,:))-b*x(2,:))./I];
end


% =============================================================
% This function defines the instantaneous cost (i.e. g(x,u))
% =============================================================
function C = cost(X,u,dt)
global xdes;


Xerr = X - repmat(xdes,1,size(X,2));
Xerr(1,:) = mod(Xerr(1,:)+pi,2*pi)-pi;

% implement a quadratic cost

C = ;
end

% =============================================================
% Implements a final cost
% =============================================================
function C = finalCost(X)
global xdes;

Xerr = X - repmat(xdes,1,size(X,2));
Xerr(1,:) = mod(Xerr(1,:)+pi,2*pi)-pi;

% implement the final cost cost

C = ;
end

% ============================================================
% Returns the cost matrices
% ============================================================
function [Q,R,Qend] = get_QR

% penalty matrices

end