forked from darren1231/Reinforcement-learning-q-learning-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathq_learning_matrix_n.m
150 lines (112 loc) · 4.34 KB
/
q_learning_matrix_n.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Project < Implement q_learning big matrix >
% Motive : This is a very simple example show how q_learning works
% Date : 2016/09/10
% Author : Kun Da Lin
% Comments: Language: Matlab.
% This is the most important formula of q_learning
% Q(state,x1)= oldQ + alpha * (R(state,x1)+ (gamma * MaxQ(x1)) - oldQ);
% Here is state information
% (1,1) (1,2) (1,3) (1,4) (1,5) (1,6) wall wall wall wall wall wall
% (2,1) (2,2) (2,3) (2,4) (2,5) (2,6) wall wall
% (3,1) (3,2) (3,3) (3,4) (3,5) (3,6) wall wall
% (4,1) (4,2) (4,3) (4,4) (4,5) (4,6) wall wall
% (5,1) (5,2) (5,3) (5,4) (5,5) (5,6) wall wall
% (6,1) (6,2) (6,3) (6,4) (6,5) (6,6) wall wall wall wall wall wall
% start position:(2,2)
% goal position:(5,5)
% Through this program you can see how agent learn to find a best way to
% reach it's goal. If agent bump into the wall, we will give -1 as the
% negative reward. On the controry, if agent hit the goal, we will give +1
% as the positive reward. You will see the q table gradually being an
% optimizing value and converge to the opitimal value.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% The above example is about 4*4 matrix with two other dimention of wall.
% So it's totally 6*6 matrix. If you want to some more big matrix, feel free
% to modify to the size what you want from goal_x or goal_y. And don't
% forget to change max_round as well.
clear;
%% Set your goal position
goal_x=20;
goal_y=20;
max_round=1000;
qtable = zeros(goal_x+1,goal_y+1,4);
round = 0;
while round<max_round
map_matrix=zeros(goal_x+1,goal_y+1);
map_matrix(1:goal_x+1,1)=-1;
map_matrix(1:goal_x+1,goal_y+1)=-1;
map_matrix(1,1:goal_y+1)=-1;
map_matrix(goal_x+1,1:goal_y+1)=-1;
% map_matrix = [1,1,1,1,1,1;
% 1,0,0,0,0,1;
% 1,0,0,0,0,1;
% 1,0,0,0,0,1;
% 1,0,0,0,0,1;
% 1,1,1,1,1,1];
round=round+1;
position_x=2;
position_y=2;
count=0;
%input('');
while ~(position_x==goal_x && position_y==goal_y)
a=0.9;
b=0.8;
reward=0;
count=count+1;
rand_action = floor(mod(rand*10,4))+1;
[max_q, max_index] = max([qtable(position_x,position_y,1) qtable(position_x,position_y,2) qtable(position_x,position_y,3) qtable(position_x,position_y,4)]);
if(qtable(position_x,position_y,rand_action)>=qtable(position_x,position_y,max_index))
action = rand_action;
else
action = max_index;
end
map_matrix(position_x,position_y)=count;
pre_position_x=position_x;
pre_position_y=position_y;
switch action
case 1
position_x = pre_position_x-1; %up
case 2
position_x = pre_position_x+1; %down
case 3
position_y = pre_position_y-1; %left
case 4
position_y = pre_position_y+1; %right
end
if(position_x==1 || position_x==goal_x+1 || position_y==1 || position_y==goal_y+1)
position_x = pre_position_x;
position_y = pre_position_y;
reward=0;
b=0;
%disp('wall');
end
if(position_x==goal_x && position_y==goal_y)
reward=1;
b=0;
end
[max_qtable, max_qtable_index] = max([qtable(position_x,position_y,1) qtable(position_x,position_y,2) qtable(position_x,position_y,3) qtable(position_x,position_y,4)]);
% You can also uncomment this to see how agen move step by step
% disp(['position_x: ',num2str(position_x),' position_y: ',num2str(position_y)]);
%% This is how magic happened
old_q=qtable(pre_position_x,pre_position_y,action);
new_q=old_q+a*(reward+b*max_qtable-old_q);
qtable(pre_position_x,pre_position_y,action)=new_q;
end
save_round(round,:)= count;
disp(['round:',num2str(round),' step:',num2str(count)]);
end
disp(['If you can see the least step: ',num2str(goal_x+goal_y-4), ' in the end, then it']);
disp('means the agent have already found the best way');
disp('to reach the goal. If not, you should change your max_round');
disp('to a bigger number');
disp('');
if(goal_x<10 && goal_y<10)
disp('Here is how agent move:');
disp(map_matrix);
end
plot(save_round);
title(['Q learning--',num2str(goal_x),'*',num2str(goal_y),' matrix']);
xlabel('episode');
ylabel('step');
%plot_action(qtable,position_x,position_y);