forked from revenol/LyDROO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LyDROO.py
185 lines (140 loc) · 7.16 KB
/
LyDROO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# #################################################################
#
# This file contains the main code of LyDROO.
#
# References:
# [1] Suzhi Bi, Liang Huang, Hui Wang, and Ying-Jun Angela Zhang, "Lyapunov-guided Deep Reinforcement Learning for Stable Online Computation Offloading in Mobile-Edge Computing Networks," IEEE Transactions on Wireless Communications, 2021, doi:10.1109/TWC.2021.3085319.
# [2] Liang Huang, Suzhi Bi, and Ying-Jun Angela Zhang, "Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks," in IEEE Transactions on Mobile Computing, vol. 19, no. 11, pp. 2581-2593, November 2020.
# [3] S. Bi and Y. J. Zhang, “Computation rate maximization for wireless powered mobile-edge computing with binary computation offloading,” IEEE Trans. Wireless Commun., vol. 17, no. 6, pp. 4177-4190, Jun. 2018.
#
# version 1.0 -- July 2020. Written by Liang Huang (lianghuang AT zjut.edu.cn)
# #################################################################
import scipy.io as sio # import scipy.io for .mat file I/
import numpy as np # import numpy
# Implementated based on the PyTorch
from memory import MemoryDNN
# import the resource allocation function
# replace it with your algorithm when applying LyDROO in other problems
from ResourceAllocation import Algo1_NUM
import math
def plot_rate( rate_his, rolling_intv = 50, ylabel='Normalized Computation Rate'):
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
rate_array = np.asarray(rate_his)
df = pd.DataFrame(rate_his)
mpl.style.use('seaborn')
fig, ax = plt.subplots(figsize=(15,8))
plt.plot(np.arange(len(rate_array))+1, np.hstack(df.rolling(rolling_intv, min_periods=1).mean().values), 'b')
plt.fill_between(np.arange(len(rate_array))+1, np.hstack(df.rolling(rolling_intv, min_periods=1).min()[0].values), np.hstack(df.rolling(rolling_intv, min_periods=1).max()[0].values), color = 'b', alpha = 0.2)
plt.ylabel(ylabel)
plt.xlabel('Time Frames')
plt.show()
# generate racian fading channel with power h and Line of sight ratio factor
# replace it with your own channel generations when necessary
def racian_mec(h,factor):
n = len(h)
beta = np.sqrt(h*factor) # LOS channel amplitude
sigma = np.sqrt(h*(1-factor)/2) # scattering sdv
x = np.multiply(sigma*np.ones((n)),np.random.randn(n)) + beta*np.ones((n))
y = np.multiply(sigma*np.ones((n)),np.random.randn(n))
g = np.power(x,2) + np.power(y,2)
return g
if __name__ == "__main__":
'''
LyDROO algorithm composed of four steps:
1) 'Actor module'
2) 'Critic module'
3) 'Policy update module'
4) ‘Queueing module’ of
'''
N =10 # number of users
n = 500 # number of time frames
K = N # initialize K = N
decoder_mode = 'OPN' # the quantization mode could be 'OP' (Order-preserving) or 'KNN' or 'OPN' (Order-Preserving with noise)
Memory = 1024 # capacity of memory structure
Delta = 32 # Update interval for adaptive K
CHFACT = 10**10 # The factor for scaling channel value
energy_thresh = np.ones((N))*0.08 # energy comsumption threshold in J per time slot
nu = 1000 # energy queue factor;
w = [1.5 if i%2==0 else 1 for i in range(N)] # weights for each user
V = 20
arrival_lambda = 3*np.ones((N)) # average data arrival, 3 Mbps per user
print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta))
# initialize data
channel = np.zeros((n,N)) # chanel gains
dataA = np.zeros((n,N)) # arrival data size
# generate channel
dist_v = np.linspace(start = 120, stop = 255, num = N)
Ad = 3
fc = 915*10**6
loss_exponent = 3 # path loss exponent
light = 3*10**8
h0 = np.ones((N))
for j in range(0,N):
h0[j] = Ad*(light/4/math.pi/fc/dist_v[j])**(loss_exponent)
mem = MemoryDNN(net = [N*3, 256, 128, N],
learning_rate = 0.01,
training_interval=20,
batch_size=128,
memory_size=Memory
)
mode_his = [] # store the offloading mode
k_idx_his = [] # store the index of optimal offloading actor
Q = np.zeros((n,N)) # data queue in MbitsW
Y = np.zeros((n,N)) # virtual energy queue in mJ
Obj = np.zeros(n) # objective values after solving problem (26)
energy = np.zeros((n,N)) # energy consumption
rate = np.zeros((n,N)) # achieved computation rate
for i in range(n):
if i % (n//10) == 0:
print("%0.1f"%(i/n))
if i> 0 and i % Delta == 0:
# index counts from 0
if Delta > 1:
max_k = max(np.array(k_idx_his[-Delta:-1])%K) +1
else:
max_k = k_idx_his[-1] +1
K = min(max_k +1, N)
i_idx = i
#real-time channel generation
h_tmp = racian_mec(h0,0.3)
# increase h to close to 1 for better training; it is a trick widely adopted in deep learning
h = h_tmp*CHFACT
channel[i,:] = h
# real-time arrival generation
dataA[i,:] = np.random.exponential(arrival_lambda)
# 4) ‘Queueing module’ of LyDROO
if i_idx > 0:
# update queues
Q[i_idx,:] = Q[i_idx-1,:] + dataA[i_idx-1,:] - rate[i_idx-1,:] # current data queue
# assert Q is positive due to float error
Q[i_idx,Q[i_idx,:]<0] =0
Y[i_idx,:] = np.maximum(Y[i_idx-1,:] + (energy[i_idx-1,:]- energy_thresh)*nu,0) # current energy queue
# assert Y is positive due to float error
Y[i_idx,Y[i_idx,:]<0] =0
# scale Q and Y to close to 1; a deep learning trick
nn_input =np.concatenate( (h, Q[i_idx,:]/10000,Y[i_idx,:]/10000))
# 1) 'Actor module' of LyDROO
# generate a batch of actions
m_list = mem.decode(nn_input, K, decoder_mode)
r_list = [] # all results of candidate offloading modes
v_list = [] # the objective values of candidate offloading modes
for m in m_list:
# 2) 'Critic module' of LyDROO
# allocate resource for all generated offloading modes saved in m_list
r_list.append(Algo1_NUM(m,h,w,Q[i_idx,:],Y[i_idx,:],V))
v_list.append(r_list[-1][0])
# record the index of largest reward
k_idx_his.append(np.argmax(v_list))
# 3) 'Policy update module' of LyDROO
# encode the mode with largest reward
mem.encode(nn_input, m_list[k_idx_his[-1]])
mode_his.append(m_list[k_idx_his[-1]])
# store max result
Obj[i_idx],rate[i_idx,:],energy[i_idx,:] = r_list[k_idx_his[-1]]
mem.plot_cost()
plot_rate(Q.sum(axis=1)/N, 100, 'Average Data Queue')
plot_rate(energy.sum(axis=1)/N, 100, 'Average Energy Consumption')
# save all data
sio.savemat('./result_%d.mat'%N, {'input_h': channel/CHFACT,'data_arrival':dataA,'data_queue':Q,'energy_queue':Y,'off_mode':mode_his,'rate':rate,'energy_consumption':energy,'data_rate':rate,'objective':Obj})