diff --git a/2024/01/30/dl_summary/index.html b/2024/01/30/dl_summary/index.html index 4f288e2..2fcc46c 100644 --- a/2024/01/30/dl_summary/index.html +++ b/2024/01/30/dl_summary/index.html @@ -25,7 +25,7 @@ - + @@ -211,7 +211,7 @@ - 11k words + 17k words @@ -222,7 +222,7 @@ - 96 mins + 144 mins @@ -435,6 +435,46 @@

Batch Normalization

Transformer结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiHeadAttention(nn.Module):
def __init__(self, embed_dim, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads

self.query_fc = nn.Linear(embed_dim, embed_dim)
self.key_fc = nn.Linear(embed_dim, embed_dim)
self.value_fc = nn.Linear(embed_dim, embed_dim)
self.fc_out = nn.Linear(embed_dim, embed_dim)

def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]

# Linearly project queries, keys, and values
Q = self.query_fc(query)
K = self.key_fc(key)
V = self.value_fc(value)

# Split the embedding into num_heads
Q = Q.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

# Calculate the attention scores
scores = torch.matmul(Q, K.permute(0, 1, 3, 2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))

if mask is not None:
scores = scores.masked_fill(mask == 0, float("-1e20"))

# Apply softmax to get attention probabilities
attention_weights = F.softmax(scores, dim=-1)

# Apply dropout
attention_weights = F.dropout(attention_weights, p=0.1, training=self.training)

# Multiply the attention weights with the values
output = torch.matmul(attention_weights, V)

# Concatenate multi-heads and project
output = output.permute(0, 2, 1, 3).contiguous().view(batch_size, -1, embed_dim)
output = self.fc_out(output)

return output, attention_weights

class PositionwiseFeedforward(nn.Module):
def __init__(self, embed_dim, hidden_dim):
super(PositionwiseFeedforward, self).__init__()
self.fc1 = nn.Linear(embed_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, embed_dim)

def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x

class EncoderLayer(nn.Module):
def __init__(self, embed_dim, num_heads, hidden_dim):
super(EncoderLayer, self).__init__()
self.multihead_attention = MultiHeadAttention(embed_dim, num_heads)
self.feed_forward = PositionwiseFeedforward(embed_dim, hidden_dim)
self.layer_norm1 = nn.LayerNorm(embed_dim)
self.layer_norm2 = nn.LayerNorm(embed_dim)

def forward(self, x, mask=None):
# Multi-Head Attention
residual = x
x, _ = self.multihead_attention(x, x, x, mask)
x = self.layer_norm1(x + residual)

# Feed Forward
residual = x
x = self.feed_forward(x)
x = self.layer_norm2(x + residual)

return x

class TransformerEncoder(nn.Module):
def __init__(self, vocab_size, embed_dim, num_layers, num_heads, hidden_dim):
super(TransformerEncoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.layers = nn.ModuleList([EncoderLayer(embed_dim, num_heads, hidden_dim) for _ in range(num_layers)])

def forward(self, x, mask=None):
x = self.embedding(x)
for layer in self.layers:
x = layer(x, mask)
return x

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import torch
import torch.nn as nn
import torch.optim as optim

class BasicNet(nn.Module):
def __init__(self, input_dim=784, hidden_dim1=256, hidden_dim2=128, output_dim=10):
super(BasicNet, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim1)
self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim)
self.relu = nn.ReLU()
self.batchnorm1 = nn.BatchNorm1d(hidden_dim1)
self.batchnorm2 = nn.BatchNorm1d(hidden_dim2)

def forward(self, x):
x = self.relu(self.batchnorm1(self.fc1(x)))
x = self.relu(self.batchnorm2(self.fc2(x)))
x = self.fc3(x)
return x

# 创建模型
model = BasicNet()

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train(model, train_loader, criterion, optimizer, num_epochs=5):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}')
running_loss = 0.0

# 保存模型
def save_model(model, filepath):
torch.save(model.state_dict(), filepath)

# 加载模型
def load_model(model, filepath):
model.load_state_dict(torch.load(filepath))
model.eval() # 设置为评估模式

# 示例数据加载
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# 示例训练过程
# train(model, train_loader, criterion, optimizer)

# 示例模型保存
# save_model(model, 'model.pth')

# 示例模型加载
# loaded_model = BasicNet()
# load_model(loaded_model, 'model.pth')

+

DQN算法

+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, output_dim)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x

class DQNAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = DQN(input_dim, output_dim).to(self.device)
self.target_net = DQN(input_dim, output_dim).to(self.device)
self.target_net.load_state_dict(self.policy_net.state_dict())
self.target_net.eval()
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state, epsilon):
if np.random.rand() < epsilon:
return np.random.randint(self.output_dim)
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
q_values = self.policy_net(state)
return q_values.max(1)[1].item()

def train(self, replay_buffer, batch_size):
if len(replay_buffer) < batch_size:
return
transitions = replay_buffer.sample(batch_size)
batch = Transition(*zip(*transitions))
state_batch = torch.FloatTensor(batch.state).to(self.device)
next_state_batch = torch.FloatTensor(batch.next_state).to(self.device)
action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
reward_batch = torch.FloatTensor(batch.reward).unsqueeze(1).to(self.device)
done_batch = torch.FloatTensor(batch.done).unsqueeze(1).to(self.device)

current_q_values = self.policy_net(state_batch).gather(1, action_batch)
next_q_values = self.target_net(next_state_batch).max(1)[0].unsqueeze(1)
target_q_values = reward_batch + (1 - done_batch) * self.gamma * next_q_values

loss = self.loss_fn(current_q_values, target_q_values.detach())
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

def update_target_net(self):
self.target_net.load_state_dict(self.policy_net.state_dict())

class ReplayBuffer:
def __init__(self, capacity):
self.capacity = capacity
self.buffer = []
self.position = 0

def push(self, state, action, reward, next_state, done):
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward, next_state, done)
self.position = (self.position + 1) % self.capacity

def sample(self, batch_size):
return random.sample(self.buffer, batch_size)

def __len__(self):
return len(self.buffer)

Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state', 'done'))

+

PPO算法

+
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class ActorCritic(nn.Module):
def __init__(self, input_dim, output_dim):
super(ActorCritic, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc_actor = nn.Linear(128, output_dim)
self.fc_critic = nn.Linear(128, 1)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
logits = self.fc_actor(x)
value = self.fc_critic(x)
return logits, value

class PPOAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = ActorCritic(input_dim, output_dim).to(self.device)
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state):
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
logits, _ = self.policy_net(state)
action_probs = torch.softmax(logits, dim=-1)
action = np.random.choice(np.arange(self.output_dim), p=action_probs.cpu().numpy().ravel())
return action

def train(self, states, actions, rewards, next_states, dones, old_log_probs, epsilon_clip=0.2, num_epochs=10):
states = torch.FloatTensor(states).to(self.device)
actions = torch.LongTensor(actions).unsqueeze(-1).to(self.device)
rewards = torch.FloatTensor(rewards).unsqueeze(-1).to(self.device)
next_states = torch.FloatTensor(next_states).to(self.device)
dones = torch.FloatTensor(dones).unsqueeze(-1).to(self.device)
old_log_probs = torch.FloatTensor(old_log_probs).unsqueeze(-1).to(self.device)

for _ in range(num_epochs):
logits, values = self.policy_net(states)
new_log_probs = torch.log_softmax(logits, dim=-1).gather(1, actions)
ratio = (new_log_probs - old_log_probs).exp()
advantages = rewards - values.detach()

surr1 = ratio * advantages
surr2 = torch.clamp(ratio, 1.0 - epsilon_clip, 1.0 + epsilon_clip) * advantages
actor_loss = -torch.min(surr1, surr2).mean()

critic_loss = 0.5 * (rewards - values).pow(2).mean()

loss = actor_loss + 0.5 * critic_loss

self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

+

自动驾驶和强化学习

+

设计和实现一个基于强化学习的自动驾驶决策规划系统是一个复杂而且需要深入思考的任务。下面是一个基本的设计和实现方案:

+
设计方案:
+
    +
  1. 环境建模: +首先,需要对自动驾驶的环境进行建模,包括车辆、道路、交通规则、障碍物、目标等。环境可以使用基于物理的仿真环境,例如CARLA,或者基于虚拟仿真的环境。
  2. +
  3. 状态空间定义: +定义自动驾驶车辆的状态空间,这可能包括车辆的位置、速度、方向、周围车辆的位置和速度等信息。
  4. +
  5. 动作空间定义: +定义自动驾驶车辆可以执行的动作空间,如加速、减速、转向等。
  6. +
  7. 奖励函数设计: +设计奖励函数来评估每个状态下执行的动作,以指导智能体的学习。奖励函数应该考虑安全性、效率、舒适性等因素。
  8. +
  9. 智能体模型选择: +选择合适的强化学习算法和模型架构来训练智能体,例如深度Q网络(DQN)、深度确定性策略梯度(DDPG)、双重深度确定性策略梯度(TD3)等。
  10. +
  11. 训练策略: +定义训练策略,包括学习率、优化器、探索策略等。
  12. +
  13. 评估与测试: +在仿真环境中对训练好的智能体进行评估和测试,检查其性能和鲁棒性。
  14. +
+
实现方案:
+
    +
  1. 环境建模: 使用CARLA等仿真平台进行环境建模。
  2. +
  3. 状态空间和动作空间定义: +编写代码从环境中获取车辆状态信息,并定义可以执行的动作。
  4. +
  5. 奖励函数设计: +根据项目需求和目标设计奖励函数。
  6. +
  7. 智能体模型选择: +根据任务选择合适的强化学习算法和模型架构,例如使用深度神经网络作为Q值函数的估计器。
  8. +
  9. 训练智能体: +使用收集的数据对智能体进行训练,调整网络参数,以最大化奖励函数。
  10. +
  11. 评估与测试: +在仿真环境中评估训练后的智能体的性能,分析其行为是否符合预期,并且能否有效应对各种情况。
  12. +
  13. 迭代优化: +根据评估结果,对模型和训练策略进行调整和优化,不断改进智能体的性能和稳定性。
  14. +
+

以上是一个基本的设计和实现方案,实际项目中可能还涉及到更多的细节和挑战,如安全性保障、仿真环境与真实环境的一致性等问题。因此,在实施过程中需要综合考虑各种因素,以确保系统的稳定性和安全性。

diff --git a/local-search.xml b/local-search.xml index 441abe6..5a9c48b 100644 --- a/local-search.xml +++ b/local-search.xml @@ -64,7 +64,7 @@ /2024/01/30/dl_summary/ - 深度学习知识汇总

深度学习八股文,这里将会收集深度学习中的基本概念和常见的问题

https://blog.csdn.net/weixin_42693876/article/details/120345924

L2范数https://blog.csdn.net/u010725283/article/details/79212762

L1L2范数 https://blog.csdn.net/weixin_35849560/article/details/113395018

Transformer为什么用多头 https://www.zhihu.com/question/341222779

Transformer里的LN https://blog.csdn.net/weixin_45069761/article/details/107834049

https://zhuanlan.zhihu.com/p/560482252

batch和minibatch https://link.zhihu.com/?target=https%3A//blog.csdn.net/xys430381_1/article/details/80680167

优化器 https://zhuanlan.zhihu.com/p/78622301

BN https://zhuanlan.zhihu.com/p/93643523

神经网络权重初始化 https://blog.csdn.net/kebu12345678/article/details/103084851

https://zhuanlan.zhihu.com/p/667048896

https://zhuanlan.zhihu.com/p/643560888

bert模型细节 https://www.zhihu.com/question/534763354

为什么Bert三个embedding可以相加 https://www.zhihu.com/question/374835153/answer/1080315948

LLAMA2结构https://blog.csdn.net/sikh_0529/article/details/134375318

旋转位置嵌入https://www.zhihu.com/tardis/zm/art/647109286?source_id=1005

Qlora https://zhuanlan.zhihu.com/p/618894919

RLHF https://zhuanlan.zhihu.com/p/631238431

    [https://zhuanlan.zhihu.com/p/599016986](https://zhuanlan.zhihu.com/p/599016986)

逻辑回归和线性回归

线性回归解决的是回归问题,逻辑回归相当于是线性回归的基础上,来解决分类问题

线性回归(Linear Regression) \[\begin{aligned}&f_{w, b}(x)=\sum_i w_i x_i+b\\\end{aligned}\] 逻辑回归(Logistic Regression) $$\[\begin{aligned}&f_{w, b}(x)=\sigma\left(\sum_i w_i x_i+b\right)\end{aligned}\]

$$ 逻辑回归可以理解为在线性回归后加了一个 sigmoid函数。将线性回归变成一个0~1输出的分类问题。逻辑回归本质上是一个线性回归模型,因为除去sigmoid映射函数关系,其他的步骤,算法都是线性回归的。可以说,逻辑回归都是以线性回归为理论支持的,只不过逻辑回归可以轻松解决0/1 分类问题。

深度学习模型的参数都在 0-1之间

因为参数越小代表模型越简单,越是复杂的模型,越是尝试对所有样本进行拟合,包括异常点。这就会造成在较小的区间中产生较大的波动,这个较大的波动也会反映在这个区间的导数比较大。只有越大的参数才可能产生较大的导数。因此参数越小,模型就越简单。

实现参数稀疏

参数的稀疏,在一定程度上实现了特征的选择。一般而言,大部分特征对模型是没有贡献的。这些没有用的特征虽然可以减少训练集上的误差,但是对测试集的样本,反而会产生干扰。稀疏参数的引入,可以将那些无用的特征的权重置为0

Batch_size的大小对学习率的影响

为什么batch-size小,学习率取的大会发生nan?学习率较高的情况下,直接影响到每次更新值的程度比较大,走的步伐因此也会大起来。如下图,过大的学习率会导致无法顺利地到达最低点,稍有不慎就会跳出可控制区域,此时我们将要面对的就是损失成倍增大(跨量级)

优化器optimizer和损失函数loss function的区别:

  1. 优化器定义了哪些参数是要用来更新的,并且设置了更新的方式(学习率、动量、SGD等),还有一些权重衰减的设置。
  2. 损失函数是用来计算损失的,也可以说损失函数是负责反向传播求导用的

残差结构设计思想:残差网络的本质也是解决梯度消失/爆炸的问题,只不过是在网络结构层面的改变残差网络的出现解决了构建深层神经网络时网络退化即梯度消失/爆炸的问题。残差结构主要设计有两个,快捷连接(shortcutconnection)和恒等映射(identitymapping),快捷连接使得残差变得可能,而恒等映射使得网络变深,恒等映射主要有两个:跳跃连接和激活函数

Adam与SGD的区别

SGD缺点是其更新方向完全依赖于当前batch计算出的梯度,因而十分不稳定。

Adam的优点主要在于:

但是Adam也有其自身问题:可能会对前期出现的特征过拟合,后期才出现的特征很难纠正前期的拟合效果。二者似乎都没法很好避免局部最优问题。

softmax如何防止指数上溢

在计算softmax函数时,指数上溢是一个常见的问题,特别是当输入的数值非常大时,指数函数的计算结果可能会溢出。为了解决这个问题,可以采取以下几种方法:

  1. 数值稳定性技巧:为了避免指数函数的溢出,可以将输入的数值减去一个常数,使得输入相对较小,从而减少指数函数的值。通常,可以通过找到输入向量中的最大值,并将所有元素减去这个最大值来实现数值稳定性。

    image-20240222173542613

    这样做可以保持相对稳定,防止指数函数的溢出。

  2. 利用性质:softmax函数的分子和分母同时除以一个相同的常数并不会改变函数的值。因此,我们可以在计算softmax时,将所有输入向量的值都减去向量中的最大值,然后进行softmax计算。

以上两种方法都可以有效地避免指数上溢的问题,并保持softmax函数的数值稳定性。在实际应用中,通常会使用这些技巧来计算softmax函数,以确保模型的稳定性和数值精度。

训练过程中发现loss快速增大应该从哪些方面考虑?

    1. 学习率过大
    2. 训练样本中有坏数据
  1. model.eval vs和torch.no_grad区别

    • model.eval():依然计算梯度,但是不反传;dropout层保留概率为1;batchnorm层使用全局的mean和var
    • with torch.no_grad: 不计算梯度
  2. Dropout和Batch norm能否一起使用?

  3. 可以,但是只能将Dropout放在Batchnorm之后使用。因为Dropout训练时会改变输入X的方差,从而影响Batchnorm训练过程中统计的滑动方差值;而测试时没有Dropout,输入X的方差和训练时不一致,这就导致Batchnorm测试时期望的方差和训练时统计的有偏差。

  4. 梯度消失和梯度爆炸

  5. 梯度消失的原因和解决办法

  6. (1)隐藏层的层数过多

  7. 反向传播求梯度时的链式求导法则,某部分梯度小于1,则多层连乘后出现梯度消失

  8. (2)采用了不合适的激活函数

  9. 如sigmoid函数的最大梯度为1/4,这意味着隐藏层每一层的梯度均小于1(权值小于1时),出现梯度消失。

  10. 解决方法:1、relu激活函数,使导数衡为1 2、batch norm3、残差结构

  11. 梯度爆炸的原因和解决办法

  12. (1)隐藏层的层数过多,某部分梯度大于1,则多层连乘后,梯度呈指数增长,产生梯度爆炸。

  13. (2)权重初始值太大,求导时会乘上权重

  14. 解决方法:1、梯度裁剪 2、权重L1/L2正则化 3、残差结构 4、batchnorm

  15. Batch Normalization(Batch Norm)缺点:在处理序列数据(如文本)时,BatchNorm可能不会表现得很好,因为序列数据通常长度不一,并且一次训练的Batch中的句子的长度可能会有很大的差异;此外,BatchNorm对于Batch大小也非常敏感。对于较小的Batch大小,BatchNorm可能会表现得不好,因为每个Batch的统计特性可能会有较大的波动。

  16. Layer Normalization(Layer Norm)优点:LayerNorm是对每个样本进行归一化,因此它对Batch大小不敏感,这使得它在处理序列数据时表现得更好;另外,LayerNorm在处理不同长度的序列时也更为灵活。

  17. Instance Normalization(Instance Norm)优点:InstanceNorm是对每个样本的每个特征进行归一化,因此它可以捕捉到更多的细节信息。InstanceNorm在某些任务,如风格迁移,中表现得很好,因为在这些任务中,细节信息很重要。缺点:InstanceNorm可能会过度强调细节信息,忽视了更宏观的信息。此外,InstanceNorm的计算成本相比Batch Norm和Layer Norm更高。

  18. Group Normalization(Group Norm)优点:Group Norm是Batch Norm和InstanceNorm的折中方案,它在Batch的一个子集(即组)上进行归一化。这使得GroupNorm既可以捕捉到Batch的统计特性,又可以捕捉到样本的细节信息。此外,GroupNorm对Batch大小也不敏感。 缺点:GroupNorm的性能取决于组的大小,需要通过实验来确定最优的组大小。此外,GroupNorm的计算成本也比Batch Norm和Layer Norm更高。

pytorch实现自注意力和多头注意力

自注意力

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from math import sqrt
import torch
import torch.nn as nn

class SelfAttention(nn.Module):
def __init__(self, dim_in, dim_k, dim_v):
super(SelfAttention, self).__init__()
self.dim_in = dim_in
self.dim_k = dim_k
self.dim_v = dim_v
self.linear_q = nn.Linear(dim_in, dim_k, bias=False)
self.linear_k = nn.Linear(dim_in, dim_k, bias=False)
self.linear_v = nn.Linear(dim_in, dim_v, bias=False)
self._norm_fact = 1/sqrt(dim_k)


def forward(self, x):
batch, n, dim_in = x.shape
assert dim_in == self.dim_in

q = self.linear_q(x) #batch, n, dim_k
k = self.linear_k(x)
v = self.linear_v(x)

dist = torch.bmm(q, k.transpose(1,2))* self._norm_fact #batch, n, n
dist = torch.softmax(dist, dim=-1)

att = torch.bmm(dist, v)
return att

多头注意力机制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from math import sqrt
import torch
import torch.nn as nn

class MultiHeadAttention(nn.Module):
#dim_in input dimention
#dim_k kq dimention
#dim_v value dimention
#num_heads number of heads

def __init__(self, dim_in, dim_k, dim_v, num_heads=8):
super(MultiHeadAttention, self).__init__()
assert dim_k% num_heads ==0 and dim_v% num_heads ==0

self.dim_in = dim_in
self.dim_k = dim_k
self.dim_v = dim_v
self.num_heads = num_heads
self.linear_q = nn.Linear(dim_in, dim_k, bias==False)
self.linear_k = nn.Linear(dim_in, dim_k, bias==False)
self.linear_v = nn.Linear(dim_in, dim_v, bias==False)
self._norm_fact = 1/sqrt(dim_k//num_heads)

def forwards(self, x):
# x: tensor of shape(batch, n, dim_in)
batch, n, dim_in = x.shape
assert dim_in = self.dim_in

nh = self.num_heads
dk = self.dim_k // nh
dv = self.dim_v // nh

q = self.linear_q(x).reshape(batch, n, nh, dk).transpose(1, 2)
k = self.linear_k(x).reshape(batch, n, nh, dk).transpose(1, 2)
v = self.linear_v(x).reshape(batch, n, nk, dk).transpose(1, 2)

dist = torch.matmul(q, k.transpose(2,3))*self._norm_fact
dist = torch.softmax(dist, dim=-1)

att = torch.matmul(dist, v)
att = att.transpose(1,2).reshape(batch, n, self.dim_v)

Batch Normalization

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class MyBN:
def __init__(self, momentum=0.01, eps=1e-5, feat_dim=2):
self._running_mean = np.zeros(shape = (feat_dim,))
self._running_var = np.ones(shape = (fear_dim,))
self._momentum = momentum
#防止分母计算为0
self._eps = eps

#对应batch norm中需要更新beta 和 gamma, 采用pytorch文档中的初始化
self._beta = np.zeros(shape=(feat_dim,))
self._gamma = np.ones(shape=(feat_dim,))


def batch_norm(self, x):
if self.training:
x_mean = x.mean(axis=0)
x_var = x.var(axis=0)
#对应running_mean的更新公式
self._running_mean = (1-self._momentum)*x_mean +self._momentum*self._running_mean
self._running_var = (1-self._momentum)*x_var + self._momentum*self._running_var
#对应论文中计算BN公式
x_hat = (x-x_mean)/np.sqrt(x_var+self._eps)
else:
x_hat = (x-self._running_mean)/np.sqrt(self._running_var+self._eps)
return self._gamma*x_hat + self._beta

Transformer结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiHeadAttention(nn.Module):
def __init__(self, embed_dim, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads

self.query_fc = nn.Linear(embed_dim, embed_dim)
self.key_fc = nn.Linear(embed_dim, embed_dim)
self.value_fc = nn.Linear(embed_dim, embed_dim)
self.fc_out = nn.Linear(embed_dim, embed_dim)

def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]

# Linearly project queries, keys, and values
Q = self.query_fc(query)
K = self.key_fc(key)
V = self.value_fc(value)

# Split the embedding into num_heads
Q = Q.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

# Calculate the attention scores
scores = torch.matmul(Q, K.permute(0, 1, 3, 2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))

if mask is not None:
scores = scores.masked_fill(mask == 0, float("-1e20"))

# Apply softmax to get attention probabilities
attention_weights = F.softmax(scores, dim=-1)

# Apply dropout
attention_weights = F.dropout(attention_weights, p=0.1, training=self.training)

# Multiply the attention weights with the values
output = torch.matmul(attention_weights, V)

# Concatenate multi-heads and project
output = output.permute(0, 2, 1, 3).contiguous().view(batch_size, -1, embed_dim)
output = self.fc_out(output)

return output, attention_weights

class PositionwiseFeedforward(nn.Module):
def __init__(self, embed_dim, hidden_dim):
super(PositionwiseFeedforward, self).__init__()
self.fc1 = nn.Linear(embed_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, embed_dim)

def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x

class EncoderLayer(nn.Module):
def __init__(self, embed_dim, num_heads, hidden_dim):
super(EncoderLayer, self).__init__()
self.multihead_attention = MultiHeadAttention(embed_dim, num_heads)
self.feed_forward = PositionwiseFeedforward(embed_dim, hidden_dim)
self.layer_norm1 = nn.LayerNorm(embed_dim)
self.layer_norm2 = nn.LayerNorm(embed_dim)

def forward(self, x, mask=None):
# Multi-Head Attention
residual = x
x, _ = self.multihead_attention(x, x, x, mask)
x = self.layer_norm1(x + residual)

# Feed Forward
residual = x
x = self.feed_forward(x)
x = self.layer_norm2(x + residual)

return x

class TransformerEncoder(nn.Module):
def __init__(self, vocab_size, embed_dim, num_layers, num_heads, hidden_dim):
super(TransformerEncoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.layers = nn.ModuleList([EncoderLayer(embed_dim, num_heads, hidden_dim) for _ in range(num_layers)])

def forward(self, x, mask=None):
x = self.embedding(x)
for layer in self.layers:
x = layer(x, mask)
return x

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import torch
import torch.nn as nn
import torch.optim as optim

class BasicNet(nn.Module):
def __init__(self, input_dim=784, hidden_dim1=256, hidden_dim2=128, output_dim=10):
super(BasicNet, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim1)
self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim)
self.relu = nn.ReLU()
self.batchnorm1 = nn.BatchNorm1d(hidden_dim1)
self.batchnorm2 = nn.BatchNorm1d(hidden_dim2)

def forward(self, x):
x = self.relu(self.batchnorm1(self.fc1(x)))
x = self.relu(self.batchnorm2(self.fc2(x)))
x = self.fc3(x)
return x

# 创建模型
model = BasicNet()

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train(model, train_loader, criterion, optimizer, num_epochs=5):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}')
running_loss = 0.0

# 保存模型
def save_model(model, filepath):
torch.save(model.state_dict(), filepath)

# 加载模型
def load_model(model, filepath):
model.load_state_dict(torch.load(filepath))
model.eval() # 设置为评估模式

# 示例数据加载
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# 示例训练过程
# train(model, train_loader, criterion, optimizer)

# 示例模型保存
# save_model(model, 'model.pth')

# 示例模型加载
# loaded_model = BasicNet()
# load_model(loaded_model, 'model.pth')

]]>
+ 深度学习知识汇总

深度学习八股文,这里将会收集深度学习中的基本概念和常见的问题

https://blog.csdn.net/weixin_42693876/article/details/120345924

L2范数https://blog.csdn.net/u010725283/article/details/79212762

L1L2范数 https://blog.csdn.net/weixin_35849560/article/details/113395018

Transformer为什么用多头 https://www.zhihu.com/question/341222779

Transformer里的LN https://blog.csdn.net/weixin_45069761/article/details/107834049

https://zhuanlan.zhihu.com/p/560482252

batch和minibatch https://link.zhihu.com/?target=https%3A//blog.csdn.net/xys430381_1/article/details/80680167

优化器 https://zhuanlan.zhihu.com/p/78622301

BN https://zhuanlan.zhihu.com/p/93643523

神经网络权重初始化 https://blog.csdn.net/kebu12345678/article/details/103084851

https://zhuanlan.zhihu.com/p/667048896

https://zhuanlan.zhihu.com/p/643560888

bert模型细节 https://www.zhihu.com/question/534763354

为什么Bert三个embedding可以相加 https://www.zhihu.com/question/374835153/answer/1080315948

LLAMA2结构https://blog.csdn.net/sikh_0529/article/details/134375318

旋转位置嵌入https://www.zhihu.com/tardis/zm/art/647109286?source_id=1005

Qlora https://zhuanlan.zhihu.com/p/618894919

RLHF https://zhuanlan.zhihu.com/p/631238431

    [https://zhuanlan.zhihu.com/p/599016986](https://zhuanlan.zhihu.com/p/599016986)

逻辑回归和线性回归

线性回归解决的是回归问题,逻辑回归相当于是线性回归的基础上,来解决分类问题

线性回归(Linear Regression) \[\begin{aligned}&f_{w, b}(x)=\sum_i w_i x_i+b\\\end{aligned}\] 逻辑回归(Logistic Regression) $$\[\begin{aligned}&f_{w, b}(x)=\sigma\left(\sum_i w_i x_i+b\right)\end{aligned}\]

$$ 逻辑回归可以理解为在线性回归后加了一个 sigmoid函数。将线性回归变成一个0~1输出的分类问题。逻辑回归本质上是一个线性回归模型,因为除去sigmoid映射函数关系,其他的步骤,算法都是线性回归的。可以说,逻辑回归都是以线性回归为理论支持的,只不过逻辑回归可以轻松解决0/1 分类问题。

深度学习模型的参数都在 0-1之间

因为参数越小代表模型越简单,越是复杂的模型,越是尝试对所有样本进行拟合,包括异常点。这就会造成在较小的区间中产生较大的波动,这个较大的波动也会反映在这个区间的导数比较大。只有越大的参数才可能产生较大的导数。因此参数越小,模型就越简单。

实现参数稀疏

参数的稀疏,在一定程度上实现了特征的选择。一般而言,大部分特征对模型是没有贡献的。这些没有用的特征虽然可以减少训练集上的误差,但是对测试集的样本,反而会产生干扰。稀疏参数的引入,可以将那些无用的特征的权重置为0

Batch_size的大小对学习率的影响

为什么batch-size小,学习率取的大会发生nan?学习率较高的情况下,直接影响到每次更新值的程度比较大,走的步伐因此也会大起来。如下图,过大的学习率会导致无法顺利地到达最低点,稍有不慎就会跳出可控制区域,此时我们将要面对的就是损失成倍增大(跨量级)

优化器optimizer和损失函数loss function的区别:

  1. 优化器定义了哪些参数是要用来更新的,并且设置了更新的方式(学习率、动量、SGD等),还有一些权重衰减的设置。
  2. 损失函数是用来计算损失的,也可以说损失函数是负责反向传播求导用的

残差结构设计思想:残差网络的本质也是解决梯度消失/爆炸的问题,只不过是在网络结构层面的改变残差网络的出现解决了构建深层神经网络时网络退化即梯度消失/爆炸的问题。残差结构主要设计有两个,快捷连接(shortcutconnection)和恒等映射(identitymapping),快捷连接使得残差变得可能,而恒等映射使得网络变深,恒等映射主要有两个:跳跃连接和激活函数

Adam与SGD的区别

SGD缺点是其更新方向完全依赖于当前batch计算出的梯度,因而十分不稳定。

Adam的优点主要在于:

但是Adam也有其自身问题:可能会对前期出现的特征过拟合,后期才出现的特征很难纠正前期的拟合效果。二者似乎都没法很好避免局部最优问题。

softmax如何防止指数上溢

在计算softmax函数时,指数上溢是一个常见的问题,特别是当输入的数值非常大时,指数函数的计算结果可能会溢出。为了解决这个问题,可以采取以下几种方法:

  1. 数值稳定性技巧:为了避免指数函数的溢出,可以将输入的数值减去一个常数,使得输入相对较小,从而减少指数函数的值。通常,可以通过找到输入向量中的最大值,并将所有元素减去这个最大值来实现数值稳定性。

    image-20240222173542613

    这样做可以保持相对稳定,防止指数函数的溢出。

  2. 利用性质:softmax函数的分子和分母同时除以一个相同的常数并不会改变函数的值。因此,我们可以在计算softmax时,将所有输入向量的值都减去向量中的最大值,然后进行softmax计算。

以上两种方法都可以有效地避免指数上溢的问题,并保持softmax函数的数值稳定性。在实际应用中,通常会使用这些技巧来计算softmax函数,以确保模型的稳定性和数值精度。

训练过程中发现loss快速增大应该从哪些方面考虑?

    1. 学习率过大
    2. 训练样本中有坏数据
  1. model.eval vs和torch.no_grad区别

    • model.eval():依然计算梯度,但是不反传;dropout层保留概率为1;batchnorm层使用全局的mean和var
    • with torch.no_grad: 不计算梯度
  2. Dropout和Batch norm能否一起使用?

  3. 可以,但是只能将Dropout放在Batchnorm之后使用。因为Dropout训练时会改变输入X的方差,从而影响Batchnorm训练过程中统计的滑动方差值;而测试时没有Dropout,输入X的方差和训练时不一致,这就导致Batchnorm测试时期望的方差和训练时统计的有偏差。

  4. 梯度消失和梯度爆炸

  5. 梯度消失的原因和解决办法

  6. (1)隐藏层的层数过多

  7. 反向传播求梯度时的链式求导法则,某部分梯度小于1,则多层连乘后出现梯度消失

  8. (2)采用了不合适的激活函数

  9. 如sigmoid函数的最大梯度为1/4,这意味着隐藏层每一层的梯度均小于1(权值小于1时),出现梯度消失。

  10. 解决方法:1、relu激活函数,使导数衡为1 2、batch norm3、残差结构

  11. 梯度爆炸的原因和解决办法

  12. (1)隐藏层的层数过多,某部分梯度大于1,则多层连乘后,梯度呈指数增长,产生梯度爆炸。

  13. (2)权重初始值太大,求导时会乘上权重

  14. 解决方法:1、梯度裁剪 2、权重L1/L2正则化 3、残差结构 4、batchnorm

  15. Batch Normalization(Batch Norm)缺点:在处理序列数据(如文本)时,BatchNorm可能不会表现得很好,因为序列数据通常长度不一,并且一次训练的Batch中的句子的长度可能会有很大的差异;此外,BatchNorm对于Batch大小也非常敏感。对于较小的Batch大小,BatchNorm可能会表现得不好,因为每个Batch的统计特性可能会有较大的波动。

  16. Layer Normalization(Layer Norm)优点:LayerNorm是对每个样本进行归一化,因此它对Batch大小不敏感,这使得它在处理序列数据时表现得更好;另外,LayerNorm在处理不同长度的序列时也更为灵活。

  17. Instance Normalization(Instance Norm)优点:InstanceNorm是对每个样本的每个特征进行归一化,因此它可以捕捉到更多的细节信息。InstanceNorm在某些任务,如风格迁移,中表现得很好,因为在这些任务中,细节信息很重要。缺点:InstanceNorm可能会过度强调细节信息,忽视了更宏观的信息。此外,InstanceNorm的计算成本相比Batch Norm和Layer Norm更高。

  18. Group Normalization(Group Norm)优点:Group Norm是Batch Norm和InstanceNorm的折中方案,它在Batch的一个子集(即组)上进行归一化。这使得GroupNorm既可以捕捉到Batch的统计特性,又可以捕捉到样本的细节信息。此外,GroupNorm对Batch大小也不敏感。 缺点:GroupNorm的性能取决于组的大小,需要通过实验来确定最优的组大小。此外,GroupNorm的计算成本也比Batch Norm和Layer Norm更高。

pytorch实现自注意力和多头注意力

自注意力

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from math import sqrt
import torch
import torch.nn as nn

class SelfAttention(nn.Module):
def __init__(self, dim_in, dim_k, dim_v):
super(SelfAttention, self).__init__()
self.dim_in = dim_in
self.dim_k = dim_k
self.dim_v = dim_v
self.linear_q = nn.Linear(dim_in, dim_k, bias=False)
self.linear_k = nn.Linear(dim_in, dim_k, bias=False)
self.linear_v = nn.Linear(dim_in, dim_v, bias=False)
self._norm_fact = 1/sqrt(dim_k)


def forward(self, x):
batch, n, dim_in = x.shape
assert dim_in == self.dim_in

q = self.linear_q(x) #batch, n, dim_k
k = self.linear_k(x)
v = self.linear_v(x)

dist = torch.bmm(q, k.transpose(1,2))* self._norm_fact #batch, n, n
dist = torch.softmax(dist, dim=-1)

att = torch.bmm(dist, v)
return att

多头注意力机制

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from math import sqrt
import torch
import torch.nn as nn

class MultiHeadAttention(nn.Module):
#dim_in input dimention
#dim_k kq dimention
#dim_v value dimention
#num_heads number of heads

def __init__(self, dim_in, dim_k, dim_v, num_heads=8):
super(MultiHeadAttention, self).__init__()
assert dim_k% num_heads ==0 and dim_v% num_heads ==0

self.dim_in = dim_in
self.dim_k = dim_k
self.dim_v = dim_v
self.num_heads = num_heads
self.linear_q = nn.Linear(dim_in, dim_k, bias==False)
self.linear_k = nn.Linear(dim_in, dim_k, bias==False)
self.linear_v = nn.Linear(dim_in, dim_v, bias==False)
self._norm_fact = 1/sqrt(dim_k//num_heads)

def forwards(self, x):
# x: tensor of shape(batch, n, dim_in)
batch, n, dim_in = x.shape
assert dim_in = self.dim_in

nh = self.num_heads
dk = self.dim_k // nh
dv = self.dim_v // nh

q = self.linear_q(x).reshape(batch, n, nh, dk).transpose(1, 2)
k = self.linear_k(x).reshape(batch, n, nh, dk).transpose(1, 2)
v = self.linear_v(x).reshape(batch, n, nk, dk).transpose(1, 2)

dist = torch.matmul(q, k.transpose(2,3))*self._norm_fact
dist = torch.softmax(dist, dim=-1)

att = torch.matmul(dist, v)
att = att.transpose(1,2).reshape(batch, n, self.dim_v)

Batch Normalization

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class MyBN:
def __init__(self, momentum=0.01, eps=1e-5, feat_dim=2):
self._running_mean = np.zeros(shape = (feat_dim,))
self._running_var = np.ones(shape = (fear_dim,))
self._momentum = momentum
#防止分母计算为0
self._eps = eps

#对应batch norm中需要更新beta 和 gamma, 采用pytorch文档中的初始化
self._beta = np.zeros(shape=(feat_dim,))
self._gamma = np.ones(shape=(feat_dim,))


def batch_norm(self, x):
if self.training:
x_mean = x.mean(axis=0)
x_var = x.var(axis=0)
#对应running_mean的更新公式
self._running_mean = (1-self._momentum)*x_mean +self._momentum*self._running_mean
self._running_var = (1-self._momentum)*x_var + self._momentum*self._running_var
#对应论文中计算BN公式
x_hat = (x-x_mean)/np.sqrt(x_var+self._eps)
else:
x_hat = (x-self._running_mean)/np.sqrt(self._running_var+self._eps)
return self._gamma*x_hat + self._beta

Transformer结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiHeadAttention(nn.Module):
def __init__(self, embed_dim, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads

self.query_fc = nn.Linear(embed_dim, embed_dim)
self.key_fc = nn.Linear(embed_dim, embed_dim)
self.value_fc = nn.Linear(embed_dim, embed_dim)
self.fc_out = nn.Linear(embed_dim, embed_dim)

def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]

# Linearly project queries, keys, and values
Q = self.query_fc(query)
K = self.key_fc(key)
V = self.value_fc(value)

# Split the embedding into num_heads
Q = Q.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

# Calculate the attention scores
scores = torch.matmul(Q, K.permute(0, 1, 3, 2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))

if mask is not None:
scores = scores.masked_fill(mask == 0, float("-1e20"))

# Apply softmax to get attention probabilities
attention_weights = F.softmax(scores, dim=-1)

# Apply dropout
attention_weights = F.dropout(attention_weights, p=0.1, training=self.training)

# Multiply the attention weights with the values
output = torch.matmul(attention_weights, V)

# Concatenate multi-heads and project
output = output.permute(0, 2, 1, 3).contiguous().view(batch_size, -1, embed_dim)
output = self.fc_out(output)

return output, attention_weights

class PositionwiseFeedforward(nn.Module):
def __init__(self, embed_dim, hidden_dim):
super(PositionwiseFeedforward, self).__init__()
self.fc1 = nn.Linear(embed_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, embed_dim)

def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x

class EncoderLayer(nn.Module):
def __init__(self, embed_dim, num_heads, hidden_dim):
super(EncoderLayer, self).__init__()
self.multihead_attention = MultiHeadAttention(embed_dim, num_heads)
self.feed_forward = PositionwiseFeedforward(embed_dim, hidden_dim)
self.layer_norm1 = nn.LayerNorm(embed_dim)
self.layer_norm2 = nn.LayerNorm(embed_dim)

def forward(self, x, mask=None):
# Multi-Head Attention
residual = x
x, _ = self.multihead_attention(x, x, x, mask)
x = self.layer_norm1(x + residual)

# Feed Forward
residual = x
x = self.feed_forward(x)
x = self.layer_norm2(x + residual)

return x

class TransformerEncoder(nn.Module):
def __init__(self, vocab_size, embed_dim, num_layers, num_heads, hidden_dim):
super(TransformerEncoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.layers = nn.ModuleList([EncoderLayer(embed_dim, num_heads, hidden_dim) for _ in range(num_layers)])

def forward(self, x, mask=None):
x = self.embedding(x)
for layer in self.layers:
x = layer(x, mask)
return x

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import torch
import torch.nn as nn
import torch.optim as optim

class BasicNet(nn.Module):
def __init__(self, input_dim=784, hidden_dim1=256, hidden_dim2=128, output_dim=10):
super(BasicNet, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim1)
self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim)
self.relu = nn.ReLU()
self.batchnorm1 = nn.BatchNorm1d(hidden_dim1)
self.batchnorm2 = nn.BatchNorm1d(hidden_dim2)

def forward(self, x):
x = self.relu(self.batchnorm1(self.fc1(x)))
x = self.relu(self.batchnorm2(self.fc2(x)))
x = self.fc3(x)
return x

# 创建模型
model = BasicNet()

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train(model, train_loader, criterion, optimizer, num_epochs=5):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}')
running_loss = 0.0

# 保存模型
def save_model(model, filepath):
torch.save(model.state_dict(), filepath)

# 加载模型
def load_model(model, filepath):
model.load_state_dict(torch.load(filepath))
model.eval() # 设置为评估模式

# 示例数据加载
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# 示例训练过程
# train(model, train_loader, criterion, optimizer)

# 示例模型保存
# save_model(model, 'model.pth')

# 示例模型加载
# loaded_model = BasicNet()
# load_model(loaded_model, 'model.pth')

DQN算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, output_dim)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x

class DQNAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = DQN(input_dim, output_dim).to(self.device)
self.target_net = DQN(input_dim, output_dim).to(self.device)
self.target_net.load_state_dict(self.policy_net.state_dict())
self.target_net.eval()
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state, epsilon):
if np.random.rand() < epsilon:
return np.random.randint(self.output_dim)
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
q_values = self.policy_net(state)
return q_values.max(1)[1].item()

def train(self, replay_buffer, batch_size):
if len(replay_buffer) < batch_size:
return
transitions = replay_buffer.sample(batch_size)
batch = Transition(*zip(*transitions))
state_batch = torch.FloatTensor(batch.state).to(self.device)
next_state_batch = torch.FloatTensor(batch.next_state).to(self.device)
action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
reward_batch = torch.FloatTensor(batch.reward).unsqueeze(1).to(self.device)
done_batch = torch.FloatTensor(batch.done).unsqueeze(1).to(self.device)

current_q_values = self.policy_net(state_batch).gather(1, action_batch)
next_q_values = self.target_net(next_state_batch).max(1)[0].unsqueeze(1)
target_q_values = reward_batch + (1 - done_batch) * self.gamma * next_q_values

loss = self.loss_fn(current_q_values, target_q_values.detach())
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

def update_target_net(self):
self.target_net.load_state_dict(self.policy_net.state_dict())

class ReplayBuffer:
def __init__(self, capacity):
self.capacity = capacity
self.buffer = []
self.position = 0

def push(self, state, action, reward, next_state, done):
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward, next_state, done)
self.position = (self.position + 1) % self.capacity

def sample(self, batch_size):
return random.sample(self.buffer, batch_size)

def __len__(self):
return len(self.buffer)

Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state', 'done'))

PPO算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class ActorCritic(nn.Module):
def __init__(self, input_dim, output_dim):
super(ActorCritic, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc_actor = nn.Linear(128, output_dim)
self.fc_critic = nn.Linear(128, 1)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
logits = self.fc_actor(x)
value = self.fc_critic(x)
return logits, value

class PPOAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = ActorCritic(input_dim, output_dim).to(self.device)
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state):
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
logits, _ = self.policy_net(state)
action_probs = torch.softmax(logits, dim=-1)
action = np.random.choice(np.arange(self.output_dim), p=action_probs.cpu().numpy().ravel())
return action

def train(self, states, actions, rewards, next_states, dones, old_log_probs, epsilon_clip=0.2, num_epochs=10):
states = torch.FloatTensor(states).to(self.device)
actions = torch.LongTensor(actions).unsqueeze(-1).to(self.device)
rewards = torch.FloatTensor(rewards).unsqueeze(-1).to(self.device)
next_states = torch.FloatTensor(next_states).to(self.device)
dones = torch.FloatTensor(dones).unsqueeze(-1).to(self.device)
old_log_probs = torch.FloatTensor(old_log_probs).unsqueeze(-1).to(self.device)

for _ in range(num_epochs):
logits, values = self.policy_net(states)
new_log_probs = torch.log_softmax(logits, dim=-1).gather(1, actions)
ratio = (new_log_probs - old_log_probs).exp()
advantages = rewards - values.detach()

surr1 = ratio * advantages
surr2 = torch.clamp(ratio, 1.0 - epsilon_clip, 1.0 + epsilon_clip) * advantages
actor_loss = -torch.min(surr1, surr2).mean()

critic_loss = 0.5 * (rewards - values).pow(2).mean()

loss = actor_loss + 0.5 * critic_loss

self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

自动驾驶和强化学习

设计和实现一个基于强化学习的自动驾驶决策规划系统是一个复杂而且需要深入思考的任务。下面是一个基本的设计和实现方案:

设计方案:
  1. 环境建模:首先,需要对自动驾驶的环境进行建模,包括车辆、道路、交通规则、障碍物、目标等。环境可以使用基于物理的仿真环境,例如CARLA,或者基于虚拟仿真的环境。
  2. 状态空间定义:定义自动驾驶车辆的状态空间,这可能包括车辆的位置、速度、方向、周围车辆的位置和速度等信息。
  3. 动作空间定义:定义自动驾驶车辆可以执行的动作空间,如加速、减速、转向等。
  4. 奖励函数设计:设计奖励函数来评估每个状态下执行的动作,以指导智能体的学习。奖励函数应该考虑安全性、效率、舒适性等因素。
  5. 智能体模型选择:选择合适的强化学习算法和模型架构来训练智能体,例如深度Q网络(DQN)、深度确定性策略梯度(DDPG)、双重深度确定性策略梯度(TD3)等。
  6. 训练策略:定义训练策略,包括学习率、优化器、探索策略等。
  7. 评估与测试:在仿真环境中对训练好的智能体进行评估和测试,检查其性能和鲁棒性。
实现方案:
  1. 环境建模: 使用CARLA等仿真平台进行环境建模。
  2. 状态空间和动作空间定义:编写代码从环境中获取车辆状态信息,并定义可以执行的动作。
  3. 奖励函数设计:根据项目需求和目标设计奖励函数。
  4. 智能体模型选择:根据任务选择合适的强化学习算法和模型架构,例如使用深度神经网络作为Q值函数的估计器。
  5. 训练智能体:使用收集的数据对智能体进行训练,调整网络参数,以最大化奖励函数。
  6. 评估与测试:在仿真环境中评估训练后的智能体的性能,分析其行为是否符合预期,并且能否有效应对各种情况。
  7. 迭代优化:根据评估结果,对模型和训练策略进行调整和优化,不断改进智能体的性能和稳定性。

以上是一个基本的设计和实现方案,实际项目中可能还涉及到更多的细节和挑战,如安全性保障、仿真环境与真实环境的一致性等问题。因此,在实施过程中需要综合考虑各种因素,以确保系统的稳定性和安全性。

]]>
diff --git a/search.xml b/search.xml index 74da45e..509931a 100644 --- a/search.xml +++ b/search.xml @@ -4264,6 +4264,46 @@ id="pytorch实现自注意力和多头注意力">pytorch实现自注意力和多

Transformer结构

import torch
import torch.nn as nn
import torch.nn.functional as F

class MultiHeadAttention(nn.Module):
def __init__(self, embed_dim, num_heads):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads

self.query_fc = nn.Linear(embed_dim, embed_dim)
self.key_fc = nn.Linear(embed_dim, embed_dim)
self.value_fc = nn.Linear(embed_dim, embed_dim)
self.fc_out = nn.Linear(embed_dim, embed_dim)

def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]

# Linearly project queries, keys, and values
Q = self.query_fc(query)
K = self.key_fc(key)
V = self.value_fc(value)

# Split the embedding into num_heads
Q = Q.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

# Calculate the attention scores
scores = torch.matmul(Q, K.permute(0, 1, 3, 2)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))

if mask is not None:
scores = scores.masked_fill(mask == 0, float("-1e20"))

# Apply softmax to get attention probabilities
attention_weights = F.softmax(scores, dim=-1)

# Apply dropout
attention_weights = F.dropout(attention_weights, p=0.1, training=self.training)

# Multiply the attention weights with the values
output = torch.matmul(attention_weights, V)

# Concatenate multi-heads and project
output = output.permute(0, 2, 1, 3).contiguous().view(batch_size, -1, embed_dim)
output = self.fc_out(output)

return output, attention_weights

class PositionwiseFeedforward(nn.Module):
def __init__(self, embed_dim, hidden_dim):
super(PositionwiseFeedforward, self).__init__()
self.fc1 = nn.Linear(embed_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, embed_dim)

def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x

class EncoderLayer(nn.Module):
def __init__(self, embed_dim, num_heads, hidden_dim):
super(EncoderLayer, self).__init__()
self.multihead_attention = MultiHeadAttention(embed_dim, num_heads)
self.feed_forward = PositionwiseFeedforward(embed_dim, hidden_dim)
self.layer_norm1 = nn.LayerNorm(embed_dim)
self.layer_norm2 = nn.LayerNorm(embed_dim)

def forward(self, x, mask=None):
# Multi-Head Attention
residual = x
x, _ = self.multihead_attention(x, x, x, mask)
x = self.layer_norm1(x + residual)

# Feed Forward
residual = x
x = self.feed_forward(x)
x = self.layer_norm2(x + residual)

return x

class TransformerEncoder(nn.Module):
def __init__(self, vocab_size, embed_dim, num_layers, num_heads, hidden_dim):
super(TransformerEncoder, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.layers = nn.ModuleList([EncoderLayer(embed_dim, num_heads, hidden_dim) for _ in range(num_layers)])

def forward(self, x, mask=None):
x = self.embedding(x)
for layer in self.layers:
x = layer(x, mask)
return x

import torch
import torch.nn as nn
import torch.optim as optim

class BasicNet(nn.Module):
def __init__(self, input_dim=784, hidden_dim1=256, hidden_dim2=128, output_dim=10):
super(BasicNet, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim1)
self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
self.fc3 = nn.Linear(hidden_dim2, output_dim)
self.relu = nn.ReLU()
self.batchnorm1 = nn.BatchNorm1d(hidden_dim1)
self.batchnorm2 = nn.BatchNorm1d(hidden_dim2)

def forward(self, x):
x = self.relu(self.batchnorm1(self.fc1(x)))
x = self.relu(self.batchnorm2(self.fc2(x)))
x = self.fc3(x)
return x

# 创建模型
model = BasicNet()

# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train(model, train_loader, criterion, optimizer, num_epochs=5):
model.train()
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (i+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}')
running_loss = 0.0

# 保存模型
def save_model(model, filepath):
torch.save(model.state_dict(), filepath)

# 加载模型
def load_model(model, filepath):
model.load_state_dict(torch.load(filepath))
model.eval() # 设置为评估模式

# 示例数据加载
# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

# 示例训练过程
# train(model, train_loader, criterion, optimizer)

# 示例模型保存
# save_model(model, 'model.pth')

# 示例模型加载
# loaded_model = BasicNet()
# load_model(loaded_model, 'model.pth')

+

DQN算法

+
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, output_dim)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x

class DQNAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = DQN(input_dim, output_dim).to(self.device)
self.target_net = DQN(input_dim, output_dim).to(self.device)
self.target_net.load_state_dict(self.policy_net.state_dict())
self.target_net.eval()
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state, epsilon):
if np.random.rand() < epsilon:
return np.random.randint(self.output_dim)
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
q_values = self.policy_net(state)
return q_values.max(1)[1].item()

def train(self, replay_buffer, batch_size):
if len(replay_buffer) < batch_size:
return
transitions = replay_buffer.sample(batch_size)
batch = Transition(*zip(*transitions))
state_batch = torch.FloatTensor(batch.state).to(self.device)
next_state_batch = torch.FloatTensor(batch.next_state).to(self.device)
action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
reward_batch = torch.FloatTensor(batch.reward).unsqueeze(1).to(self.device)
done_batch = torch.FloatTensor(batch.done).unsqueeze(1).to(self.device)

current_q_values = self.policy_net(state_batch).gather(1, action_batch)
next_q_values = self.target_net(next_state_batch).max(1)[0].unsqueeze(1)
target_q_values = reward_batch + (1 - done_batch) * self.gamma * next_q_values

loss = self.loss_fn(current_q_values, target_q_values.detach())
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

def update_target_net(self):
self.target_net.load_state_dict(self.policy_net.state_dict())

class ReplayBuffer:
def __init__(self, capacity):
self.capacity = capacity
self.buffer = []
self.position = 0

def push(self, state, action, reward, next_state, done):
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward, next_state, done)
self.position = (self.position + 1) % self.capacity

def sample(self, batch_size):
return random.sample(self.buffer, batch_size)

def __len__(self):
return len(self.buffer)

Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state', 'done'))

+

PPO算法

+
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class ActorCritic(nn.Module):
def __init__(self, input_dim, output_dim):
super(ActorCritic, self).__init__()
self.fc1 = nn.Linear(input_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc_actor = nn.Linear(128, output_dim)
self.fc_critic = nn.Linear(128, 1)

def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
logits = self.fc_actor(x)
value = self.fc_critic(x)
return logits, value

class PPOAgent:
def __init__(self, input_dim, output_dim, gamma=0.99, lr=0.001):
self.input_dim = input_dim
self.output_dim = output_dim
self.gamma = gamma
self.lr = lr
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.policy_net = ActorCritic(input_dim, output_dim).to(self.device)
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=lr)
self.loss_fn = nn.MSELoss()

def select_action(self, state):
state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
with torch.no_grad():
logits, _ = self.policy_net(state)
action_probs = torch.softmax(logits, dim=-1)
action = np.random.choice(np.arange(self.output_dim), p=action_probs.cpu().numpy().ravel())
return action

def train(self, states, actions, rewards, next_states, dones, old_log_probs, epsilon_clip=0.2, num_epochs=10):
states = torch.FloatTensor(states).to(self.device)
actions = torch.LongTensor(actions).unsqueeze(-1).to(self.device)
rewards = torch.FloatTensor(rewards).unsqueeze(-1).to(self.device)
next_states = torch.FloatTensor(next_states).to(self.device)
dones = torch.FloatTensor(dones).unsqueeze(-1).to(self.device)
old_log_probs = torch.FloatTensor(old_log_probs).unsqueeze(-1).to(self.device)

for _ in range(num_epochs):
logits, values = self.policy_net(states)
new_log_probs = torch.log_softmax(logits, dim=-1).gather(1, actions)
ratio = (new_log_probs - old_log_probs).exp()
advantages = rewards - values.detach()

surr1 = ratio * advantages
surr2 = torch.clamp(ratio, 1.0 - epsilon_clip, 1.0 + epsilon_clip) * advantages
actor_loss = -torch.min(surr1, surr2).mean()

critic_loss = 0.5 * (rewards - values).pow(2).mean()

loss = actor_loss + 0.5 * critic_loss

self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

+

自动驾驶和强化学习

+

设计和实现一个基于强化学习的自动驾驶决策规划系统是一个复杂而且需要深入思考的任务。下面是一个基本的设计和实现方案:

+
设计方案:
+
    +
  1. 环境建模: +首先,需要对自动驾驶的环境进行建模,包括车辆、道路、交通规则、障碍物、目标等。环境可以使用基于物理的仿真环境,例如CARLA,或者基于虚拟仿真的环境。
  2. +
  3. 状态空间定义: +定义自动驾驶车辆的状态空间,这可能包括车辆的位置、速度、方向、周围车辆的位置和速度等信息。
  4. +
  5. 动作空间定义: +定义自动驾驶车辆可以执行的动作空间,如加速、减速、转向等。
  6. +
  7. 奖励函数设计: +设计奖励函数来评估每个状态下执行的动作,以指导智能体的学习。奖励函数应该考虑安全性、效率、舒适性等因素。
  8. +
  9. 智能体模型选择: +选择合适的强化学习算法和模型架构来训练智能体,例如深度Q网络(DQN)、深度确定性策略梯度(DDPG)、双重深度确定性策略梯度(TD3)等。
  10. +
  11. 训练策略: +定义训练策略,包括学习率、优化器、探索策略等。
  12. +
  13. 评估与测试: +在仿真环境中对训练好的智能体进行评估和测试,检查其性能和鲁棒性。
  14. +
+
实现方案:
+
    +
  1. 环境建模: 使用CARLA等仿真平台进行环境建模。
  2. +
  3. 状态空间和动作空间定义: +编写代码从环境中获取车辆状态信息,并定义可以执行的动作。
  4. +
  5. 奖励函数设计: +根据项目需求和目标设计奖励函数。
  6. +
  7. 智能体模型选择: +根据任务选择合适的强化学习算法和模型架构,例如使用深度神经网络作为Q值函数的估计器。
  8. +
  9. 训练智能体: +使用收集的数据对智能体进行训练,调整网络参数,以最大化奖励函数。
  10. +
  11. 评估与测试: +在仿真环境中评估训练后的智能体的性能,分析其行为是否符合预期,并且能否有效应对各种情况。
  12. +
  13. 迭代优化: +根据评估结果,对模型和训练策略进行调整和优化,不断改进智能体的性能和稳定性。
  14. +
+

以上是一个基本的设计和实现方案,实际项目中可能还涉及到更多的细节和挑战,如安全性保障、仿真环境与真实环境的一致性等问题。因此,在实施过程中需要综合考虑各种因素,以确保系统的稳定性和安全性。

]]> 深度学习