婴儿版训练GPT

昆曲之源_娄江河畔

269人浏览 · 2026-04-21 09:25:48

昆曲之源_娄江河畔 · 2026-04-21 09:25:48 发布

import numpy as np

# ==============================
# 1. 固定词典（你建立的字典）
# ==============================
vocab = {"我":0, "喜":1, "欢":2, "中":3, "国":4, "美":5, "食":6, "[END]":7}
idx2word = {v:k for k,v in vocab.items()}
vocab_size = len(vocab)
d_model = 8 # 向量维度
lr = 0.1 # 学习率（训练步长）

# ==============================
# 2. 初始化所有矩阵！【随机，但会被训练】
# ==============================
np.random.seed(42) # 固定初始随机值，方便看效果
embedding = np.random.randn(vocab_size, d_model) # 嵌入矩阵
Wq = np.random.randn(d_model, d_model) # 注意力Q
Wk = np.random.randn(d_model, d_model) # 注意力K
Wv = np.random.randn(d_model, d_model) # 注意力V
output_layer = np.random.randn(d_model, vocab_size) # 输出层

# ==============================
# 3. 定义 softmax（稳定版）
# ==============================
def softmax(x):
exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# ==============================
# 4. 【核心】前向传播（模型预测）
# ==============================
def forward(input_ids):
# 字 → 向量
x = embedding[input_ids]

# 自注意力
Q = x @ Wq
K = x @ Wk
V = x @ Wv
scores = Q @ K.T / np.sqrt(d_model)
attn_weights = softmax(scores)
attn_out = attn_weights @ V

# 取最后一个token → 预测下一个字
last_vec = attn_out[-1]
logits = last_vec @ output_layer
probs = softmax(logits)

return x, Q, K, V, scores, attn_weights, attn_out, last_vec, logits, probs

# ==============================
# 5. 【核心】训练！反向传播（调参）
# 让模型从错误中学习
# ==============================
def train(input_ids, target_id):
global embedding, Wq, Wk, Wv, output_layer

# 1. 前向预测
x, Q, K, V, scores, attn_weights, attn_out, last_vec, logits, probs = forward(input_ids)

# 2. 计算误差（预测值 - 真实值）
loss = -np.log(probs[target_id] + 1e-10) # 损失越小越准

# 3. 反向更新所有矩阵（学习过程）
grad_logits = probs.copy()
grad_logits[target_id] -= 1

# 更新输出层
grad_output_layer = np.outer(last_vec, grad_logits)
output_layer -= lr * grad_output_layer

# 更新注意力 & 嵌入层（简化版，让模型能学到）
grad_last = grad_logits @ output_layer.T
embedding[input_ids[-1]] -= lr * grad_last

return loss, probs

# ==============================
# 6. 开始训练！
# 输入：我喜欢中国 → 目标：输出美（ID=5）
# ==============================
input_text = "我喜欢中国"
input_ids = [vocab[c] for c in input_text]
target_id = 5 # 正确答案：美

print("===== 开始训练（越训练，越准）=====\n")
for step in range(200): # 训练200次
loss, probs = train(input_ids, target_id)
pred_id = np.argmax(probs)
pred_word = idx2word[pred_id]
true_word = idx2word[target_id]

# 每10步打印一次
if step % 10 == 0:
print(f"训练步数 {step:3d} | 损失:{loss:.4f} | 预测:{pred_word} | 正确:{true_word}")

# ==============================
# 训练完成，最终测试
# ==============================
print("\n===== 训练完成！最终预测 =====")
_, _, _, _, _, _, _, _, _, probs = forward(input_ids)
pred_id = np.argmax(probs)
pred_word = idx2word[pred_id]
print(f"输入：{input_text}")
print(f"模型预测下一个字：【 {pred_word} 】")
print("? 训练成功！模型学会了！")

DeepSeek技术社区

欢迎加入DeepSeek 技术社区。在这里，你可以找到志同道合的朋友，共同探索AI技术的奥秘。

更多推荐

AI时代下的前端求生之路

DeepSeek技术社区

2026 年有哪些真正适合学生写开题的 AI 辅助写作工具，实测无套路分享

DeepSeek技术社区

DeepSeek 集成大全：100+ 工具和框架，覆盖主流开发场景

桌面应用部分的选择最多。IDE 集成方面，VS Code 有 Continue、Cline 这样的 AI 编程助手，JetBrains 系列也有对应的插件，比如 AutoDev 和 Onegai Copilot。整个集合覆盖了从轻量级客户端到企业级平台的完整生态，既有成熟的开源项目如 LibreChat 和 LobeChat，也有专门解决某个痛点的小工具。主要分类包括桌面应用、浏览器插件、IDE