视频演示讲解:

https://www.bilibili.com/video/BV1ENopBqEgv/?pop_share=1&spm_id_from=333.40164.0.0&vd_source=66998a92e3158aac4fbba5005183878c

结果显示:

代码:

# requirements:
#   pip install transformers==4.39.0 peft accelerate evaluate torch
import multiprocessing
from multiprocessing import freeze_support
import os
import json
from datasets import Dataset


def main():
    import warnings
    warnings.filterwarnings("ignore")

    import torch
    from transformers import (
        AutoTokenizer, AutoModelForCausalLM,
        TrainingArguments, Trainer,
        DataCollatorForLanguageModeling
    )
    from peft import LoraConfig, get_peft_model, PeftModel, TaskType

    # ===================== 模型配置 =====================
    BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
    LORA_SAVE_DIR = "./qwen2.5-power-supply-lora"
    MAX_LENGTH = 512  # 全局固定长度

    # 加载分词器
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # 加载模型(CPU 兼容)
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.float32,
        low_cpu_mem_usage=True,
        trust_remote_code=True,
        device_map="cpu"
    )

    # LoRA 配置
    lora_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules=["q_proj", "v_proj"]
    )
    model = get_peft_model(base_model, lora_config)
    model.print_trainable_parameters()

    # ===================== 读取数据集 =====================
    with open("train_qa_供电数据集.json", "r", encoding="utf-8") as f:
        data = json.load(f)
    train_ds = Dataset.from_list(data)

    # ===================== 终极修复:强制所有样本长度完全一致 =====================
    def preprocess_train(ex):
        prompt = f"问题:{ex['question']}\n回答:"
        answer = ex['expert_answer']
        full_text = prompt + answer

        # 1. 强制填充+截断到固定长度(所有样本长度一模一样)
        tokenized = tokenizer(
            full_text,
            truncation=True,
            padding="max_length",  # 核心:直接填充到最大长度
            max_length=MAX_LENGTH,
            add_special_tokens=False,
        )

        # 2. 计算问题长度,构建labels(严格和input_ids等长)
        prompt_len = len(tokenizer(prompt, add_special_tokens=False)["input_ids"])
        input_ids = tokenized["input_ids"]

        # 3. 生成标签,强制长度=MAX_LENGTH
        labels = [-100] * prompt_len + input_ids[prompt_len:]
        labels = labels[:MAX_LENGTH]  # 截断
        labels += [-100] * (MAX_LENGTH - len(labels))  # 填充

        tokenized["labels"] = labels
        return tokenized

    train_ds = train_ds.map(
        preprocess_train,
        remove_columns=train_ds.column_names,
        num_proc=1
    )

    # 数据对齐器
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False,
    )

    # ===================== 批量微调训练参数(CPU 稳跑) =====================
    training_args = TrainingArguments(
        # 1. 模型输出配置
        output_dir=LORA_SAVE_DIR,  # 训练好的模型、日志、检查点 保存的文件夹路径

        # 2. 批次大小(核心:一次训练多少条数据)
        per_device_train_batch_size=16,  # 每个设备(你的CPU)一次喂给模型**16条数据**训练
        # 👉 CPU建议改 2/4,16太大容易内存不足报错

        # 3. 梯度累积(模拟大批次训练,提升效果)
        gradient_accumulation_steps=16,  # 每累积16步才更新一次模型参数
        # 👉 小批次+累积 = 模拟大批次训练,效果更好

        # 4. 训练轮次
        num_train_epochs=5,  # 把整个数据集**完整训练5遍**

        # 5. 学习率(核心:模型学习的速度)
        learning_rate=2e-4,  # 学习率=0.0002,LoRA微调标准参数
        # 👉 太大模型不收敛,太小训练太慢

        # 6. 混合精度训练(CPU专用:必须关闭)
        fp16=False,  # 不使用FP16半精度(CPU不支持,必须关)
        bf16=False,  # 不使用BF16半精度(CPU不支持,必须关)

        # 7. 日志与保存
        logging_steps=5,  # 每训练5步,打印一次训练日志(损失值、步数)
        save_steps=200,  # 每训练200步,保存一次模型
        save_total_limit=1,  # 最多只保留1个模型文件(节省磁盘空间)

        # 8. Windows CPU 专属配置
        dataloader_num_workers=0,  # 数据加载线程数=0
        # 👉 Windows CPU必须设0,否则报错/卡死

        # 9. 评估与上报
        evaluation_strategy="no",  # 不做模型评估(只训练,不测试)
        report_to="none",  # 不向任何平台上报日志(本地训练专用)

        # 10. 优化器与稳定性
        optim="adamw_torch",  # 使用 AdamW 优化器(深度学习最常用)
        max_grad_norm=1.0,  # 梯度裁剪,防止模型训练时梯度爆炸(训练更稳定)
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

    # 开始训练
    trainer.train()

    # 保存模型
    model.save_pretrained(LORA_SAVE_DIR)
    tokenizer.save_pretrained(LORA_SAVE_DIR)

    # ===================== 推理测试 =====================
    base_model_infer = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL,
        torch_dtype=torch.float32,
        low_cpu_mem_usage=True,
        trust_remote_code=True,
        device_map="cpu"
    )
    lora_model = PeftModel.from_pretrained(base_model_infer, LORA_SAVE_DIR)
    lora_model.eval()

    def answer_question(question, max_new_tokens=512):
        prompt = f"问题:{question}\n回答:"
        inputs = tokenizer(prompt, return_tensors="pt")
        with torch.no_grad():
            out = lora_model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=False,
                temperature=0.1,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id,
            )
        text = tokenizer.decode(out[0], skip_special_tokens=True)
        return text[len(prompt):].strip()

    test_questions = [
        "运营期间巡检时发现交直流屏整流模块1台停机该如何处理?",
        "江西路降压所35kV I段开关柜停电会影响哪些供电分区?"
    ]

    print("\n=== 地铁供电运维问答 测试结果 ===")
    for q in test_questions:
        a = answer_question(q)
        print(f"Q: {q}")
        print(f"A: {a}")
        print("-" * 80)


if __name__ == "__main__":
    freeze_support()
    main()

Logo

欢迎加入DeepSeek 技术社区。在这里,你可以找到志同道合的朋友,共同探索AI技术的奥秘。

更多推荐