使用Unsloth微调大模型DeepSeek-R1

微调是指在大规模预训练的基础模型上,使用特定领域或任务数据集进行少量迭代训练,以调整模型参数,提升其在特定任务上的表现。这种方法可以充分利用预训练模型的广泛知识,同时针对特定应用进行优化,达到更精准高效的效果。

AI小白龙*

807人浏览 · 2025-03-20 10:13:20

AI小白龙* · 2025-03-20 10:13:20 发布

微调是指在大规模预训练的基础模型上,使用特定领域或任务数据集进行少量迭代训练,以调整模型参数,提升其在特定任务上的表现。这种方法可以充分利用预训练模型的广泛知识,同时针对特定应用进行优化,达到更精准高效的效果。

Unsloth

Unsloth是一个开源的大模型训练加速项目，可以显著提升大模型的训练速度（提高2-5 倍），减少显存占用（最大减少80%）

环境设置

创建环境

conda create --name unsloth_env python=3.10
conda activate unsloth_env

###########################################
conda create --name unsloth_env \
    python=3.11 \
    pytorch-cuda=12.1 \
    pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \
    -y
conda activate unsloth_env

安装相关依赖

!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes
!pip install modelscope 

############################
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
pip install --no-deps trl peft accelerate bitsandbytes

验证安装

python -m torch.utils.collect_env
python -m xformers.info
python -m bitsandbytes

unsloth 模型推理

from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = None
load_in_4bit = False

model, tokenizer = FastLanguageModel.from_pretrained(
  model_name = "./DeepSeek-R1-Distill-Llama-8B",
  max_seq_length = max_seq_length,
  dtype = dtype,
  load_in_4bit = load_in_4bit,
)

FastLanguageModel.for_inference(model)

question = "天空为什么是蓝色？"
# inputs = tokenizer([question], return_tensors="pt").to("cuda")

# 增加问答模板
prompt_style_chat = """请写出一个恰当的回答来完成当前对话任务

### Instruction:
你是一名助人为乐的助手。

### Question:
{}

### Response:
<think>{}"""
inputs = tokenizer([prompt_style_chat.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
  input_ids=inputs.input_ids,
  max_new_tokens=1200,
  use_cache=True,
)
response = tokenizer.batch_decode(outputs)

print(response[0].split("### Response:")[1])

微调前测试

prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}"""

question_1 = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

question_2 = "Given a patient who experiences sudden-onset chest pain radiating to the neck and left arm, with a past medical history of hypercholesterolemia and coronary artery disease, elevated troponin I levels, and tachycardia, what is the most likely coronary artery involved based on this presentation?"

inputs1 = tokenizer([prompt_style.format(question_1, "")], return_tensors="pt").to("cuda")

outputs1 = model.generate(
    input_ids=inputs1.input_ids,
    max_new_tokens=1200,
    use_cache=True,
)
response1 = tokenizer.batch_decode(outputs1)

print(response1[0].split("### Response:")[1])

最小可行性微调

下载数据集

!pip install datasets

import os
from datasets import load_dataset

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True)

# dataset[0]

参数设置

# 提示词模板
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

# 设置文本生成结束的标记 tokenizer.eos_token
EOS_TOKEN = tokenizer.eos_token

处理数据集

# 定义函数，用于对medical-o1-reasoning-SFT数据集进行修改，Complex_CoT列和Response列进行拼接，并加上文本结束标记
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

# 数据集结构化处理
dataset = dataset.map(formatting_prompts_func, batched = True,)

# dataset["text"][0]

开启微调

# 模型设置为微调模式
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)

创建有监督微调对象

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=2,
args=TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
# Use num_train_epochs = 1, warmup_ratio for full training runs!
warmup_steps=5,
max_steps=60,
learning_rate=2e-4,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=10,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed=3407,
output_dir="outputs",
),
)

设置wandb（可选）

pip install wandb

import wandb

wandb.login(key="YOUR_WANDB_API_KEY")

开始微调

trainer_stats = trainer.train()

测试验证

unsloth在微调结束后，会自动更新模型权重（在缓存中），因此无需手动合并模型权重即可直接调用微调后的模型

FastLanguageModel.for_inference(model)

inputs = tokenizer([prompt_style.format(question_1, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)

print(response[0].split("### Response:")[1])

模型合并

new_model_local = "DeepSeek-R1-Medical-COT-Tiny"
model.save_pretrained(new_model_local) 
tokenizer.save_pretrained(new_model_local)

model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)

完整高效微调实验

train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train",trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
# dataset["text"][0] 

model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# 设置epoch为3，遍历3次数据集
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        num_train_epochs = 3,
        warmup_steps=5,
        # max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

trainer_stats = trainer.train()

带入两个问题进行测试

question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"


FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])