使用Unsloth微调大模型DeepSeek-R1
微调是指在大规模预训练的基础模型上,使用特定领域或任务数据集进行少量迭代训练,以调整模型参数,提升其在特定任务上的表现。这种方法可以充分利用预训练模型的广泛知识,同时针对特定应用进行优化,达到更精准高效的效果。
微调是指在大规模预训练的基础模型上,使用特定领域或任务数据集进行少量迭代训练,以调整模型参数,提升其在特定任务上的表现。这种方法可以充分利用预训练模型的广泛知识,同时针对特定应用进行优化,达到更精准高效的效果。
Unsloth
Unsloth是一个开源的大模型训练加速项目,可以显著提升大模型的训练速度(提高2-5 倍),减少显存占用(最大减少80%)
环境设置
创建环境
conda create --name unsloth_env python=3.10 conda activate unsloth_env ########################################### conda create --name unsloth_env \ python=3.11 \ pytorch-cuda=12.1 \ pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers \ -y conda activate unsloth_env
安装相关依赖
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" !pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes !pip install modelscope ############################ pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" pip install --no-deps trl peft accelerate bitsandbytes
验证安装
python -m torch.utils.collect_env python -m xformers.info python -m bitsandbytes
unsloth 模型推理
from unsloth import FastLanguageModel max_seq_length = 2048 dtype = None load_in_4bit = False model, tokenizer = FastLanguageModel.from_pretrained( model_name = "./DeepSeek-R1-Distill-Llama-8B", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, ) FastLanguageModel.for_inference(model) question = "天空为什么是蓝色?" # inputs = tokenizer([question], return_tensors="pt").to("cuda") # 增加问答模板 prompt_style_chat = """请写出一个恰当的回答来完成当前对话任务 ### Instruction: 你是一名助人为乐的助手。 ### Question: {} ### Response: <think>{}""" inputs = tokenizer([prompt_style_chat.format(question, "")], return_tensors="pt").to("cuda") outputs = model.generate( input_ids=inputs.input_ids, max_new_tokens=1200, use_cache=True, ) response = tokenizer.batch_decode(outputs) print(response[0].split("### Response:")[1])
微调前测试
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. ### Instruction: You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question. ### Question: {} ### Response: <think>{}""" question_1 = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?" question_2 = "Given a patient who experiences sudden-onset chest pain radiating to the neck and left arm, with a past medical history of hypercholesterolemia and coronary artery disease, elevated troponin I levels, and tachycardia, what is the most likely coronary artery involved based on this presentation?" inputs1 = tokenizer([prompt_style.format(question_1, "")], return_tensors="pt").to("cuda") outputs1 = model.generate( input_ids=inputs1.input_ids, max_new_tokens=1200, use_cache=True, ) response1 = tokenizer.batch_decode(outputs1) print(response1[0].split("### Response:")[1])
最小可行性微调
下载数据集
!pip install datasets
import os from datasets import load_dataset dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train[0:500]",trust_remote_code=True) # dataset[0]
参数设置
# 提示词模板 train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. ### Instruction: You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question. ### Question: {} ### Response: <think> {} </think> {}""" # 设置文本生成结束的标记 tokenizer.eos_token EOS_TOKEN = tokenizer.eos_token
处理数据集
# 定义函数,用于对medical-o1-reasoning-SFT数据集进行修改,Complex_CoT列和Response列进行拼接,并加上文本结束标记 def formatting_prompts_func(examples): inputs = examples["Question"] cots = examples["Complex_CoT"] outputs = examples["Response"] texts = [] for input, cot, output in zip(inputs, cots, outputs): text = train_prompt_style.format(input, cot, output) + EOS_TOKEN texts.append(text) return { "text": texts, } # 数据集结构化处理 dataset = dataset.map(formatting_prompts_func, batched = True,) # dataset["text"][0]
开启微调
# 模型设置为微调模式 model = FastLanguageModel.get_peft_model( model, r=16, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context random_state=3407, use_rslora=False, loftq_config=None, )
创建有监督微调对象
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supportedtrainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=max_seq_length,
dataset_num_proc=2,
args=TrainingArguments(
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
# Use num_train_epochs = 1, warmup_ratio for full training runs!
warmup_steps=5,
max_steps=60,
learning_rate=2e-4,
fp16=not is_bfloat16_supported(),
bf16=is_bfloat16_supported(),
logging_steps=10,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
seed=3407,
output_dir="outputs",
),
)
设置wandb(可选)
pip install wandb
import wandb wandb.login(key="YOUR_WANDB_API_KEY")
开始微调
trainer_stats = trainer.train()
测试验证
unsloth在微调结束后,会自动更新模型权重(在缓存中),因此无需手动合并模型权重即可直接调用微调后的模型
FastLanguageModel.for_inference(model) inputs = tokenizer([prompt_style.format(question_1, "")], return_tensors="pt").to("cuda") outputs = model.generate( input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=1200, use_cache=True, ) response = tokenizer.batch_decode(outputs) print(response[0].split("### Response:")[1])
模型合并
new_model_local = "DeepSeek-R1-Medical-COT-Tiny" model.save_pretrained(new_model_local) tokenizer.save_pretrained(new_model_local) model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)
完整高效微调实验
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response. ### Instruction: You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. Please answer the following medical question. ### Question: {} ### Response: <think> {} </think> {}""" EOS_TOKEN = tokenizer.eos_token def formatting_prompts_func(examples): inputs = examples["Question"] cots = examples["Complex_CoT"] outputs = examples["Response"] texts = [] for input, cot, output in zip(inputs, cots, outputs): text = train_prompt_style.format(input, cot, output) + EOS_TOKEN texts.append(text) return { "text": texts, } dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT","en", split = "train",trust_remote_code=True) dataset = dataset.map(formatting_prompts_func, batched = True,) # dataset["text"][0] model = FastLanguageModel.get_peft_model( model, r=16, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", # True or "unsloth" for very long context random_state=3407, use_rslora=False, loftq_config=None, ) from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported # 设置epoch为3,遍历3次数据集 trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, dataset_text_field="text", max_seq_length=max_seq_length, dataset_num_proc=2, args=TrainingArguments( per_device_train_batch_size=2, gradient_accumulation_steps=4, num_train_epochs = 3, warmup_steps=5, # max_steps=60, learning_rate=2e-4, fp16=not is_bfloat16_supported(), bf16=is_bfloat16_supported(), logging_steps=10, optim="adamw_8bit", weight_decay=0.01, lr_scheduler_type="linear", seed=3407, output_dir="outputs", ), ) trainer_stats = trainer.train()
带入两个问题进行测试
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"
FastLanguageModel.for_inference(model) # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")
outputs = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=1200,
use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])
更多推荐
所有评论(0)