5.1 Unsloth LoRA Configuration
Source: Unsloth Documentation
Model Loading with 4-bit Quantization
from unsloth import FastLanguageModel
# Load model with 4-bit quantization
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="unsloth/llama-3-8b-bnb-4bit",
max_seq_length=2048,
load_in_4bit=True, # QLoRA
dtype=None,
)
Apply LoRA Adapters
# Apply LoRA adapters
model = FastLanguageModel.get_peft_model(
model,
r=16, # Rank: 8, 16, 32, 64, 128
target_modules=[
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"
],
lora_alpha=16, # Equal to r, or 2*r for aggressive learning
lora_dropout=0, # 0 is optimized
bias="none", # "none" is optimized
use_gradient_checkpointing="unsloth", # 30% memory reduction
random_state=3407,
use_rslora=False,
loftq_config=None,
)
Training Configuration
from transformers import TrainingArguments
from trl import SFTTrainer
training_args = TrainingArguments(
output_dir="./outputs",
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
warmup_steps=5,
max_steps=60,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
optim="adamw_8bit",
weight_decay=0.01,
lr_scheduler_type="linear",
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=2048,
args=training_args,
)
Key Insights
- Start with QLoRA (4-bit) for efficiency
- Rank 16 is good default; increase for complex tasks
- Setting
lora_dropout=0enables Unsloth optimizations - Target all major linear layers for best results