databricks/databricks-dolly-15k
Viewer • Updated • 15k • 32.6k • 982
How to use Kurapika993/qwen2.5-7b-qlora-dolly15k with PEFT:
from peft import PeftModel
from transformers import AutoModelForCausalLM
base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
model = PeftModel.from_pretrained(base_model, "Kurapika993/qwen2.5-7b-qlora-dolly15k")This repository contains a QLoRA adapter fine-tuned from Qwen/Qwen2.5-7B-Instruct on databricks/databricks-dolly-15k.
This is a supervised fine-tuning experiment for learning and demonstrating the full 7B QLoRA workflow:
SFTTrainerq_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_projThis adapter is intended for instruction-following experiments and PEFT/QLoRA learning.
Example use cases:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import torch
base_model = "Qwen/Qwen2.5-7B-Instruct"
adapter = "Kurapika993/qwen2.5-7b-qlora-dolly15k"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
tokenizer = AutoTokenizer.from_pretrained(adapter)
model = AutoModelForCausalLM.from_pretrained(
base_model,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
model = PeftModel.from_pretrained(model, adapter)
model.eval()
def generate_response(model, tokenizer, user_prompt, max_new_tokens=250):
messages = [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": user_prompt
}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(
text,
return_tensors="pt"
).to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.05,
pad_token_id=tokenizer.eos_token_id,
)
generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
return response.strip()
prompt = "Explain instruction tuning to a beginner using a simple analogy."
response = generate_response(model, tokenizer, prompt)
print(response)