import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
= "qnnpack"
torch.backends.quantized.engine # Load the pre-trained model and tokenizer
= "meta-llama/Meta-Llama-3-8B-Instruct"
model_name
= AutoModelForCausalLM.from_pretrained(model_name)
model = AutoTokenizer.from_pretrained(model_name)
tokenizer
# Apply dynamic quantization to the model
= torch.quantization.quantize_dynamic(
quantized_model =torch.qint8
model, {torch.nn.Linear}, dtype
)
# Create a pipeline with the quantized model
= pipeline(
quantized_pipeline "text-generation",
=quantized_model,
model=tokenizer,
tokenizer= -1
device
)
= pipeline(
normal_pipeline "text-generation",
=model,
model=tokenizer,
tokenizer= -1
device )
def get_model_size(model):
= 0
param_size for param in model.parameters():
+= param.numel() * param.element_size()
param_size
= 0
buffer_size for buffer in model.buffers():
+= buffer.numel() * buffer.element_size()
buffer_size
= (param_size + buffer_size) / 1024**2
size_all_mb return size_all_mb
print('model', get_model_size(model))
print('quantized', get_model_size(quantized_model))
model 30633.023681640625
quantized 2005.023681640625
def test_pipeline(pipe, prompt="Tell a dad joke that involves socks with sandals"):
# Test the quantized model
= pipe(prompt, max_length=50)
output print(output)
test_pipeline(quantized_pipeline) test_pipeline(normal_pipeline)
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
[{'generated_text': "Tell a dad joke that involves socks with sandals\nHere's one:\n\nWhy did the sock go to the party with the sandal?\n\nBecause it was a sole-ful occasion! (get it? sole, like the bottom of the foot,"}]
[{'generated_text': "Tell a dad joke that involves socks with sandals\nHere's one: Why did the sock go with the sandal? Because it was a sole-ful match! (get it? sole-ful, like soulful, but also a reference"}]
© Copyright 2024 Justin Donaldson. Except where otherwise noted, all rights reserved. The views and opinions on this website are my own and do not represent my current or former employers.