Skip to content

Commit

Permalink
fix llama 3 tokenizer issues
Browse files Browse the repository at this point in the history
  • Loading branch information
eisenzopf committed May 14, 2024
1 parent 5345d7c commit 1b5e1e7
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
12 changes: 10 additions & 2 deletions llm_eval/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,21 @@ def __init__(self, config):

def generate_output(self, text):
"""Generates an output for a given input"""
# create the prompt with the template
messages = [
{"role": "system", "content": self.system_prompt },
{"role": "user", "content": text },
]
prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=self.max_length).to(self.device)
outputs = self.model.generate(**inputs, max_new_tokens=self.max_new_tokens, do_sample=True, temperature=self.temperature, top_p=self.top_p)

if self.current_model == 'meta-llama/Meta-Llama-3-8B-Instruct':
terminators = [
self.tokenizer.eos_token_id,
self.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
outputs = self.modelgenerate(inputs, eos_token_id=terminators, max_new_tokens=self.max_new_tokens, do_sample=True, temperature=self.temperature, top_p=self.top_p)
else:
outputs = self.model.generate(inputs, max_new_tokens=self.max_new_tokens, do_sample=True, temperature=self.temperature, top_p=self.top_p)
responses = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return prompt, responses

Expand Down Expand Up @@ -92,6 +99,7 @@ def prepare_output(self):
def process_dataset(self):
df = self.prepare_output()
for model_name, group in df.groupby('model'):
self.current_model = model_name
print(f"Loading {model_name}...")
self.tokenizer, self.model = self.load_model_and_tokenizer(model_name)
for index, row in group.iterrows(): # Process the correct group rather than the entire df
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "llm-eval"
version = "0.3.17"
version = "0.3.18"
authors = [
{name = "Jonathan Eisenzopf", email = "[email protected]"},
]
Expand Down

0 comments on commit 1b5e1e7

Please sign in to comment.