Text Generation
Transformers
Safetensors
qwen2
conversational
text-generation-inference
Inference Endpoints
chaoscodes commited on
Commit
ad6607c
·
verified ·
1 Parent(s): 69a5035

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +57 -3
README.md CHANGED
@@ -1,3 +1,57 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ ```python
6
+
7
+ import os
8
+ from tqdm import tqdm
9
+ import torch
10
+ from transformers import AutoTokenizer
11
+ from vllm import LLM, SamplingParams
12
+
13
+ def generate(question_list,model_path):
14
+ llm = LLM(
15
+ model=model_path,
16
+ trust_remote_code=True,
17
+ tensor_parallel_size=1,
18
+ )
19
+ sampling_params = SamplingParams(
20
+ max_tokens=4096,
21
+ temperature=0.0,
22
+ n=1
23
+ )
24
+ outputs = llm.generate(question_list, sampling_params, use_tqdm=True)
25
+ completions = [[output.text for output in output_item.outputs] for output_item in outputs]
26
+ return completions
27
+
28
+ def prepare_prompt(question, tokenizer):
29
+ content = f"<|im_start|>user\nSolve the following math problem efficiently and clearly.\nPlease reason step by step, and put your final answer within \\boxed{{}}.\nProblem: {question}<|im_end|>\n<|im_start|>assistant\n"
30
+
31
+ msg = [
32
+ {"role": "user", "content": content}
33
+ ]
34
+ prompt = tokenizer.apply_chat_template(
35
+ msg,
36
+ tokenize=False,
37
+ add_generation_prompt=True
38
+ )
39
+ return prompt
40
+
41
+ def run():
42
+ model_path = "Satori-reasoning/Satori-round2"
43
+ all_problems = [
44
+ "which number is larger? 9.11 or 9.9?",
45
+ ]
46
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
47
+ completions = generate(
48
+ [prepare_prompt(problem_data, tokenizer) for problem_data in all_problems],
49
+ model_path
50
+ )
51
+
52
+ for completion in completions:
53
+ print(completion[0])
54
+ if __name__ == "__main__":
55
+ run()
56
+
57
+ ```