lamm-mit
/

BioinspiredLlama-3-1-8B-128k

@@ -23,52 +23,55 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
 #### Function to interact with the model
 ```
-def generate_response (text_input="Biology offers amazing",system_prompt='You are a materials scientist.',
-                      num_return_sequences=1,
-                      temperature=1., #the higher the temperature, the more creative the model becomes
-                      max_new_tokens=127,device='cuda',
-                      num_beams=1,eos_token_id= [
                                             128001,
                                             128008,
                                             128009
-                                          ],
-                      top_k = 50,
-                      top_p =0.9,repetition_penalty=1.1,
-                      messages=[],
                       ):
-    if messages==[]:
-        messages=[{"role": "system", "content":system_prompt},
-                          {"role": "user", "content":text_input}]
-    else:
-        messages.append ({"role": "user", "content":text_input})
     text_input = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
     )
-    inputs = tokenizer([text_input],  add_special_tokens  =True,  return_tensors ='pt'  ).to(device)
     with torch.no_grad():
           outputs = model.generate(**inputs,
                                    max_new_tokens=max_new_tokens,
                                    temperature=temperature,
                                    num_beams=num_beams,
-                                   top_k = top_k,
-                                   top_p = top_p,
-                                   num_return_sequences = num_return_sequences,
-                                   eos_token_id=eos_token_id,
-                                   do_sample =True,repetition_penalty=repetition_penalty,
                                   )
     outputs=outputs[:, inputs["input_ids"].shape[1]:]
-    return tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True), messages
 ```
 Usage:
 ```
-res,_= generate_response (text_input = "What is collagen?", system_prompt = 'You are a materials scientist. ',
                       num_return_sequences=1,
                       temperature=1., #the higher the temperature, the more creative the model becomes
                       max_new_tokens=127,

 #### Function to interact with the model
 ```
+def generate_response (text_input="What is spider silk?",
+                       system_prompt='',
+                       num_return_sequences=1,
+                       temperature=1., #the higher the temperature, the more creative the model becomes
+                       max_new_tokens=127,device='cuda',
+                       add_special_tokens = False, #since tokenizer.apply_chat_template adds <|begin_of_text|> template already, set to False
+                       num_beams=1,eos_token_id= [
                                             128001,
                                             128008,
                                             128009
+                                          ], verbatim=False,
+                       top_k = 50,
+                       top_p = 0.9,
+                       repetition_penalty=1.1,
+                       messages=[],
                       ):
+    if messages==[]: #start new messages dictionary
+        if system_prompt != '': #include system prompt if provided
+            messages.extend ([  {"role": "system", "content": system_prompt},  ])
+        messages.extend ( [   {"role": "user", "content": text_input}, ])
+    else: #if messages provided, will extend (make sure to add previous response as assistant message)
+        messages.append ({"role": "user", "content": text_input})
     text_input = tokenizer.apply_chat_template(
             messages,
             tokenize=False,
             add_generation_prompt=True
     )
+    inputs = tokenizer([text_input],  add_special_tokens = add_special_tokens,  return_tensors ='pt' ).to(device)
+    if verbatim:
+        print (inputs)
     with torch.no_grad():
           outputs = model.generate(**inputs,
                                    max_new_tokens=max_new_tokens,
                                    temperature=temperature,
                                    num_beams=num_beams,
+                                   top_k = top_k,eos_token_id=eos_token_id,
+                                   top_p =top_p,
+                                   num_return_sequences = num_return_sequences,
+                                   do_sample =True, repetition_penalty=repetition_penalty,
                                   )
     outputs=outputs[:, inputs["input_ids"].shape[1]:]
+    return tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True), messages
 ```
 Usage:
 ```
+res,_= generate_response (text_input = "What is collagen?", system_prompt = 'You are a materials scientist.',
                       num_return_sequences=1,
                       temperature=1., #the higher the temperature, the more creative the model becomes
                       max_new_tokens=127,