Llama-3.2-1b-CPU

Sleeping

App Files Files Community

KingNish commited on Sep 26, 2024

Commit

55b49ed

verified ·

1 Parent(s): 1ad03b9

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -48

app.py CHANGED Viewed

@@ -8,18 +8,27 @@ from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
-llm = None
-llm_model = None
-# Download the new model
 hf_hub_download(
     repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
     filename="llama-3.2-1b-instruct-q4_k_m.gguf",
     local_dir="./models"
 )
-def get_messages_formatter_type(model_name):
-    return MessagesFormatterType.LLAMA_3
 def respond(
     message,
@@ -32,26 +41,10 @@ def respond(
     top_k,
     repeat_penalty,
 ):
-    global llm
-    global llm_model
-    chat_template = get_messages_formatter_type(model)
-    if llm is None or llm_model != model:
-        llm = Llama(
-            model_path=f"models/{model}",
-            n_gpu_layers=0,
-            n_batch=32000,
-            n_ctx=2048,
-        )
-        llm_model = model
-    provider = LlamaCppPythonProvider(llm)
     agent = LlamaCppAgent(
         provider,
-        system_prompt=f"{system_message}",
-        predefined_messages_formatter_type=chat_template,
         debug_output=True
     )
@@ -101,10 +94,9 @@ Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for c
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Dropdown([
-                "llama-3.2-1b-instruct-q4_k_m.gguf"
-            ],
-            value="llama-3.2-1b-instruct-q4_k_m.gguf",
             label="Model"
         ),
         gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -120,27 +112,9 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
 """, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=2.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p",
-        ),
-        gr.Slider(
-            minimum=0,
-            maximum=100,
-            value=40,
-            step=1,
-            label="Top-k",
-        ),
-        gr.Slider(
-            minimum=0.0,
-            maximum=2.0,
-            value=1.1,
-            step=0.1,
-            label="Repetition penalty",
-        ),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",

 import gradio as gr
 from huggingface_hub import hf_hub_download
+# Global variables
+MODEL_PATH = "models/llama-3.2-1b-instruct-q4_k_m.gguf"
+CHAT_TEMPLATE = MessagesFormatterType.LLAMA_3
+# Download the model (if not already present)
 hf_hub_download(
     repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
     filename="llama-3.2-1b-instruct-q4_k_m.gguf",
     local_dir="./models"
 )
+# Initialize the model globally
+llm = Llama(
+    model_path=MODEL_PATH,
+    n_gpu_layers=0,
+    n_batch=32000,
+    n_ctx=2048,
+)
+# Initialize the provider globally
+provider = LlamaCppPythonProvider(llm)
 def respond(
     message,
     top_k,
     repeat_penalty,
 ):
     agent = LlamaCppAgent(
         provider,
+        system_prompt=system_message,
+        predefined_messages_formatter_type=CHAT_TEMPLATE,
         debug_output=True
     )
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Dropdown(
+            [MODEL_PATH.split("/")[-1]],
+            value=MODEL_PATH.split("/")[-1],
             label="Model"
         ),
         gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
 """, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.95, step=0.05, label="Top-p"),
+        gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
+        gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",