KingNish commited on
Commit
55b49ed
·
verified ·
1 Parent(s): 1ad03b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -48
app.py CHANGED
@@ -8,18 +8,27 @@ from llama_cpp_agent.chat_history.messages import Roles
8
  import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
 
11
- llm = None
12
- llm_model = None
 
13
 
14
- # Download the new model
15
  hf_hub_download(
16
  repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
17
  filename="llama-3.2-1b-instruct-q4_k_m.gguf",
18
  local_dir="./models"
19
  )
20
 
21
- def get_messages_formatter_type(model_name):
22
- return MessagesFormatterType.LLAMA_3
 
 
 
 
 
 
 
 
23
 
24
  def respond(
25
  message,
@@ -32,26 +41,10 @@ def respond(
32
  top_k,
33
  repeat_penalty,
34
  ):
35
- global llm
36
- global llm_model
37
-
38
- chat_template = get_messages_formatter_type(model)
39
-
40
- if llm is None or llm_model != model:
41
- llm = Llama(
42
- model_path=f"models/{model}",
43
- n_gpu_layers=0,
44
- n_batch=32000,
45
- n_ctx=2048,
46
- )
47
- llm_model = model
48
-
49
- provider = LlamaCppPythonProvider(llm)
50
-
51
  agent = LlamaCppAgent(
52
  provider,
53
- system_prompt=f"{system_message}",
54
- predefined_messages_formatter_type=chat_template,
55
  debug_output=True
56
  )
57
 
@@ -101,10 +94,9 @@ Meta Llama 3.2 (1B) is a multilingual large language model (LLM) optimized for c
101
  demo = gr.ChatInterface(
102
  respond,
103
  additional_inputs=[
104
- gr.Dropdown([
105
- "llama-3.2-1b-instruct-q4_k_m.gguf"
106
- ],
107
- value="llama-3.2-1b-instruct-q4_k_m.gguf",
108
  label="Model"
109
  ),
110
  gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
@@ -120,27 +112,9 @@ Always strive for accuracy, clarity, and helpfulness in your responses. If you'r
120
  """, label="System message"),
121
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
122
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
123
- gr.Slider(
124
- minimum=0.1,
125
- maximum=2.0,
126
- value=0.95,
127
- step=0.05,
128
- label="Top-p",
129
- ),
130
- gr.Slider(
131
- minimum=0,
132
- maximum=100,
133
- value=40,
134
- step=1,
135
- label="Top-k",
136
- ),
137
- gr.Slider(
138
- minimum=0.0,
139
- maximum=2.0,
140
- value=1.1,
141
- step=0.1,
142
- label="Repetition penalty",
143
- ),
144
  ],
145
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
146
  body_background_fill_dark="#16141c",
 
8
  import gradio as gr
9
  from huggingface_hub import hf_hub_download
10
 
11
+ # Global variables
12
+ MODEL_PATH = "models/llama-3.2-1b-instruct-q4_k_m.gguf"
13
+ CHAT_TEMPLATE = MessagesFormatterType.LLAMA_3
14
 
15
+ # Download the model (if not already present)
16
  hf_hub_download(
17
  repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
18
  filename="llama-3.2-1b-instruct-q4_k_m.gguf",
19
  local_dir="./models"
20
  )
21
 
22
+ # Initialize the model globally
23
+ llm = Llama(
24
+ model_path=MODEL_PATH,
25
+ n_gpu_layers=0,
26
+ n_batch=32000,
27
+ n_ctx=2048,
28
+ )
29
+
30
+ # Initialize the provider globally
31
+ provider = LlamaCppPythonProvider(llm)
32
 
33
  def respond(
34
  message,
 
41
  top_k,
42
  repeat_penalty,
43
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  agent = LlamaCppAgent(
45
  provider,
46
+ system_prompt=system_message,
47
+ predefined_messages_formatter_type=CHAT_TEMPLATE,
48
  debug_output=True
49
  )
50
 
 
94
  demo = gr.ChatInterface(
95
  respond,
96
  additional_inputs=[
97
+ gr.Dropdown(
98
+ [MODEL_PATH.split("/")[-1]],
99
+ value=MODEL_PATH.split("/")[-1],
 
100
  label="Model"
101
  ),
102
  gr.TextArea(value="""You are Meta Llama 3.2 (1B), an advanced AI assistant created by Meta. Your capabilities include:
 
112
  """, label="System message"),
113
  gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
114
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
115
+ gr.Slider(minimum=0.1, maximum=2.0, value=0.95, step=0.05, label="Top-p"),
116
+ gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
117
+ gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ],
119
  theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
120
  body_background_fill_dark="#16141c",