Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -34,27 +34,18 @@ def llm_chat_response(text: str) -> str:
|
|
34 |
raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
|
35 |
|
36 |
logger.info("Initializing InferenceClient...")
|
37 |
-
client = InferenceClient(token=HF_TOKEN)
|
38 |
-
|
39 |
-
messages = [
|
40 |
-
{
|
41 |
-
"role": "user",
|
42 |
-
"content": [
|
43 |
-
{
|
44 |
-
"type": "text",
|
45 |
-
"text": text + str(' describe in one line only')
|
46 |
-
}
|
47 |
-
]
|
48 |
-
}
|
49 |
-
]
|
50 |
|
51 |
logger.info("Sending request to model...")
|
52 |
-
|
|
|
|
|
53 |
model="meta-llama/Llama-3.2-11B-Vision-Instruct",
|
54 |
-
|
55 |
-
|
56 |
)
|
57 |
-
return
|
|
|
58 |
except Exception as e:
|
59 |
logger.error(f"Error in llm_chat_response: {str(e)}")
|
60 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
34 |
raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
|
35 |
|
36 |
logger.info("Initializing InferenceClient...")
|
37 |
+
client = InferenceClient(token=HF_TOKEN)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
logger.info("Sending request to model...")
|
40 |
+
# Using text-generation endpoint instead of chat
|
41 |
+
response = client.text_generation(
|
42 |
+
text + str(' describe in one line only'),
|
43 |
model="meta-llama/Llama-3.2-11B-Vision-Instruct",
|
44 |
+
max_new_tokens=500,
|
45 |
+
temperature=0.7
|
46 |
)
|
47 |
+
return response
|
48 |
+
|
49 |
except Exception as e:
|
50 |
logger.error(f"Error in llm_chat_response: {str(e)}")
|
51 |
raise HTTPException(status_code=500, detail=str(e))
|