khurrameycon commited on
Commit
6114052
·
verified ·
1 Parent(s): 4c2b726

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -34,27 +34,18 @@ def llm_chat_response(text: str) -> str:
34
  raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
35
 
36
  logger.info("Initializing InferenceClient...")
37
- client = InferenceClient(token=HF_TOKEN) # Changed from api_key to token
38
-
39
- messages = [
40
- {
41
- "role": "user",
42
- "content": [
43
- {
44
- "type": "text",
45
- "text": text + str(' describe in one line only')
46
- }
47
- ]
48
- }
49
- ]
50
 
51
  logger.info("Sending request to model...")
52
- response_from_llama = client.chat.completions.create(
 
 
53
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
54
- messages=messages,
55
- max_tokens=500
56
  )
57
- return response_from_llama.choices[0].message['content']
 
58
  except Exception as e:
59
  logger.error(f"Error in llm_chat_response: {str(e)}")
60
  raise HTTPException(status_code=500, detail=str(e))
 
34
  raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
35
 
36
  logger.info("Initializing InferenceClient...")
37
+ client = InferenceClient(token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  logger.info("Sending request to model...")
40
+ # Using text-generation endpoint instead of chat
41
+ response = client.text_generation(
42
+ text + str(' describe in one line only'),
43
  model="meta-llama/Llama-3.2-11B-Vision-Instruct",
44
+ max_new_tokens=500,
45
+ temperature=0.7
46
  )
47
+ return response
48
+
49
  except Exception as e:
50
  logger.error(f"Error in llm_chat_response: {str(e)}")
51
  raise HTTPException(status_code=500, detail=str(e))