khurrameycon commited on
Commit
ad93aea
·
verified ·
1 Parent(s): 5332468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -4,7 +4,7 @@ import logging
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.responses import JSONResponse
6
  from pydantic import BaseModel
7
- from huggingface_hub import InferenceClient, HfApi
8
  from typing import Optional
9
 
10
  # Set up logging
@@ -25,34 +25,39 @@ class ChatResponse(BaseModel):
25
  response: str
26
  status: str
27
 
28
- # Initialize HF client at startup
29
- try:
30
- HF_TOKEN = os.getenv("HF_TOKEN")
31
- if not HF_TOKEN:
32
- logger.warning("HF_TOKEN not found in environment variables")
33
- api = HfApi(token=HF_TOKEN)
34
- client = InferenceClient(
35
- model="meta-llama/Llama-3.2-11B-Vision-Instruct", # You might need to change this
36
- token=HF_TOKEN
37
- )
38
- logger.info("Successfully initialized HuggingFace client")
39
- except Exception as e:
40
- logger.error(f"Error initializing HuggingFace client: {str(e)}")
41
-
42
  def llm_chat_response(text: str) -> str:
43
  try:
44
- logger.info(f"Processing text: {text}")
 
 
 
 
45
 
46
- # Direct text generation
47
- response = client.text_generation(
48
- text + " describe in one line only",
49
- max_new_tokens=100,
50
- temperature=0.7,
51
- repetition_penalty=1.2
52
  )
53
 
54
- logger.info(f"Generated response: {response}")
55
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  except Exception as e:
58
  logger.error(f"Error in llm_chat_response: {str(e)}")
 
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.responses import JSONResponse
6
  from pydantic import BaseModel
7
+ from huggingface_hub import InferenceClient
8
  from typing import Optional
9
 
10
  # Set up logging
 
25
  response: str
26
  status: str
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def llm_chat_response(text: str) -> str:
29
  try:
30
+ HF_TOKEN = os.getenv("HF_TOKEN")
31
+ logger.info("Checking HF_TOKEN...")
32
+ if not HF_TOKEN:
33
+ logger.error("HF_TOKEN not found in environment variables")
34
+ raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
35
 
36
+ logger.info("Initializing InferenceClient...")
37
+ client = InferenceClient(
38
+ provider="sambanova",
39
+ api_key=HF_TOKEN
 
 
40
  )
41
 
42
+ messages = [
43
+ {
44
+ "role": "user",
45
+ "content": [
46
+ {
47
+ "type": "text",
48
+ "text": text + " describe in one line only"
49
+ }
50
+ ]
51
+ }
52
+ ]
53
+
54
+ logger.info("Sending request to model...")
55
+ completion = client.chat.completions.create(
56
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct",
57
+ messages=messages,
58
+ max_tokens=500
59
+ )
60
+ return completion.choices[0].message['content']
61
 
62
  except Exception as e:
63
  logger.error(f"Error in llm_chat_response: {str(e)}")