Spaces:

sagaxlearn
/

Chat

Running

khurrameycon commited on 15 days ago

Commit

ad93aea

verified ·

1 Parent(s): 5332468

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
-from huggingface_hub import InferenceClient, HfApi
 from typing import Optional
 # Set up logging
@@ -25,34 +25,39 @@ class ChatResponse(BaseModel):
     response: str
     status: str
-# Initialize HF client at startup
-try:
-    HF_TOKEN = os.getenv("HF_TOKEN")
-    if not HF_TOKEN:
-        logger.warning("HF_TOKEN not found in environment variables")
-    api = HfApi(token=HF_TOKEN)
-    client = InferenceClient(
-        model="meta-llama/Llama-3.2-11B-Vision-Instruct",  # You might need to change this
-        token=HF_TOKEN
-    )
-    logger.info("Successfully initialized HuggingFace client")
-except Exception as e:
-    logger.error(f"Error initializing HuggingFace client: {str(e)}")
 def llm_chat_response(text: str) -> str:
     try:
-        logger.info(f"Processing text: {text}")
-        # Direct text generation
-        response = client.text_generation(
-            text + " describe in one line only",
-            max_new_tokens=100,
-            temperature=0.7,
-            repetition_penalty=1.2
         )
-        logger.info(f"Generated response: {response}")
-        return response
     except Exception as e:
         logger.error(f"Error in llm_chat_response: {str(e)}")

 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+from huggingface_hub import InferenceClient
 from typing import Optional
 # Set up logging
     response: str
     status: str
 def llm_chat_response(text: str) -> str:
     try:
+        HF_TOKEN = os.getenv("HF_TOKEN")
+        logger.info("Checking HF_TOKEN...")
+        if not HF_TOKEN:
+            logger.error("HF_TOKEN not found in environment variables")
+            raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
+        logger.info("Initializing InferenceClient...")
+        client = InferenceClient(
+            provider="sambanova",
+            api_key=HF_TOKEN
         )
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": text + " describe in one line only"
+                    }
+                ]
+            }
+        ]
+        logger.info("Sending request to model...")
+        completion = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            messages=messages,
+            max_tokens=500
+        )
+        return completion.choices[0].message['content']
     except Exception as e:
         logger.error(f"Error in llm_chat_response: {str(e)}")