khurrameycon commited on
Commit
9f75a74
·
verified ·
1 Parent(s): 6114052

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -4,7 +4,7 @@ import logging
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.responses import JSONResponse
6
  from pydantic import BaseModel
7
- from huggingface_hub import InferenceClient
8
  from typing import Optional
9
 
10
  # Set up logging
@@ -25,25 +25,33 @@ class ChatResponse(BaseModel):
25
  response: str
26
  status: str
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def llm_chat_response(text: str) -> str:
29
  try:
30
- HF_TOKEN = os.getenv("HF_TOKEN")
31
- logger.info("Checking HF_TOKEN...")
32
- if not HF_TOKEN:
33
- logger.error("HF_TOKEN not found in environment variables")
34
- raise HTTPException(status_code=500, detail="HF_TOKEN not configured")
35
 
36
- logger.info("Initializing InferenceClient...")
37
- client = InferenceClient(token=HF_TOKEN)
38
-
39
- logger.info("Sending request to model...")
40
- # Using text-generation endpoint instead of chat
41
  response = client.text_generation(
42
- text + str(' describe in one line only'),
43
- model="meta-llama/Llama-3.2-11B-Vision-Instruct",
44
- max_new_tokens=500,
45
- temperature=0.7
46
  )
 
 
47
  return response
48
 
49
  except Exception as e:
 
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.responses import JSONResponse
6
  from pydantic import BaseModel
7
+ from huggingface_hub import InferenceClient, HfApi
8
  from typing import Optional
9
 
10
  # Set up logging
 
25
  response: str
26
  status: str
27
 
28
+ # Initialize HF client at startup
29
+ try:
30
+ HF_TOKEN = os.getenv("HF_TOKEN")
31
+ if not HF_TOKEN:
32
+ logger.warning("HF_TOKEN not found in environment variables")
33
+ api = HfApi(token=HF_TOKEN)
34
+ client = InferenceClient(
35
+ model="meta-llama/Llama-3.2-11B-Vision-Instruct", # You might need to change this
36
+ token=HF_TOKEN
37
+ )
38
+ logger.info("Successfully initialized HuggingFace client")
39
+ except Exception as e:
40
+ logger.error(f"Error initializing HuggingFace client: {str(e)}")
41
+
42
  def llm_chat_response(text: str) -> str:
43
  try:
44
+ logger.info(f"Processing text: {text}")
 
 
 
 
45
 
46
+ # Direct text generation
 
 
 
 
47
  response = client.text_generation(
48
+ text + " describe in one line only",
49
+ max_new_tokens=100,
50
+ temperature=0.7,
51
+ repetition_penalty=1.2
52
  )
53
+
54
+ logger.info(f"Generated response: {response}")
55
  return response
56
 
57
  except Exception as e: