Spaces:
Running
Running
Reality123b
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ import networkx as nx
|
|
13 |
from collections import Counter
|
14 |
import json
|
15 |
from datetime import datetime
|
16 |
-
from transformers import pipeline
|
17 |
|
18 |
@dataclass
|
19 |
class ChatMessage:
|
@@ -33,8 +32,6 @@ class XylariaChat:
|
|
33 |
model="Qwen/QwQ-32B-Preview",
|
34 |
token=self.hf_token
|
35 |
)
|
36 |
-
|
37 |
-
self.image_captioning_pipe = pipeline("image-text-to-text", model="llava-onevision-qwen2-0.5b-si-hf")
|
38 |
|
39 |
self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
40 |
self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
|
@@ -404,29 +401,6 @@ class XylariaChat:
|
|
404 |
print(f"Error resetting API client: {e}")
|
405 |
|
406 |
return None
|
407 |
-
|
408 |
-
def caption_image_llava(self, image_path, prompt):
|
409 |
-
try:
|
410 |
-
with open(image_path, "rb") as img_file:
|
411 |
-
image_data = base64.b64encode(img_file.read()).decode("utf-8")
|
412 |
-
|
413 |
-
messages = [
|
414 |
-
{
|
415 |
-
"role": "user",
|
416 |
-
"content": [
|
417 |
-
{"type": "image", "url": image_path},
|
418 |
-
{"type": "text", "text": prompt},
|
419 |
-
],
|
420 |
-
},
|
421 |
-
]
|
422 |
-
|
423 |
-
caption_result = self.image_captioning_pipe(text=messages, max_new_tokens=50)
|
424 |
-
|
425 |
-
caption = caption_result[0]['generated_text'] if caption_result else "No caption generated"
|
426 |
-
|
427 |
-
return caption
|
428 |
-
except Exception as e:
|
429 |
-
return f"Error captioning image with llava: {str(e)}"
|
430 |
|
431 |
def caption_image(self, image):
|
432 |
try:
|
@@ -510,9 +484,8 @@ class XylariaChat:
|
|
510 |
messages.append(msg)
|
511 |
|
512 |
if image:
|
513 |
-
|
514 |
-
|
515 |
-
user_input = f"Here is a description of an image: {caption}\n\n{user_input}"
|
516 |
|
517 |
messages.append(ChatMessage(
|
518 |
role="user",
|
|
|
13 |
from collections import Counter
|
14 |
import json
|
15 |
from datetime import datetime
|
|
|
16 |
|
17 |
@dataclass
|
18 |
class ChatMessage:
|
|
|
32 |
model="Qwen/QwQ-32B-Preview",
|
33 |
token=self.hf_token
|
34 |
)
|
|
|
|
|
35 |
|
36 |
self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
37 |
self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
|
|
|
401 |
print(f"Error resetting API client: {e}")
|
402 |
|
403 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
def caption_image(self, image):
|
406 |
try:
|
|
|
484 |
messages.append(msg)
|
485 |
|
486 |
if image:
|
487 |
+
image_caption = self.caption_image(image)
|
488 |
+
user_input = f"description of an image: {image_caption}\n\nUser's message about it: {user_input}"
|
|
|
489 |
|
490 |
messages.append(ChatMessage(
|
491 |
role="user",
|