Reality123b commited on
Commit
24594e9
·
verified ·
1 Parent(s): 0c17752

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -372
app.py CHANGED
@@ -11,10 +11,6 @@ import torch
11
  import numpy as np
12
  import networkx as nx
13
  from collections import Counter
14
- import asyncio
15
- import edge_tts
16
- import speech_recognition as sr
17
- import random
18
 
19
  @dataclass
20
  class ChatMessage:
@@ -25,14 +21,14 @@ class ChatMessage:
25
  return {"role": self.role, "content": self.content}
26
 
27
  class XylariaChat:
28
- def __init__(self):
29
  self.hf_token = os.getenv("HF_TOKEN")
30
  if not self.hf_token:
31
  raise ValueError("HuggingFace token not found in environment variables")
32
 
33
  self.client = InferenceClient(
34
- model="Qwen/Qwen-32B-Preview",
35
- token=self.hf_token
36
  )
37
 
38
  self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
@@ -53,7 +49,7 @@ class XylariaChat:
53
  "bias_detection": 0.0,
54
  "strategy_adjustment": ""
55
  }
56
-
57
  self.internal_state = {
58
  "emotions": {
59
  "valence": 0.5,
@@ -82,7 +78,7 @@ class XylariaChat:
82
  ]
83
 
84
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step """
85
-
86
  self.causal_rules_db = {
87
  "rain": ["wet roads", "flooding"],
88
  "fire": ["heat", "smoke"],
@@ -96,11 +92,8 @@ class XylariaChat:
96
  "democracy": "government by the people",
97
  "photosynthesis": "process used by plants to convert light to energy"
98
  }
99
-
100
- # === Voice Mode Initialization (Start) ===
101
- self.voice_mode_active = False
102
- self.selected_voice = "en-US-JennyNeural" # Default voice
103
- # === Voice Mode Initialization (End) ===
104
 
105
  def update_internal_state(self, emotion_deltas, cognitive_load_deltas, introspection_delta, engagement_delta):
106
  for emotion, delta in emotion_deltas.items():
@@ -128,7 +121,7 @@ class XylariaChat:
128
 
129
  def update_belief_system(self, statement, belief_score):
130
  self.belief_system[statement] = belief_score
131
-
132
  def dynamic_belief_update(self, user_message):
133
  sentences = [s.strip() for s in user_message.split('.') if s.strip()]
134
  sentence_counts = Counter(sentences)
@@ -234,7 +227,7 @@ class XylariaChat:
234
  return "Current strategy is effective. Continue with the current approach."
235
  else:
236
  return " ".join(adjustments)
237
-
238
  def introspect(self):
239
  introspection_report = "Introspection Report:\n"
240
  introspection_report += f" Current Emotional State:\n"
@@ -284,7 +277,7 @@ class XylariaChat:
284
  response = "I'm feeling quite energized and ready to assist! " + response
285
  else:
286
  response = "I'm in a good mood and happy to help. " + response
287
-
288
  if curiosity > 0.7:
289
  response += " I'm very curious about this topic, could you tell me more?"
290
  if frustration > 0.5:
@@ -310,7 +303,7 @@ class XylariaChat:
310
  if goal["goal"] == "Provide helpful, informative, and contextually relevant responses":
311
  goal["priority"] = max(goal["priority"] - 0.1, 0.0)
312
  goal["progress"] = max(goal["progress"] - 0.2, 0.0)
313
-
314
  if "learn more" in feedback_lower:
315
  for goal in self.goals:
316
  if goal["goal"] == "Actively learn and adapt from interactions to improve conversational abilities":
@@ -321,7 +314,7 @@ class XylariaChat:
321
  if goal["goal"] == "Maintain a coherent, engaging, and empathetic conversation flow":
322
  goal["priority"] = max(goal["priority"] - 0.1, 0.0)
323
  goal["progress"] = max(goal["progress"] - 0.2, 0.0)
324
-
325
  if self.internal_state["emotions"]["curiosity"] > 0.8:
326
  for goal in self.goals:
327
  if goal["goal"] == "Identify and fill knowledge gaps by seeking external information":
@@ -398,8 +391,8 @@ class XylariaChat:
398
 
399
  try:
400
  self.client = InferenceClient(
401
- model="Qwen/Qwen-32B-Preview",
402
- token=self.hf_token
403
  )
404
  except Exception as e:
405
  print(f"Error resetting API client: {e}")
@@ -432,7 +425,7 @@ class XylariaChat:
432
 
433
  except Exception as e:
434
  return f"Error processing image: {str(e)}"
435
-
436
  def generate_image(self, prompt):
437
  try:
438
  image = self.image_gen_client.text_to_image(prompt)
@@ -447,58 +440,9 @@ class XylariaChat:
447
  return text.strip()
448
  except Exception as e:
449
  return f"Error during Math OCR: {e}"
450
-
451
- # === Voice Mode Methods (Start) ===
452
- async def speak_text(self, text):
453
- if not text:
454
- return None, None
455
-
456
- temp_file = "temp_audio.mp3"
457
- try:
458
- communicator = edge_tts.Communicate(text, self.selected_voice)
459
- await communicator.save(temp_file)
460
- return temp_file
461
- except Exception as e:
462
- print(f"Error during text-to-speech: {e}")
463
- return None, None
464
-
465
- def recognize_speech(self, timeout=10, phrase_time_limit=10):
466
- recognizer = sr.Recognizer()
467
- recognizer.energy_threshold = 4000
468
- recognizer.dynamic_energy_threshold = True
469
-
470
- with sr.Microphone() as source:
471
- print("Listening...")
472
- try:
473
- audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
474
- print("Processing speech...")
475
- text = recognizer.recognize_whisper_api(audio_data, api_key=self.hf_token)
476
- print(f"Recognized: {text}")
477
- return text
478
- except sr.WaitTimeoutError:
479
- print("No speech detected within the timeout period.")
480
- return ""
481
- except sr.UnknownValueError:
482
- print("Speech recognition could not understand audio")
483
- return ""
484
- except sr.RequestError as e:
485
- print(f"Could not request results from Whisper API; {e}")
486
- return ""
487
- except Exception as e:
488
- print(f"An error occurred during speech recognition: {e}")
489
- return ""
490
- # === Voice Mode Methods (End) ===
491
-
492
  def get_response(self, user_input, image=None):
493
  try:
494
- # === Voice Mode Adaptation (Start) ===
495
- if self.voice_mode_active:
496
- print("Voice mode is active, using speech recognition.")
497
- user_input = self.recognize_speech() # Get input from speech
498
- if not user_input:
499
- return "I didn't hear anything." , None
500
- # === Voice Mode Adaptation (End) ===
501
-
502
  messages = []
503
 
504
  messages.append(ChatMessage(
@@ -525,7 +469,7 @@ class XylariaChat:
525
  role="user",
526
  content=user_input
527
  ).to_dict())
528
-
529
  entities = []
530
  relationships = []
531
 
@@ -535,19 +479,19 @@ class XylariaChat:
535
  extracted_relationships = self.extract_relationships(message['content'])
536
  entities.extend(extracted_entities)
537
  relationships.extend(extracted_relationships)
538
-
539
  self.update_knowledge_graph(entities, relationships)
540
  self.run_metacognitive_layer()
541
-
542
  for message in messages:
543
  if message['role'] == 'user':
544
  self.dynamic_belief_update(message['content'])
545
-
546
  for cause, effects in self.causal_rules_db.items():
547
  if any(cause in msg['content'].lower() for msg in messages if msg['role'] == 'user') and any(
548
  effect in msg['content'].lower() for msg in messages for effect in effects):
549
  self.store_information("Causal Inference", f"It seems {cause} might be related to {', '.join(effects)}.")
550
-
551
  for concept, generalization in self.concept_generalizations.items():
552
  if any(concept in msg['content'].lower() for msg in messages if msg['role'] == 'user'):
553
  self.store_information("Inferred Knowledge", f"This reminds me of a general principle: {generalization}.")
@@ -555,54 +499,28 @@ class XylariaChat:
555
  if self.internal_state["emotions"]["curiosity"] > 0.8 and any("?" in msg['content'] for msg in messages if msg['role'] == 'user'):
556
  print("Simulating external knowledge seeking...")
557
  self.store_information("External Knowledge", "This is a placeholder for external information I would have found")
558
-
559
  self.store_information("User Input", user_input)
560
 
561
  input_tokens = sum(len(msg['content'].split()) for msg in messages)
562
  max_new_tokens = 16384 - input_tokens - 50
563
 
564
  max_new_tokens = min(max_new_tokens, 10020)
565
-
566
- # === Voice Mode Output (Start) ===
567
- if self.voice_mode_active:
568
- stream = self.client.chat_completion(
569
- messages=messages,
570
- model="Qwen/Qwen-32B-Preview",
571
- temperature=0.7,
572
- max_tokens=max_new_tokens,
573
- top_p=0.9,
574
- stream=True
575
- )
576
-
577
- full_response = ""
578
- for chunk in stream:
579
- if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
580
- full_response += chunk.choices[0].delta.content
581
-
582
- full_response = self.adjust_response_based_on_state(full_response)
583
- audio_file = asyncio.run(self.speak_text(full_response))
584
 
585
- # Update conversation history
586
- self.conversation_history.append(ChatMessage(role="user", content=user_input).to_dict())
587
- self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
588
-
589
- return full_response, audio_file
590
-
591
- # === Voice Mode Output (End) ===
592
- else:
593
- stream = self.client.chat_completion(
594
- messages=messages,
595
- model="Qwen/Qwen-32B-Preview",
596
- temperature=0.7,
597
- max_tokens=max_new_tokens,
598
- top_p=0.9,
599
- stream=True
600
- )
601
-
602
- return stream
603
  except Exception as e:
604
  print(f"Detailed error in get_response: {e}")
605
- return f"Error generating response: {str(e)}", None
606
 
607
  def extract_entities(self, text):
608
  words = text.split()
@@ -619,7 +537,7 @@ class XylariaChat:
619
  if words[i].istitle() and words[i+2].istitle():
620
  relationships.append((words[i], words[i+1], words[i+2]))
621
  return relationships
622
-
623
  def messages_to_prompt(self, messages):
624
  prompt = ""
625
  for msg in messages:
@@ -633,149 +551,30 @@ class XylariaChat:
633
  return prompt
634
 
635
  def create_interface(self):
636
- # === Voice-Specific UI Elements (Start) ===
637
- def toggle_voice_mode(active_state):
638
- self.voice_mode_active = active_state
639
- if self.voice_mode_active:
640
- # Get the list of available voices
641
- voices = asyncio.run(edge_tts.list_voices())
642
- voice_names = [voice['ShortName'] for voice in voices]
643
-
644
- # Select a random voice from the list
645
- random_voice = random.choice(voice_names)
646
- self.selected_voice = random_voice
647
-
648
- return gr.Button.update(value="Stop Voice Mode"), gr.Dropdown.update(value=random_voice)
649
- else:
650
- return gr.Button.update(value="Start Voice Mode"), gr.Dropdown.update(value=self.selected_voice)
651
-
652
- def update_selected_voice(voice_name):
653
- self.selected_voice = voice_name
654
- return voice_name
655
-
656
- # === Voice-Specific UI Elements (End) ===
657
-
658
- def streaming_response(message, chat_history, image_filepath, math_ocr_image_path, voice_mode_state, selected_voice):
659
- if self.voice_mode_active:
660
- response_text, audio_output = self.get_response(message)
661
-
662
- if isinstance(response_text, str):
663
- updated_history = chat_history + [[message, response_text]]
664
- if audio_output:
665
- yield updated_history, audio_output, None, None, ""
666
- else:
667
- yield updated_history, None, None, None, ""
668
- else:
669
- full_response = ""
670
- updated_history = chat_history + [[message, ""]]
671
- try:
672
- for chunk in response_text:
673
- if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
674
- chunk_content = chunk.choices[0].delta.content
675
- full_response += chunk_content
676
- updated_history[-1][1] = full_response
677
- if audio_output:
678
- yield updated_history, audio_output, None, None, ""
679
- else:
680
- yield updated_history, None, None, None, ""
681
- except Exception as e:
682
- print(f"Streaming error: {e}")
683
- updated_history[-1][1] = f"Error during response: {e}"
684
- if audio_output:
685
- yield updated_history, audio_output, None, None, ""
686
- else:
687
- yield updated_history, None, None, None, ""
688
- return
689
-
690
- full_response = self.adjust_response_based_on_state(full_response)
691
-
692
- audio_file = asyncio.run(self.speak_text(full_response))
693
-
694
- self.update_goals(message)
695
-
696
- emotion_deltas = {}
697
- cognitive_load_deltas = {}
698
- engagement_delta = 0
699
-
700
- if any(word in message.lower() for word in ["sad", "unhappy", "depressed", "down"]):
701
- emotion_deltas.update({"valence": -0.2, "arousal": 0.1, "confidence": -0.1, "sadness": 0.3, "joy": -0.2})
702
- engagement_delta = -0.1
703
- elif any(word in message.lower() for word in ["happy", "good", "great", "excited", "amazing"]):
704
- emotion_deltas.update({"valence": 0.2, "arousal": 0.2, "confidence": 0.1, "sadness": -0.2, "joy": 0.3})
705
- engagement_delta = 0.2
706
- elif any(word in message.lower() for word in ["angry", "mad", "furious", "frustrated"]):
707
- emotion_deltas.update({"valence": -0.3, "arousal": 0.3, "dominance": -0.2, "frustration": 0.2, "sadness": 0.1, "joy": -0.1})
708
- engagement_delta = -0.2
709
- elif any(word in message.lower() for word in ["scared", "afraid", "fearful", "anxious"]):
710
- emotion_deltas.update({"valence": -0.2, "arousal": 0.4, "dominance": -0.3, "confidence": -0.2, "sadness": 0.2})
711
- engagement_delta = -0.1
712
- elif any(word in message.lower() for word in ["surprise", "amazed", "astonished"]):
713
- emotion_deltas.update({"valence": 0.1, "arousal": 0.5, "dominance": 0.1, "curiosity": 0.3, "sadness": -0.1, "joy": 0.1})
714
- engagement_delta = 0.3
715
- elif any(word in message.lower() for word in ["confused", "uncertain", "unsure"]):
716
- cognitive_load_deltas.update({"processing_intensity": 0.2})
717
- emotion_deltas.update({"curiosity": 0.2, "confidence": -0.1, "sadness": 0.1})
718
- engagement_delta = 0.1
719
- else:
720
- emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
721
- engagement_delta = 0.05
722
-
723
- if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
724
- emotion_deltas.update({"curiosity": 0.3})
725
- cognitive_load_deltas.update({"processing_intensity": 0.1})
726
- engagement_delta = 0.2
727
-
728
- self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
729
-
730
- self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
731
- self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
732
-
733
- if len(self.conversation_history) > 10:
734
- self.conversation_history = self.conversation_history[-10:]
735
-
736
- if audio_file:
737
- yield updated_history, audio_file, None, None, ""
738
- else:
739
- yield updated_history, None, None, None, ""
740
-
741
- # Handling /image command for image generation
742
  if "/image" in message:
743
  image_prompt = message.replace("/image", "").strip()
744
 
745
- # Updated placeholder SVG with animation and text
746
  placeholder_image = "data:image/svg+xml," + requests.utils.quote(f'''
747
  <svg width="256" height="256" viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg">
748
- <style>
749
- rect {{
750
- animation: fillAnimation 3s ease-in-out infinite;
751
- }}
752
- @keyframes fillAnimation {{
753
- 0% {{ fill: #626262; }}
754
- 50% {{ fill: #111111; }}
755
- 100% {{ fill: #626262; }}
756
- }}
757
- text {{
758
- font-family: 'Helvetica Neue', Arial, sans-serif; /* Choose a good font */
759
- font-weight: 300; /* Slightly lighter font weight */
760
- text-shadow: 0px 2px 4px rgba(0, 0, 0, 0.4); /* Subtle shadow */
761
- }}
762
- </style>
763
- <rect width="256" height="256" rx="20" fill="#888888" />
764
- <text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" font-size="24" fill="white" opacity="0.8">
765
- <tspan>creating your image</tspan>
766
- <tspan x="50%" dy="1.2em">with xylaria iris</tspan>
767
- </text>
768
  </svg>
769
  ''')
770
 
771
  updated_history = chat_history + [[message, gr.Image(value=placeholder_image, type="pil", visible=True)]]
772
- yield updated_history, None, None, None, ""
773
 
774
  try:
775
  generated_image = self.generate_image(image_prompt)
776
 
777
  updated_history[-1][1] = gr.Image(value=generated_image, type="pil", visible=True)
778
- yield updated_history, None, None, None, ""
779
 
780
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
781
  self.conversation_history.append(ChatMessage(role="assistant", content="Image generated").to_dict())
@@ -783,15 +582,15 @@ class XylariaChat:
783
  return
784
  except Exception as e:
785
  updated_history[-1][1] = f"Error generating image: {e}"
786
- yield updated_history, None, None, None, ""
787
  return
788
-
789
  ocr_text = ""
790
  if math_ocr_image_path:
791
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
792
  if ocr_text.startswith("Error"):
793
  updated_history = chat_history + [[message, ocr_text]]
794
- yield updated_history, None, None, None, ""
795
  return
796
  else:
797
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
@@ -800,10 +599,10 @@ class XylariaChat:
800
  response_stream = self.get_response(message, image_filepath)
801
  else:
802
  response_stream = self.get_response(message)
803
-
804
  if isinstance(response_stream, str):
805
  updated_history = chat_history + [[message, response_stream]]
806
- yield updated_history, None, None, None, ""
807
  return
808
 
809
  full_response = ""
@@ -814,13 +613,13 @@ class XylariaChat:
814
  if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
815
  chunk_content = chunk.choices[0].delta.content
816
  full_response += chunk_content
817
-
818
  updated_history[-1][1] = full_response
819
- yield updated_history, None, None, None, ""
820
  except Exception as e:
821
  print(f"Streaming error: {e}")
822
  updated_history[-1][1] = f"Error during response: {e}"
823
- yield updated_history, None, None, None, ""
824
  return
825
 
826
  full_response = self.adjust_response_based_on_state(full_response)
@@ -853,14 +652,14 @@ class XylariaChat:
853
  else:
854
  emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
855
  engagement_delta = 0.05
856
-
857
  if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
858
  emotion_deltas.update({"curiosity": 0.3})
859
  cognitive_load_deltas.update({"processing_intensity": 0.1})
860
  engagement_delta = 0.2
861
-
862
  self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
863
-
864
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
865
  self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
866
 
@@ -874,74 +673,6 @@ class XylariaChat:
874
  background-color: #f5f5f5;
875
  font-family: 'Source Sans Pro', sans-serif;
876
  }
877
-
878
- .voice-mode-button {
879
- background-color: #4CAF50; /* Green */
880
- border: none;
881
- color: white;
882
- padding: 15px 32px;
883
- text-align: center;
884
- text-decoration: none;
885
- display: inline-block;
886
- font-size: 16px;
887
- margin: 4px 2px;
888
- cursor: pointer;
889
- border-radius: 10px; /* Rounded corners */
890
- transition: all 0.3s ease; /* Smooth transition for hover effect */
891
- }
892
-
893
- /* Style when voice mode is active */
894
- .voice-mode-button.active {
895
- background-color: #f44336; /* Red */
896
- }
897
-
898
- /* Hover effect */
899
- .voice-mode-button:hover {
900
- opacity: 0.8;
901
- }
902
-
903
- /* Style for the voice mode overlay */
904
- .voice-mode-overlay {
905
- position: fixed; /* Stay in place */
906
- left: 0;
907
- top: 0;
908
- width: 100%; /* Full width */
909
- height: 100%; /* Full height */
910
- background-color: rgba(0, 0, 0, 0.7); /* Black w/ opacity */
911
- z-index: 10; /* Sit on top */
912
- display: flex;
913
- justify-content: center;
914
- align-items: center;
915
- border-radius: 10px;
916
- }
917
-
918
- /* Style for the growing circle */
919
- .voice-mode-circle {
920
- width: 100px;
921
- height: 100px;
922
- background-color: #4CAF50;
923
- border-radius: 50%;
924
- display: flex;
925
- justify-content: center;
926
- align-items: center;
927
- animation: grow 2s infinite;
928
- }
929
-
930
- /* Keyframes for the growing animation */
931
- @keyframes grow {
932
- 0% {
933
- transform: scale(1);
934
- opacity: 0.8;
935
- }
936
- 50% {
937
- transform: scale(1.5);
938
- opacity: 0.5;
939
- }
940
- 100% {
941
- transform: scale(1);
942
- opacity: 0.8;
943
- }
944
- }
945
 
946
  .gradio-container {
947
  max-width: 900px;
@@ -1102,23 +833,6 @@ class XylariaChat:
1102
  display: flex;
1103
  align-items: center;
1104
  }
1105
-
1106
- .audio-container {
1107
- display: flex;
1108
- align-items: center;
1109
- margin-top: 10px;
1110
- }
1111
-
1112
- .audio-player {
1113
- width: 100%;
1114
- border-radius: 15px;
1115
- }
1116
-
1117
- .audio-icon {
1118
- width: 30px;
1119
- height: 30px;
1120
- margin-right: 10px;
1121
- }
1122
  """
1123
 
1124
  with gr.Blocks(theme=gr.themes.Soft(
@@ -1139,30 +853,6 @@ class XylariaChat:
1139
  )
1140
  )
1141
 
1142
- # === Voice Mode UI (Start) ===
1143
- voice_mode_btn = gr.Button("Start Voice Mode", elem_classes="voice-mode-button")
1144
-
1145
- voices = asyncio.run(edge_tts.list_voices())
1146
- voice_names = [voice['ShortName'] for voice in voices]
1147
-
1148
- voice_dropdown = gr.Dropdown(
1149
- label="Select Voice",
1150
- choices=voice_names,
1151
- value=self.selected_voice,
1152
- interactive=True
1153
- )
1154
- voice_dropdown.input(
1155
- fn=update_selected_voice,
1156
- inputs=voice_dropdown,
1157
- outputs=voice_dropdown
1158
- )
1159
- voice_mode_btn.click(
1160
- fn=toggle_voice_mode,
1161
- inputs=voice_mode_btn,
1162
- outputs=[voice_mode_btn, voice_dropdown]
1163
- )
1164
- # === Voice Mode UI (End) ===
1165
-
1166
  with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"):
1167
  with gr.Row(elem_classes="image-container"):
1168
  with gr.Column(elem_classes="image-upload"):
@@ -1193,16 +883,15 @@ class XylariaChat:
1193
  clear = gr.Button("Clear Conversation", variant="stop")
1194
  clear_memory = gr.Button("Clear Memory")
1195
 
1196
- # Pass voice_mode_state and selected_voice to the streaming_response function
1197
  btn.click(
1198
  fn=streaming_response,
1199
- inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown],
1200
- outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt]
1201
  )
1202
  txt.submit(
1203
  fn=streaming_response,
1204
- inputs=[txt, chatbot, img, math_ocr_img, voice_mode_btn, voice_dropdown],
1205
- outputs=[chatbot, gr.Audio(label="Audio Response", type="filepath", autoplay=True, visible=True), img, math_ocr_img, txt]
1206
  )
1207
 
1208
  clear.click(
@@ -1231,5 +920,5 @@ def main():
1231
  debug=True
1232
  )
1233
 
1234
- if __name__ == "__main__":
1235
  main()
 
11
  import numpy as np
12
  import networkx as nx
13
  from collections import Counter
 
 
 
 
14
 
15
  @dataclass
16
  class ChatMessage:
 
21
  return {"role": self.role, "content": self.content}
22
 
23
  class XylariaChat:
24
+ def _init_(self):
25
  self.hf_token = os.getenv("HF_TOKEN")
26
  if not self.hf_token:
27
  raise ValueError("HuggingFace token not found in environment variables")
28
 
29
  self.client = InferenceClient(
30
+ model="Qwen/QwQ-32B-Preview",
31
+ api_key=self.hf_token
32
  )
33
 
34
  self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
 
49
  "bias_detection": 0.0,
50
  "strategy_adjustment": ""
51
  }
52
+
53
  self.internal_state = {
54
  "emotions": {
55
  "valence": 0.5,
 
78
  ]
79
 
80
  self.system_prompt = """You are a helpful and harmless assistant. You are Xylaria developed by Sk Md Saad Amin. You should think step-by-step """
81
+
82
  self.causal_rules_db = {
83
  "rain": ["wet roads", "flooding"],
84
  "fire": ["heat", "smoke"],
 
92
  "democracy": "government by the people",
93
  "photosynthesis": "process used by plants to convert light to energy"
94
  }
95
+
96
+ # ... (other methods: update_internal_state, update_knowledge_graph, etc.) ...
 
 
 
97
 
98
  def update_internal_state(self, emotion_deltas, cognitive_load_deltas, introspection_delta, engagement_delta):
99
  for emotion, delta in emotion_deltas.items():
 
121
 
122
  def update_belief_system(self, statement, belief_score):
123
  self.belief_system[statement] = belief_score
124
+
125
  def dynamic_belief_update(self, user_message):
126
  sentences = [s.strip() for s in user_message.split('.') if s.strip()]
127
  sentence_counts = Counter(sentences)
 
227
  return "Current strategy is effective. Continue with the current approach."
228
  else:
229
  return " ".join(adjustments)
230
+
231
  def introspect(self):
232
  introspection_report = "Introspection Report:\n"
233
  introspection_report += f" Current Emotional State:\n"
 
277
  response = "I'm feeling quite energized and ready to assist! " + response
278
  else:
279
  response = "I'm in a good mood and happy to help. " + response
280
+
281
  if curiosity > 0.7:
282
  response += " I'm very curious about this topic, could you tell me more?"
283
  if frustration > 0.5:
 
303
  if goal["goal"] == "Provide helpful, informative, and contextually relevant responses":
304
  goal["priority"] = max(goal["priority"] - 0.1, 0.0)
305
  goal["progress"] = max(goal["progress"] - 0.2, 0.0)
306
+
307
  if "learn more" in feedback_lower:
308
  for goal in self.goals:
309
  if goal["goal"] == "Actively learn and adapt from interactions to improve conversational abilities":
 
314
  if goal["goal"] == "Maintain a coherent, engaging, and empathetic conversation flow":
315
  goal["priority"] = max(goal["priority"] - 0.1, 0.0)
316
  goal["progress"] = max(goal["progress"] - 0.2, 0.0)
317
+
318
  if self.internal_state["emotions"]["curiosity"] > 0.8:
319
  for goal in self.goals:
320
  if goal["goal"] == "Identify and fill knowledge gaps by seeking external information":
 
391
 
392
  try:
393
  self.client = InferenceClient(
394
+ model="Qwen/QwQ-32B-Preview",
395
+ api_key=self.hf_token
396
  )
397
  except Exception as e:
398
  print(f"Error resetting API client: {e}")
 
425
 
426
  except Exception as e:
427
  return f"Error processing image: {str(e)}"
428
+
429
  def generate_image(self, prompt):
430
  try:
431
  image = self.image_gen_client.text_to_image(prompt)
 
440
  return text.strip()
441
  except Exception as e:
442
  return f"Error during Math OCR: {e}"
443
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  def get_response(self, user_input, image=None):
445
  try:
 
 
 
 
 
 
 
 
446
  messages = []
447
 
448
  messages.append(ChatMessage(
 
469
  role="user",
470
  content=user_input
471
  ).to_dict())
472
+
473
  entities = []
474
  relationships = []
475
 
 
479
  extracted_relationships = self.extract_relationships(message['content'])
480
  entities.extend(extracted_entities)
481
  relationships.extend(extracted_relationships)
482
+
483
  self.update_knowledge_graph(entities, relationships)
484
  self.run_metacognitive_layer()
485
+
486
  for message in messages:
487
  if message['role'] == 'user':
488
  self.dynamic_belief_update(message['content'])
489
+
490
  for cause, effects in self.causal_rules_db.items():
491
  if any(cause in msg['content'].lower() for msg in messages if msg['role'] == 'user') and any(
492
  effect in msg['content'].lower() for msg in messages for effect in effects):
493
  self.store_information("Causal Inference", f"It seems {cause} might be related to {', '.join(effects)}.")
494
+
495
  for concept, generalization in self.concept_generalizations.items():
496
  if any(concept in msg['content'].lower() for msg in messages if msg['role'] == 'user'):
497
  self.store_information("Inferred Knowledge", f"This reminds me of a general principle: {generalization}.")
 
499
  if self.internal_state["emotions"]["curiosity"] > 0.8 and any("?" in msg['content'] for msg in messages if msg['role'] == 'user'):
500
  print("Simulating external knowledge seeking...")
501
  self.store_information("External Knowledge", "This is a placeholder for external information I would have found")
502
+
503
  self.store_information("User Input", user_input)
504
 
505
  input_tokens = sum(len(msg['content'].split()) for msg in messages)
506
  max_new_tokens = 16384 - input_tokens - 50
507
 
508
  max_new_tokens = min(max_new_tokens, 10020)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
+ stream = self.client.chat_completion(
511
+ messages=messages,
512
+ model="Qwen/QwQ-32B-Preview",
513
+ temperature=0.7,
514
+ max_tokens=max_new_tokens,
515
+ top_p=0.9,
516
+ stream=True
517
+ )
518
+
519
+ return stream
520
+
 
 
 
 
 
 
 
521
  except Exception as e:
522
  print(f"Detailed error in get_response: {e}")
523
+ return f"Error generating response: {str(e)}"
524
 
525
  def extract_entities(self, text):
526
  words = text.split()
 
537
  if words[i].istitle() and words[i+2].istitle():
538
  relationships.append((words[i], words[i+1], words[i+2]))
539
  return relationships
540
+
541
  def messages_to_prompt(self, messages):
542
  prompt = ""
543
  for msg in messages:
 
551
  return prompt
552
 
553
  def create_interface(self):
554
+ def streaming_response(message, chat_history, image_filepath, math_ocr_image_path):
555
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
556
  if "/image" in message:
557
  image_prompt = message.replace("/image", "").strip()
558
 
559
+ # Placeholder for image generation
560
  placeholder_image = "data:image/svg+xml," + requests.utils.quote(f'''
561
  <svg width="256" height="256" viewBox="0 0 256 256" xmlns="http://www.w3.org/2000/svg">
562
+ <rect width="256" height="256" rx="20" fill="#888888" />
563
+ <animate attributeName="fill" values="#888888;#000000;#888888" dur="2s" repeatCount="indefinite" />
564
+ <text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" font-size="48" fill="white">
565
+ <tspan>/</tspan>
566
+ </text>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  </svg>
568
  ''')
569
 
570
  updated_history = chat_history + [[message, gr.Image(value=placeholder_image, type="pil", visible=True)]]
571
+ yield "", updated_history, None, None, ""
572
 
573
  try:
574
  generated_image = self.generate_image(image_prompt)
575
 
576
  updated_history[-1][1] = gr.Image(value=generated_image, type="pil", visible=True)
577
+ yield "", updated_history, None, None, ""
578
 
579
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
580
  self.conversation_history.append(ChatMessage(role="assistant", content="Image generated").to_dict())
 
582
  return
583
  except Exception as e:
584
  updated_history[-1][1] = f"Error generating image: {e}"
585
+ yield "", updated_history, None, None, ""
586
  return
587
+
588
  ocr_text = ""
589
  if math_ocr_image_path:
590
  ocr_text = self.perform_math_ocr(math_ocr_image_path)
591
  if ocr_text.startswith("Error"):
592
  updated_history = chat_history + [[message, ocr_text]]
593
+ yield "", updated_history, None, None, ""
594
  return
595
  else:
596
  message = f"Math OCR Result: {ocr_text}\n\nUser's message: {message}"
 
599
  response_stream = self.get_response(message, image_filepath)
600
  else:
601
  response_stream = self.get_response(message)
602
+
603
  if isinstance(response_stream, str):
604
  updated_history = chat_history + [[message, response_stream]]
605
+ yield "", updated_history, None, None, ""
606
  return
607
 
608
  full_response = ""
 
613
  if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
614
  chunk_content = chunk.choices[0].delta.content
615
  full_response += chunk_content
616
+
617
  updated_history[-1][1] = full_response
618
+ yield "", updated_history, None, None, ""
619
  except Exception as e:
620
  print(f"Streaming error: {e}")
621
  updated_history[-1][1] = f"Error during response: {e}"
622
+ yield "", updated_history, None, None, ""
623
  return
624
 
625
  full_response = self.adjust_response_based_on_state(full_response)
 
652
  else:
653
  emotion_deltas.update({"valence": 0.05, "arousal": 0.05})
654
  engagement_delta = 0.05
655
+
656
  if "learn" in message.lower() or "explain" in message.lower() or "know more" in message.lower():
657
  emotion_deltas.update({"curiosity": 0.3})
658
  cognitive_load_deltas.update({"processing_intensity": 0.1})
659
  engagement_delta = 0.2
660
+
661
  self.update_internal_state(emotion_deltas, cognitive_load_deltas, 0.1, engagement_delta)
662
+
663
  self.conversation_history.append(ChatMessage(role="user", content=message).to_dict())
664
  self.conversation_history.append(ChatMessage(role="assistant", content=full_response).to_dict())
665
 
 
673
  background-color: #f5f5f5;
674
  font-family: 'Source Sans Pro', sans-serif;
675
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
 
677
  .gradio-container {
678
  max-width: 900px;
 
833
  display: flex;
834
  align-items: center;
835
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
  """
837
 
838
  with gr.Blocks(theme=gr.themes.Soft(
 
853
  )
854
  )
855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
856
  with gr.Accordion("Image Input", open=False, elem_classes="gr-accordion"):
857
  with gr.Row(elem_classes="image-container"):
858
  with gr.Column(elem_classes="image-upload"):
 
883
  clear = gr.Button("Clear Conversation", variant="stop")
884
  clear_memory = gr.Button("Clear Memory")
885
 
 
886
  btn.click(
887
  fn=streaming_response,
888
+ inputs=[txt, chatbot, img, math_ocr_img],
889
+ outputs=[txt, chatbot, img, math_ocr_img, txt]
890
  )
891
  txt.submit(
892
  fn=streaming_response,
893
+ inputs=[txt, chatbot, img, math_ocr_img],
894
+ outputs=[txt, chatbot, img, math_ocr_img, txt]
895
  )
896
 
897
  clear.click(
 
920
  debug=True
921
  )
922
 
923
+ if _name_ == "_main_":
924
  main()