timeki commited on
Commit
e1d592e
Β·
2 Parent(s): 868be0d 63b4037

Merge branch 'feature/add_graphs_in_separate_pannel' into pr/15

Browse files
app.py CHANGED
@@ -32,7 +32,7 @@ from utils import create_user_id
32
  # ClimateQ&A imports
33
  from climateqa.engine.llm import get_llm
34
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
35
- from climateqa.knowledge.retriever import ClimateQARetriever
36
  from climateqa.engine.reranker import get_reranker
37
  from climateqa.engine.embeddings import get_embeddings_function
38
  from climateqa.engine.chains.prompts import audience_prompts
@@ -43,7 +43,7 @@ from climateqa.engine.keywords import make_keywords_chain
43
  # from climateqa.engine.chains.answer_rag import make_rag_papers_chain
44
  from climateqa.engine.graph import make_graph_agent,display_graph
45
 
46
- from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
47
 
48
  # Load environment variables in local mode
49
  try:
@@ -133,6 +133,7 @@ async def chat(query,history,audience,sources,reports):
133
  output_keywords = ""
134
  gallery = []
135
  start_streaming = False
 
136
 
137
  steps_display = {
138
  "categorize_intent":("πŸ”„οΈ Analyzing user message",True),
@@ -151,10 +152,12 @@ async def chat(query,history,audience,sources,reports):
151
  try:
152
  docs = event["data"]["output"]["documents"]
153
  docs_html = []
154
- for i, d in enumerate(docs, 1):
155
- docs_html.append(make_html_source(d, i))
 
 
156
 
157
- used_documents = used_documents + [d.metadata["name"] for d in docs]
158
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
159
 
160
  docs_html = "".join(docs_html)
@@ -184,7 +187,7 @@ async def chat(query,history,audience,sources,reports):
184
  if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
185
  print("X")
186
 
187
- yield history,docs_html,output_query,output_language,gallery #,output_query,output_keywords
188
 
189
  except Exception as e:
190
  print(event, "has failed")
@@ -212,12 +215,49 @@ async def chat(query,history,audience,sources,reports):
212
  print(f"Error logging on Azure Blob Storage: {e}")
213
  raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
214
 
215
- image_dict = {}
216
- for i,doc in enumerate(docs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
 
 
218
  if doc.metadata["chunk_type"] == "image":
219
  try:
220
  key = f"Image {i+1}"
 
221
  image_path = doc.metadata["image_path"].split("documents/")[1]
222
  img = get_image_from_azure_blob_storage(image_path)
223
 
@@ -225,31 +265,18 @@ async def chat(query,history,audience,sources,reports):
225
  buffered = BytesIO()
226
  img.save(buffered, format="PNG")
227
  img_str = base64.b64encode(buffered.getvalue()).decode()
 
 
 
 
228
 
229
- # Embedding the base64 string in Markdown
230
- markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
231
- image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
232
  except Exception as e:
233
  print(f"Skipped adding image {i} because of {e}")
 
 
 
234
 
235
- if len(image_dict) > 0:
236
-
237
- gallery = [x["img"] for x in list(image_dict.values())]
238
- img = list(image_dict.values())[0]
239
- img_md = img["md"]
240
- img_caption = img["caption"]
241
- img_code = img["figure_code"]
242
- if img_code != "N/A":
243
- img_name = f"{img['key']} - {img['figure_code']}"
244
- else:
245
- img_name = f"{img['key']}"
246
-
247
- history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
248
- # answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
249
- # history[-1] = (history[-1][0],answer_yet)
250
- # history = [tuple(x) for x in history]
251
-
252
- yield history,docs_html,output_query,output_language,gallery#,output_query,output_keywords
253
 
254
 
255
  def save_feedback(feed: str, user_id):
@@ -382,6 +409,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
382
  with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
383
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
384
  docs_textbox = gr.State("")
 
 
 
385
 
386
  # with Modal(visible = False) as config_modal:
387
  with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
@@ -415,6 +445,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
415
  output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
416
 
417
 
 
 
 
 
418
 
419
  #---------------------------------------------------------------------------------------
420
  # OTHER TABS
@@ -463,13 +497,13 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
463
 
464
  (textbox
465
  .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
466
- .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
467
  .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
468
  )
469
 
470
  (examples_hidden
471
  .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
472
- .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
473
  .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
474
  )
475
 
 
32
  # ClimateQ&A imports
33
  from climateqa.engine.llm import get_llm
34
  from climateqa.engine.vectorstore import get_pinecone_vectorstore
35
+ # from climateqa.knowledge.retriever import ClimateQARetriever
36
  from climateqa.engine.reranker import get_reranker
37
  from climateqa.engine.embeddings import get_embeddings_function
38
  from climateqa.engine.chains.prompts import audience_prompts
 
43
  # from climateqa.engine.chains.answer_rag import make_rag_papers_chain
44
  from climateqa.engine.graph import make_graph_agent,display_graph
45
 
46
+ from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
47
 
48
  # Load environment variables in local mode
49
  try:
 
133
  output_keywords = ""
134
  gallery = []
135
  start_streaming = False
136
+ figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
137
 
138
  steps_display = {
139
  "categorize_intent":("πŸ”„οΈ Analyzing user message",True),
 
152
  try:
153
  docs = event["data"]["output"]["documents"]
154
  docs_html = []
155
+ textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
156
+ for i, d in enumerate(textual_docs, 1):
157
+ if d.metadata["chunk_type"] == "text":
158
+ docs_html.append(make_html_source(d, i))
159
 
160
+ used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
161
  history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
162
 
163
  docs_html = "".join(docs_html)
 
187
  if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
188
  print("X")
189
 
190
+ yield history,docs_html,output_query,output_language,gallery, figures #,output_query,output_keywords
191
 
192
  except Exception as e:
193
  print(event, "has failed")
 
215
  print(f"Error logging on Azure Blob Storage: {e}")
216
  raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
217
 
218
+
219
+
220
+
221
+ # image_dict = {}
222
+ # for i,doc in enumerate(docs):
223
+
224
+ # if doc.metadata["chunk_type"] == "image":
225
+ # try:
226
+ # key = f"Image {i+1}"
227
+ # image_path = doc.metadata["image_path"].split("documents/")[1]
228
+ # img = get_image_from_azure_blob_storage(image_path)
229
+
230
+ # # Convert the image to a byte buffer
231
+ # buffered = BytesIO()
232
+ # img.save(buffered, format="PNG")
233
+ # img_str = base64.b64encode(buffered.getvalue()).decode()
234
+
235
+ # # Embedding the base64 string in Markdown
236
+ # markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
237
+ # image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
238
+ # except Exception as e:
239
+ # print(f"Skipped adding image {i} because of {e}")
240
+
241
+ # if len(image_dict) > 0:
242
+
243
+ # gallery = [x["img"] for x in list(image_dict.values())]
244
+ # img = list(image_dict.values())[0]
245
+ # img_md = img["md"]
246
+ # img_caption = img["caption"]
247
+ # img_code = img["figure_code"]
248
+ # if img_code != "N/A":
249
+ # img_name = f"{img['key']} - {img['figure_code']}"
250
+ # else:
251
+ # img_name = f"{img['key']}"
252
+
253
+ # history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
254
 
255
+ docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
256
+ for i, doc in enumerate(docs_figures):
257
  if doc.metadata["chunk_type"] == "image":
258
  try:
259
  key = f"Image {i+1}"
260
+
261
  image_path = doc.metadata["image_path"].split("documents/")[1]
262
  img = get_image_from_azure_blob_storage(image_path)
263
 
 
265
  buffered = BytesIO()
266
  img.save(buffered, format="PNG")
267
  img_str = base64.b64encode(buffered.getvalue()).decode()
268
+
269
+ figures = figures + make_html_figure_sources(doc, i, img_str)
270
+
271
+ gallery.append(img)
272
 
 
 
 
273
  except Exception as e:
274
  print(f"Skipped adding image {i} because of {e}")
275
+
276
+
277
+
278
 
279
+ yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
 
282
  def save_feedback(feed: str, user_id):
 
409
  with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
410
  sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
411
  docs_textbox = gr.State("")
412
+
413
+
414
+
415
 
416
  # with Modal(visible = False) as config_modal:
417
  with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
 
445
  output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
446
 
447
 
448
+ with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
449
+ figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
450
+
451
+
452
 
453
  #---------------------------------------------------------------------------------------
454
  # OTHER TABS
 
497
 
498
  (textbox
499
  .submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
500
+ .then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,figures_cards],concurrency_limit = 8,api_name = "chat_textbox")
501
  .then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
502
  )
503
 
504
  (examples_hidden
505
  .change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
506
+ .then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards],concurrency_limit = 8,api_name = "chat_examples")
507
  .then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
508
  )
509
 
front/utils.py CHANGED
@@ -55,7 +55,7 @@ def make_html_source(source,i):
55
  score = meta['reranking_score']
56
  if score > 0.8:
57
  color = "score-green"
58
- elif score > 0.4:
59
  color = "score-orange"
60
  else:
61
  color = "score-red"
@@ -91,8 +91,9 @@ def make_html_source(source,i):
91
  <div class="card card-image">
92
  <div class="card-content">
93
  <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
94
- <p>{content}</p>
95
  <p class='ai-generated'>AI-generated description</p>
 
 
96
  {relevancy_score}
97
  </div>
98
  <div class="card-footer">
@@ -107,6 +108,53 @@ def make_html_source(source,i):
107
  return card
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
  def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
112
 
 
55
  score = meta['reranking_score']
56
  if score > 0.8:
57
  color = "score-green"
58
+ elif score > 0.5:
59
  color = "score-orange"
60
  else:
61
  color = "score-red"
 
91
  <div class="card card-image">
92
  <div class="card-content">
93
  <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
 
94
  <p class='ai-generated'>AI-generated description</p>
95
+ <p>{content}</p>
96
+
97
  {relevancy_score}
98
  </div>
99
  <div class="card-footer">
 
108
  return card
109
 
110
 
111
+ def make_html_figure_sources(source,i,img_str):
112
+ meta = source.metadata
113
+ content = source.page_content.strip()
114
+
115
+ score = meta['reranking_score']
116
+ if score > 0.8:
117
+ color = "score-green"
118
+ elif score > 0.5:
119
+ color = "score-orange"
120
+ else:
121
+ color = "score-red"
122
+
123
+ toc_levels = []
124
+ if len(toc_levels) > 0:
125
+ name = f"<b>{toc_levels}</b><br/>{meta['name']}"
126
+ else:
127
+ name = meta['name']
128
+
129
+ relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
130
+
131
+ if meta["figure_code"] != "N/A":
132
+ title = f"{meta['figure_code']} - {meta['short_name']}"
133
+ else:
134
+ title = f"{meta['short_name']}"
135
+
136
+ card = f"""
137
+ <div class="card card-image">
138
+ <div class="card-content">
139
+ <h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
140
+ <p class='ai-generated'>AI-generated description</p>
141
+ <img src="data:image/png;base64, { img_str } alt="Alt text" />
142
+
143
+ <p>{content}</p>
144
+
145
+ {relevancy_score}
146
+ </div>
147
+ <div class="card-footer">
148
+ <span>{name}</span>
149
+ <a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
150
+ <span role="img" aria-label="Open PDF">πŸ”—</span>
151
+ </a>
152
+ </div>
153
+ </div>
154
+ """
155
+ return card
156
+
157
+
158
 
159
  def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
160
 
sandbox/20240310 - CQA - Semantic Routing 1.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
style.css CHANGED
@@ -206,6 +206,11 @@ label.selected{
206
  overflow-y: auto !important;
207
  }
208
 
 
 
 
 
 
209
  div#tab-config{
210
  height:calc(100vh - 190px) !important;
211
  overflow-y: auto !important;
@@ -475,7 +480,7 @@ span.chatbot > p > img{
475
  color:orange !important;
476
  }
477
 
478
- .score-orange{
479
  color:red !important;
480
  }
481
  .message-buttons-left.panel.message-buttons.with-avatar {
 
206
  overflow-y: auto !important;
207
  }
208
 
209
+ div#sources-figures{
210
+ height:calc(100vh - 190px) !important;
211
+ overflow-y: auto !important;
212
+ }
213
+
214
  div#tab-config{
215
  height:calc(100vh - 190px) !important;
216
  overflow-y: auto !important;
 
480
  color:orange !important;
481
  }
482
 
483
+ .score-red{
484
  color:red !important;
485
  }
486
  .message-buttons-left.panel.message-buttons.with-avatar {