Merge branch 'main' into feature/graph_recommandation
Browse files- .gitignore +1 -0
- README.md +1 -1
- app.py +84 -170
- climateqa/engine/chains/answer_ai_impact.py +0 -1
- climateqa/engine/chains/intent_categorization.py +5 -32
- climateqa/engine/chains/query_transformation.py +1 -0
- climateqa/engine/chains/retrieve_documents.py +6 -4
- climateqa/engine/graph.py +1 -141
- climateqa/engine/llm/openai.py +1 -1
- climateqa/engine/reranker.py +1 -2
- front/utils.py +50 -2
- requirements.txt +1 -0
- style.css +65 -13
.gitignore
CHANGED
@@ -10,3 +10,4 @@ notebooks/
|
|
10 |
**/.flashrank_cache/
|
11 |
|
12 |
data/
|
|
|
|
10 |
**/.flashrank_cache/
|
11 |
|
12 |
data/
|
13 |
+
sandbox/
|
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🌍
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
fullWidth: true
|
10 |
pinned: false
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.0.2
|
8 |
app_file: app.py
|
9 |
fullWidth: true
|
10 |
pinned: false
|
app.py
CHANGED
@@ -33,7 +33,7 @@ from collections import defaultdict
|
|
33 |
# ClimateQ&A imports
|
34 |
from climateqa.engine.llm import get_llm
|
35 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
36 |
-
from climateqa.knowledge.retriever import ClimateQARetriever
|
37 |
from climateqa.engine.reranker import get_reranker
|
38 |
from climateqa.engine.embeddings import get_embeddings_function
|
39 |
from climateqa.engine.chains.prompts import audience_prompts
|
@@ -47,6 +47,8 @@ from climateqa.engine.embeddings import get_embeddings_function
|
|
47 |
|
48 |
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
|
49 |
|
|
|
|
|
50 |
# Load environment variables in local mode
|
51 |
try:
|
52 |
from dotenv import load_dotenv
|
@@ -88,13 +90,12 @@ share_client = service.get_share_client(file_share_name)
|
|
88 |
user_id = create_user_id()
|
89 |
|
90 |
|
91 |
-
embeddings_function = get_embeddings_function()
|
92 |
-
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
93 |
-
reranker = get_reranker("nano")
|
94 |
|
95 |
# Create vectorstore and retriever
|
96 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
97 |
-
|
|
|
|
|
98 |
|
99 |
# agent = make_graph_agent(llm,vectorstore,reranker)
|
100 |
agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
|
@@ -140,6 +141,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
140 |
gallery = []
|
141 |
updates = []
|
142 |
start_streaming = False
|
|
|
143 |
|
144 |
steps_display = {
|
145 |
"categorize_intent":("🔄️ Analyzing user message",True),
|
@@ -151,11 +153,6 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
151 |
answer_message_content = ""
|
152 |
try:
|
153 |
async for event in result:
|
154 |
-
|
155 |
-
# if event["event"] == "on_chat_model_stream" and event["metadata"]["langgraph_node"] in ["answer_rag", "answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
|
156 |
-
# if start_streaming == False:
|
157 |
-
# start_streaming = True
|
158 |
-
# history[-1] = (query,"")
|
159 |
if "langgraph_node" in event["metadata"]:
|
160 |
node = event["metadata"]["langgraph_node"]
|
161 |
|
@@ -163,10 +160,12 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
163 |
try:
|
164 |
docs = event["data"]["output"]["documents"]
|
165 |
docs_html = []
|
166 |
-
|
167 |
-
|
|
|
|
|
168 |
|
169 |
-
used_documents = used_documents + [d.metadata[
|
170 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
171 |
|
172 |
docs_html = "".join(docs_html)
|
@@ -180,15 +179,15 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
180 |
if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
|
181 |
history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
|
182 |
|
183 |
-
elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search"]:# if streaming answer
|
184 |
if start_streaming == False:
|
185 |
start_streaming = True
|
186 |
history.append(ChatMessage(role="assistant", content = ""))
|
187 |
answer_message_content += event["data"]["chunk"].content
|
188 |
answer_message_content = parse_output_llm_with_sources(answer_message_content)
|
189 |
history[-1] = ChatMessage(role="assistant", content = answer_message_content)
|
190 |
-
|
191 |
-
|
192 |
elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
|
193 |
try:
|
194 |
recommended_content = event["data"]["output"]["recommended_content"]
|
@@ -239,116 +238,9 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
239 |
|
240 |
except Exception as e:
|
241 |
print(f"Error getting graphs: {e}")
|
242 |
-
|
243 |
-
|
244 |
-
# history.append(ChatMessage(role="assistant", content = new_message_content))
|
245 |
-
|
246 |
-
# if docs_used is True and event["metadata"]["langgraph_node"] in ["answer_rag_no_docs", "answer_chitchat", "answer_ai_impact"]:
|
247 |
-
# docs_used = False
|
248 |
-
|
249 |
-
# elif docs_used is True and event["name"] == "retrieve_documents" and event["event"] == "on_chain_end":
|
250 |
-
# try:
|
251 |
-
# docs = event["data"]["output"]["documents"]
|
252 |
-
# docs_html = []
|
253 |
-
# for i, d in enumerate(docs, 1):
|
254 |
-
# docs_html.append(make_html_source(d, i))
|
255 |
-
# docs_html = "".join(docs_html)
|
256 |
-
|
257 |
-
# except Exception as e:
|
258 |
-
# print(f"Error getting documents: {e}")
|
259 |
-
# print(event)
|
260 |
-
|
261 |
-
# # elif event["name"] == "retrieve_documents" and event["event"] == "on_chain_start":
|
262 |
-
# # print(event)
|
263 |
-
# # questions = event["data"]["input"]["questions"]
|
264 |
-
# # questions = "\n".join([f"{i+1}. {q['question']} ({q['source']})" for i,q in enumerate(questions)])
|
265 |
-
# # answer_yet = "🔄️ Searching in the knowledge base\n{questions}"
|
266 |
-
# # history[-1] = (query,answer_yet)
|
267 |
-
|
268 |
-
# elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
|
269 |
-
# try:
|
270 |
-
# recommended_content = event["data"]["output"]["recommended_content"]
|
271 |
-
# # graphs = [
|
272 |
-
# # {
|
273 |
-
# # "embedding": x.metadata["returned_content"],
|
274 |
-
# # "metadata": {
|
275 |
-
# # "source": x.metadata["source"],
|
276 |
-
# # "category": x.metadata["category"]
|
277 |
-
# # }
|
278 |
-
# # } for x in recommended_content if x.metadata["source"] == "OWID"
|
279 |
-
# # ]
|
280 |
-
|
281 |
-
# unique_graphs = []
|
282 |
-
# seen_embeddings = set()
|
283 |
-
|
284 |
-
# for x in recommended_content:
|
285 |
-
# embedding = x.metadata["returned_content"]
|
286 |
-
|
287 |
-
# # Check if the embedding has already been seen
|
288 |
-
# if embedding not in seen_embeddings:
|
289 |
-
# unique_graphs.append({
|
290 |
-
# "embedding": embedding,
|
291 |
-
# "metadata": {
|
292 |
-
# "source": x.metadata["source"],
|
293 |
-
# "category": x.metadata["category"]
|
294 |
-
# }
|
295 |
-
# })
|
296 |
-
# # Add the embedding to the seen set
|
297 |
-
# seen_embeddings.add(embedding)
|
298 |
-
|
299 |
-
|
300 |
-
# categories = {}
|
301 |
-
# for graph in unique_graphs:
|
302 |
-
# category = graph['metadata']['category']
|
303 |
-
# if category not in categories:
|
304 |
-
# categories[category] = []
|
305 |
-
# categories[category].append(graph['embedding'])
|
306 |
-
|
307 |
-
# # graphs_html = ""
|
308 |
-
# for category, embeddings in categories.items():
|
309 |
-
# # graphs_html += f"<h3>{category}</h3>"
|
310 |
-
# # current_graphs.append(f"<h3>{category}</h3>")
|
311 |
-
# for embedding in embeddings:
|
312 |
-
# current_graphs.append([embedding, category])
|
313 |
-
# # graphs_html += f"<div>{embedding}</div>"
|
314 |
-
|
315 |
-
# except Exception as e:
|
316 |
-
# print(f"Error getting graphs: {e}")
|
317 |
-
|
318 |
-
# for event_name,(event_description,display_output) in steps_display.items():
|
319 |
-
# if event["name"] == event_name:
|
320 |
-
# if event["event"] == "on_chain_start":
|
321 |
-
# # answer_yet = f"<p><span class='loader'></span>{event_description}</p>"
|
322 |
-
# # answer_yet = make_toolbox(event_description, "", checked = False)
|
323 |
-
# answer_yet = event_description
|
324 |
-
|
325 |
-
# history[-1] = (query,answer_yet)
|
326 |
-
# # elif event["event"] == "on_chain_end":
|
327 |
-
# # answer_yet = ""
|
328 |
-
# # history[-1] = (query,answer_yet)
|
329 |
-
# # if display_output:
|
330 |
-
# # print(event["data"]["output"])
|
331 |
-
|
332 |
-
# # if op['path'] == path_reformulation: # reforulated question
|
333 |
-
# # try:
|
334 |
-
# # output_language = op['value']["language"] # str
|
335 |
-
# # output_query = op["value"]["question"]
|
336 |
-
# # except Exception as e:
|
337 |
-
# # raise gr.Error(f"ClimateQ&A Error: {e} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
338 |
-
|
339 |
-
# # if op["path"] == path_keywords:
|
340 |
-
# # try:
|
341 |
-
# # output_keywords = op['value']["keywords"] # str
|
342 |
-
# # output_keywords = " AND ".join(output_keywords)
|
343 |
-
# # except Exception as e:
|
344 |
-
# # pass
|
345 |
|
346 |
|
347 |
|
348 |
-
# history = [tuple(x) for x in history]
|
349 |
-
# yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
|
350 |
-
|
351 |
-
|
352 |
if event["name"] == "transform_query" and event["event"] =="on_chain_end":
|
353 |
if hasattr(history[-1],"content"):
|
354 |
history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
|
@@ -356,7 +248,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
356 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
357 |
print("X")
|
358 |
|
359 |
-
yield history,docs_html,output_query,output_language,gallery, current_graphs #,output_query,output_keywords
|
360 |
|
361 |
except Exception as e:
|
362 |
print(event, "has failed")
|
@@ -368,7 +260,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
368 |
if os.getenv("GRADIO_ENV") != "local":
|
369 |
timestamp = str(datetime.now().timestamp())
|
370 |
file = timestamp + ".json"
|
371 |
-
prompt = history[
|
372 |
logs = {
|
373 |
"user_id": str(user_id),
|
374 |
"prompt": prompt,
|
@@ -376,7 +268,7 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
376 |
"question":output_query,
|
377 |
"sources":sources,
|
378 |
"docs":serialize_docs(docs),
|
379 |
-
"answer": history[-1]
|
380 |
"time": timestamp,
|
381 |
}
|
382 |
log_on_azure(file, logs, share_client)
|
@@ -384,12 +276,49 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
384 |
print(f"Error logging on Azure Blob Storage: {e}")
|
385 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
386 |
|
387 |
-
|
388 |
-
|
|
|
|
|
|
|
389 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
if doc.metadata["chunk_type"] == "image":
|
391 |
try:
|
392 |
key = f"Image {i+1}"
|
|
|
393 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
394 |
img = get_image_from_azure_blob_storage(image_path)
|
395 |
|
@@ -397,45 +326,18 @@ async def chat(query,history,audience,sources,reports,current_graphs):
|
|
397 |
buffered = BytesIO()
|
398 |
img.save(buffered, format="PNG")
|
399 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
|
|
|
|
|
|
|
400 |
|
401 |
-
# Embedding the base64 string in Markdown
|
402 |
-
markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
|
403 |
-
image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
|
404 |
except Exception as e:
|
405 |
print(f"Skipped adding image {i} because of {e}")
|
|
|
|
|
|
|
406 |
|
407 |
-
|
408 |
-
|
409 |
-
gallery = [x["img"] for x in list(image_dict.values())]
|
410 |
-
img = list(image_dict.values())[0]
|
411 |
-
img_md = img["md"]
|
412 |
-
img_caption = img["caption"]
|
413 |
-
img_code = img["figure_code"]
|
414 |
-
if img_code != "N/A":
|
415 |
-
img_name = f"{img['key']} - {img['figure_code']}"
|
416 |
-
else:
|
417 |
-
img_name = f"{img['key']}"
|
418 |
-
|
419 |
-
history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
420 |
-
|
421 |
-
# print(f"\n\nImages:\n{gallery}")
|
422 |
-
|
423 |
-
# # gallery = [x.metadata["image_path"] for x in docs if (len(x.metadata["image_path"]) > 0 and "IAS" in x.metadata["image_path"])]
|
424 |
-
# # if len(gallery) > 0:
|
425 |
-
# # gallery = list(set("|".join(gallery).split("|")))
|
426 |
-
# # gallery = [get_image_from_azure_blob_storage(x) for x in gallery]
|
427 |
-
|
428 |
-
# yield history,docs_html,output_query,output_language,gallery,current_graphs #,output_query,output_keywords
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
# # else:
|
433 |
-
# # docs_string = "No relevant passages found in the climate science reports (IPCC and IPBES)"
|
434 |
-
# # complete_response = "**No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate issues).**"
|
435 |
-
# # messages.append({"role": "assistant", "content": complete_response})
|
436 |
-
# # gradio_format = make_pairs([a["content"] for a in messages[1:]])
|
437 |
-
# # yield gradio_format, messages, docs_string
|
438 |
-
yield history,docs_html,output_query,output_language,gallery, current_graphs#,output_query,output_keywords
|
439 |
|
440 |
|
441 |
def save_feedback(feed: str, user_id):
|
@@ -498,6 +400,10 @@ Hello, I am ClimateQ&A, a conversational assistant designed to help you understa
|
|
498 |
⚠️ Limitations
|
499 |
*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
500 |
|
|
|
|
|
|
|
|
|
501 |
What do you want to learn ?
|
502 |
"""
|
503 |
|
@@ -517,8 +423,7 @@ def save_graph(saved_graphs_state, embedding, category):
|
|
517 |
return saved_graphs_state, gr.Button("Graph Saved")
|
518 |
|
519 |
|
520 |
-
|
521 |
-
# user_id_state = gr.State([user_id])
|
522 |
|
523 |
# chat_completed_state = gr.State(0)
|
524 |
# current_graphs = gr.State([])
|
@@ -532,7 +437,6 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
532 |
|
533 |
with gr.Row(elem_id="chatbot-row"):
|
534 |
with gr.Column(scale=2):
|
535 |
-
# state = gr.State([system_template])
|
536 |
chatbot = gr.Chatbot(
|
537 |
value = [ChatMessage(role="assistant", content=init_prompt)],
|
538 |
type = "messages",
|
@@ -541,6 +445,8 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
541 |
elem_id="chatbot",
|
542 |
layout = "panel",
|
543 |
avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
|
|
|
|
|
544 |
)
|
545 |
|
546 |
# bot.like(vote,None,None)
|
@@ -585,6 +491,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
585 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
586 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
587 |
docs_textbox = gr.State("")
|
|
|
|
|
|
|
588 |
|
589 |
# with Modal(visible = False) as config_modal:
|
590 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
@@ -656,6 +565,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
656 |
# )
|
657 |
|
658 |
|
|
|
|
|
|
|
|
|
659 |
|
660 |
#---------------------------------------------------------------------------------------
|
661 |
# OTHER TABS
|
@@ -752,7 +665,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
752 |
# history = history + [(query,None)]
|
753 |
# history = [tuple(x) for x in history]
|
754 |
history = history + [ChatMessage(role="user", content=query)]
|
755 |
-
return (gr.update(interactive = False),gr.update(selected=
|
756 |
|
757 |
def finish_chat():
|
758 |
return (gr.update(interactive = True,value = ""),gr.update(selected=3))
|
@@ -765,10 +678,11 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
765 |
|
766 |
(textbox
|
767 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
768 |
-
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
769 |
.then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
|
770 |
.then(change_completion_status, [chat_completed_state], [chat_completed_state])
|
771 |
# .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
|
|
|
772 |
)
|
773 |
|
774 |
(examples_hidden
|
@@ -777,7 +691,7 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
777 |
# .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
|
778 |
# .then(change_completion_status, [chat_completed_state], [chat_completed_state])
|
779 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
780 |
-
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
|
781 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
782 |
# .then(lambda graphs : graphs, [current_graphs], [graphs_container])
|
783 |
|
@@ -797,4 +711,4 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
797 |
|
798 |
demo.queue()
|
799 |
|
800 |
-
demo.launch(
|
|
|
33 |
# ClimateQ&A imports
|
34 |
from climateqa.engine.llm import get_llm
|
35 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
36 |
+
# from climateqa.knowledge.retriever import ClimateQARetriever
|
37 |
from climateqa.engine.reranker import get_reranker
|
38 |
from climateqa.engine.embeddings import get_embeddings_function
|
39 |
from climateqa.engine.chains.prompts import audience_prompts
|
|
|
47 |
|
48 |
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox,generate_html_graphs
|
49 |
|
50 |
+
from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
51 |
+
|
52 |
# Load environment variables in local mode
|
53 |
try:
|
54 |
from dotenv import load_dotenv
|
|
|
90 |
user_id = create_user_id()
|
91 |
|
92 |
|
|
|
|
|
|
|
93 |
|
94 |
# Create vectorstore and retriever
|
95 |
vectorstore = get_pinecone_vectorstore(embeddings_function)
|
96 |
+
llm = get_llm(provider="openai",max_tokens = 1024,temperature = 0.0)
|
97 |
+
reranker = get_reranker("large")
|
98 |
+
agent = make_graph_agent(llm,vectorstore,reranker)
|
99 |
|
100 |
# agent = make_graph_agent(llm,vectorstore,reranker)
|
101 |
agent = make_graph_agent(llm=llm, vectorstore_ipcc=vectorstore, vectorstore_graphs=vectorstore_graphs, reranker=reranker)
|
|
|
141 |
gallery = []
|
142 |
updates = []
|
143 |
start_streaming = False
|
144 |
+
figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
|
145 |
|
146 |
steps_display = {
|
147 |
"categorize_intent":("🔄️ Analyzing user message",True),
|
|
|
153 |
answer_message_content = ""
|
154 |
try:
|
155 |
async for event in result:
|
|
|
|
|
|
|
|
|
|
|
156 |
if "langgraph_node" in event["metadata"]:
|
157 |
node = event["metadata"]["langgraph_node"]
|
158 |
|
|
|
160 |
try:
|
161 |
docs = event["data"]["output"]["documents"]
|
162 |
docs_html = []
|
163 |
+
textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
|
164 |
+
for i, d in enumerate(textual_docs, 1):
|
165 |
+
if d.metadata["chunk_type"] == "text":
|
166 |
+
docs_html.append(make_html_source(d, i))
|
167 |
|
168 |
+
used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
|
169 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
170 |
|
171 |
docs_html = "".join(docs_html)
|
|
|
179 |
if not hasattr(history[-1], 'metadata') or history[-1].metadata["title"] != event_description: # if a new step begins
|
180 |
history.append(ChatMessage(role="assistant", content = "", metadata={'title' :event_description}))
|
181 |
|
182 |
+
elif event["name"] != "transform_query" and event["event"] == "on_chat_model_stream" and node in ["answer_rag", "answer_search","answer_chitchat"]:# if streaming answer
|
183 |
if start_streaming == False:
|
184 |
start_streaming = True
|
185 |
history.append(ChatMessage(role="assistant", content = ""))
|
186 |
answer_message_content += event["data"]["chunk"].content
|
187 |
answer_message_content = parse_output_llm_with_sources(answer_message_content)
|
188 |
history[-1] = ChatMessage(role="assistant", content = answer_message_content)
|
189 |
+
# history.append(ChatMessage(role="assistant", content = new_message_content))
|
190 |
+
|
191 |
elif event["name"] in ["retrieve_graphs", "retrieve_graphs_ai"] and event["event"] == "on_chain_end":
|
192 |
try:
|
193 |
recommended_content = event["data"]["output"]["recommended_content"]
|
|
|
238 |
|
239 |
except Exception as e:
|
240 |
print(f"Error getting graphs: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
|
243 |
|
|
|
|
|
|
|
|
|
244 |
if event["name"] == "transform_query" and event["event"] =="on_chain_end":
|
245 |
if hasattr(history[-1],"content"):
|
246 |
history[-1].content += "Decompose question into sub-questions: \n\n - " + "\n - ".join([q["question"] for q in event["data"]["output"]["remaining_questions"]])
|
|
|
248 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
249 |
print("X")
|
250 |
|
251 |
+
yield history,docs_html,output_query,output_language,gallery, figures, current_graphs #,output_query,output_keywords
|
252 |
|
253 |
except Exception as e:
|
254 |
print(event, "has failed")
|
|
|
260 |
if os.getenv("GRADIO_ENV") != "local":
|
261 |
timestamp = str(datetime.now().timestamp())
|
262 |
file = timestamp + ".json"
|
263 |
+
prompt = history[1]["content"]
|
264 |
logs = {
|
265 |
"user_id": str(user_id),
|
266 |
"prompt": prompt,
|
|
|
268 |
"question":output_query,
|
269 |
"sources":sources,
|
270 |
"docs":serialize_docs(docs),
|
271 |
+
"answer": history[-1].content,
|
272 |
"time": timestamp,
|
273 |
}
|
274 |
log_on_azure(file, logs, share_client)
|
|
|
276 |
print(f"Error logging on Azure Blob Storage: {e}")
|
277 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
278 |
|
279 |
+
|
280 |
+
|
281 |
+
|
282 |
+
# image_dict = {}
|
283 |
+
# for i,doc in enumerate(docs):
|
284 |
|
285 |
+
# if doc.metadata["chunk_type"] == "image":
|
286 |
+
# try:
|
287 |
+
# key = f"Image {i+1}"
|
288 |
+
# image_path = doc.metadata["image_path"].split("documents/")[1]
|
289 |
+
# img = get_image_from_azure_blob_storage(image_path)
|
290 |
+
|
291 |
+
# # Convert the image to a byte buffer
|
292 |
+
# buffered = BytesIO()
|
293 |
+
# img.save(buffered, format="PNG")
|
294 |
+
# img_str = base64.b64encode(buffered.getvalue()).decode()
|
295 |
+
|
296 |
+
# # Embedding the base64 string in Markdown
|
297 |
+
# markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
|
298 |
+
# image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
|
299 |
+
# except Exception as e:
|
300 |
+
# print(f"Skipped adding image {i} because of {e}")
|
301 |
+
|
302 |
+
# if len(image_dict) > 0:
|
303 |
+
|
304 |
+
# gallery = [x["img"] for x in list(image_dict.values())]
|
305 |
+
# img = list(image_dict.values())[0]
|
306 |
+
# img_md = img["md"]
|
307 |
+
# img_caption = img["caption"]
|
308 |
+
# img_code = img["figure_code"]
|
309 |
+
# if img_code != "N/A":
|
310 |
+
# img_name = f"{img['key']} - {img['figure_code']}"
|
311 |
+
# else:
|
312 |
+
# img_name = f"{img['key']}"
|
313 |
+
|
314 |
+
# history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
315 |
+
|
316 |
+
docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
|
317 |
+
for i, doc in enumerate(docs_figures):
|
318 |
if doc.metadata["chunk_type"] == "image":
|
319 |
try:
|
320 |
key = f"Image {i+1}"
|
321 |
+
|
322 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
323 |
img = get_image_from_azure_blob_storage(image_path)
|
324 |
|
|
|
326 |
buffered = BytesIO()
|
327 |
img.save(buffered, format="PNG")
|
328 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
329 |
+
|
330 |
+
figures = figures + make_html_figure_sources(doc, i, img_str)
|
331 |
+
|
332 |
+
gallery.append(img)
|
333 |
|
|
|
|
|
|
|
334 |
except Exception as e:
|
335 |
print(f"Skipped adding image {i} because of {e}")
|
336 |
+
|
337 |
+
|
338 |
+
|
339 |
|
340 |
+
yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
|
342 |
|
343 |
def save_feedback(feed: str, user_id):
|
|
|
400 |
⚠️ Limitations
|
401 |
*Please note that the AI is not perfect and may sometimes give irrelevant answers. If you are not satisfied with the answer, please ask a more specific question or report your feedback to help us improve the system.*
|
402 |
|
403 |
+
🛈 Information
|
404 |
+
Please note that we log your questions for meta-analysis purposes, so avoid sharing any sensitive or personal information.
|
405 |
+
|
406 |
+
|
407 |
What do you want to learn ?
|
408 |
"""
|
409 |
|
|
|
423 |
return saved_graphs_state, gr.Button("Graph Saved")
|
424 |
|
425 |
|
426 |
+
with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=theme,elem_id = "main-component") as demo:
|
|
|
427 |
|
428 |
# chat_completed_state = gr.State(0)
|
429 |
# current_graphs = gr.State([])
|
|
|
437 |
|
438 |
with gr.Row(elem_id="chatbot-row"):
|
439 |
with gr.Column(scale=2):
|
|
|
440 |
chatbot = gr.Chatbot(
|
441 |
value = [ChatMessage(role="assistant", content=init_prompt)],
|
442 |
type = "messages",
|
|
|
445 |
elem_id="chatbot",
|
446 |
layout = "panel",
|
447 |
avatar_images = (None,"https://i.ibb.co/YNyd5W2/logo4.png"),
|
448 |
+
max_height="80vh",
|
449 |
+
height="100vh"
|
450 |
)
|
451 |
|
452 |
# bot.like(vote,None,None)
|
|
|
491 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
492 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
493 |
docs_textbox = gr.State("")
|
494 |
+
|
495 |
+
|
496 |
+
|
497 |
|
498 |
# with Modal(visible = False) as config_modal:
|
499 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
|
|
565 |
# )
|
566 |
|
567 |
|
568 |
+
with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
|
569 |
+
figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
|
570 |
+
|
571 |
+
|
572 |
|
573 |
#---------------------------------------------------------------------------------------
|
574 |
# OTHER TABS
|
|
|
665 |
# history = history + [(query,None)]
|
666 |
# history = [tuple(x) for x in history]
|
667 |
history = history + [ChatMessage(role="user", content=query)]
|
668 |
+
return (gr.update(interactive = False),gr.update(selected=1),history)
|
669 |
|
670 |
def finish_chat():
|
671 |
return (gr.update(interactive = True,value = ""),gr.update(selected=3))
|
|
|
678 |
|
679 |
(textbox
|
680 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
681 |
+
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_textbox")
|
682 |
.then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_textbox")
|
683 |
.then(change_completion_status, [chat_completed_state], [chat_completed_state])
|
684 |
# .then(lambda graphs : generate_html_graphs(graphs), [current_graphs], [graphs_container],)
|
685 |
+
|
686 |
)
|
687 |
|
688 |
(examples_hidden
|
|
|
691 |
# .then(finish_chat, None, [textbox,tabs],api_name = "finish_chat_examples")
|
692 |
# .then(change_completion_status, [chat_completed_state], [chat_completed_state])
|
693 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
694 |
+
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports, current_graphs], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards, current_graphs],concurrency_limit = 8,api_name = "chat_examples")
|
695 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
696 |
# .then(lambda graphs : graphs, [current_graphs], [graphs_container])
|
697 |
|
|
|
711 |
|
712 |
demo.queue()
|
713 |
|
714 |
+
demo.launch(ssr_mode=False)
|
climateqa/engine/chains/answer_ai_impact.py
CHANGED
@@ -38,7 +38,6 @@ def make_ai_impact_chain(llm):
|
|
38 |
def make_ai_impact_node(llm):
|
39 |
|
40 |
ai_impact_chain = make_ai_impact_chain(llm)
|
41 |
-
|
42 |
|
43 |
async def answer_ai_impact(state,config):
|
44 |
answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)
|
|
|
38 |
def make_ai_impact_node(llm):
|
39 |
|
40 |
ai_impact_chain = make_ai_impact_chain(llm)
|
|
|
41 |
|
42 |
async def answer_ai_impact(state,config):
|
43 |
answer = await ai_impact_chain.ainvoke({"question":state["user_input"]},config)
|
climateqa/engine/chains/intent_categorization.py
CHANGED
@@ -7,34 +7,6 @@ from langchain_core.utils.function_calling import convert_to_openai_function
|
|
7 |
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
8 |
|
9 |
|
10 |
-
# class IntentCategorizer(BaseModel):
|
11 |
-
# """Analyzing the user message input"""
|
12 |
-
|
13 |
-
# language: str = Field(
|
14 |
-
# description="Find the language of the message input in full words (ex: French, English, Spanish, ...), defaults to English",
|
15 |
-
# default="English",
|
16 |
-
# )
|
17 |
-
# intent: str = Field(
|
18 |
-
# enum=[
|
19 |
-
# "ai",
|
20 |
-
# # "geo_info",
|
21 |
-
# # "esg"
|
22 |
-
# "search",
|
23 |
-
# "chitchat",
|
24 |
-
# ],
|
25 |
-
# description="""
|
26 |
-
# Categorize the user input in one of the following category
|
27 |
-
# Any question
|
28 |
-
|
29 |
-
# Examples:
|
30 |
-
# - ai = any question related to AI: "What are the environmental consequences of AI", "How does AI affect the environment"
|
31 |
-
# - search = Searching for any question about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers. Also questions about individual actions or anything loosely related to the environment.
|
32 |
-
# - chitchat = Any chit chat or any question that is not related to the environment or climate change or for which it is not necessary to look for the answer in the IPCC, IPBES, IPOS or scientific reports.
|
33 |
-
# """,
|
34 |
-
# # - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
|
35 |
-
# # - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
|
36 |
-
# )
|
37 |
-
|
38 |
class IntentCategorizer(BaseModel):
|
39 |
"""Analyzing the user message input"""
|
40 |
|
@@ -44,9 +16,9 @@ class IntentCategorizer(BaseModel):
|
|
44 |
)
|
45 |
intent: str = Field(
|
46 |
enum=[
|
47 |
-
"
|
48 |
# "geo_info",
|
49 |
-
# "esg"
|
50 |
"search",
|
51 |
"chitchat",
|
52 |
],
|
@@ -55,12 +27,13 @@ class IntentCategorizer(BaseModel):
|
|
55 |
Any question
|
56 |
|
57 |
Examples:
|
58 |
-
-
|
59 |
- search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
|
60 |
- chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
|
61 |
""",
|
62 |
# - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
|
63 |
# - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
|
|
|
64 |
)
|
65 |
|
66 |
|
@@ -71,7 +44,7 @@ def make_intent_categorization_chain(llm):
|
|
71 |
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
|
72 |
|
73 |
prompt = ChatPromptTemplate.from_messages([
|
74 |
-
("system", "You are a helpful assistant, you will analyze, and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
|
75 |
("user", "input: {input}")
|
76 |
])
|
77 |
|
|
|
7 |
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
8 |
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
class IntentCategorizer(BaseModel):
|
11 |
"""Analyzing the user message input"""
|
12 |
|
|
|
16 |
)
|
17 |
intent: str = Field(
|
18 |
enum=[
|
19 |
+
"ai_impact",
|
20 |
# "geo_info",
|
21 |
+
# "esg",
|
22 |
"search",
|
23 |
"chitchat",
|
24 |
],
|
|
|
27 |
Any question
|
28 |
|
29 |
Examples:
|
30 |
+
- ai_impact = Environmental impacts of AI: "What are the environmental impacts of AI", "How does AI affect the environment"
|
31 |
- search = Searching for any quesiton about climate change, energy, biodiversity, nature, and everything we can find the IPCC or IPBES reports or scientific papers,
|
32 |
- chitchat = Any general question that is not related to the environment or climate change or just conversational, or if you don't think searching the IPCC or IPBES reports would be relevant
|
33 |
""",
|
34 |
# - geo_info = Geolocated info about climate change: Any question where the user wants to know localized impacts of climate change, eg: "What will be the temperature in Marseille in 2050"
|
35 |
# - esg = Any question about the ESG regulation, frameworks and standards like the CSRD, TCFD, SASB, GRI, CDP, etc.
|
36 |
+
|
37 |
)
|
38 |
|
39 |
|
|
|
44 |
llm_with_functions = llm.bind(functions = openai_functions,function_call={"name":"IntentCategorizer"})
|
45 |
|
46 |
prompt = ChatPromptTemplate.from_messages([
|
47 |
+
("system", "You are a helpful assistant, you will analyze, translate and categorize the user input message using the function provided. Categorize the user input as ai ONLY if it is related to Artificial Intelligence, search if it is related to the environment, climate change, energy, biodiversity, nature, etc. and chitchat if it is just general conversation."),
|
48 |
("user", "input: {input}")
|
49 |
])
|
50 |
|
climateqa/engine/chains/query_transformation.py
CHANGED
@@ -161,6 +161,7 @@ def make_query_transform_node(llm,k_final=15):
|
|
161 |
question_state = {"question":question}
|
162 |
analysis_output = rewriter_chain.invoke({"input":question})
|
163 |
|
|
|
164 |
# The case when the llm does not return any sources
|
165 |
if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
|
166 |
analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
|
|
|
161 |
question_state = {"question":question}
|
162 |
analysis_output = rewriter_chain.invoke({"input":question})
|
163 |
|
164 |
+
# TODO WARNING llm should always return smthg
|
165 |
# The case when the llm does not return any sources
|
166 |
if not analysis_output["sources"] or not all(source in ["IPCC", "IPBS", "IPOS"] for source in analysis_output["sources"]):
|
167 |
analysis_output["sources"] = ["IPCC", "IPBES", "IPOS"]
|
climateqa/engine/chains/retrieve_documents.py
CHANGED
@@ -84,11 +84,13 @@ def make_retriever_node(vectorstore,reranker,llm,rerank_by_question=True, k_fina
|
|
84 |
# # Option 2 - Get 100/n documents by question and rerank the total
|
85 |
# if rerank_by_question:
|
86 |
# k_by_question = divide_into_parts(k_final,len(questions))
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
docs = []
|
92 |
k_by_question = k_final // state["n_questions"]
|
93 |
|
94 |
sources = current_question["sources"]
|
|
|
84 |
# # Option 2 - Get 100/n documents by question and rerank the total
|
85 |
# if rerank_by_question:
|
86 |
# k_by_question = divide_into_parts(k_final,len(questions))
|
87 |
+
if "documents" in state and state["documents"] is not None:
|
88 |
+
docs = state["documents"]
|
89 |
+
else:
|
90 |
+
docs = []
|
91 |
|
92 |
+
|
93 |
+
|
|
|
|
|
94 |
k_by_question = k_final // state["n_questions"]
|
95 |
|
96 |
sources = current_question["sources"]
|
climateqa/engine/graph.py
CHANGED
@@ -92,10 +92,9 @@ def make_graph_agent(llm, vectorstore_ipcc, vectorstore_graphs, reranker, thresh
|
|
92 |
transform_query = make_query_transform_node(llm)
|
93 |
translate_query = make_translation_node(llm)
|
94 |
answer_chitchat = make_chitchat_node(llm)
|
95 |
-
|
96 |
retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
|
97 |
retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
|
98 |
-
# answer_rag_graph = make_rag_graph_node(llm)
|
99 |
answer_rag = make_rag_node(llm, with_docs=True)
|
100 |
answer_rag_no_docs = make_rag_node(llm, with_docs=False)
|
101 |
chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
|
@@ -190,142 +189,3 @@ def display_graph(app):
|
|
190 |
)
|
191 |
)
|
192 |
)
|
193 |
-
|
194 |
-
# import sys
|
195 |
-
# import os
|
196 |
-
# from contextlib import contextmanager
|
197 |
-
|
198 |
-
# from langchain.schema import Document
|
199 |
-
# from langgraph.graph import END, StateGraph
|
200 |
-
# from langchain_core.runnables.graph import CurveStyle, NodeColors, MermaidDrawMethod
|
201 |
-
|
202 |
-
# from typing_extensions import TypedDict
|
203 |
-
# from typing import List
|
204 |
-
|
205 |
-
# from IPython.display import display, HTML, Image
|
206 |
-
|
207 |
-
# from .chains.answer_chitchat import make_chitchat_node
|
208 |
-
# from .chains.answer_ai_impact import make_ai_impact_node
|
209 |
-
# from .chains.query_transformation import make_query_transform_node
|
210 |
-
# from .chains.translation import make_translation_node
|
211 |
-
# from .chains.intent_categorization import make_intent_categorization_node
|
212 |
-
# from .chains.retriever import make_retriever_node
|
213 |
-
# from .chains.answer_rag import make_rag_node
|
214 |
-
|
215 |
-
|
216 |
-
# class GraphState(TypedDict):
|
217 |
-
# """
|
218 |
-
# Represents the state of our graph.
|
219 |
-
# """
|
220 |
-
# user_input : str
|
221 |
-
# language : str
|
222 |
-
# intent : str
|
223 |
-
# query: str
|
224 |
-
# questions : List[dict]
|
225 |
-
# answer: str
|
226 |
-
# audience: str = "experts"
|
227 |
-
# sources_input: List[str] = ["auto"]
|
228 |
-
# documents: List[Document]
|
229 |
-
|
230 |
-
# def search(state):
|
231 |
-
# return {}
|
232 |
-
|
233 |
-
# def route_intent(state):
|
234 |
-
# intent = state["intent"]
|
235 |
-
# if intent in ["chitchat","esg"]:
|
236 |
-
# return "answer_chitchat"
|
237 |
-
# elif intent == "ai_impact":
|
238 |
-
# return "answer_ai_impact"
|
239 |
-
# else:
|
240 |
-
# # Search route
|
241 |
-
# return "search"
|
242 |
-
|
243 |
-
# def route_translation(state):
|
244 |
-
# if state["language"].lower() == "english":
|
245 |
-
# return "transform_query"
|
246 |
-
# else:
|
247 |
-
# return "translate_query"
|
248 |
-
|
249 |
-
# def route_based_on_relevant_docs(state,threshold_docs=0.2):
|
250 |
-
# docs = [x for x in state["documents"] if x.metadata["reranking_score"] > threshold_docs]
|
251 |
-
# if len(docs) > 0:
|
252 |
-
# return "answer_rag"
|
253 |
-
# else:
|
254 |
-
# return "answer_rag_no_docs"
|
255 |
-
|
256 |
-
|
257 |
-
# def make_id_dict(values):
|
258 |
-
# return {k:k for k in values}
|
259 |
-
|
260 |
-
# def make_graph_agent(llm,vectorstore,reranker,threshold_docs = 0.2):
|
261 |
-
|
262 |
-
# workflow = StateGraph(GraphState)
|
263 |
-
|
264 |
-
# # Define the node functions
|
265 |
-
# categorize_intent = make_intent_categorization_node(llm)
|
266 |
-
# transform_query = make_query_transform_node(llm)
|
267 |
-
# translate_query = make_translation_node(llm)
|
268 |
-
# answer_chitchat = make_chitchat_node(llm)
|
269 |
-
# answer_ai_impact = make_ai_impact_node(llm)
|
270 |
-
# retrieve_documents = make_retriever_node(vectorstore,reranker)
|
271 |
-
# answer_rag = make_rag_node(llm,with_docs=True)
|
272 |
-
# answer_rag_no_docs = make_rag_node(llm,with_docs=False)
|
273 |
-
|
274 |
-
# # Define the nodes
|
275 |
-
# workflow.add_node("categorize_intent", categorize_intent)
|
276 |
-
# workflow.add_node("search", search)
|
277 |
-
# workflow.add_node("transform_query", transform_query)
|
278 |
-
# workflow.add_node("translate_query", translate_query)
|
279 |
-
# workflow.add_node("answer_chitchat", answer_chitchat)
|
280 |
-
# workflow.add_node("answer_ai_impact", answer_ai_impact)
|
281 |
-
# workflow.add_node("retrieve_documents",retrieve_documents)
|
282 |
-
# workflow.add_node("answer_rag",answer_rag)
|
283 |
-
# workflow.add_node("answer_rag_no_docs",answer_rag_no_docs)
|
284 |
-
|
285 |
-
# # Entry point
|
286 |
-
# workflow.set_entry_point("categorize_intent")
|
287 |
-
|
288 |
-
# # CONDITIONAL EDGES
|
289 |
-
# workflow.add_conditional_edges(
|
290 |
-
# "categorize_intent",
|
291 |
-
# route_intent,
|
292 |
-
# make_id_dict(["answer_chitchat","answer_ai_impact","search"])
|
293 |
-
# )
|
294 |
-
|
295 |
-
# workflow.add_conditional_edges(
|
296 |
-
# "search",
|
297 |
-
# route_translation,
|
298 |
-
# make_id_dict(["translate_query","transform_query"])
|
299 |
-
# )
|
300 |
-
|
301 |
-
# workflow.add_conditional_edges(
|
302 |
-
# "retrieve_documents",
|
303 |
-
# lambda x : route_based_on_relevant_docs(x,threshold_docs=threshold_docs),
|
304 |
-
# make_id_dict(["answer_rag","answer_rag_no_docs"])
|
305 |
-
# )
|
306 |
-
|
307 |
-
# # Define the edges
|
308 |
-
# workflow.add_edge("translate_query", "transform_query")
|
309 |
-
# workflow.add_edge("transform_query", "retrieve_documents")
|
310 |
-
# workflow.add_edge("retrieve_documents", "answer_rag")
|
311 |
-
# workflow.add_edge("answer_rag", END)
|
312 |
-
# workflow.add_edge("answer_rag_no_docs", END)
|
313 |
-
# workflow.add_edge("answer_chitchat", END)
|
314 |
-
# workflow.add_edge("answer_ai_impact", END)
|
315 |
-
|
316 |
-
# # Compile
|
317 |
-
# app = workflow.compile()
|
318 |
-
# return app
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
# def display_graph(app):
|
324 |
-
|
325 |
-
# display(
|
326 |
-
# Image(
|
327 |
-
# app.get_graph(xray = True).draw_mermaid_png(
|
328 |
-
# draw_method=MermaidDrawMethod.API,
|
329 |
-
# )
|
330 |
-
# )
|
331 |
-
# )
|
|
|
92 |
transform_query = make_query_transform_node(llm)
|
93 |
translate_query = make_translation_node(llm)
|
94 |
answer_chitchat = make_chitchat_node(llm)
|
95 |
+
answer_ai_impact = make_ai_impact_node(llm)
|
96 |
retrieve_documents = make_retriever_node(vectorstore_ipcc, reranker, llm)
|
97 |
retrieve_graphs = make_graph_retriever_node(vectorstore_graphs, reranker)
|
|
|
98 |
answer_rag = make_rag_node(llm, with_docs=True)
|
99 |
answer_rag_no_docs = make_rag_node(llm, with_docs=False)
|
100 |
chitchat_categorize_intent = make_chitchat_intent_categorization_node(llm)
|
|
|
189 |
)
|
190 |
)
|
191 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
climateqa/engine/llm/openai.py
CHANGED
@@ -7,7 +7,7 @@ try:
|
|
7 |
except Exception:
|
8 |
pass
|
9 |
|
10 |
-
def get_llm(model="gpt-
|
11 |
|
12 |
llm = ChatOpenAI(
|
13 |
model=model,
|
|
|
7 |
except Exception:
|
8 |
pass
|
9 |
|
10 |
+
def get_llm(model="gpt-4o-mini",max_tokens=1024, temperature=0.0, streaming=True,timeout=30, **kwargs):
|
11 |
|
12 |
llm = ChatOpenAI(
|
13 |
model=model,
|
climateqa/engine/reranker.py
CHANGED
@@ -6,7 +6,7 @@ from sentence_transformers import CrossEncoder
|
|
6 |
|
7 |
load_dotenv()
|
8 |
|
9 |
-
def get_reranker(model = "
|
10 |
|
11 |
assert model in ["nano","tiny","small","large", "jina"]
|
12 |
|
@@ -34,7 +34,6 @@ def rerank_docs(reranker,docs,query):
|
|
34 |
# Get a list of texts from langchain docs
|
35 |
input_docs = [x.page_content for x in docs]
|
36 |
|
37 |
-
print(f"\n\nDOCS:{input_docs}\n\n")
|
38 |
# Rerank using rerankers library
|
39 |
results = reranker.rank(query=query, docs=input_docs)
|
40 |
|
|
|
6 |
|
7 |
load_dotenv()
|
8 |
|
9 |
+
def get_reranker(model = "nano", cohere_api_key = None):
|
10 |
|
11 |
assert model in ["nano","tiny","small","large", "jina"]
|
12 |
|
|
|
34 |
# Get a list of texts from langchain docs
|
35 |
input_docs = [x.page_content for x in docs]
|
36 |
|
|
|
37 |
# Rerank using rerankers library
|
38 |
results = reranker.rank(query=query, docs=input_docs)
|
39 |
|
front/utils.py
CHANGED
@@ -134,7 +134,7 @@ def make_html_source(source,i):
|
|
134 |
score = meta['reranking_score']
|
135 |
if score > 0.8:
|
136 |
color = "score-green"
|
137 |
-
elif score > 0.
|
138 |
color = "score-orange"
|
139 |
else:
|
140 |
color = "score-red"
|
@@ -170,8 +170,9 @@ def make_html_source(source,i):
|
|
170 |
<div class="card card-image">
|
171 |
<div class="card-content">
|
172 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
173 |
-
<p>{content}</p>
|
174 |
<p class='ai-generated'>AI-generated description</p>
|
|
|
|
|
175 |
{relevancy_score}
|
176 |
</div>
|
177 |
<div class="card-footer">
|
@@ -186,6 +187,53 @@ def make_html_source(source,i):
|
|
186 |
return card
|
187 |
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
191 |
|
|
|
134 |
score = meta['reranking_score']
|
135 |
if score > 0.8:
|
136 |
color = "score-green"
|
137 |
+
elif score > 0.5:
|
138 |
color = "score-orange"
|
139 |
else:
|
140 |
color = "score-red"
|
|
|
170 |
<div class="card card-image">
|
171 |
<div class="card-content">
|
172 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
|
|
173 |
<p class='ai-generated'>AI-generated description</p>
|
174 |
+
<p>{content}</p>
|
175 |
+
|
176 |
{relevancy_score}
|
177 |
</div>
|
178 |
<div class="card-footer">
|
|
|
187 |
return card
|
188 |
|
189 |
|
190 |
+
def make_html_figure_sources(source,i,img_str):
|
191 |
+
meta = source.metadata
|
192 |
+
content = source.page_content.strip()
|
193 |
+
|
194 |
+
score = meta['reranking_score']
|
195 |
+
if score > 0.8:
|
196 |
+
color = "score-green"
|
197 |
+
elif score > 0.5:
|
198 |
+
color = "score-orange"
|
199 |
+
else:
|
200 |
+
color = "score-red"
|
201 |
+
|
202 |
+
toc_levels = []
|
203 |
+
if len(toc_levels) > 0:
|
204 |
+
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
205 |
+
else:
|
206 |
+
name = meta['name']
|
207 |
+
|
208 |
+
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
|
209 |
+
|
210 |
+
if meta["figure_code"] != "N/A":
|
211 |
+
title = f"{meta['figure_code']} - {meta['short_name']}"
|
212 |
+
else:
|
213 |
+
title = f"{meta['short_name']}"
|
214 |
+
|
215 |
+
card = f"""
|
216 |
+
<div class="card card-image">
|
217 |
+
<div class="card-content">
|
218 |
+
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
219 |
+
<p class='ai-generated'>AI-generated description</p>
|
220 |
+
<img src="data:image/png;base64, { img_str } alt="Alt text" />
|
221 |
+
|
222 |
+
<p>{content}</p>
|
223 |
+
|
224 |
+
{relevancy_score}
|
225 |
+
</div>
|
226 |
+
<div class="card-footer">
|
227 |
+
<span>{name}</span>
|
228 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
229 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
230 |
+
</a>
|
231 |
+
</div>
|
232 |
+
</div>
|
233 |
+
"""
|
234 |
+
return card
|
235 |
+
|
236 |
+
|
237 |
|
238 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
239 |
|
requirements.txt
CHANGED
@@ -17,3 +17,4 @@ torch==2.3.0
|
|
17 |
nvidia-cudnn-cu12==8.9.2.26
|
18 |
langchain-community==0.2
|
19 |
msal==1.31
|
|
|
|
17 |
nvidia-cudnn-cu12==8.9.2.26
|
18 |
langchain-community==0.2
|
19 |
msal==1.31
|
20 |
+
matplotlib==3.9.2
|
style.css
CHANGED
@@ -11,7 +11,23 @@
|
|
11 |
margin: 0px;
|
12 |
}
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
background-color: #fff3cd;
|
16 |
border: 1px solid #ffeeba;
|
17 |
border-radius: 4px;
|
@@ -194,41 +210,59 @@ label.selected{
|
|
194 |
padding:0px !important;
|
195 |
}
|
196 |
|
197 |
-
|
198 |
@media screen and (min-width: 1024px) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
div#tab-examples{
|
200 |
height:calc(100vh - 190px) !important;
|
201 |
-
overflow-y: auto;
|
202 |
}
|
203 |
|
204 |
div#sources-textbox{
|
205 |
height:calc(100vh - 190px) !important;
|
206 |
-
overflow-y: auto !important;
|
|
|
|
|
|
|
|
|
|
|
207 |
}
|
208 |
|
209 |
div#tab-config{
|
210 |
height:calc(100vh - 190px) !important;
|
211 |
-
overflow-y: auto !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
}
|
213 |
|
|
|
214 |
div#chatbot-row{
|
215 |
height:calc(100vh - 90px) !important;
|
|
|
216 |
}
|
217 |
|
218 |
-
|
219 |
-
height:calc(100vh - 170px) !important;
|
220 |
-
}
|
221 |
|
222 |
.max-height{
|
223 |
height:calc(100vh - 90px) !important;
|
|
|
224 |
overflow-y: auto;
|
225 |
}
|
|
|
226 |
|
227 |
-
/* .tabitem:nth-child(n+3) {
|
228 |
-
padding-top:30px;
|
229 |
-
padding-left:40px;
|
230 |
-
padding-right:40px;
|
231 |
-
} */
|
232 |
}
|
233 |
|
234 |
footer {
|
@@ -508,4 +542,22 @@ div#tab-saved-graphs {
|
|
508 |
}
|
509 |
.message-buttons-left.panel.message-buttons.with-avatar {
|
510 |
display: none;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
}
|
|
|
11 |
margin: 0px;
|
12 |
}
|
13 |
|
14 |
+
|
15 |
+
/* fix for huggingface infinite growth*/
|
16 |
+
main.flex.flex-1.flex-col {
|
17 |
+
max-height: 95vh !important;
|
18 |
+
}
|
19 |
+
|
20 |
+
|
21 |
+
.avatar-container.svelte-1x5p6hu:not(.thumbnail-item) img {
|
22 |
+
width: 100%;
|
23 |
+
height: 100%;
|
24 |
+
object-fit: cover;
|
25 |
+
border-radius: 50%;
|
26 |
+
padding: 0px;
|
27 |
+
margin: 0px;
|
28 |
+
}
|
29 |
+
|
30 |
+
.warning-box {
|
31 |
background-color: #fff3cd;
|
32 |
border: 1px solid #ffeeba;
|
33 |
border-radius: 4px;
|
|
|
210 |
padding:0px !important;
|
211 |
}
|
212 |
|
|
|
213 |
@media screen and (min-width: 1024px) {
|
214 |
+
.gradio-container {
|
215 |
+
max-height: calc(100vh - 190px) !important;
|
216 |
+
overflow: hidden;
|
217 |
+
}
|
218 |
+
|
219 |
+
/* div#chatbot{
|
220 |
+
height:calc(100vh - 170px) !important;
|
221 |
+
max-height:calc(100vh - 170px) !important;
|
222 |
+
|
223 |
+
} */
|
224 |
+
|
225 |
div#tab-examples{
|
226 |
height:calc(100vh - 190px) !important;
|
227 |
+
/* overflow-y: auto; */
|
228 |
}
|
229 |
|
230 |
div#sources-textbox{
|
231 |
height:calc(100vh - 190px) !important;
|
232 |
+
/* overflow-y: auto !important; */
|
233 |
+
}
|
234 |
+
|
235 |
+
div#sources-figures{
|
236 |
+
height:calc(100vh - 190px) !important;
|
237 |
+
/* overflow-y: auto !important; */
|
238 |
}
|
239 |
|
240 |
div#tab-config{
|
241 |
height:calc(100vh - 190px) !important;
|
242 |
+
/* overflow-y: auto !important; */
|
243 |
+
}
|
244 |
+
|
245 |
+
/* Force container to respect height limits */
|
246 |
+
.main-component{
|
247 |
+
contain: size layout;
|
248 |
+
overflow: hidden;
|
249 |
}
|
250 |
|
251 |
+
/*
|
252 |
div#chatbot-row{
|
253 |
height:calc(100vh - 90px) !important;
|
254 |
+
max-height:calc(100vh - 90px) !important;
|
255 |
}
|
256 |
|
257 |
+
|
|
|
|
|
258 |
|
259 |
.max-height{
|
260 |
height:calc(100vh - 90px) !important;
|
261 |
+
max-height:calc(100vh - 90px) !important;
|
262 |
overflow-y: auto;
|
263 |
}
|
264 |
+
*/
|
265 |
|
|
|
|
|
|
|
|
|
|
|
266 |
}
|
267 |
|
268 |
footer {
|
|
|
542 |
}
|
543 |
.message-buttons-left.panel.message-buttons.with-avatar {
|
544 |
display: none;
|
545 |
+
}
|
546 |
+
.score-red{
|
547 |
+
color:red !important;
|
548 |
+
}
|
549 |
+
.message-buttons-left.panel.message-buttons.with-avatar {
|
550 |
+
display: none;
|
551 |
+
}
|
552 |
+
|
553 |
+
/* Specific fixes for Hugging Face Space iframe */
|
554 |
+
.h-full {
|
555 |
+
height: auto !important;
|
556 |
+
min-height: 0 !important;
|
557 |
+
}
|
558 |
+
|
559 |
+
.space-content {
|
560 |
+
height: auto !important;
|
561 |
+
max-height: 100vh !important;
|
562 |
+
overflow: hidden;
|
563 |
}
|