Юра Цепліцький commited on
Commit
1a0f750
·
1 Parent(s): af8b652

Switch to cohere command r model

Browse files
app.py CHANGED
@@ -11,8 +11,7 @@ model_config = ConfigDict(protected_namespaces=())
11
  setting_keys = gr.Interface(
12
  fn=set_keys,
13
  inputs=[
14
- #gr.Textbox(label="Enter your CO_API_KEY"),
15
- gr.Textbox(label="Enter your OPENAI_API_KEY"),
16
  gr.Textbox(label="Enter your LLAMA_CLOUD_API_KEY"),
17
  ],
18
  outputs=gr.Textbox(label="Status")
@@ -31,7 +30,10 @@ uploading_files = gr.Interface(
31
  qa = gr.Interface(
32
  fn=answer_query,
33
  inputs=gr.Textbox(label="Enter your question"),
34
- outputs=gr.Textbox(label="Answer"),
 
 
 
35
  title="Document Q&A System"
36
  )
37
 
 
11
  setting_keys = gr.Interface(
12
  fn=set_keys,
13
  inputs=[
14
+ gr.Textbox(label="Enter your CO_API_KEY"),
 
15
  gr.Textbox(label="Enter your LLAMA_CLOUD_API_KEY"),
16
  ],
17
  outputs=gr.Textbox(label="Status")
 
30
  qa = gr.Interface(
31
  fn=answer_query,
32
  inputs=gr.Textbox(label="Enter your question"),
33
+ outputs=[
34
+ gr.Textbox(label="Answer"),
35
+ gr.Textbox(label="Relevant Nodes"),
36
+ ],
37
  title="Document Q&A System"
38
  )
39
 
main.py CHANGED
@@ -1,13 +1,13 @@
1
- from utils.retriever import get_query_engine
2
  from utils.index import create_index
3
  from utils.constant import INDEX_PATH, DATA_PATH
4
  import os
5
  import shutil
6
 
7
- def set_keys(openai_api_key: str, llama_cloud_api_key: str) -> str:
 
8
  try:
9
- #os.environ["CO_API_KEY"] = co_api_key
10
- os.environ["OPENAI_API_KEY"] = openai_api_key
11
  os.environ["LLAMA_CLOUD_API_KEY"] = llama_cloud_api_key
12
  return "Keys are set successfully"
13
 
@@ -23,6 +23,10 @@ def handle_file(uploaded_file):
23
  shutil.move(temp_file_path, save_file_path)
24
 
25
  create_index(DATA_PATH, INDEX_PATH)
 
 
 
 
26
  return "File uploaded and indexed"
27
 
28
  except Exception as e:
@@ -30,19 +34,20 @@ def handle_file(uploaded_file):
30
 
31
  def answer_query(query: str) -> str:
32
 
33
- query_engine = get_query_engine(semantic=True)
34
  response = query_engine.query(query)
35
 
36
  nodes = query_engine.retriever.retrieve(query)
 
37
 
38
  for node in nodes:
39
 
40
  score = node.get_score()
41
  text = node.text
42
 
43
- response += f"\nNode: {node.node_id}\nScore: {score:0.3f}\nText: {text[:1000]}\n"
44
 
45
- return response
46
 
47
  if __name__ == "__main__":
48
 
 
1
+ from utils.retriever import get_engine
2
  from utils.index import create_index
3
  from utils.constant import INDEX_PATH, DATA_PATH
4
  import os
5
  import shutil
6
 
7
+
8
+ def set_keys(co_api_key: str, llama_cloud_api_key: str) -> str:
9
  try:
10
+ os.environ["CO_API_KEY"] = co_api_key
 
11
  os.environ["LLAMA_CLOUD_API_KEY"] = llama_cloud_api_key
12
  return "Keys are set successfully"
13
 
 
23
  shutil.move(temp_file_path, save_file_path)
24
 
25
  create_index(DATA_PATH, INDEX_PATH)
26
+
27
+ engine_manager = get_engine()[1]
28
+ engine_manager.initialize_index()
29
+
30
  return "File uploaded and indexed"
31
 
32
  except Exception as e:
 
34
 
35
  def answer_query(query: str) -> str:
36
 
37
+ query_engine = get_engine(semantic=True)[0]
38
  response = query_engine.query(query)
39
 
40
  nodes = query_engine.retriever.retrieve(query)
41
+ nodes_str = ""
42
 
43
  for node in nodes:
44
 
45
  score = node.get_score()
46
  text = node.text
47
 
48
+ nodes_str += f"\nNode: {node.node_id}\nScore: {score:0.3f}\nText: {text[:1000]}\n"
49
 
50
+ return response, nodes_str
51
 
52
  if __name__ == "__main__":
53
 
utils/__pycache__/retriever.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/retriever.cpython-312.pyc and b/utils/__pycache__/retriever.cpython-312.pyc differ
 
utils/__pycache__/settings.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/settings.cpython-312.pyc and b/utils/__pycache__/settings.cpython-312.pyc differ
 
utils/retriever.py CHANGED
@@ -25,14 +25,18 @@ class QueryEngineManager:
25
  self.reranker = None
26
  self.query_engine = None
27
  self._configure()
28
-
29
- def _configure(self):
30
- configure_settings()
31
  self.index = load_index(path=INDEX_PATH)
32
  self.nodes = list(self.index.docstore.docs.values())
 
 
 
 
33
  self.reranker = LLMRerank(top_n=TOP_N_RERANKER)
34
 
35
  def get_engine(self, bm25: bool = False, semantic: bool = False):
 
36
  if bm25:
37
  self.retriever = BM25Retriever.from_defaults(
38
  nodes=self.nodes,
@@ -59,6 +63,6 @@ class QueryEngineManager:
59
 
60
  return self.query_engine
61
 
62
- def get_query_engine(bm25: bool = False, semantic: bool = False):
63
  engine_manager = QueryEngineManager()
64
- return engine_manager.get_engine(bm25, semantic)
 
25
  self.reranker = None
26
  self.query_engine = None
27
  self._configure()
28
+
29
+ def initialize_index(self):
 
30
  self.index = load_index(path=INDEX_PATH)
31
  self.nodes = list(self.index.docstore.docs.values())
32
+
33
+ def _configure(self):
34
+ configure_settings()
35
+ self.initialize_index()
36
  self.reranker = LLMRerank(top_n=TOP_N_RERANKER)
37
 
38
  def get_engine(self, bm25: bool = False, semantic: bool = False):
39
+
40
  if bm25:
41
  self.retriever = BM25Retriever.from_defaults(
42
  nodes=self.nodes,
 
63
 
64
  return self.query_engine
65
 
66
+ def get_engine(bm25: bool = False, semantic: bool = False):
67
  engine_manager = QueryEngineManager()
68
+ return engine_manager.get_engine(bm25, semantic), engine_manager
utils/settings.py CHANGED
@@ -1,6 +1,7 @@
1
  from llama_index.core import Settings
2
- #from llama_index.llms.cohere import Cohere
3
- from llama_index.llms.openai import OpenAI
 
4
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
5
  from llama_index.core.node_parser import SemanticSplitterNodeParser
6
 
@@ -16,15 +17,20 @@ def load_llm():
16
  - Do not include information from external sources not provided by the user.
17
  '''
18
 
19
- # llm = Cohere(
20
- # system_prompt=system_prompt,
21
 
 
 
 
 
 
 
 
 
 
22
  # )
23
 
24
- llm = OpenAI(
25
- model = "gpt-4o-mini",
26
- system_prompt=system_prompt
27
- )
28
 
29
  return llm
30
 
@@ -43,6 +49,7 @@ def configure_settings():
43
  llm_replicate = load_llm()
44
  embed_model = load_embed_model()
45
 
 
46
  Settings.llm = llm_replicate
47
  Settings.embed_model = embed_model
48
  Settings.node_parser = SemanticSplitterNodeParser(
 
1
  from llama_index.core import Settings
2
+ from llama_index.llms.cohere import Cohere
3
+ #from llama_index.llms.anthropic import Anthropic
4
+ #from llama_index.llms.openai import OpenAI
5
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
  from llama_index.core.node_parser import SemanticSplitterNodeParser
7
 
 
17
  - Do not include information from external sources not provided by the user.
18
  '''
19
 
20
+ llm = Cohere(
21
+ system_prompt=system_prompt,
22
 
23
+ )
24
+
25
+ # llm = OpenAI(
26
+ # model = "gpt-3.5-turbo",
27
+ # system_prompt=system_prompt
28
+ # )
29
+ # llm = Anthropic(
30
+ # model="claude-3-opus-20240229",
31
+ # system_prompt=system_prompt
32
  # )
33
 
 
 
 
 
34
 
35
  return llm
36
 
 
49
  llm_replicate = load_llm()
50
  embed_model = load_embed_model()
51
 
52
+ #Settings.tokenizer = Anthropic().tokenizer
53
  Settings.llm = llm_replicate
54
  Settings.embed_model = embed_model
55
  Settings.node_parser = SemanticSplitterNodeParser(