File size: 1,843 Bytes
693d949
1a0f750
 
 
693d949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a0f750
 
693d949
1a0f750
 
 
 
 
 
 
 
 
816e0e9
 
693d949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a0f750
693d949
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from llama_index.core import Settings
from llama_index.llms.cohere import Cohere
#from llama_index.llms.anthropic import Anthropic
#from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.node_parser import SemanticSplitterNodeParser

def load_llm():
    print("Loading LLM model...")
    
    system_prompt = '''
    You are an academic assistant specialized in synthesizing and analyzing information from scholarly papers provided by the user.
    Your roles is to:
    - Base your answers solely on the content of these papers. 
    - Ensure that your explanations are clear, concise, and accurately reflect the information and insights contained within the supplied documents. 
    - Integrate information from the relevant papers seamlessly, if a question pertains to multiple topics
    - Do not include information from external sources not provided by the user.
    '''
    
    llm = Cohere(
        system_prompt=system_prompt,
        
    )
    
    # llm = OpenAI(
    #     model = "gpt-3.5-turbo",
    #     system_prompt=system_prompt
    # )
    # llm = Anthropic(
    #     model="claude-3-opus-20240229",
    #     system_prompt=system_prompt
    # )
    
    
    return llm 

def load_embed_model():
    print("Loading embedding model...")
    
    embed_model = HuggingFaceEmbedding(
        model_name="sentence-transformers/all-mpnet-base-v2",
    )
    
    return embed_model

def configure_settings():
    print("Configuring settings...")
    
    llm_replicate = load_llm()
    embed_model = load_embed_model()
    
    #Settings.tokenizer = Anthropic().tokenizer
    Settings.llm = llm_replicate
    Settings.embed_model = embed_model
    Settings.node_parser = SemanticSplitterNodeParser(
        embed_model=Settings.embed_model,
    )