Spaces:
Sleeping
Sleeping
from utils.settings import configure_settings | |
from utils.constant import * | |
from llama_parse import LlamaParse | |
from llama_index.core import Settings | |
from llama_index.core import VectorStoreIndex | |
from llama_index.core import SimpleDirectoryReader | |
from llama_index.core import StorageContext, load_index_from_storage | |
def get_documents(path: str): | |
print("Getting documents...") | |
parser = LlamaParse() | |
file_extractor = {".pdf": parser} | |
documents = SimpleDirectoryReader( | |
input_dir=path, | |
file_extractor=file_extractor | |
).load_data() | |
return documents | |
def create_index(doc_path: str, index_path: str): | |
print("Indexing documents...") | |
configure_settings() | |
documents = get_documents(doc_path) | |
nodes = Settings.node_parser.get_nodes_from_documents(documents) | |
vector_index = VectorStoreIndex(nodes, show_progress=True) | |
vector_index.storage_context.persist(persist_dir=index_path) | |
return vector_index | |
def load_index(path: str): | |
print("Loading index...") | |
storage_context = StorageContext.from_defaults(persist_dir=path) | |
index = load_index_from_storage(storage_context) | |
return index | |
if __name__ == "__main__": | |
doc_path = DATA_PATH | |
index_path = INDEX_PATH | |
create_index(doc_path, index_path) |