from utils.settings import configure_settings from utils.constant import * from llama_parse import LlamaParse from llama_index.core import Settings from llama_index.core import VectorStoreIndex from llama_index.core import SimpleDirectoryReader from llama_index.core import StorageContext, load_index_from_storage def get_documents(path: str): print("Getting documents...") parser = LlamaParse() file_extractor = {".pdf": parser} documents = SimpleDirectoryReader( input_dir=path, file_extractor=file_extractor ).load_data() return documents def create_index(doc_path: str, index_path: str): print("Indexing documents...") configure_settings() documents = get_documents(doc_path) nodes = Settings.node_parser.get_nodes_from_documents(documents) vector_index = VectorStoreIndex(nodes, show_progress=True) vector_index.storage_context.persist(persist_dir=index_path) return vector_index def load_index(path: str): print("Loading index...") storage_context = StorageContext.from_defaults(persist_dir=path) index = load_index_from_storage(storage_context) return index if __name__ == "__main__": doc_path = DATA_PATH index_path = INDEX_PATH create_index(doc_path, index_path)