Spaces:
Running
on
Zero
Running
on
Zero
Liam Dyer
commited on
add filenames because of a gradio client bug
Browse files
app.py
CHANGED
@@ -86,7 +86,7 @@ def convert_pandoc(input_file, filename) -> str:
|
|
86 |
|
87 |
|
88 |
@spaces.GPU
|
89 |
-
def convert(input_file) -> str:
|
90 |
plain_text_filetypes = [
|
91 |
".txt",
|
92 |
".csv",
|
@@ -99,14 +99,14 @@ def convert(input_file) -> str:
|
|
99 |
".jsonc",
|
100 |
]
|
101 |
# Already a plain text file that wouldn't benefit from pandoc so return the content
|
102 |
-
if any(
|
103 |
with open(input_file, "r") as f:
|
104 |
return f.read()
|
105 |
|
106 |
-
if
|
107 |
return convert_pdf(input_file)
|
108 |
|
109 |
-
return convert_pandoc(input_file,
|
110 |
|
111 |
|
112 |
def chunk_to_length(text, max_length=512):
|
@@ -119,11 +119,14 @@ def chunk_to_length(text, max_length=512):
|
|
119 |
|
120 |
|
121 |
@spaces.GPU
|
122 |
-
def predict(queries, documents, max_characters) -> list[list[str]]:
|
123 |
queries = queries.split("\n")
|
|
|
124 |
|
125 |
-
#
|
126 |
-
converted_docs = [
|
|
|
|
|
127 |
|
128 |
# Return if the total length is less than the max characters
|
129 |
total_doc_lengths = sum([len(doc) for doc in converted_docs])
|
@@ -193,6 +196,7 @@ gr.Interface(
|
|
193 |
inputs=[
|
194 |
gr.Textbox(label="Queries separated by newline"),
|
195 |
gr.File(label="Upload File", file_count="multiple"),
|
|
|
196 |
gr.Number(label="Max output characters", value=16384),
|
197 |
],
|
198 |
outputs=[gr.JSON(label="Embedded documents")],
|
|
|
86 |
|
87 |
|
88 |
@spaces.GPU
|
89 |
+
def convert(input_file, filename) -> str:
|
90 |
plain_text_filetypes = [
|
91 |
".txt",
|
92 |
".csv",
|
|
|
99 |
".jsonc",
|
100 |
]
|
101 |
# Already a plain text file that wouldn't benefit from pandoc so return the content
|
102 |
+
if any(filename.endswith(ft) for ft in plain_text_filetypes):
|
103 |
with open(input_file, "r") as f:
|
104 |
return f.read()
|
105 |
|
106 |
+
if filename.endswith(".pdf"):
|
107 |
return convert_pdf(input_file)
|
108 |
|
109 |
+
return convert_pandoc(input_file, filename)
|
110 |
|
111 |
|
112 |
def chunk_to_length(text, max_length=512):
|
|
|
119 |
|
120 |
|
121 |
@spaces.GPU
|
122 |
+
def predict(queries, documents, document_filenames, max_characters) -> list[list[str]]:
|
123 |
queries = queries.split("\n")
|
124 |
+
document_filenames = document_filenames.split("\n")
|
125 |
|
126 |
+
# Convert the documents to text
|
127 |
+
converted_docs = [
|
128 |
+
convert(doc, filename) for doc, filename in zip(documents, document_filenames)
|
129 |
+
]
|
130 |
|
131 |
# Return if the total length is less than the max characters
|
132 |
total_doc_lengths = sum([len(doc) for doc in converted_docs])
|
|
|
196 |
inputs=[
|
197 |
gr.Textbox(label="Queries separated by newline"),
|
198 |
gr.File(label="Upload File", file_count="multiple"),
|
199 |
+
gr.Textbox(label="Filenames separated by newline"),
|
200 |
gr.Number(label="Max output characters", value=16384),
|
201 |
],
|
202 |
outputs=[gr.JSON(label="Embedded documents")],
|