yasserrmd commited on
Commit
e2d728a
·
verified ·
1 Parent(s): 9f6f5de

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from docling.document_converter import DocumentConverter
3
+
4
+ def convert_document(file, output_format):
5
+ # Load document and convert it using Docling
6
+ converter = DocumentConverter()
7
+ result = converter.convert(file.name)
8
+
9
+ # Choose the output format
10
+ if output_format == "Markdown":
11
+ converted_text = result.document.export_to_markdown()
12
+ elif output_format == "JSON":
13
+ converted_text = result.document.export_to_json()
14
+ else:
15
+ converted_text = "Unsupported format"
16
+
17
+ # Extract metadata
18
+ metadata = {
19
+ "Title": result.document.title,
20
+ "Author": result.document.author,
21
+ "Language": result.document.language,
22
+ "References": result.document.references
23
+ }
24
+
25
+ return converted_text, metadata
26
+
27
+ # Define the Gradio interface
28
+ input_file = gr.inputs.File(label="Upload Document")
29
+ output_format = gr.inputs.Radio(["Markdown", "JSON"], label="Choose Output Format")
30
+ output_text = gr.outputs.Textbox(label="Converted Document")
31
+ output_metadata = gr.outputs.JSON(label="Metadata")
32
+
33
+ app = gr.Interface(
34
+ fn=convert_document,
35
+ inputs=[input_file, output_format],
36
+ outputs=[output_text, output_metadata],
37
+ title="Document Converter with Docling",
38
+ description="Upload a document (PDF, DOCX, or image), choose the output format, and get the converted document text along with metadata.",
39
+ )
40
+
41
+ app.launch()