episbase / app.py
Andrii Fedorenko
Implement OCR functionality with Gradio interface and model downloads
5925098
import gradio as gr
import numpy as np
import cv2
import onnxruntime as ort
import os
from huggingface_hub import hf_hub_download
# Function to download model files from Hugging Face
def download_model_files():
model_repo = "SWHL/RapidOCR"
det_model_filename = "PP-OCRv4/en_PP-OCRv3_det_infer.onnx"
rec_model_filename = "PP-OCRv4/ch_PP-OCRv4_rec_server_infer.onnx"
cls_model_filename = "PP-OCRv3/ch_ppocr_mobile_v2.0_cls_train.onnx"
det_model_path = hf_hub_download(repo_id=model_repo, filename=det_model_filename)
rec_model_path = hf_hub_download(repo_id=model_repo, filename=rec_model_filename)
cls_model_path = hf_hub_download(repo_id=model_repo, filename=cls_model_filename)
return det_model_path, rec_model_path, cls_model_path
# Download model files
det_model_path, rec_model_path, cls_model_path = download_model_files()
# Load models
det_session = ort.InferenceSession(det_model_path)
rec_session = ort.InferenceSession(rec_model_path)
cls_session = ort.InferenceSession(cls_model_path)
def preprocess_image(image):
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Resize or pad image to desired size if necessary
return gray
import cv2
from rapidocr_onnxruntime import RapidOCR
# Initialize the RapidOCR engine
ocr_engine = RapidOCR()
def ocr_predict(image):
"""
Perform OCR on the input image and return the extracted text.
"""
# Convert the image from RGB to BGR format as OpenCV uses BGR
image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
# Perform OCR
result, _ = ocr_engine(image_bgr)
# Extract text from the result
extracted_text = "\n".join([item[1] for item in result])
return extracted_text
# Define Gradio interface
iface = gr.Interface(
fn=ocr_predict,
inputs=gr.Image(type="numpy", label="Upload Image"),
outputs=gr.Textbox(label="Detected Text"),
title="RapidOCR Gradio Demo",
description="Upload an image to extract text using RapidOCR."
)
if __name__ == "__main__":
iface.launch()