import gradio as gr import numpy as np import cv2 import onnxruntime as ort import os from huggingface_hub import hf_hub_download # Function to download model files from Hugging Face def download_model_files(): model_repo = "SWHL/RapidOCR" det_model_filename = "PP-OCRv4/en_PP-OCRv3_det_infer.onnx" rec_model_filename = "PP-OCRv4/ch_PP-OCRv4_rec_server_infer.onnx" cls_model_filename = "PP-OCRv3/ch_ppocr_mobile_v2.0_cls_train.onnx" det_model_path = hf_hub_download(repo_id=model_repo, filename=det_model_filename) rec_model_path = hf_hub_download(repo_id=model_repo, filename=rec_model_filename) cls_model_path = hf_hub_download(repo_id=model_repo, filename=cls_model_filename) return det_model_path, rec_model_path, cls_model_path # Download model files det_model_path, rec_model_path, cls_model_path = download_model_files() # Load models det_session = ort.InferenceSession(det_model_path) rec_session = ort.InferenceSession(rec_model_path) cls_session = ort.InferenceSession(cls_model_path) def preprocess_image(image): # Convert to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Resize or pad image to desired size if necessary return gray import cv2 from rapidocr_onnxruntime import RapidOCR # Initialize the RapidOCR engine ocr_engine = RapidOCR() def ocr_predict(image): """ Perform OCR on the input image and return the extracted text. """ # Convert the image from RGB to BGR format as OpenCV uses BGR image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Perform OCR result, _ = ocr_engine(image_bgr) # Extract text from the result extracted_text = "\n".join([item[1] for item in result]) return extracted_text # Define Gradio interface iface = gr.Interface( fn=ocr_predict, inputs=gr.Image(type="numpy", label="Upload Image"), outputs=gr.Textbox(label="Detected Text"), title="RapidOCR Gradio Demo", description="Upload an image to extract text using RapidOCR." ) if __name__ == "__main__": iface.launch()