|
from PIL import Image |
|
import pytesseract |
|
import torch |
|
from torchvision import transforms |
|
from transformers import ResNetForImageClassification |
|
import logging |
|
|
|
def resize_image(image, max_size=(800, 600)): |
|
"""Resize an image to the specified maximum size.""" |
|
image.thumbnail(max_size) |
|
return image |
|
|
|
def extract_text_from_image(image): |
|
"""Extract text from an image using OCR.""" |
|
try: |
|
image = resize_image(image) |
|
text = pytesseract.image_to_string(image) |
|
return text |
|
except Exception as e: |
|
logging.error(f"Error extracting text from image: {e}") |
|
return "" |
|
|
|
def analyze_image(image): |
|
"""Analyze image content using a pre-trained model.""" |
|
try: |
|
preprocess = transforms.Compose([ |
|
transforms.Resize(256), |
|
transforms.CenterCrop(224), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), |
|
]) |
|
image_tensor = preprocess(image).unsqueeze(0) |
|
|
|
|
|
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50") |
|
model.eval() |
|
|
|
with torch.no_grad(): |
|
output = model(image_tensor) |
|
return output |
|
except Exception as e: |
|
logging.error(f"Error analyzing image: {e}") |
|
return None |