# Filename   : app.py
# Description: A Streamlit application to turn an image to audio story.
# Author     : Georgios Ioannou
# Copyright © 2024 by Georgios Ioannou
# Import libraries.

import os  # Load environment variable(s).
import requests  # Send HTTP GET request to Hugging Face models for inference.
import streamlit as st  # Build the GUI of the application.

from langchain.chat_models import ChatOpenAI  # Access to OpenAI gpt-3.5-turbo model.
from langchain.chains import LLMChain  # Chain to run queries against LLMs.
# A prompt template. It accepts a set of parameters from the user that can be used to generate a prompt for a language model.
from langchain.prompts import PromptTemplate
from transformers import pipeline  # Access to Hugging Face models.

# Load environment variable(s).


# Function to apply local CSS.

def local_css(file_name):
    with open(file_name) as f:
        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)

# Return the text generated by the model for the image.
# Using pipeline.

def img_to_text(image_path):
    # https://huggingface.co/tasks
    # Task used here : "image-to-text".
    # Model used here: "Salesforce/blip-image-captioning-base".
    # Backup model: "nlpconnect/vit-gpt2-image-captioning".
    # Backup model: "Salesforce/blip-image-captioning-large"

    image_to_text = pipeline(
        "image-to-text", model="Salesforce/blip-image-captioning-base"
    # image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
    # image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")

    scenario = image_to_text(image_path)[0]["generated_text"]

    return scenario

# Return the story generated by the model for the scenario.
# Using Langchain.

def generate_story(scenario, personality):
    # Model used here: "gpt-3.5-turbo".

    # The template can be customized to meet one's needs such as:
    # Generate a story and generate lyrics of a song.

    template = """
    You are a story teller.
    You must sound like {personality}.
    The story should be less than 50 words.
    Generate a story based on the above constraints and the following scenario: {scenario}.

    prompt = PromptTemplate(
        template=template, input_variables=["scenario", "personality"]

    story_llm = LLMChain(
            model_name="gpt-3.5-turbo", temperature=0
        ),  # Increasing the temperature, the model becomes more creative and takes longer for inference.
        verbose=True,  # Print intermediate values to the console.

    story = story_llm.predict(
        scenario=scenario, personality=personality
    )  # Format prompt with kwargs and pass to LLM.

    return story

# Return the speech generated by the model for the story.
# Using inference api.

def text_to_speech(story):
    # Model used here: "espnet/kan-bayashi_ljspeech_vits.
    # Backup model: "facebook/mms-tts-eng".

    API_URL = (
    # API_URL = "https://api-inference.huggingface.co/models/facebook/mms-tts-eng"

    headers = {"Authorization": f"Bearer {HUGGINGFACEHUB_API_TOKEN}"}

    payload = {"inputs": story}

    response = requests.post(API_URL, headers=headers, json=payload)

    with open("audio.flac", "wb") as file:

# Main function to create the Streamlit web application.

def main():
        # Page title and favicon.

        st.set_page_config(page_title="Image To Audio Story", page_icon="🖼️")

        # Load CSS.


        # Title.

        title = f"""<h1 align="center" style="font-family: monospace; font-size: 2.1rem; margin-top: -4rem">
                    Turn Image to Audio Story</h1>"""
        st.markdown(title, unsafe_allow_html=True)

        # Subtitle.

        title = f"""<h2 align="center" style="font-family: monospace; font-size: 1.5rem; margin-top: -2rem">
                    CUNY Tech Prep Tutorial 1</h2>"""
        st.markdown(title, unsafe_allow_html=True)

        # Image.

        image = "./ctp.png"
        left_co, cent_co, last_co = st.columns(3)
        with cent_co:

        # Define the personalities for the dropdown menu.

        personalities = [
            "Donald Trump",
            "Abraham Lincoln",
            "Cardi B",
            "Kanye West",
        personality = st.selectbox("Select a personality:", personalities)

        # Upload an image.

        uploaded_file = st.file_uploader("Choose an image:")

        if uploaded_file is not None:
            # Display the uploaded image.

            bytes_data = uploaded_file.getvalue()
            with open(uploaded_file.name, "wb") as file:
            st.image(uploaded_file, caption="Uploaded Image.", use_column_width=True)
            with st.spinner(text="Model Inference..."): # Spinner to keep the application interactive.
                # Model inference.
                scenario = img_to_text(uploaded_file.name)
                story = generate_story(scenario=scenario, personality=personality)

                # Display the scenario and story.

                with st.expander("Scenario"):
                with st.expander("Story"):

            # Display the audio.

    except Exception as e:
        # Display any errors.


if __name__ == "__main__":