import os api_key = os.environ.get('api_key') from openai import OpenAI import gradio as gr import codecs import base64 import json def login(username, password): try: users = json.loads(os.environ.get('users')) return users[username] == password except: return False print("Start") client = OpenAI(api_key=api_key) assistants = client.beta.assistants.list() for a in assistants: if "Patient Sim" in a.name: client.beta.assistants.delete(a.id) instruction = codecs.open("instruction.txt", "r", "utf-8").read() instruction = "You are helpful assistant. Keep your responses clear and concise." model = "gpt-4o" model = "gpt-3.5-turbo" assistant = client.beta.assistants.create(name="Patient Sim", instructions=instruction, model=model) toggle_js = open("toggle_speech.js").read() play_js = open("play.js").read() def start_thread(): return client.beta.threads.create() def user(text, audio, history, thread): if audio: text = transcribe(audio) print(f"User: {text}") message = client.beta.threads.messages.create(thread_id=thread.id, role="user", content=text) return "", history + [[text, None]] def bot(history, thread): stream = client.beta.threads.runs.create(thread_id=thread.id, assistant_id=assistant.id, stream=True) history[-1][1] = "" for event in stream: try: delta = event.data.delta.content[0].text.value history[-1][1] += delta yield history except: pass def transcribe(file): print(f"Transcribe: {file}") file = open(file, "rb") response = client.audio.transcriptions.create( file=file, model="whisper-1", language="en", response_format="text" ) return response def speak(history): text = history[-1][1] print(f"Assistant: {text}") speech = client.audio.speech.create(model="tts-1", voice="alloy", input=text) audio = base64.b64encode(speech.read()).decode("utf-8") src = f"data:audio/mpeg;base64,{audio}" audio_element = f'' return audio_element def vote(data: gr.LikeData): if data.liked: print("You upvoted this response: " + data.value) else: print("You downvoted this response: " + data.value) css = """ .container {height: 100vh;} #title_markdown {height: 10%;} #chatbot {height: 60%;} #speech {height: 10%;} #textbox {height: 60%;} footer{display:none !important} """ with gr.Blocks(title="Paitient Sim", css=css) as demo: with gr.Column(elem_classes=["container"]): gr.Markdown(""" # Patient Sim Say goodbye to end the session and receive a feedback. """, elem_id="title_markdown") chatbot = gr.Chatbot(label="Messages", elem_id="chatbot") chatbot.like(vote, None, None) speech = gr.Button("Record", size="sm", elem_id="speech") speech.click(None, js=toggle_js) msg = gr.Textbox(label="Say something.", elem_id="textbox") mic = gr.Microphone(type="filepath", format="mp3", editable=False, waveform_options={"show_controls": False}, visible=False, elem_id="recorder") thread = gr.State(start_thread) audio_html = gr.HTML(visible=False, elem_id="player") msg.submit(user, [msg, mic, chatbot, thread], [msg, chatbot]).then( bot, [chatbot, thread], chatbot).then( speak, chatbot, audio_html).then( None, None, None, js=play_js ) mic.stop_recording(user, [msg, mic, chatbot, thread], [msg, chatbot]).then( lambda:None, None, mic).then( bot, [chatbot, thread], chatbot).then( speak, chatbot, audio_html).then( None, None, None, js=play_js ) #demo.queue() demo.launch(auth=login)