Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_name = "Qwen/Qwen2.5-7B-Instruct" # Replace with a smaller model if needed | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto") | |
# Define the inference function | |
def chat_with_model(input_text): | |
inputs = tokenizer(input_text, return_tensors="pt").to("cuda") | |
outputs = model.generate(inputs["input_ids"], max_length=200) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=chat_with_model, | |
inputs=gr.Textbox(label="Enter your message"), | |
outputs=gr.Textbox(label="Model's Response"), | |
title="AI Chat with Qwen", | |
) | |
iface.launch() | |