Spaces:
Running
Running
RakeshUtekar
commited on
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from langchain.chains import LLMChain
|
4 |
+
from langchain.prompts import ChatPromptTemplate
|
5 |
+
from langchain_community.llms import HuggingFacePipeline # Updated import
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
7 |
+
|
8 |
+
# ---------------------------------------
|
9 |
+
# Utility functions
|
10 |
+
# ---------------------------------------
|
11 |
+
|
12 |
+
def load_model_pipeline(model_name: str):
|
13 |
+
"""Load a HuggingFace pipeline for a given model name.
|
14 |
+
|
15 |
+
For large models, ensure you have the model downloaded and configured.
|
16 |
+
Adjust device_map, load_in_8bit, etc., as needed for your hardware.
|
17 |
+
"""
|
18 |
+
try:
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
20 |
+
model = AutoModelForCausalLM.from_pretrained(
|
21 |
+
model_name,
|
22 |
+
device_map="auto", # Adjust as needed (e.g. "cpu", "cuda:0")
|
23 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
24 |
+
trust_remote_code=True
|
25 |
+
)
|
26 |
+
# Create pipeline
|
27 |
+
pipe = pipeline(
|
28 |
+
"text-generation",
|
29 |
+
model=model,
|
30 |
+
tokenizer=tokenizer,
|
31 |
+
max_length=512,
|
32 |
+
temperature=0.7,
|
33 |
+
top_p=0.9,
|
34 |
+
repetition_penalty=1.1,
|
35 |
+
pad_token_id=tokenizer.eos_token_id,
|
36 |
+
)
|
37 |
+
return pipe
|
38 |
+
except Exception as e:
|
39 |
+
st.error(f"Error loading model {model_name}: {e}")
|
40 |
+
return None
|
41 |
+
|
42 |
+
|
43 |
+
def create_prompt(name: str, persona_style: str):
|
44 |
+
"""Create the chat prompt template as described."""
|
45 |
+
# The prompt template from the user prompt
|
46 |
+
# We combine both snippet versions into one final template.
|
47 |
+
prompt_template_str = f"""
|
48 |
+
In this interaction, your role is to embody the personality of a human named {name}, with characteristics defined as {persona_style}. Engage in a natural, human-like conversation, responding to each query or comment as if you were {name} with the specific traits and style of {persona_style}. Remember, your responses should reflect a human perspective, complete with emotions, opinions, and personal insights that align with {persona_style}. ////
|
49 |
+
//////////
|
50 |
+
Current conversation:
|
51 |
+
{{chat_history}}
|
52 |
+
Human: {{input}}
|
53 |
+
AI:
|
54 |
+
|
55 |
+
You are {name}, communicating with a personality that reflects {persona_style}. Keep these guidelines in mind:
|
56 |
+
- Be brief and natural, like texting a friend
|
57 |
+
- Share genuine thoughts and feelings ({persona_style} style)
|
58 |
+
- Use everyday language
|
59 |
+
- Keep responses to 1-2 short sentences
|
60 |
+
- Show interest in others through questions
|
61 |
+
- React naturally to what others say
|
62 |
+
- Use emojis sparingly and naturally when they fit your {persona_style}
|
63 |
+
- Don't overuse emojis (1-2 max per message)
|
64 |
+
|
65 |
+
Current conversation:
|
66 |
+
{{chat_history}}
|
67 |
+
Human: {{input}}
|
68 |
+
AI:
|
69 |
+
"""
|
70 |
+
return ChatPromptTemplate.from_template(prompt_template_str)
|
71 |
+
|
72 |
+
|
73 |
+
def simulate_conversation(chain: LLMChain, turns: int = 15):
|
74 |
+
"""Simulate a conversation for a given number of turns.
|
75 |
+
|
76 |
+
Here, we'll pretend the "Human" says something generic each time,
|
77 |
+
and we get the AI's response. We store and update the chat history.
|
78 |
+
|
79 |
+
After 15 responses from the AI, we return the full conversation.
|
80 |
+
"""
|
81 |
+
|
82 |
+
chat_history = ""
|
83 |
+
# We will simulate the human input as a rotating set of simple messages
|
84 |
+
# or just a single repeated message to show the flow.
|
85 |
+
human_messages = [
|
86 |
+
"Hey, what's up?",
|
87 |
+
"That's interesting, tell me more!",
|
88 |
+
"Really? How does that make you feel?",
|
89 |
+
"What do you think about that?",
|
90 |
+
"Haha, that’s funny. Why do you say that?",
|
91 |
+
"Hmm, I see. Can you elaborate?",
|
92 |
+
"What would you do in that situation?",
|
93 |
+
"Any personal experience with that?",
|
94 |
+
"Oh, I didn’t know that. Explain more.",
|
95 |
+
"Do you have any other thoughts?",
|
96 |
+
"That's a unique perspective. Why?",
|
97 |
+
"How would you handle it differently?",
|
98 |
+
"Can you share an example?",
|
99 |
+
"That sounds complicated. Are you sure?",
|
100 |
+
"So what’s your conclusion?"
|
101 |
+
]
|
102 |
+
|
103 |
+
try:
|
104 |
+
for i in range(turns):
|
105 |
+
human_input = human_messages[i % len(human_messages)]
|
106 |
+
# Generate AI response
|
107 |
+
response = chain.run(chat_history=chat_history, input=human_input)
|
108 |
+
# Update the chat history
|
109 |
+
chat_history += f"Human: {human_input}\nAI: {response}\n"
|
110 |
+
return chat_history
|
111 |
+
except Exception as e:
|
112 |
+
st.error(f"Error during conversation simulation: {e}")
|
113 |
+
return None
|
114 |
+
|
115 |
+
|
116 |
+
def summarize_conversation(chain: LLMChain, conversation: str):
|
117 |
+
"""Use the LLM to summarize the completed conversation."""
|
118 |
+
# We'll provide a simple prompt for summary:
|
119 |
+
summary_prompt = f"Summarize the following conversation in a few short sentences highlighting the main points, tone, and conclusion:\n\n{conversation}\nSummary:"
|
120 |
+
try:
|
121 |
+
response = chain.run(chat_history="", input=summary_prompt)
|
122 |
+
return response.strip()
|
123 |
+
except Exception as e:
|
124 |
+
st.error(f"Error summarizing conversation: {e}")
|
125 |
+
return "No summary available due to error."
|
126 |
+
|
127 |
+
|
128 |
+
# ---------------------------------------
|
129 |
+
# Streamlit App
|
130 |
+
# ---------------------------------------
|
131 |
+
def main():
|
132 |
+
st.title("LLM Conversation Simulation")
|
133 |
+
|
134 |
+
# Model selection
|
135 |
+
model_names = [
|
136 |
+
"meta-llama/Llama-3.3-70B-Instruct",
|
137 |
+
"meta-llama/Llama-3.1-405B-Instruct",
|
138 |
+
"lmsys/vicuna-13b-v1.5"
|
139 |
+
]
|
140 |
+
selected_model = st.selectbox("Select a model:", model_names)
|
141 |
+
|
142 |
+
# Persona Inputs
|
143 |
+
name = st.text_input("Enter the persona's name:", value="Alex")
|
144 |
+
persona_style = st.text_area("Enter the persona style characteristics:",
|
145 |
+
value="friendly, curious, and a bit sarcastic")
|
146 |
+
|
147 |
+
# Button to start simulation
|
148 |
+
if st.button("Start Conversation Simulation"):
|
149 |
+
with st.spinner("Loading model and starting simulation..."):
|
150 |
+
pipe = load_model_pipeline(selected_model)
|
151 |
+
if pipe is not None:
|
152 |
+
# Create a ChatModel from the pipeline
|
153 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
154 |
+
|
155 |
+
# Create our prompt template chain
|
156 |
+
prompt = create_prompt(name, persona_style)
|
157 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
158 |
+
|
159 |
+
# Simulate conversation
|
160 |
+
conversation = simulate_conversation(chain, turns=15)
|
161 |
+
if conversation:
|
162 |
+
st.subheader("Conversation:")
|
163 |
+
st.text(conversation)
|
164 |
+
|
165 |
+
# Summarize conversation
|
166 |
+
st.subheader("Summary:")
|
167 |
+
summary = summarize_conversation(chain, conversation)
|
168 |
+
st.write(summary)
|
169 |
+
|
170 |
+
|
171 |
+
if __name__ == "__main__":
|
172 |
+
main()
|