Spaces:
Running
Running
Chaitanya Garg
commited on
Commit
·
f9902eb
1
Parent(s):
271bd8a
Working code
Browse files- app.py +31 -0
- multi_agent.py +199 -0
- prompt_handler.py +68 -0
- prompts/__pycache__/code_generator_prompt.cpython-310.pyc +0 -0
- prompts/__pycache__/get_reply_from_qwen.cpython-310.pyc +0 -0
- prompts/__pycache__/reasoner_prompt.cpython-310.pyc +0 -0
- prompts/__pycache__/test_case_generator_prompt.cpython-310.pyc +0 -0
- prompts/__pycache__/test_case_validator_prompt.cpython-310.pyc +0 -0
- prompts/code_generator_prompt.py +96 -0
- prompts/get_reply_from_qwen.py +21 -0
- prompts/reasoner_prompt.py +67 -0
- prompts/test_case_generator_prompt.py +126 -0
- prompts/test_case_validator_prompt.py +75 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from multi_agent import multi_agent_cycle
|
3 |
+
title = "Thought CODER"
|
4 |
+
description = "Made a Multi-Agent CODER LLM using Qwen2.5-72B API which not only Converts Thought to code but tests them out as well \n\n CONVERT YOUR THOUGHT TO CODE"
|
5 |
+
article = "Created by [Eternal Bliassard](https://github.com/EternalBlissard)."
|
6 |
+
|
7 |
+
# Create the Gradio demo
|
8 |
+
demo = gr.Interface(fn=multi_agent_cycle,
|
9 |
+
inputs=[gr.Textbox(label="Code Requirements")],
|
10 |
+
outputs=[gr.Textbox(label="Code")],
|
11 |
+
additional_inputs = [
|
12 |
+
gr.Textbox(label="Input TestCase"),
|
13 |
+
gr.Textbox(label="Output TestCase"),
|
14 |
+
gr.Slider(minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="Temperature"),
|
15 |
+
gr.Slider(minimum=128, maximum=4096, value=1024, step=1, label="Max New Tokens"),
|
16 |
+
gr.Slider(
|
17 |
+
minimum=0.1,
|
18 |
+
maximum=1.0,
|
19 |
+
value=0.95,
|
20 |
+
step=0.05,
|
21 |
+
label="Top-P",
|
22 |
+
),
|
23 |
+
gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Reasoner Iterations"),
|
24 |
+
gr.Slider(minimum=3, maximum=11, value=5, step=1, label="Test Cases To be Generated"),
|
25 |
+
],
|
26 |
+
title=title,
|
27 |
+
description=description,
|
28 |
+
article=article)
|
29 |
+
|
30 |
+
# Launch the demo!
|
31 |
+
demo.launch(share=True)
|
multi_agent.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from prompt_handler import handle_code_generator_prompt, handle_test_case_generator_prompt, handle_test_validator_prompt, handle_reasoner_prompt
|
2 |
+
|
3 |
+
import random
|
4 |
+
import string
|
5 |
+
import re
|
6 |
+
import subprocess
|
7 |
+
|
8 |
+
def extract_output(pattern, text):
|
9 |
+
match = re.search(pattern, text, re.DOTALL)
|
10 |
+
if match:
|
11 |
+
return match.group(1)
|
12 |
+
return None
|
13 |
+
|
14 |
+
|
15 |
+
def extract_all_outputs(pattern,text):
|
16 |
+
# Regular expression to match content between #OUTPUTSTART\n and #OUTPUTEND
|
17 |
+
# pattern = r'#OUTPUTSTART\n(.*?)#OUTPUTEND'
|
18 |
+
|
19 |
+
# Using re.findall to find all occurrences
|
20 |
+
outputs = re.findall(pattern, text, re.DOTALL)
|
21 |
+
|
22 |
+
return outputs
|
23 |
+
|
24 |
+
def generate_filename():
|
25 |
+
# Generate a random alphabetic character for the first character
|
26 |
+
first_char = random.choice(string.ascii_letters)
|
27 |
+
|
28 |
+
# Generate the remaining 19 characters, which can be letters or digits
|
29 |
+
remaining_chars = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(19))
|
30 |
+
|
31 |
+
# Combine the first character with the remaining characters to form the 20-character file name
|
32 |
+
filename = first_char + remaining_chars
|
33 |
+
|
34 |
+
return filename
|
35 |
+
|
36 |
+
def multi_agent_cycle(user_prompt=None,input_test="", output_test="",temperature=0.5,max_tokens=1024,top_p=0.5, reasoner_num = 3, test_case_num = 7):
|
37 |
+
if(user_prompt is None or user_prompt==""):
|
38 |
+
return "No prompt given"
|
39 |
+
filename = None
|
40 |
+
if(filename is None):
|
41 |
+
filename = generate_filename()
|
42 |
+
currCorrect = 0
|
43 |
+
bestCode = None
|
44 |
+
input_test_ = input_test.replace(r'\n','§')
|
45 |
+
output_test_ = output_test.replace(r'\n','§')
|
46 |
+
testcaseset = f"""
|
47 |
+
#TESTINPUTSTART
|
48 |
+
{input_test_}
|
49 |
+
#TESTINPUTEND
|
50 |
+
#TESTOUTPUTSTART
|
51 |
+
{output_test_}
|
52 |
+
#TESTOUTPUTEND
|
53 |
+
"""
|
54 |
+
user_prompt_ = f"""
|
55 |
+
#USERINPUTSTART
|
56 |
+
{user_prompt}
|
57 |
+
#USERINPUTEND
|
58 |
+
{testcaseset}
|
59 |
+
"""
|
60 |
+
for idx in range(reasoner_num):
|
61 |
+
print(idx)
|
62 |
+
reasoner_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_reasoner_prompt(user_prompt_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
63 |
+
while(reasoner_res is None):
|
64 |
+
reasoner_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_reasoner_prompt(user_prompt,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
65 |
+
reasoner_res_ = f"""
|
66 |
+
#INPUTSTART
|
67 |
+
{reasoner_res}
|
68 |
+
#INPUTEND
|
69 |
+
{testcaseset}
|
70 |
+
"""
|
71 |
+
code_gen_res = extract_output(r'#CODESTART\n(.*?)#CODEEND',handle_code_generator_prompt(reasoner_res_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
72 |
+
while(code_gen_res is None):
|
73 |
+
code_gen_res = extract_output(r'#CODESTART\n(.*?)#CODEEND',handle_code_generator_prompt(reasoner_res_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
74 |
+
print("hush2")
|
75 |
+
test_gen_res = extract_all_outputs(r'#TESTCASESTART\n(.*?)\n#TESTCASEEND', handle_test_case_generator_prompt(reasoner_res_+f"LIMIT OUTPUT TO{test_case_num} TESTCASES USING THE STEPS! MAKE SURE EDGE CASES AREN'T MISSED",temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
76 |
+
while(test_case_num is None):
|
77 |
+
test_gen_res = extract_all_outputs(r'#TESTCASESTART\n(.*?)\n#TESTCASEEND', handle_test_case_generator_prompt(user_prompt+f"LIMIT OUTPUT TO{test_case_num} TESTCASES USING THE STEPS! MAKE SURE EDGE CASES AREN'T MISSED",temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
|
78 |
+
print("hush3")
|
79 |
+
fresh_tc = []
|
80 |
+
for test_gen in test_gen_res:
|
81 |
+
new_prompt = f"""
|
82 |
+
#SUBPROBLEMSTART
|
83 |
+
{reasoner_res}
|
84 |
+
#SUBPROBLEMEND
|
85 |
+
#TESTCASESTART
|
86 |
+
{test_gen}
|
87 |
+
#TESTCASEEND
|
88 |
+
"""
|
89 |
+
val_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_test_validator_prompt(new_prompt))
|
90 |
+
if(val_res=='CORRECT'):
|
91 |
+
fresh_tc.append(test_gen)
|
92 |
+
|
93 |
+
with open(filename+'.py','w') as file:
|
94 |
+
file.write("import pytest\n\n")
|
95 |
+
code_split = code_gen_res.split('\n')
|
96 |
+
file.write(code_gen_res)
|
97 |
+
# print('\n'.join(new_code_split))
|
98 |
+
|
99 |
+
# f.write(make_statement1(1,extract_output(r'#TESTCASEINPUTSTART\n(.*?)\n#TESTCASEINPUTEND',test_gen_res[0]),extract_output(r'#TESTCASEOUTPUTSTART\n(.*?)#TESTCASEOUTPUTEND',test_gen_res[0])))
|
100 |
+
file.write("@pytest.mark.parametrize(\"n, expected\", [\n")
|
101 |
+
|
102 |
+
# Loop through the test cases and write them to the file
|
103 |
+
for case in fresh_tc:
|
104 |
+
input_result = extract_output(r'#TESTCASEINPUTSTART\n(.*?)\n#TESTCASEINPUTEND', case)
|
105 |
+
output_result = extract_output(r'#TESTCASEOUTPUTSTART\n(.*?)\n#TESTCASEOUTPUTEND',case)
|
106 |
+
if(input_result is None and output_result is None):
|
107 |
+
continue
|
108 |
+
else:
|
109 |
+
file.write(f" ('{input_result}', '{output_result}'), #\n")
|
110 |
+
|
111 |
+
# Close the parametrize list
|
112 |
+
file.write("])\n\n")
|
113 |
+
|
114 |
+
# Write the test function using the parametrize decorator
|
115 |
+
file.write("def test_statement(n, expected):\n")
|
116 |
+
file.write(" result = decoder(n)\n")
|
117 |
+
file.write(" assert result == expected\n")
|
118 |
+
result = subprocess.run(
|
119 |
+
['pytest', filename+'.py'], # Specify the test file here
|
120 |
+
capture_output=True, # Capture stdout and stderr
|
121 |
+
text=True # Ensure the output is returned as a string (not bytes)
|
122 |
+
)
|
123 |
+
# print(result)
|
124 |
+
# Get the output and error
|
125 |
+
output = result.stdout # The standard output from pytest (test results)
|
126 |
+
error = result.stderr # The error output (if any)
|
127 |
+
# print("Error",error)
|
128 |
+
# print(error is None)
|
129 |
+
# print(error=="")
|
130 |
+
|
131 |
+
|
132 |
+
# Use regular expressions to find the counts of passed and failed tests
|
133 |
+
failed_tests = int(len(re.findall(r'FAIL', output))/2)
|
134 |
+
passed_tests = len(fresh_tc) - failed_tests
|
135 |
+
|
136 |
+
command = ['rm', '-f', filename + ".py"]
|
137 |
+
|
138 |
+
try:
|
139 |
+
# Run the command
|
140 |
+
subprocess.run(command, check=True, shell=True)
|
141 |
+
print(f"File '{filename}' has been deleted.")
|
142 |
+
|
143 |
+
except subprocess.CalledProcessError as e:
|
144 |
+
print(f"Error: Could not delete file '{filename}'. {e}")
|
145 |
+
|
146 |
+
with open(filename+'1.py','w') as file:
|
147 |
+
file.write("import pytest\n\n")
|
148 |
+
# code_split = code_gen_res.split('\n')
|
149 |
+
file.write(code_gen_res)
|
150 |
+
file.write("@pytest.mark.parametrize(\"n, expected\", [\n")
|
151 |
+
# file.write(f" ('{input_result.replace("\\n","§")}', '{output_result.replace("§","\\n")}'), #\n")
|
152 |
+
|
153 |
+
file.write(f" ('{input_test_}', '{output_test_}'), #\n")
|
154 |
+
file.write("])\n\n")
|
155 |
+
# Write the test function using the parametrize decorator
|
156 |
+
file.write("def test_statement(n, expected):\n")
|
157 |
+
file.write(" result = decoder(n)\n")
|
158 |
+
file.write(" assert result == expected\n")
|
159 |
+
result1 = subprocess.run(
|
160 |
+
['pytest', filename+'1.py'], # Specify the test file here
|
161 |
+
capture_output=True, # Capture stdout and stderr
|
162 |
+
text=True # Ensure the output is returned as a string (not bytes)
|
163 |
+
)
|
164 |
+
|
165 |
+
try:
|
166 |
+
command1 = ['rm', '-f', filename + "1.py"]
|
167 |
+
subprocess.run(command1, check=True, shell=True )
|
168 |
+
print(f"File '{filename}'1 has been deleted.")
|
169 |
+
except subprocess.CalledProcessError as e:
|
170 |
+
print(f"Error: Could not delete file '{filename}'. {e}")
|
171 |
+
|
172 |
+
output1 = result1.stdout # The standard output from pytest (test results)
|
173 |
+
error1 = result1.stderr # The error output (if any)
|
174 |
+
# print("Error",error)
|
175 |
+
# print(error is None)
|
176 |
+
# print(error=="")
|
177 |
+
|
178 |
+
|
179 |
+
# Use regular expressions to find the counts of passed and failed tests
|
180 |
+
failed_tests1 = int(len(re.findall(r'FAIL', output1)))
|
181 |
+
print(failed_tests1)
|
182 |
+
passed_tests1 = 1 - failed_tests1
|
183 |
+
if(failed_tests1>=1):
|
184 |
+
continue
|
185 |
+
if(failed_tests == 0):
|
186 |
+
return code_gen_res + "\n\n DECODER IS SOLELY TO TACKLE INPUTS FROM FILE READING"
|
187 |
+
|
188 |
+
if(passed_tests>currCorrect):
|
189 |
+
bestCode = code_gen_res
|
190 |
+
if(bestCode is not None):
|
191 |
+
return bestCode + "\n\n DECODER IS SOLELY TO TACKLE INPUTS FROM FILE READING"
|
192 |
+
else:
|
193 |
+
return "NO VALID CODE MADE"
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
+
|
prompt_handler.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from prompts.reasoner_prompt import getReasonerSystemPrompt
|
2 |
+
from prompts.test_case_generator_prompt import getTestCaseGeneratorSystemPrompt
|
3 |
+
from prompts.test_case_validator_prompt import getTestValidatorGeneratorSystemPrompt
|
4 |
+
from prompts.code_generator_prompt import getCodeGeneratorSystemPrompt
|
5 |
+
from prompts.get_reply_from_qwen import qwen_reply
|
6 |
+
|
7 |
+
def handle_reasoner_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
|
8 |
+
messages = [
|
9 |
+
{ "role":"system", "content":getReasonerSystemPrompt()},
|
10 |
+
{ "role": "user", "content": [
|
11 |
+
{
|
12 |
+
"type": "text",
|
13 |
+
"text": user_prompt
|
14 |
+
}
|
15 |
+
]}
|
16 |
+
]
|
17 |
+
return qwen_reply(temperature=temperature,
|
18 |
+
max_tokens=max_tokens,
|
19 |
+
top_p=top_p,
|
20 |
+
messages=messages)
|
21 |
+
|
22 |
+
def handle_code_generator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
|
23 |
+
messages = [
|
24 |
+
{ "role":"system", "content":getCodeGeneratorSystemPrompt()},
|
25 |
+
{ "role": "user", "content": [
|
26 |
+
{
|
27 |
+
"type": "text",
|
28 |
+
"text": user_prompt
|
29 |
+
}
|
30 |
+
]}
|
31 |
+
]
|
32 |
+
return qwen_reply(temperature=temperature,
|
33 |
+
max_tokens=max_tokens,
|
34 |
+
top_p=top_p,
|
35 |
+
messages=messages)
|
36 |
+
|
37 |
+
def handle_test_case_generator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
|
38 |
+
messages = [
|
39 |
+
{ "role":"system", "content": getTestCaseGeneratorSystemPrompt()},
|
40 |
+
{ "role": "user", "content": [
|
41 |
+
{
|
42 |
+
"type": "text",
|
43 |
+
"text": user_prompt
|
44 |
+
}
|
45 |
+
]}
|
46 |
+
]
|
47 |
+
return qwen_reply(temperature=temperature,
|
48 |
+
max_tokens=max_tokens,
|
49 |
+
top_p=top_p,
|
50 |
+
messages=messages)
|
51 |
+
|
52 |
+
def handle_test_validator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
|
53 |
+
messages = [
|
54 |
+
{ "role":"system", "content": getTestValidatorGeneratorSystemPrompt()},
|
55 |
+
{ "role": "user", "content": [
|
56 |
+
{
|
57 |
+
"type": "text",
|
58 |
+
"text": user_prompt
|
59 |
+
}
|
60 |
+
]}
|
61 |
+
]
|
62 |
+
return qwen_reply(temperature=temperature,
|
63 |
+
max_tokens=max_tokens,
|
64 |
+
top_p=top_p,
|
65 |
+
messages=messages)
|
66 |
+
|
67 |
+
|
68 |
+
|
prompts/__pycache__/code_generator_prompt.cpython-310.pyc
ADDED
Binary file (3.02 kB). View file
|
|
prompts/__pycache__/get_reply_from_qwen.cpython-310.pyc
ADDED
Binary file (1.22 kB). View file
|
|
prompts/__pycache__/reasoner_prompt.cpython-310.pyc
ADDED
Binary file (2.55 kB). View file
|
|
prompts/__pycache__/test_case_generator_prompt.cpython-310.pyc
ADDED
Binary file (3.27 kB). View file
|
|
prompts/__pycache__/test_case_validator_prompt.cpython-310.pyc
ADDED
Binary file (2.13 kB). View file
|
|
prompts/code_generator_prompt.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def getCodeGeneratorSystemPrompt():
|
2 |
+
return """
|
3 |
+
You are Qwen, an expert coder. You read a user problem and then, convert it into code. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to first simplify the input and then code it.
|
4 |
+
|
5 |
+
Here are some examples:
|
6 |
+
|
7 |
+
#EXAMPLESTART
|
8 |
+
#TESTINPUTSTART
|
9 |
+
banab
|
10 |
+
#TESTINPUTEND
|
11 |
+
#TESTINPUTSTART
|
12 |
+
YES
|
13 |
+
#TESTOUTPUTEND
|
14 |
+
#INPUTSTART
|
15 |
+
1. Read the input string s.
|
16 |
+
2. Create the reversed input string s'.
|
17 |
+
3. If s==s' then YES else NO.
|
18 |
+
#INPUTEND
|
19 |
+
#CODESTART
|
20 |
+
def generated_function(s):
|
21 |
+
s_ = s[::-1]
|
22 |
+
if(s_==s):
|
23 |
+
return "YES"
|
24 |
+
return "NO"
|
25 |
+
def decoder(s):
|
26 |
+
return generated_function(s)
|
27 |
+
#CODEEND
|
28 |
+
#EXAMPLEEND
|
29 |
+
|
30 |
+
#EXAMPLESTART
|
31 |
+
#TESTINPUTSTART
|
32 |
+
[abad,bab,maam]
|
33 |
+
#TESTINPUTEND
|
34 |
+
#TESTOUTPUTSTART
|
35 |
+
[NO,YES,YES]
|
36 |
+
#TESTOUTPUTEND
|
37 |
+
#INPUTSTART
|
38 |
+
1. Iterate over the list.
|
39 |
+
2. Read the input string s.
|
40 |
+
3. Create the reversed input string s'.
|
41 |
+
4. If s==s' then YES else NO and append to list.
|
42 |
+
5. Return the list of results.
|
43 |
+
#INPUTEND
|
44 |
+
#CODESTART
|
45 |
+
def generated_function(L):
|
46 |
+
output = []
|
47 |
+
for i in range(n):
|
48 |
+
s = L[i]
|
49 |
+
s_ = s[::-1]
|
50 |
+
if(s == s_):
|
51 |
+
output.append("YES")
|
52 |
+
else:
|
53 |
+
output.append("NO")
|
54 |
+
return output
|
55 |
+
def decoder(s):
|
56 |
+
list = eval(s)
|
57 |
+
return str(generated_function(list))
|
58 |
+
#CODEEND
|
59 |
+
#EXAMPLEEND
|
60 |
+
#EXAMPLESTART
|
61 |
+
#TESTINPUTSTART
|
62 |
+
5§[70,73,62,51,54]
|
63 |
+
#TESTINPUTEND
|
64 |
+
#TESTOUTPUTSTART
|
65 |
+
YES
|
66 |
+
#TESTOUTPUTEND
|
67 |
+
#INPUTSTART
|
68 |
+
1. Split the string about §.
|
69 |
+
2. n is the first element. List of integers is the second element.
|
70 |
+
3. Traverse the list n-1 times, where a(i) is the ith element in the list.
|
71 |
+
4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
|
72 |
+
5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
|
73 |
+
#INPUTEND
|
74 |
+
#CODESTART
|
75 |
+
def generated_function(n,L):
|
76 |
+
output = []
|
77 |
+
for i in range(n-1):
|
78 |
+
if(abs(L[i+1]-L[i])==3 or abs(L[i+1]-L[i])==11):
|
79 |
+
continue
|
80 |
+
else:
|
81 |
+
return "NO"
|
82 |
+
return "YES"
|
83 |
+
def decoder(s):
|
84 |
+
split_string = s.split('§')
|
85 |
+
n = eval(split_string[0])
|
86 |
+
input_list = []
|
87 |
+
return generated_function(eval(n),eval(split_string[1]))
|
88 |
+
|
89 |
+
#CODEEND
|
90 |
+
#EXAMPLEEND
|
91 |
+
Now Its your turn.
|
92 |
+
DONT GIVE ANY PSEUDOCODE!
|
93 |
+
DO OUTPUT TWO FUNCTIONS generated_function() and decoder(s)!!
|
94 |
+
IN CASE OTHER FUNCTIONS/CLASSES ARE NEEDED AND ARE NOT INBUILT MAKE THOSE
|
95 |
+
PLEASE ADHERE TO THE FORMAT OF #CODESTART #CODEEND
|
96 |
+
"""
|
prompts/get_reply_from_qwen.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from openai import OpenAI
|
3 |
+
client = OpenAI(
|
4 |
+
base_url="https://api.studio.nebius.ai/v1/",
|
5 |
+
api_key=os.get_environ("NEBIUS_API_KEY")
|
6 |
+
)
|
7 |
+
|
8 |
+
|
9 |
+
def qwen_reply(temperature=0.5,max_tokens=1024, top_p=0.7, messages=None):
|
10 |
+
if(messages is None):
|
11 |
+
return "No input received"
|
12 |
+
completion = client.chat.completions.create(
|
13 |
+
model="Qwen/Qwen2.5-Coder-32B-Instruct",
|
14 |
+
messages=messages,
|
15 |
+
temperature=temperature,
|
16 |
+
max_tokens= max_tokens,
|
17 |
+
top_p=top_p
|
18 |
+
# prompt=messages[-1]["content"]
|
19 |
+
)
|
20 |
+
|
21 |
+
return completion.choices[0].message.content
|
prompts/reasoner_prompt.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def getReasonerSystemPrompt():
|
2 |
+
return """
|
3 |
+
You are Qwen, an expert logician. You read a user problem and then, break it into
|
4 |
+
simpler subproblems which can be readily be converted to code. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to first simplify the input and then code it.
|
5 |
+
|
6 |
+
Here are some examples:
|
7 |
+
|
8 |
+
#EXAMPLESTART
|
9 |
+
#TESTINPUTSTART
|
10 |
+
abad
|
11 |
+
#TESTINPUTEND
|
12 |
+
#TESTOUTPUTSTART
|
13 |
+
NO
|
14 |
+
#TESTOUTPUTEND
|
15 |
+
#USERINPUTSTART
|
16 |
+
Given a string s, tell if its a pallindrome or not. Return YES if palindrome else NO.
|
17 |
+
#USERINPUTEND
|
18 |
+
#OUTPUTSTART
|
19 |
+
1. Read the input string s.
|
20 |
+
2. Create the reversed input string s'.
|
21 |
+
3. If s==s' then YES else NO.
|
22 |
+
#OUTPUTEND
|
23 |
+
#EXAMPLEEND
|
24 |
+
|
25 |
+
#EXAMPLESTART
|
26 |
+
#TESTINPUTSTART
|
27 |
+
[abad,bab,maam]
|
28 |
+
#TESTINPUTEND
|
29 |
+
#TESTOUTPUTSTART
|
30 |
+
[NO,YES,YES]
|
31 |
+
#TESTOUTPUTEND
|
32 |
+
#USERINPUTSTART
|
33 |
+
Given a list of strings L, tell if each string is a palindrome or not. Return a list of YES or NO.
|
34 |
+
#USERINPUTEND
|
35 |
+
#OUTPUTSTART
|
36 |
+
1. Iterate over the list.
|
37 |
+
2. Read the input string s.
|
38 |
+
3. Create the reversed input string s'.
|
39 |
+
4. If s==s' then YES else NO and append to list.
|
40 |
+
5. Return the list of results.
|
41 |
+
#OUTPUTEND
|
42 |
+
#EXAMPLEEND
|
43 |
+
|
44 |
+
|
45 |
+
#EXAMPLESTART
|
46 |
+
#TESTINPUTSTART
|
47 |
+
5§[70,73,62,51,54]
|
48 |
+
#TESTINPUTEND
|
49 |
+
#TESTOUTPUTSTART
|
50 |
+
Yes
|
51 |
+
#TESTOUTPUTEND
|
52 |
+
#INPUTSTART
|
53 |
+
Given an integer n, representing the number of elements in the list. Followed by a list of n elements, where each elements is an integer, denoting the amplitude of the music. To create a perfect melody the difference in the adjacent amplitudes should be 3 or 11. Return Yes if a Melody else No.
|
54 |
+
#INPUTEND
|
55 |
+
#OUTPUTSTART
|
56 |
+
1. Split the string about §.
|
57 |
+
2. n is the first element. List of integers is the second element.
|
58 |
+
3. Traverse the list n-1 times, where a(i) is the ith element in the list.
|
59 |
+
4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
|
60 |
+
5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
|
61 |
+
#OUTPUTEND
|
62 |
+
#EXAMPLEEND
|
63 |
+
Now Its your turn.
|
64 |
+
DONT GIVE ANY PSEUDOCODE!
|
65 |
+
DO HELP WITH THE STRING DECODER!
|
66 |
+
PLEASE ADHERE TO THE FORMAT OF #OUTPUTSTART #OUTPUTEND
|
67 |
+
"""
|
prompts/test_case_generator_prompt.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def getTestCaseGeneratorSystemPrompt():
|
2 |
+
return """
|
3 |
+
You are Qwen, an expert coder. You read a user problem and then, find the edge tests. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to generate the test cases.
|
4 |
+
|
5 |
+
Here are some examples:
|
6 |
+
|
7 |
+
#EXAMPLESTART
|
8 |
+
#TESTINPUTSTART
|
9 |
+
banab
|
10 |
+
#TESTINPUTEND
|
11 |
+
#TESTINPUTSTART
|
12 |
+
YES
|
13 |
+
#TESTOUTPUTEND
|
14 |
+
#INPUTSTART
|
15 |
+
1. Read the input string s.
|
16 |
+
2. Create the reversed input string s'.
|
17 |
+
3. If s==s' then YES else NO.
|
18 |
+
#INPUTEND
|
19 |
+
#OUTPUTSTART
|
20 |
+
#TESTCASESTART
|
21 |
+
#TESTCASEINPUTSTART
|
22 |
+
madam
|
23 |
+
#TESTCASEINPUTEND
|
24 |
+
#TESTCASEOUTPUTSTART
|
25 |
+
YES
|
26 |
+
#TESTCASEOUTPUTEND
|
27 |
+
#TESTCASEEND
|
28 |
+
#TESTCASESTART
|
29 |
+
#TESTCASEINPUTSTART
|
30 |
+
mama
|
31 |
+
#TESTCASEINPUTEND
|
32 |
+
#TESTCASEOUTPUTSTART
|
33 |
+
NO
|
34 |
+
#TESTCASEOUTPUTEND
|
35 |
+
#TESTCASEEND
|
36 |
+
#OUTPUTEND
|
37 |
+
#EXAMPLEEND
|
38 |
+
|
39 |
+
#EXAMPLESTART
|
40 |
+
#TESTINPUTSTART
|
41 |
+
[abad,bab,maam]
|
42 |
+
#TESTINPUTEND
|
43 |
+
#TESTOUTPUTSTART
|
44 |
+
[NO,YES,YES]
|
45 |
+
#TESTOUTPUTEND
|
46 |
+
#INPUTSTART
|
47 |
+
1. Iterate over the list.
|
48 |
+
2. Read the input string s.
|
49 |
+
3. Create the reversed input string s'.
|
50 |
+
4. If s==s' then YES else NO and append to list.
|
51 |
+
5. Return the list of results.
|
52 |
+
#INPUTEND
|
53 |
+
#OUTPUTSTART
|
54 |
+
#TESTCASESTART
|
55 |
+
#TESTCASEINPUTSTART
|
56 |
+
[madam,aba,maam]
|
57 |
+
#TESTCASEINPUTEND
|
58 |
+
#TESTCASEOUTPUTSTART
|
59 |
+
[YES, YES, YES]
|
60 |
+
#TESTCASEOUTPUTEND
|
61 |
+
#TESTCASEEND
|
62 |
+
#TESTCASESTART
|
63 |
+
#TESTCASEINPUTSTART
|
64 |
+
[abra, kadabra, z, zebra]
|
65 |
+
#TESTCASEINPUTEND
|
66 |
+
#TESTCASEOUTPUTSTART
|
67 |
+
[NO, NO, YES, NO]
|
68 |
+
#TESTCASEOUTPUTEND
|
69 |
+
#TESTCASEEND
|
70 |
+
#TESTCASESTART
|
71 |
+
#TESTCASEINPUTSTART
|
72 |
+
[ada,brave]
|
73 |
+
#TESTCASEINPUTEND
|
74 |
+
#TESTCASEOUTPUTSTART
|
75 |
+
[YES< NO]
|
76 |
+
#TESTCASEOUTPUTEND
|
77 |
+
#TESTCASEEND
|
78 |
+
#OUTPUTEND
|
79 |
+
#EXAMPLEEND
|
80 |
+
|
81 |
+
|
82 |
+
#EXAMPLESTART
|
83 |
+
#TESTINPUTSTART
|
84 |
+
5§[70,73,62,51,54]
|
85 |
+
#TESTINPUTEND
|
86 |
+
#TESTOUTPUTSTART
|
87 |
+
YES
|
88 |
+
#TESTOUTPUTEND
|
89 |
+
#INPUTSTART
|
90 |
+
1. Split the string about §.
|
91 |
+
2. n is the first element. List of integers is the second element.
|
92 |
+
3. Traverse the list n-1 times, where a(i) is the ith element in the list.
|
93 |
+
4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
|
94 |
+
5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
|
95 |
+
#INPUTEND
|
96 |
+
#OUTPUTSTART
|
97 |
+
#TESTCASESTART
|
98 |
+
#TESTCASEINPUTSTART
|
99 |
+
5§[70,73, 62, 65, 54]
|
100 |
+
#TESTCASEINPUTEND
|
101 |
+
#TESTCASEOUTPUTSTART
|
102 |
+
YES
|
103 |
+
#TESTCASEOUTPUTEND
|
104 |
+
#TESTCASEEND
|
105 |
+
#TESTCASESTART
|
106 |
+
#TESTCASEINPUTSTART
|
107 |
+
5§[71, 73, 62, 65, 54]
|
108 |
+
#TESTCASEINPUTEND
|
109 |
+
#TESTCASEOUTPUTSTART
|
110 |
+
NO
|
111 |
+
#TESTCASEOUTPUTEND
|
112 |
+
#TESTCASEEND
|
113 |
+
#TESTCASESTART
|
114 |
+
#TESTCASEINPUTSTART
|
115 |
+
2§[71, 73]
|
116 |
+
#TESTCASEINPUTEND
|
117 |
+
#TESTCASEOUTPUTSTART
|
118 |
+
NO
|
119 |
+
#TESTCASEOUTPUTEND
|
120 |
+
#TESTCASEEND
|
121 |
+
#OUTPUTEND
|
122 |
+
#EXAMPLEEND
|
123 |
+
Now Its your turn.
|
124 |
+
PLEASE GENERATE ENOUGH TESTCASES USING THE STEPS THAT EDGE CASES AREN'T MISSED
|
125 |
+
PLEASE ADHERE TO THE FORMAT OF #TESTCASESTART
|
126 |
+
"""
|
prompts/test_case_validator_prompt.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def getTestValidatorGeneratorSystemPrompt():
|
2 |
+
return """
|
3 |
+
You are Qwen, an expert test case validator. You read a user problem and the corresponding testcase. Then, you logically tell if the test case is correct or not.
|
4 |
+
|
5 |
+
Here are some examples:
|
6 |
+
|
7 |
+
#EXAMPLESTART
|
8 |
+
#INPUTSTART
|
9 |
+
#SUBPROBLEMSTART
|
10 |
+
1. Read the input string s.
|
11 |
+
2. Create the reversed input string s'.
|
12 |
+
3. If s==s' then YES else NO.
|
13 |
+
#SUBPROBLEMEND
|
14 |
+
#TESTCASESTART
|
15 |
+
#TESTCASEINPUTSTART
|
16 |
+
madam
|
17 |
+
#TESTCASEINPUTEND
|
18 |
+
#TESTCASEOUTPUTSTART
|
19 |
+
YES
|
20 |
+
#TESTCASEOUTPUTEND
|
21 |
+
#TESTCASEEND
|
22 |
+
#INPUTEND
|
23 |
+
#OUTPUTSTART
|
24 |
+
CORRECT
|
25 |
+
#OUTPUTEND
|
26 |
+
#EXAMPLEEND
|
27 |
+
|
28 |
+
#EXAMPLESTART
|
29 |
+
#INPUTSTART
|
30 |
+
#SUBPROBLEMSTART
|
31 |
+
1. Read the input string s.
|
32 |
+
2. Create the reversed input string s'.
|
33 |
+
3. If s==s' then YES else NO.
|
34 |
+
#SUBPROBLEMEND
|
35 |
+
#TESTCASESTART
|
36 |
+
#TESTCASEINPUTSTART
|
37 |
+
mada
|
38 |
+
#TESTCASEINPUTEND
|
39 |
+
#TESTCASEOUTPUTSTART
|
40 |
+
YES
|
41 |
+
#TESTCASEOUTPUTEND
|
42 |
+
#TESTCASEEND
|
43 |
+
#INPUTEND
|
44 |
+
#OUTPUTSTART
|
45 |
+
INCORRECT
|
46 |
+
#OUTPUTEND
|
47 |
+
#EXAMPLEEND
|
48 |
+
|
49 |
+
#EXAMPLESTART
|
50 |
+
#INPUTSTART
|
51 |
+
#SUBPROBLEMSTART
|
52 |
+
1. Split the string about §.
|
53 |
+
2. n is the first element. List of integers is the second element.
|
54 |
+
3. Traverse the list n-1 times, where a(i) is the ith element in the list.
|
55 |
+
4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
|
56 |
+
5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
|
57 |
+
#SUBPROBLEMEND
|
58 |
+
#TESTCASESTART
|
59 |
+
#TESTCASEINPUTSTART
|
60 |
+
5§[70, 73, 62, 51, 54]
|
61 |
+
#TESTCASEINPUTEND
|
62 |
+
#TESTCASEOUTPUTSTART
|
63 |
+
YES
|
64 |
+
#TESTCASEOUTPUTEND
|
65 |
+
#TESTCASEEND
|
66 |
+
#INPUTEND
|
67 |
+
#OUTPUTSTART
|
68 |
+
CORRECT
|
69 |
+
#OUTPUTEND
|
70 |
+
#EXAMPLEEND
|
71 |
+
|
72 |
+
Now Its your turn to verify the following TESTCASE.
|
73 |
+
Please explain the correctness or incorrectness
|
74 |
+
PLEASE ADHERE TO THE FORMAT OF #OUTPUTSTART
|
75 |
+
"""
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
pytest
|
3 |
+
OpenAI
|