Spaces:

eternalBlissard
/

ThoughtCODER

Running

App Files Files Community

Chaitanya Garg commited on 15 days ago

Commit

f9902eb

1 Parent(s): 271bd8a

Working code

Browse files

Files changed (14) hide show

app.py +31 -0
multi_agent.py +199 -0
prompt_handler.py +68 -0
prompts/__pycache__/code_generator_prompt.cpython-310.pyc +0 -0
prompts/__pycache__/get_reply_from_qwen.cpython-310.pyc +0 -0
prompts/__pycache__/reasoner_prompt.cpython-310.pyc +0 -0
prompts/__pycache__/test_case_generator_prompt.cpython-310.pyc +0 -0
prompts/__pycache__/test_case_validator_prompt.cpython-310.pyc +0 -0
prompts/code_generator_prompt.py +96 -0
prompts/get_reply_from_qwen.py +21 -0
prompts/reasoner_prompt.py +67 -0
prompts/test_case_generator_prompt.py +126 -0
prompts/test_case_validator_prompt.py +75 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+from multi_agent import multi_agent_cycle
+title = "Thought CODER"
+description = "Made a Multi-Agent CODER LLM using Qwen2.5-72B API which not only Converts Thought to code but tests them out as well \n\n CONVERT YOUR THOUGHT TO CODE"
+article = "Created by [Eternal Bliassard](https://github.com/EternalBlissard)."
+# Create the Gradio demo
+demo = gr.Interface(fn=multi_agent_cycle,
+                    inputs=[gr.Textbox(label="Code Requirements")],
+                    outputs=[gr.Textbox(label="Code")],
+                    additional_inputs = [
+        gr.Textbox(label="Input TestCase"),
+        gr.Textbox(label="Output TestCase"),
+        gr.Slider(minimum=0.1, maximum=2.0, value=0.5, step=0.1, label="Temperature"),
+        gr.Slider(minimum=128, maximum=4096, value=1024, step=1, label="Max New Tokens"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-P",
+        ),
+        gr.Slider(minimum=1, maximum=5, value=3, step=1, label="Reasoner Iterations"),
+        gr.Slider(minimum=3, maximum=11, value=5, step=1, label="Test Cases To be Generated"),
+],
+                    title=title,
+                    description=description,
+                    article=article)
+# Launch the demo!
+demo.launch(share=True)

multi_agent.py ADDED Viewed

	@@ -0,0 +1,199 @@

+from prompt_handler import handle_code_generator_prompt, handle_test_case_generator_prompt, handle_test_validator_prompt, handle_reasoner_prompt
+import random
+import string
+import re
+import subprocess
+def extract_output(pattern, text):
+    match = re.search(pattern, text, re.DOTALL)
+    if match:
+        return match.group(1)
+    return None
+def extract_all_outputs(pattern,text):
+    # Regular expression to match content between #OUTPUTSTART\n and #OUTPUTEND
+    # pattern = r'#OUTPUTSTART\n(.*?)#OUTPUTEND'
+    # Using re.findall to find all occurrences
+    outputs = re.findall(pattern, text, re.DOTALL)
+    return outputs
+def generate_filename():
+    # Generate a random alphabetic character for the first character
+    first_char = random.choice(string.ascii_letters)
+    # Generate the remaining 19 characters, which can be letters or digits
+    remaining_chars = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(19))
+    # Combine the first character with the remaining characters to form the 20-character file name
+    filename = first_char + remaining_chars
+    return filename
+def multi_agent_cycle(user_prompt=None,input_test="", output_test="",temperature=0.5,max_tokens=1024,top_p=0.5, reasoner_num = 3, test_case_num = 7):
+    if(user_prompt is None or user_prompt==""):
+        return "No prompt given"
+    filename = None
+    if(filename is None):
+        filename = generate_filename()
+    currCorrect = 0
+    bestCode = None
+    input_test_ = input_test.replace(r'\n','§')
+    output_test_ = output_test.replace(r'\n','§')
+    testcaseset = f"""
+    #TESTINPUTSTART
+    {input_test_}
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    {output_test_}
+    #TESTOUTPUTEND
+    """
+    user_prompt_ = f"""
+    #USERINPUTSTART
+    {user_prompt}
+    #USERINPUTEND
+    {testcaseset}
+    """
+    for idx in range(reasoner_num):
+        print(idx)
+        reasoner_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_reasoner_prompt(user_prompt_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        while(reasoner_res is None):
+            reasoner_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_reasoner_prompt(user_prompt,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        reasoner_res_ = f"""
+        #INPUTSTART
+        {reasoner_res}
+        #INPUTEND
+        {testcaseset}
+        """
+        code_gen_res = extract_output(r'#CODESTART\n(.*?)#CODEEND',handle_code_generator_prompt(reasoner_res_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        while(code_gen_res is None):
+            code_gen_res = extract_output(r'#CODESTART\n(.*?)#CODEEND',handle_code_generator_prompt(reasoner_res_,temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        print("hush2")
+        test_gen_res = extract_all_outputs(r'#TESTCASESTART\n(.*?)\n#TESTCASEEND', handle_test_case_generator_prompt(reasoner_res_+f"LIMIT OUTPUT TO{test_case_num} TESTCASES USING THE STEPS! MAKE SURE EDGE CASES AREN'T MISSED",temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        while(test_case_num is None):
+            test_gen_res = extract_all_outputs(r'#TESTCASESTART\n(.*?)\n#TESTCASEEND', handle_test_case_generator_prompt(user_prompt+f"LIMIT OUTPUT TO{test_case_num} TESTCASES USING THE STEPS! MAKE SURE EDGE CASES AREN'T MISSED",temperature=temperature+(0.05)*idx,max_tokens = max_tokens+((512)*idx)))
+        print("hush3")
+        fresh_tc = []
+        for test_gen in test_gen_res:
+            new_prompt = f"""
+#SUBPROBLEMSTART
+{reasoner_res}
+#SUBPROBLEMEND
+#TESTCASESTART
+{test_gen}
+#TESTCASEEND
+"""
+            val_res = extract_output(r'#OUTPUTSTART\n(.*?)\n#OUTPUTEND',handle_test_validator_prompt(new_prompt))
+            if(val_res=='CORRECT'):
+                fresh_tc.append(test_gen)
+        with open(filename+'.py','w') as file:
+            file.write("import pytest\n\n")
+            code_split = code_gen_res.split('\n')
+            file.write(code_gen_res)
+            # print('\n'.join(new_code_split))
+            # f.write(make_statement1(1,extract_output(r'#TESTCASEINPUTSTART\n(.*?)\n#TESTCASEINPUTEND',test_gen_res[0]),extract_output(r'#TESTCASEOUTPUTSTART\n(.*?)#TESTCASEOUTPUTEND',test_gen_res[0])))
+            file.write("@pytest.mark.parametrize(\"n, expected\", [\n")
+            # Loop through the test cases and write them to the file
+            for case in fresh_tc:
+                input_result = extract_output(r'#TESTCASEINPUTSTART\n(.*?)\n#TESTCASEINPUTEND', case)
+                output_result = extract_output(r'#TESTCASEOUTPUTSTART\n(.*?)\n#TESTCASEOUTPUTEND',case)
+                if(input_result is None and output_result is None):
+                    continue
+                else:
+                    file.write(f"    ('{input_result}', '{output_result}'),  #\n")
+            # Close the parametrize list
+            file.write("])\n\n")
+            # Write the test function using the parametrize decorator
+            file.write("def test_statement(n, expected):\n")
+            file.write("    result = decoder(n)\n")
+            file.write("    assert result == expected\n")
+        result = subprocess.run(
+        ['pytest', filename+'.py'],  # Specify the test file here
+        capture_output=True,  # Capture stdout and stderr
+        text=True             # Ensure the output is returned as a string (not bytes)
+        )
+        # print(result)
+        # Get the output and error
+        output = result.stdout  # The standard output from pytest (test results)
+        error = result.stderr   # The error output (if any)
+        # print("Error",error)
+        # print(error is None)
+        # print(error=="")
+        # Use regular expressions to find the counts of passed and failed tests
+        failed_tests = int(len(re.findall(r'FAIL', output))/2)
+        passed_tests = len(fresh_tc) - failed_tests
+        command = ['rm', '-f', filename + ".py"]
+        try:
+            # Run the command
+            subprocess.run(command, check=True, shell=True)
+            print(f"File '{filename}' has been deleted.")
+        except subprocess.CalledProcessError as e:
+            print(f"Error: Could not delete file '{filename}'. {e}")
+        with open(filename+'1.py','w') as file:
+            file.write("import pytest\n\n")
+            # code_split = code_gen_res.split('\n')
+            file.write(code_gen_res)
+            file.write("@pytest.mark.parametrize(\"n, expected\", [\n")
+            # file.write(f"    ('{input_result.replace("\\n","§")}', '{output_result.replace("§","\\n")}'),  #\n")
+            file.write(f"    ('{input_test_}', '{output_test_}'),  #\n")
+            file.write("])\n\n")
+        # Write the test function using the parametrize decorator
+            file.write("def test_statement(n, expected):\n")
+            file.write("    result = decoder(n)\n")
+            file.write("    assert result == expected\n")
+            result1 = subprocess.run(
+            ['pytest', filename+'1.py'],  # Specify the test file here
+            capture_output=True,  # Capture stdout and stderr
+            text=True             # Ensure the output is returned as a string (not bytes)
+            )
+            try:
+                command1 = ['rm', '-f', filename + "1.py"]
+                subprocess.run(command1, check=True, shell=True )
+                print(f"File '{filename}'1 has been deleted.")
+            except subprocess.CalledProcessError as e:
+                print(f"Error: Could not delete file '{filename}'. {e}")
+                output1 = result1.stdout  # The standard output from pytest (test results)
+                error1 = result1.stderr   # The error output (if any)
+                # print("Error",error)
+                # print(error is None)
+                # print(error=="")
+                # Use regular expressions to find the counts of passed and failed tests
+                failed_tests1 = int(len(re.findall(r'FAIL', output1)))
+                print(failed_tests1)
+                passed_tests1 = 1 - failed_tests1
+                if(failed_tests1>=1):
+                    continue
+        if(failed_tests == 0):
+            return code_gen_res + "\n\n DECODER IS SOLELY TO TACKLE INPUTS FROM FILE READING"
+        if(passed_tests>currCorrect):
+            bestCode = code_gen_res
+    if(bestCode is not None):
+        return bestCode + "\n\n DECODER IS SOLELY TO TACKLE INPUTS FROM FILE READING"
+    else:
+        return "NO VALID CODE MADE"

prompt_handler.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from prompts.reasoner_prompt import getReasonerSystemPrompt
+from prompts.test_case_generator_prompt import getTestCaseGeneratorSystemPrompt
+from prompts.test_case_validator_prompt import getTestValidatorGeneratorSystemPrompt
+from prompts.code_generator_prompt import getCodeGeneratorSystemPrompt
+from prompts.get_reply_from_qwen import qwen_reply
+def handle_reasoner_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
+    messages = [
+    { "role":"system", "content":getReasonerSystemPrompt()},
+	{ "role": "user", "content": [
+        {
+            "type": "text",
+            "text": user_prompt
+        }
+    ]}
+    ]
+    return qwen_reply(temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    messages=messages)
+def handle_code_generator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
+    messages = [
+    { "role":"system", "content":getCodeGeneratorSystemPrompt()},
+	{ "role": "user", "content": [
+        {
+            "type": "text",
+            "text": user_prompt
+        }
+    ]}
+    ]
+    return qwen_reply(temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    messages=messages)
+def handle_test_case_generator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
+    messages = [
+    { "role":"system", "content": getTestCaseGeneratorSystemPrompt()},
+	{ "role": "user", "content": [
+        {
+            "type": "text",
+            "text": user_prompt
+        }
+    ]}
+    ]
+    return qwen_reply(temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    messages=messages)
+def handle_test_validator_prompt(user_prompt,temperature=0.5,max_tokens=1024, top_p=0.7):
+    messages = [
+    { "role":"system", "content": getTestValidatorGeneratorSystemPrompt()},
+	{ "role": "user", "content": [
+        {
+            "type": "text",
+            "text": user_prompt
+        }
+    ]}
+    ]
+    return qwen_reply(temperature=temperature,
+                    max_tokens=max_tokens,
+                    top_p=top_p,
+                    messages=messages)

prompts/__pycache__/code_generator_prompt.cpython-310.pyc ADDED Viewed

Binary file (3.02 kB). View file

prompts/__pycache__/get_reply_from_qwen.cpython-310.pyc ADDED Viewed

Binary file (1.22 kB). View file

prompts/__pycache__/reasoner_prompt.cpython-310.pyc ADDED Viewed

Binary file (2.55 kB). View file

prompts/__pycache__/test_case_generator_prompt.cpython-310.pyc ADDED Viewed

Binary file (3.27 kB). View file

prompts/__pycache__/test_case_validator_prompt.cpython-310.pyc ADDED Viewed

Binary file (2.13 kB). View file

prompts/code_generator_prompt.py ADDED Viewed

	@@ -0,0 +1,96 @@

+def getCodeGeneratorSystemPrompt():
+    return """
+    You are Qwen, an expert coder. You read a user problem and then, convert it into code. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to first simplify the input and then code it.
+    Here are some examples:
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    banab
+    #TESTINPUTEND
+    #TESTINPUTSTART
+    YES
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Read the input string s.
+    2. Create the reversed input string s'.
+    3. If s==s' then YES else NO.
+    #INPUTEND
+    #CODESTART
+    def generated_function(s):
+        s_ = s[::-1]
+        if(s_==s):
+            return "YES"
+        return "NO"
+    def decoder(s):
+        return generated_function(s)
+    #CODEEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    [abad,bab,maam]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    [NO,YES,YES]
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Iterate over the list.
+    2. Read the input string s.
+    3. Create the reversed input string s'.
+    4. If s==s' then YES else NO and append to list.
+    5. Return the list of results.
+    #INPUTEND
+    #CODESTART
+    def generated_function(L):
+        output = []
+        for i in range(n):
+            s  = L[i]
+            s_ = s[::-1]
+            if(s == s_):
+                output.append("YES")
+            else:
+                output.append("NO")
+        return output
+    def decoder(s):
+        list = eval(s)
+        return str(generated_function(list))
+    #CODEEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    5§[70,73,62,51,54]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    YES
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Split the string about §.
+    2. n is the first element. List of integers is the second element.
+    3. Traverse the list n-1 times, where a(i) is the ith element in the list.
+    4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
+    5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
+    #INPUTEND
+    #CODESTART
+    def generated_function(n,L):
+        output = []
+        for i in range(n-1):
+            if(abs(L[i+1]-L[i])==3 or abs(L[i+1]-L[i])==11):
+                continue
+            else:
+                return "NO"
+        return "YES"
+    def decoder(s):
+        split_string = s.split('§')
+        n = eval(split_string[0])
+        input_list = []
+        return generated_function(eval(n),eval(split_string[1]))
+    #CODEEND
+    #EXAMPLEEND
+    Now Its your turn.
+    DONT GIVE ANY PSEUDOCODE!
+    DO OUTPUT TWO FUNCTIONS generated_function() and decoder(s)!!
+    IN CASE OTHER FUNCTIONS/CLASSES ARE NEEDED AND ARE NOT INBUILT MAKE THOSE
+    PLEASE ADHERE TO THE FORMAT OF #CODESTART #CODEEND
+    """

prompts/get_reply_from_qwen.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from openai import OpenAI
+client = OpenAI(
+    base_url="https://api.studio.nebius.ai/v1/",
+    api_key=os.get_environ("NEBIUS_API_KEY")
+)
+def qwen_reply(temperature=0.5,max_tokens=1024, top_p=0.7, messages=None):
+    if(messages is None):
+        return "No input received"
+    completion = client.chat.completions.create(
+    model="Qwen/Qwen2.5-Coder-32B-Instruct",
+    messages=messages,
+    temperature=temperature,
+    max_tokens= max_tokens,
+    top_p=top_p
+    # prompt=messages[-1]["content"]
+    )
+    return completion.choices[0].message.content

prompts/reasoner_prompt.py ADDED Viewed

	@@ -0,0 +1,67 @@

+def getReasonerSystemPrompt():
+    return """
+    You are Qwen, an expert logician. You read a user problem and then, break it into
+    simpler subproblems which can be readily be converted to code. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to first simplify the input and then code it.
+    Here are some examples:
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    abad
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    NO
+    #TESTOUTPUTEND
+    #USERINPUTSTART
+    Given a string s, tell if its a pallindrome or not. Return YES if palindrome else NO.
+    #USERINPUTEND
+    #OUTPUTSTART
+    1. Read the input string s.
+    2. Create the reversed input string s'.
+    3. If s==s' then YES else NO.
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    [abad,bab,maam]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    [NO,YES,YES]
+    #TESTOUTPUTEND
+    #USERINPUTSTART
+    Given a list of strings L, tell if each string is a palindrome or not. Return a list of YES or NO.
+    #USERINPUTEND
+    #OUTPUTSTART
+    1. Iterate over the list.
+    2. Read the input string s.
+    3. Create the reversed input string s'.
+    4. If s==s' then YES else NO and append to list.
+    5. Return the list of results.
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    5§[70,73,62,51,54]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    Yes
+    #TESTOUTPUTEND
+    #INPUTSTART
+    Given an integer n, representing the number of elements in the list. Followed by a list of n elements, where each elements is an integer, denoting the amplitude of the music. To create a perfect melody the difference in the adjacent amplitudes should be 3 or 11. Return Yes if a Melody else No.
+    #INPUTEND
+    #OUTPUTSTART
+    1. Split the string about §.
+    2. n is the first element. List of integers is the second element.
+    3. Traverse the list n-1 times, where a(i) is the ith element in the list.
+    4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
+    5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
+    #OUTPUTEND
+    #EXAMPLEEND
+    Now Its your turn.
+    DONT GIVE ANY PSEUDOCODE!
+    DO HELP WITH THE STRING DECODER!
+    PLEASE ADHERE TO THE FORMAT OF #OUTPUTSTART #OUTPUTEND
+    """

prompts/test_case_generator_prompt.py ADDED Viewed

	@@ -0,0 +1,126 @@

+def getTestCaseGeneratorSystemPrompt():
+    return """
+    You are Qwen, an expert coder. You read a user problem and then, find the edge tests. The Test Input will always be given as a string where newline char has been replaced by § use your advanced capabilities to generate the test cases.
+    Here are some examples:
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    banab
+    #TESTINPUTEND
+    #TESTINPUTSTART
+    YES
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Read the input string s.
+    2. Create the reversed input string s'.
+    3. If s==s' then YES else NO.
+    #INPUTEND
+    #OUTPUTSTART
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    madam
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    YES
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    mama
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    NO
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    [abad,bab,maam]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    [NO,YES,YES]
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Iterate over the list.
+    2. Read the input string s.
+    3. Create the reversed input string s'.
+    4. If s==s' then YES else NO and append to list.
+    5. Return the list of results.
+    #INPUTEND
+    #OUTPUTSTART
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    [madam,aba,maam]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    [YES, YES, YES]
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    [abra, kadabra, z, zebra]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    [NO, NO, YES, NO]
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    [ada,brave]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    [YES< NO]
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #TESTINPUTSTART
+    5§[70,73,62,51,54]
+    #TESTINPUTEND
+    #TESTOUTPUTSTART
+    YES
+    #TESTOUTPUTEND
+    #INPUTSTART
+    1. Split the string about §.
+    2. n is the first element. List of integers is the second element.
+    3. Traverse the list n-1 times, where a(i) is the ith element in the list.
+    4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
+    5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
+    #INPUTEND
+    #OUTPUTSTART
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    5§[70,73, 62, 65, 54]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    YES
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    5§[71, 73, 62, 65, 54]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    NO
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    2§[71, 73]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    NO
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #OUTPUTEND
+    #EXAMPLEEND
+    Now Its your turn.
+    PLEASE GENERATE ENOUGH TESTCASES USING THE STEPS THAT EDGE CASES AREN'T MISSED
+    PLEASE ADHERE TO THE FORMAT OF #TESTCASESTART
+    """

prompts/test_case_validator_prompt.py ADDED Viewed

	@@ -0,0 +1,75 @@

+def getTestValidatorGeneratorSystemPrompt():
+    return """
+    You are Qwen, an expert test case validator. You read a user problem and the corresponding testcase. Then, you logically tell if the test case is correct or not.
+    Here are some examples:
+    #EXAMPLESTART
+    #INPUTSTART
+    #SUBPROBLEMSTART
+    1. Read the input string s.
+    2. Create the reversed input string s'.
+    3. If s==s' then YES else NO.
+    #SUBPROBLEMEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    madam
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    YES
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #INPUTEND
+    #OUTPUTSTART
+    CORRECT
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #INPUTSTART
+    #SUBPROBLEMSTART
+    1. Read the input string s.
+    2. Create the reversed input string s'.
+    3. If s==s' then YES else NO.
+    #SUBPROBLEMEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    mada
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    YES
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #INPUTEND
+    #OUTPUTSTART
+    INCORRECT
+    #OUTPUTEND
+    #EXAMPLEEND
+    #EXAMPLESTART
+    #INPUTSTART
+    #SUBPROBLEMSTART
+    1. Split the string about §.
+    2. n is the first element. List of integers is the second element.
+    3. Traverse the list n-1 times, where a(i) is the ith element in the list.
+    4. Check if |a(i)-a(i+1)| == 3 or |a(i)-a(i+1)| == 11.
+    5. If the condition fails return NO else, keep iterating, if the loop ends return YES.
+    #SUBPROBLEMEND
+    #TESTCASESTART
+    #TESTCASEINPUTSTART
+    5§[70, 73, 62, 51, 54]
+    #TESTCASEINPUTEND
+    #TESTCASEOUTPUTSTART
+    YES
+    #TESTCASEOUTPUTEND
+    #TESTCASEEND
+    #INPUTEND
+    #OUTPUTSTART
+    CORRECT
+    #OUTPUTEND
+    #EXAMPLEEND
+    Now Its your turn to verify the following TESTCASE.
+    Please explain the correctness or incorrectness
+    PLEASE ADHERE TO THE FORMAT OF #OUTPUTSTART
+    """

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+pytest
+OpenAI