Spaces:
Running
Running
import os | |
from datetime import datetime | |
import random | |
import gradio as gr | |
from datasets import load_dataset, Dataset, DatasetDict | |
from huggingface_hub import whoami, InferenceClient | |
# Initialize the inference client | |
client = InferenceClient( | |
api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable | |
) | |
# Load questions from Hugging Face dataset | |
EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 5 # We have 5 questions total | |
EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7 | |
EXAM_DATASET_ID = "burtenshaw/dummy-code-quiz" | |
# prep the dataset for the quiz | |
ds = load_dataset(EXAM_DATASET_ID, split="train") | |
quiz_data = ds.to_list() | |
random.shuffle(quiz_data) | |
def check_code(user_code, solution, challenge): | |
""" | |
Use LLM to evaluate if the user's code solution is correct. | |
Returns True if the solution is correct, False otherwise. | |
""" | |
prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. | |
Challenge: | |
{challenge} | |
Reference Solution: | |
{solution} | |
Student's Solution: | |
{user_code} | |
Evaluate if the student's solution is functionally equivalent to the reference solution. | |
Consider: | |
1. Does it solve the problem correctly? | |
2. Does it handle edge cases appropriately? | |
3. Does it follow the requirements of the challenge? | |
Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. | |
""" | |
messages = [{"role": "user", "content": prompt}] | |
try: | |
completion = client.chat.completions.create( | |
model="Qwen/Qwen2.5-Coder-32B-Instruct", | |
messages=messages, | |
max_tokens=500, | |
) | |
response = completion.choices[0].message.content.strip() | |
# Extract the verdict from the response | |
is_correct = response.upper().startswith("CORRECT") | |
# Add the explanation to the status text with emoji | |
explanation = response.split("\n", 1)[1] if "\n" in response else "" | |
status = "β Correct!" if is_correct else "β Incorrect!" | |
gr.Info(f"{status}\n\n{explanation}") | |
return is_correct | |
except Exception as e: | |
gr.Warning(f"Error checking code: {str(e)}") | |
# Fall back to simple string comparison if LLM fails | |
is_correct = user_code.strip() == solution.strip() | |
status = "β Correct!" if is_correct else "β Incorrect!" | |
gr.Info(f"{status} (Fallback comparison)") | |
return is_correct | |
def on_user_logged_in(token: gr.OAuthToken | None): | |
"""Handle user login state""" | |
if token is not None: | |
return gr.update(visible=False), gr.update(visible=True) | |
else: | |
return gr.update(visible=True), gr.update(visible=False) | |
def push_results_to_hub( | |
user_answers: list, token: gr.OAuthToken | None, signed_in_message: str | |
): | |
"""Push results to Hugging Face Hub.""" | |
print(f"signed_in_message: {signed_in_message}") | |
if not user_answers: # Check if there are any answers to submit | |
gr.Warning("No answers to submit!") | |
return "No answers to submit!" | |
if token is None: | |
gr.Warning("Please log in to Hugging Face before pushing!") | |
return "Please log in to Hugging Face before pushing!" | |
# Calculate grade | |
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
total_questions = len(user_answers) | |
grade = correct_count / total_questions if total_questions > 0 else 0 | |
if grade < float(EXAM_PASSING_SCORE): | |
gr.Warning( | |
f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" | |
) | |
return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" | |
gr.Info("Submitting answers to the Hub. Please wait...", duration=2) | |
user_info = whoami(token=token.token) | |
username = user_info["name"] | |
repo_id = f"{EXAM_DATASET_ID}_responses" | |
submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# Create a dataset with the user's answers and metadata | |
submission_data = [ | |
{ | |
"username": username, | |
"datetime": submission_time, | |
"grade": grade, | |
**answer, # Include all answer data | |
} | |
for answer in user_answers | |
] | |
try: | |
# Try to load existing dataset | |
existing_ds = load_dataset(repo_id) | |
# Convert to DatasetDict if it isn't already | |
if not isinstance(existing_ds, dict): | |
existing_ds = DatasetDict({"default": existing_ds}) | |
except Exception: | |
# If dataset doesn't exist, create empty DatasetDict | |
existing_ds = DatasetDict() | |
# Create new dataset from submission | |
new_ds = Dataset.from_list(submission_data) | |
# Add or update the split for this user | |
existing_ds[username] = new_ds | |
# Push the updated dataset to the Hub | |
existing_ds.push_to_hub( | |
repo_id, | |
private=True, # Make it private by default since it contains student submissions | |
) | |
return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" | |
def handle_quiz(question_idx, user_answers, submitted_code, is_start): | |
"""Handle quiz state and progression""" | |
# Hide the start button once the first question is shown | |
start_btn_update = gr.update(visible=False) if is_start else None | |
# If this is the first time (start=True), begin at question_idx=0 | |
if is_start: | |
question_idx = 0 | |
else: | |
# If not the first question and there's a submission, store the user's last submission | |
if ( | |
question_idx < len(quiz_data) and submitted_code.strip() | |
): # Only check if there's code | |
current_q = quiz_data[question_idx] | |
is_correct = check_code( | |
submitted_code, current_q["solution"], current_q["challenge"] | |
) | |
user_answers.append( | |
{ | |
"challenge": current_q["challenge"], | |
"submitted_code": submitted_code, | |
"correct_solution": current_q["solution"], | |
"is_correct": is_correct, | |
} | |
) | |
question_idx += 1 | |
# If we've reached the end, show final results | |
if question_idx >= len(quiz_data): | |
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
grade = correct_count / len(user_answers) | |
results_text = ( | |
f"**Quiz Complete!**\n\n" | |
f"Your score: {grade:.1%}\n" | |
f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" | |
f"Your answers:\n\n" | |
) | |
for idx, answer in enumerate(user_answers): | |
results_text += ( | |
f"Question {idx + 1}: {'β ' if answer['is_correct'] else 'β'}\n" | |
) | |
results_text += ( | |
f"Your code:\n```python\n{answer['submitted_code']}\n```\n\n" | |
) | |
return ( | |
"", # question_text becomes blank | |
gr.update(value="", visible=False), # clear and hide code input | |
f"{'β Passed!' if grade >= float(EXAM_PASSING_SCORE) else 'β Did not pass'}", | |
question_idx, | |
user_answers, | |
start_btn_update, | |
gr.update(value=results_text, visible=True), # show final_markdown | |
) | |
else: | |
# Show the next question | |
q = quiz_data[question_idx] | |
challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" | |
return ( | |
challenge_text, | |
gr.update(value=q["placeholder"], visible=True), | |
"Submit your code solution and click 'Next' to continue.", | |
question_idx, | |
user_answers, | |
start_btn_update, | |
gr.update(visible=False), # Hide final_markdown | |
) | |
with gr.Blocks() as demo: | |
demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" | |
# State variables | |
question_idx = gr.State(value=0) | |
user_answers = gr.State(value=[]) | |
with gr.Row(variant="compact"): | |
gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") | |
with gr.Row(variant="compact"): | |
gr.Markdown( | |
"Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " | |
"and finally click 'Submit' to publish your results to the Hugging Face Hub." | |
) | |
with gr.Row(variant="panel"): | |
question_text = gr.Markdown("") | |
code_input = gr.Code(language="python", label="Your Solution", visible=False) | |
with gr.Row(variant="compact"): | |
status_text = gr.Markdown("") | |
with gr.Row(variant="compact"): | |
final_markdown = gr.Markdown("", visible=False) | |
next_btn = gr.Button("Next βοΈ") | |
submit_btn = gr.Button("Submit β ") | |
with gr.Row(variant="compact"): | |
login_btn = gr.LoginButton() | |
start_btn = gr.Button("Start", visible=False) | |
login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn]) | |
start_btn.click( | |
fn=handle_quiz, | |
inputs=[question_idx, user_answers, code_input, gr.State(True)], | |
outputs=[ | |
question_text, | |
code_input, | |
status_text, | |
question_idx, | |
user_answers, | |
start_btn, | |
final_markdown, | |
], | |
) | |
next_btn.click( | |
fn=handle_quiz, | |
inputs=[question_idx, user_answers, code_input, gr.State(False)], | |
outputs=[ | |
question_text, | |
code_input, | |
status_text, | |
question_idx, | |
user_answers, | |
start_btn, | |
final_markdown, | |
], | |
) | |
submit_btn.click( | |
fn=push_results_to_hub, | |
inputs=[user_answers, login_btn], | |
outputs=status_text, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |