Spaces:
Running
Running
import os | |
from datetime import datetime | |
import random | |
from typing import List | |
import gradio as gr | |
from datasets import load_dataset, Dataset, DatasetDict | |
from huggingface_hub import whoami, InferenceClient | |
import black # Add black import | |
# Initialize the inference client | |
client = InferenceClient( | |
api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable | |
) | |
# Load questions from Hugging Face dataset | |
EXAM_MAX_QUESTIONS = int( | |
os.getenv("EXAM_MAX_QUESTIONS", 5) | |
) # Limit quiz to max questions | |
EXAM_PASSING_SCORE = float(os.getenv("EXAM_PASSING_SCORE", 0.8)) | |
EXAM_DATASET_ID = "burtenshaw/dummy-code-quiz" | |
# prep the dataset for the quiz | |
ds = load_dataset(EXAM_DATASET_ID, split="train", download_mode="force_redownload") | |
quiz_data = list(ds) # Convert dataset to list instead of using to_list() | |
random.shuffle(quiz_data) | |
if EXAM_MAX_QUESTIONS: | |
quiz_data = quiz_data[:EXAM_MAX_QUESTIONS] | |
def format_python_code(code: str) -> str: | |
"""Format Python code using black.""" | |
try: | |
return black.format_str(code, mode=black.Mode()) | |
except Exception as e: | |
gr.Warning(f"Code formatting failed: {str(e)}") | |
return code | |
def check_code( | |
user_code: str, solution: str, challenge: str, assessment_criteria: List[str] | |
): | |
""" | |
Use LLM to evaluate if the user's code solution is correct. | |
Returns True if the solution is correct, False otherwise. | |
""" | |
# Format both user code and solution | |
formatted_user_code = format_python_code(user_code) | |
formatted_solution = format_python_code(solution) | |
assessment_criteria_str = "\n".join( | |
[f"{i + 1}. {c}" for i, c in enumerate(assessment_criteria)] | |
) | |
prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. | |
Challenge: | |
{challenge} | |
Reference Solution: | |
{formatted_solution} | |
Student's Solution: | |
{formatted_user_code} | |
Assessment Criteria: | |
{assessment_criteria_str} | |
Evaluate if the student's solution is functionally equivalent to the reference solution. | |
Consider: | |
1. Does it solve the problem correctly? | |
2. Does it handle edge cases appropriately? | |
3. Does it follow the requirements of the challenge? | |
4. Does it meet the assessment criteria? | |
Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. | |
""" | |
messages = [{"role": "user", "content": prompt}] | |
try: | |
completion = client.chat.completions.create( | |
model="Qwen/Qwen2.5-Coder-32B-Instruct", | |
messages=messages, | |
max_tokens=500, | |
) | |
response = completion.choices[0].message.content.strip() | |
# Extract the verdict from the response | |
is_correct = response.upper().startswith("CORRECT") | |
# Add the explanation to the status text with emoji | |
explanation = response.split("\n", 1)[1] if "\n" in response else "" | |
status = "β Correct!" if is_correct else "β Incorrect!" | |
gr.Info(f"{status}\n\n{explanation}") | |
return is_correct | |
except Exception as e: | |
gr.Warning(f"Error checking code: {str(e)}") | |
# Fall back to simple string comparison if LLM fails | |
is_correct = formatted_user_code.strip() == formatted_solution.strip() | |
status = "β Correct!" if is_correct else "β Incorrect!" | |
gr.Info(f"{status} (Fallback comparison)") | |
return is_correct | |
def on_user_logged_in(token: gr.OAuthToken | None): | |
""" | |
Handle user login state. | |
On a valid token, hide the login button and reveal the Start button while keeping Next and Submit hidden. | |
Also, clear the question text, code input, status, and image. | |
""" | |
if token is not None: | |
return ( | |
gr.update(visible=False), # login_btn hidden | |
gr.update(visible=True), # start_btn shown | |
gr.update(visible=False), # next_btn hidden | |
gr.update(visible=False), # submit_btn hidden | |
"", # Clear question_text | |
gr.update(value="", visible=False), # Clear code_input | |
"", # Clear status_text | |
gr.update(value="", visible=False), # Clear question_image | |
) | |
else: | |
return ( | |
gr.update(visible=True), # login_btn visible | |
gr.update(visible=False), # start_btn hidden | |
gr.update(visible=False), # next_btn hidden | |
gr.update(visible=False), # submit_btn hidden | |
"", | |
gr.update(value="", visible=False), | |
"", | |
gr.update(value="", visible=False), | |
) | |
def push_results_to_hub( | |
user_answers: list, token: gr.OAuthToken | None, signed_in_message: str | |
): | |
"""Push results to Hugging Face Hub.""" | |
print(f"signed_in_message: {signed_in_message}") | |
if not user_answers: # Check if there are any answers to submit | |
gr.Warning("No answers to submit!") | |
return "No answers to submit!" | |
if token is None: | |
gr.Warning("Please log in to Hugging Face before pushing!") | |
return "Please log in to Hugging Face before pushing!" | |
# Calculate grade | |
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
total_questions = len(user_answers) | |
grade = correct_count / total_questions if total_questions > 0 else 0 | |
if grade < float(EXAM_PASSING_SCORE): | |
gr.Warning( | |
f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" | |
) | |
return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" | |
gr.Info("Submitting answers to the Hub. Please wait...", duration=2) | |
user_info = whoami(token=token.token) | |
username = user_info["name"] | |
repo_id = f"{EXAM_DATASET_ID}_responses" | |
submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# Create a dataset with the user's answers and metadata | |
submission_data = [ | |
{ | |
"username": username, | |
"datetime": submission_time, | |
"grade": grade, | |
**answer, # Include all answer data | |
} | |
for answer in user_answers | |
] | |
try: | |
# Try to load existing dataset | |
existing_ds = load_dataset(repo_id) | |
# Convert to DatasetDict if it isn't already | |
if not isinstance(existing_ds, dict): | |
existing_ds = DatasetDict({"default": existing_ds}) | |
except Exception: | |
# If dataset doesn't exist, create empty DatasetDict | |
existing_ds = DatasetDict() | |
# Create new dataset from submission | |
new_ds = Dataset.from_list(submission_data) | |
# Add or update the split for this user | |
existing_ds[username] = new_ds | |
# Push the updated dataset to the Hub | |
existing_ds.push_to_hub( | |
repo_id, | |
private=True, # Make it private by default since it contains student submissions | |
) | |
return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" | |
def handle_quiz(question_idx, user_answers, submitted_code, is_start): | |
"""Handle quiz state and progression""" | |
# Hide the start button once the first question is shown | |
start_btn_update = gr.update(visible=False) if is_start else None | |
# If this is the first time (start=True), begin at question_idx=0 | |
if is_start: | |
question_idx = 0 | |
else: | |
# If not the first question and there's a submission, store the user's last submission | |
if ( | |
question_idx < len(quiz_data) and submitted_code.strip() | |
): # Only check if there's code | |
current_q = quiz_data[question_idx] | |
# Format the submitted code before checking | |
formatted_code = format_python_code(submitted_code) | |
is_correct = check_code( | |
formatted_code, | |
current_q["solution"], | |
current_q["challenge"], | |
current_q["assessment_criteria"], | |
) | |
user_answers.append( | |
{ | |
"challenge": current_q["challenge"], | |
"submitted_code": formatted_code, # Store formatted code | |
"correct_solution": current_q["solution"], | |
"assessment_criteria": current_q["assessment_criteria"], | |
"is_correct": is_correct, | |
} | |
) | |
question_idx += 1 | |
# If we've reached the end, show final results | |
if question_idx >= len(quiz_data): | |
correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
grade = correct_count / len(user_answers) | |
results_text = ( | |
f"**Quiz Complete!**\n\n" | |
f"Your score: {grade:.1%}\n" | |
f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" | |
f"Your answers:\n\n" | |
) | |
for idx, answer in enumerate(user_answers): | |
results_text += ( | |
f"Question {idx + 1}: {'β ' if answer['is_correct'] else 'β'}\n" | |
) | |
return ( | |
"", # question_text cleared | |
gr.update(value="", visible=False), # hide code_input | |
f"{'β Passed!' if grade >= EXAM_PASSING_SCORE else 'β Did not pass'}", # status_text | |
question_idx, # updated question index | |
user_answers, # accumulated answers | |
gr.update(visible=False), # start_btn hidden for quiz-in-progress | |
gr.update(visible=False), # next_btn hidden on completion | |
gr.update(visible=True), # submit_btn shown | |
gr.update(value=results_text, visible=True), # final_markdown with results | |
gr.update(visible=False), # question_image hidden on completion | |
) | |
else: | |
# Show the next question | |
q = quiz_data[question_idx] | |
challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" | |
return ( | |
challenge_text, # question_text | |
gr.update(value=q["placeholder"], visible=True), # code_input | |
"Submit your code solution and click 'Next' to continue.", # status_text | |
question_idx, # updated question_idx | |
user_answers, # user_answers | |
gr.update(visible=False), # start_btn hidden | |
gr.update(visible=True), # next_btn visible | |
gr.update(visible=False), # submit_btn hidden | |
gr.update(visible=False), # final_markdown hidden | |
gr.update( | |
value=q["image"], visible=True if q["image"] else False | |
), # question_image with current question image | |
) | |
with gr.Blocks() as demo: | |
demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" | |
# State variables | |
question_idx = gr.State(value=0) | |
user_answers = gr.State(value=[]) | |
with gr.Row(variant="compact"): | |
gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") | |
with gr.Row(variant="compact"): | |
gr.Markdown( | |
"Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " | |
"and finally click 'Submit' to publish your results to the Hugging Face Hub." | |
) | |
with gr.Row(variant="panel"): | |
with gr.Column(): | |
question_text = gr.Markdown("") | |
question_image = gr.Image( | |
label="Question Image", visible=True, type="pil" | |
) # Add image component | |
with gr.Column(): | |
code_input = gr.Code(language="python", label="Your Solution", visible=False) | |
with gr.Row(variant="compact"): | |
status_text = gr.Markdown("") | |
with gr.Row(variant="compact"): | |
login_btn = gr.LoginButton() | |
start_btn = gr.Button("Start") | |
next_btn = gr.Button("Next βοΈ", visible=False) | |
submit_btn = gr.Button("Submit β ", visible=False) | |
with gr.Row(variant="compact"): | |
final_markdown = gr.Markdown("", visible=False) | |
login_btn.click( | |
fn=on_user_logged_in, | |
inputs=None, | |
outputs=[ | |
login_btn, | |
start_btn, | |
next_btn, | |
submit_btn, | |
question_text, | |
code_input, | |
status_text, | |
question_image, | |
], | |
) | |
start_btn.click( | |
fn=handle_quiz, | |
inputs=[question_idx, user_answers, code_input, gr.State(True)], | |
outputs=[ | |
question_text, # Markdown with question text | |
code_input, # Code input field | |
status_text, # Status text (instructions/status messages) | |
question_idx, # Updated question index (state) | |
user_answers, # Updated user answers (state) | |
start_btn, # Update for start button (will be hidden) | |
next_btn, # Update for next button (shown for in-progress quiz) | |
submit_btn, # Update for submit button (hidden until end) | |
final_markdown, # Final results markdown (hidden until quiz ends) | |
question_image, # Image update for the quiz question | |
], | |
) | |
next_btn.click( | |
fn=handle_quiz, | |
inputs=[question_idx, user_answers, code_input, gr.State(False)], | |
outputs=[ | |
question_text, | |
code_input, | |
status_text, | |
question_idx, | |
user_answers, | |
start_btn, | |
next_btn, | |
submit_btn, | |
final_markdown, | |
question_image, | |
], | |
) | |
submit_btn.click( | |
fn=push_results_to_hub, | |
inputs=[user_answers, login_btn], | |
outputs=status_text, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |