Spaces:

burtenshaw
/

code_quiz

Running

code_quiz / app.py

burtenshaw

add question level feedback

95114d7 about 1 month ago

9.89 kB

	import os
	from datetime import datetime
	import random

	import gradio as gr
	from datasets import load_dataset, Dataset, DatasetDict
	from huggingface_hub import whoami, InferenceClient

	# Initialize the inference client
	client = InferenceClient(
	api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable
	)

	# Load questions from Hugging Face dataset
	EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 5 # We have 5 questions total
	EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
	EXAM_DATASET_ID = "burtenshaw/dummy-code-quiz"

	# prep the dataset for the quiz
	ds = load_dataset(EXAM_DATASET_ID, split="train")
	quiz_data = ds.to_list()
	random.shuffle(quiz_data)


	def check_code(user_code, solution, challenge):
	"""
	Use LLM to evaluate if the user's code solution is correct.
	Returns True if the solution is correct, False otherwise.
	"""
	prompt = f"""You are an expert Python programming instructor evaluating a student's code solution.

	Challenge:
	{challenge}

	Reference Solution:
	{solution}

	Student's Solution:
	{user_code}

	Evaluate if the student's solution is functionally equivalent to the reference solution.
	Consider:
	1. Does it solve the problem correctly?
	2. Does it handle edge cases appropriately?
	3. Does it follow the requirements of the challenge?

	Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation.
	"""

	messages = [{"role": "user", "content": prompt}]

	try:
	completion = client.chat.completions.create(
	model="Qwen/Qwen2.5-Coder-32B-Instruct",
	messages=messages,
	max_tokens=500,
	)

	response = completion.choices[0].message.content.strip()

	# Extract the verdict from the response
	is_correct = response.upper().startswith("CORRECT")

	# Add the explanation to the status text with emoji
	explanation = response.split("\n", 1)[1] if "\n" in response else ""
	status = "✅ Correct!" if is_correct else "❌ Incorrect!"
	gr.Info(f"{status}\n\n{explanation}")

	return is_correct

	except Exception as e:
	gr.Warning(f"Error checking code: {str(e)}")
	# Fall back to simple string comparison if LLM fails
	is_correct = user_code.strip() == solution.strip()
	status = "✅ Correct!" if is_correct else "❌ Incorrect!"
	gr.Info(f"{status} (Fallback comparison)")
	return is_correct


	def on_user_logged_in(token: gr.OAuthToken \| None):
	"""Handle user login state"""
	if token is not None:
	return gr.update(visible=False), gr.update(visible=True)
	else:
	return gr.update(visible=True), gr.update(visible=False)


	def push_results_to_hub(
	user_answers: list, token: gr.OAuthToken \| None, signed_in_message: str
	):
	"""Push results to Hugging Face Hub."""

	print(f"signed_in_message: {signed_in_message}")

	if not user_answers: # Check if there are any answers to submit
	gr.Warning("No answers to submit!")
	return "No answers to submit!"

	if token is None:
	gr.Warning("Please log in to Hugging Face before pushing!")
	return "Please log in to Hugging Face before pushing!"

	# Calculate grade
	correct_count = sum(1 for answer in user_answers if answer["is_correct"])
	total_questions = len(user_answers)
	grade = correct_count / total_questions if total_questions > 0 else 0

	if grade < float(EXAM_PASSING_SCORE):
	gr.Warning(
	f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
	)
	return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}"

	gr.Info("Submitting answers to the Hub. Please wait...", duration=2)

	user_info = whoami(token=token.token)
	username = user_info["name"]
	repo_id = f"{EXAM_DATASET_ID}_responses"
	submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# Create a dataset with the user's answers and metadata
	submission_data = [
	{
	"username": username,
	"datetime": submission_time,
	"grade": grade,
	**answer, # Include all answer data
	}
	for answer in user_answers
	]

	try:
	# Try to load existing dataset
	existing_ds = load_dataset(repo_id)
	# Convert to DatasetDict if it isn't already
	if not isinstance(existing_ds, dict):
	existing_ds = DatasetDict({"default": existing_ds})
	except Exception:
	# If dataset doesn't exist, create empty DatasetDict
	existing_ds = DatasetDict()

	# Create new dataset from submission
	new_ds = Dataset.from_list(submission_data)

	# Add or update the split for this user
	existing_ds[username] = new_ds

	# Push the updated dataset to the Hub
	existing_ds.push_to_hub(
	repo_id,
	private=True, # Make it private by default since it contains student submissions
	)

	return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"


	def handle_quiz(question_idx, user_answers, submitted_code, is_start):
	"""Handle quiz state and progression"""
	# Hide the start button once the first question is shown
	start_btn_update = gr.update(visible=False) if is_start else None

	# If this is the first time (start=True), begin at question_idx=0
	if is_start:
	question_idx = 0
	else:
	# If not the first question and there's a submission, store the user's last submission
	if (
	question_idx < len(quiz_data) and submitted_code.strip()
	): # Only check if there's code
	current_q = quiz_data[question_idx]
	is_correct = check_code(
	submitted_code, current_q["solution"], current_q["challenge"]
	)
	user_answers.append(
	{
	"challenge": current_q["challenge"],
	"submitted_code": submitted_code,
	"correct_solution": current_q["solution"],
	"is_correct": is_correct,
	}
	)
	question_idx += 1

	# If we've reached the end, show final results
	if question_idx >= len(quiz_data):
	correct_count = sum(1 for answer in user_answers if answer["is_correct"])
	grade = correct_count / len(user_answers)
	results_text = (
	f"Quiz Complete!\n\n"
	f"Your score: {grade:.1%}\n"
	f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
	f"Your answers:\n\n"
	)
	for idx, answer in enumerate(user_answers):
	results_text += (
	f"Question {idx + 1}: {'✅' if answer['is_correct'] else '❌'}\n"
	)
	results_text += (
	f"Your code:\n```python\n{answer['submitted_code']}\n```\n\n"
	)

	return (
	"", # question_text becomes blank
	gr.update(value="", visible=False), # clear and hide code input
	f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
	question_idx,
	user_answers,
	start_btn_update,
	gr.update(value=results_text, visible=True), # show final_markdown
	)
	else:
	# Show the next question
	q = quiz_data[question_idx]
	challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}"
	return (
	challenge_text,
	gr.update(value=q["placeholder"], visible=True),
	"Submit your code solution and click 'Next' to continue.",
	question_idx,
	user_answers,
	start_btn_update,
	gr.update(visible=False), # Hide final_markdown
	)


	with gr.Blocks() as demo:
	demo.title = f"Coding Quiz: {EXAM_DATASET_ID}"
	# State variables
	question_idx = gr.State(value=0)
	user_answers = gr.State(value=[])

	with gr.Row(variant="compact"):
	gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
	with gr.Row(variant="compact"):
	gr.Markdown(
	"Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', "
	"and finally click 'Submit' to publish your results to the Hugging Face Hub."
	)

	with gr.Row(variant="panel"):
	question_text = gr.Markdown("")
	code_input = gr.Code(language="python", label="Your Solution", visible=False)

	with gr.Row(variant="compact"):
	status_text = gr.Markdown("")

	with gr.Row(variant="compact"):
	final_markdown = gr.Markdown("", visible=False)

	next_btn = gr.Button("Next ⏭️")
	submit_btn = gr.Button("Submit ✅")

	with gr.Row(variant="compact"):
	login_btn = gr.LoginButton()
	start_btn = gr.Button("Start", visible=False)

	login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn])

	start_btn.click(
	fn=handle_quiz,
	inputs=[question_idx, user_answers, code_input, gr.State(True)],
	outputs=[
	question_text,
	code_input,
	status_text,
	question_idx,
	user_answers,
	start_btn,
	final_markdown,
	],
	)

	next_btn.click(
	fn=handle_quiz,
	inputs=[question_idx, user_answers, code_input, gr.State(False)],
	outputs=[
	question_text,
	code_input,
	status_text,
	question_idx,
	user_answers,
	start_btn,
	final_markdown,
	],
	)

	submit_btn.click(
	fn=push_results_to_hub,
	inputs=[user_answers, login_btn],
	outputs=status_text,
	)


	if __name__ == "__main__":
	demo.launch()