Spaces:

KBaba7
/

Quantization-LLM

Running

App Files Files Community

Quantization-LLM / app.py

KBaba7

Create app.py

660a3f9 verified 19 days ago

raw

history blame

4.8 kB


	import streamlit as st
	import subprocess
	import os
	import requests
	from huggingface_hub import snapshot_download, login, HfApi
	from pathlib import Path
	import tempfile

	# Define paths for llama.cpp binaries
	LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
	LLAMA_CPP_BIN = "build/bin"
	BUILD_DIR = "build"
	CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path

	def run_command(command):
	""" Run a shell command and return its output. """
	result = subprocess.run(
	command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
	)
	return result.stdout, result.stderr

	st.title("LLAMA Quantization Pipeline")
	st.markdown(
	"""
	This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
	"""
	)

	st.sidebar.header("Settings")
	st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
	username = st.sidebar.text_input("Hugging Face Username")
	password = st.sidebar.text_input("Hugging Face Password", type="password")
	model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
	quantization_options = ["q4_k_m", "q4_0", "q4_1"]
	quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
	quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
	quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
	upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
	run_button = st.button("Run Pipeline")

	if run_button:
	st.info("Starting the pipeline. Please be patient...")
	log_area = st.empty()
	logs = []

	def log(message):
	logs.append(message)
	log_area.text("\n".join(logs))

	try:
	# Download the llama.cpp repository
	snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")

	# Create temporary directories for the original and quantized models
	temp_path = Path(tempfile.gettempdir())
	original_model_dir = temp_path / "original_model"
	quantized_model_dir = temp_path / "quantized_model"
	original_model_dir.mkdir(parents=True, exist_ok=True)
	quantized_model_dir.mkdir(parents=True, exist_ok=True)

	log("Downloading model from Hugging Face...")
	snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
	log(f"Model downloaded to: {original_model_dir}")

	log("Converting model to GGUF format...")
	conversion_outfile = quantized_model_dir / "model_converted.gguf"
	conversion_cmd = (
	f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
	f"--outfile {conversion_outfile}"
	)
	conv_stdout, conv_stderr = run_command(conversion_cmd)
	log(conv_stdout + conv_stderr)

	if not conversion_outfile.exists():
	log("Error: GGUF conversion failed! No output file found.")
	st.error("GGUF conversion failed. Check logs.")
	st.stop()

	log("Quantizing the model...")
	quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
	quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
	quant_stdout, quant_stderr = run_command(quantize_cmd)
	log(quant_stdout + quant_stderr)

	if not quantized_model_outfile.exists():
	log("Error: Quantization failed! No output file found.")
	st.error("Quantization failed. Check logs.")
	st.stop()

	log("Pipeline completed successfully!")
	st.success("Quantized model ready for download.")
	with open(quantized_model_outfile, "rb") as file:
	st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)

	# Upload if selected
	if upload_option:
	log("Uploading quantized model to Hugging Face...")
	login(username, password)
	api = HfApi()
	target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
	api.create_repo(target_repo, exist_ok=True, repo_type="model")
	api.upload_file(
	path_or_fileobj=str(quantized_model_outfile),
	path_in_repo=quantized_model_outfile.name,
	)
	log("Upload complete!")
	except Exception as e:
	log(f"An error occurred: {e}")
	finally:
	# Remove temporary directories
	original_model_dir.rmdir()
	quantized_model_dir.rmdir()