Spaces:
Running
Running
import streamlit as st | |
import subprocess | |
import os | |
import requests | |
from huggingface_hub import snapshot_download, login, HfApi | |
from pathlib import Path | |
import tempfile | |
# Define paths for llama.cpp binaries | |
LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp" | |
LLAMA_CPP_BIN = "build/bin" | |
BUILD_DIR = "build" | |
CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path | |
def run_command(command): | |
""" Run a shell command and return its output. """ | |
result = subprocess.run( | |
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True | |
) | |
return result.stdout, result.stderr | |
st.title("LLAMA Quantization Pipeline") | |
st.markdown( | |
""" | |
This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model. | |
""" | |
) | |
st.sidebar.header("Settings") | |
st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.") | |
username = st.sidebar.text_input("Hugging Face Username") | |
password = st.sidebar.text_input("Hugging Face Password", type="password") | |
model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B") | |
quantization_options = ["q4_k_m", "q4_0", "q4_1"] | |
quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options) | |
quant_options = ["f32", "f16", "bf16", "q8_0", "auto"] | |
quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options) | |
upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False) | |
run_button = st.button("Run Pipeline") | |
if run_button: | |
st.info("Starting the pipeline. Please be patient...") | |
log_area = st.empty() | |
logs = [] | |
def log(message): | |
logs.append(message) | |
log_area.text("\n".join(logs)) | |
try: | |
# Download the llama.cpp repository | |
snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space") | |
# Create temporary directories for the original and quantized models | |
temp_path = Path(tempfile.gettempdir()) | |
original_model_dir = temp_path / "original_model" | |
quantized_model_dir = temp_path / "quantized_model" | |
original_model_dir.mkdir(parents=True, exist_ok=True) | |
quantized_model_dir.mkdir(parents=True, exist_ok=True) | |
log("Downloading model from Hugging Face...") | |
snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False) | |
log(f"Model downloaded to: {original_model_dir}") | |
log("Converting model to GGUF format...") | |
conversion_outfile = quantized_model_dir / "model_converted.gguf" | |
conversion_cmd = ( | |
f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} " | |
f"--outfile {conversion_outfile}" | |
) | |
conv_stdout, conv_stderr = run_command(conversion_cmd) | |
log(conv_stdout + conv_stderr) | |
if not conversion_outfile.exists(): | |
log("Error: GGUF conversion failed! No output file found.") | |
st.error("GGUF conversion failed. Check logs.") | |
st.stop() | |
log("Quantizing the model...") | |
quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf" | |
quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}" | |
quant_stdout, quant_stderr = run_command(quantize_cmd) | |
log(quant_stdout + quant_stderr) | |
if not quantized_model_outfile.exists(): | |
log("Error: Quantization failed! No output file found.") | |
st.error("Quantization failed. Check logs.") | |
st.stop() | |
log("Pipeline completed successfully!") | |
st.success("Quantized model ready for download.") | |
with open(quantized_model_outfile, "rb") as file: | |
st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name) | |
# Upload if selected | |
if upload_option: | |
log("Uploading quantized model to Hugging Face...") | |
login(username, password) | |
api = HfApi() | |
target_repo = f"automated-quantization/{quantized_model_outfile.stem}" | |
api.create_repo(target_repo, exist_ok=True, repo_type="model") | |
api.upload_file( | |
path_or_fileobj=str(quantized_model_outfile), | |
path_in_repo=quantized_model_outfile.name, | |
) | |
log("Upload complete!") | |
except Exception as e: | |
log(f"An error occurred: {e}") | |
finally: | |
# Remove temporary directories | |
original_model_dir.rmdir() | |
quantized_model_dir.rmdir() |