Spaces:
Running
Running
File size: 4,796 Bytes
660a3f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import streamlit as st
import subprocess
import os
import requests
from huggingface_hub import snapshot_download, login, HfApi
from pathlib import Path
import tempfile
# Define paths for llama.cpp binaries
LLAMA_CPP_PATH = "https://huggingface.co/spaces/KBaba7/llama.cpp/tree/main/llama.cpp"
LLAMA_CPP_BIN = "build/bin"
BUILD_DIR = "build"
CONVERT_SCRIPT = "convert-hf-to-gguf.py" # Ensure correct path
def run_command(command):
""" Run a shell command and return its output. """
result = subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, text=True
)
return result.stdout, result.stderr
st.title("LLAMA Quantization Pipeline")
st.markdown(
"""
This tool downloads a model from Hugging Face, converts it to GGUF format, quantizes it, and provides an option to download the final model.
"""
)
st.sidebar.header("Settings")
st.sidebar.write("Please login to your Hugging Face account to use your llama.cpp repository.")
username = st.sidebar.text_input("Hugging Face Username")
password = st.sidebar.text_input("Hugging Face Password", type="password")
model_repo_id = st.sidebar.text_input("Model Repository ID", "Qwen/Qwen2.5-3B")
quantization_options = ["q4_k_m", "q4_0", "q4_1"]
quantization_type = st.sidebar.selectbox("Select Quantization Type", quantization_options)
quant_options = ["f32", "f16", "bf16", "q8_0", "auto"]
quant_type = st.sidebar.selectbox("Select GGUF Output Type", quant_options)
upload_option = st.sidebar.checkbox("Upload quantized model to Hugging Face?", value=False)
run_button = st.button("Run Pipeline")
if run_button:
st.info("Starting the pipeline. Please be patient...")
log_area = st.empty()
logs = []
def log(message):
logs.append(message)
log_area.text("\n".join(logs))
try:
# Download the llama.cpp repository
snapshot_download(repo_id="KBaba7/llama.cpp", local_dir="llama.cpp", repo_type="space")
# Create temporary directories for the original and quantized models
temp_path = Path(tempfile.gettempdir())
original_model_dir = temp_path / "original_model"
quantized_model_dir = temp_path / "quantized_model"
original_model_dir.mkdir(parents=True, exist_ok=True)
quantized_model_dir.mkdir(parents=True, exist_ok=True)
log("Downloading model from Hugging Face...")
snapshot_download(repo_id=model_repo_id, local_dir=str(original_model_dir), local_dir_use_symlinks=False)
log(f"Model downloaded to: {original_model_dir}")
log("Converting model to GGUF format...")
conversion_outfile = quantized_model_dir / "model_converted.gguf"
conversion_cmd = (
f"python3 convert-hf-to-gguf.py {original_model_dir} --outtype {quant_type} "
f"--outfile {conversion_outfile}"
)
conv_stdout, conv_stderr = run_command(conversion_cmd)
log(conv_stdout + conv_stderr)
if not conversion_outfile.exists():
log("Error: GGUF conversion failed! No output file found.")
st.error("GGUF conversion failed. Check logs.")
st.stop()
log("Quantizing the model...")
quantized_model_outfile = quantized_model_dir / f"model_quantized_{quantization_type}.gguf"
quantize_cmd = f"build/bin/llama-quantize {conversion_outfile} {quantized_model_outfile} {quantization_type}"
quant_stdout, quant_stderr = run_command(quantize_cmd)
log(quant_stdout + quant_stderr)
if not quantized_model_outfile.exists():
log("Error: Quantization failed! No output file found.")
st.error("Quantization failed. Check logs.")
st.stop()
log("Pipeline completed successfully!")
st.success("Quantized model ready for download.")
with open(quantized_model_outfile, "rb") as file:
st.download_button(label="Download Quantized Model", data=file, file_name=quantized_model_outfile.name)
# Upload if selected
if upload_option:
log("Uploading quantized model to Hugging Face...")
login(username, password)
api = HfApi()
target_repo = f"automated-quantization/{quantized_model_outfile.stem}"
api.create_repo(target_repo, exist_ok=True, repo_type="model")
api.upload_file(
path_or_fileobj=str(quantized_model_outfile),
path_in_repo=quantized_model_outfile.name,
)
log("Upload complete!")
except Exception as e:
log(f"An error occurred: {e}")
finally:
# Remove temporary directories
original_model_dir.rmdir()
quantized_model_dir.rmdir() |