Spaces:

amiguel
/

amiguel-fintune_naming_model

Sleeping

App Files Files Community

amiguel commited on about 10 hours ago

Commit

e65b516

verified ·

1 Parent(s): fb280d1

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -33

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
 from huggingface_hub import login
 from threading import Thread
 import PyPDF2
@@ -24,8 +24,12 @@ st.set_page_config(
 )
 # Model names
-BASE_MODEL_NAME ="HuggingFaceTB/SmolLM2-360M" #"HuggingFaceTB/SmolLM2-1.7B-Instruct" #"google/flan-t5-base"  # Base FLAN-T5 model
-PEFT_ADAPTER_NAME ="amiguel/enterpriseFTmodel" #"amiguel/cerebrasFTdeepseek" #"amiguel/classFinetuned_deepseek"  # PEFT adapter
 # Title with rocket emojis
 st.title("🚀 WizNerd Insp 🚀")
@@ -40,6 +44,10 @@ with st.sidebar:
     hf_token = st.text_input("Hugging Face Token", type="password",
                            help="Get your token from https://huggingface.co/settings/tokens")
     st.header("Upload Documents 📂")
     uploaded_file = st.file_uploader(
         "Choose a PDF or XLSX file",
@@ -70,7 +78,7 @@ def process_file(uploaded_file):
 # Model loading function
 @st.cache_resource
-def load_model(hf_token):
     try:
         if not hf_token:
             st.error("🔐 Authentication required! Please provide a Hugging Face token.")
@@ -78,37 +86,42 @@ def load_model(hf_token):
         login(token=hf_token)
-        # Load base FLAN-T5 model
-        peft_model_base = AutoModelForSeq2SeqLM.from_pretrained(
-            BASE_MODEL_NAME,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            token=hf_token
-        )
-        # Load PEFT adapter and merge with base model
-        peft_model = PeftModel.from_pretrained(
-            peft_model_base,
-            PEFT_ADAPTER_NAME,
-            torch_dtype=torch.bfloat16,
-            is_trainable=False,  # Set to False for inference
-            token=hf_token
-        )
         # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(
-            BASE_MODEL_NAME,
-            token=hf_token
-        )
-        return peft_model, tokenizer
     except Exception as e:
         st.error(f"🤖 Model loading failed: {str(e)}")
         return None
 # Generation function with KV caching
-def generate_with_kv_cache(prompt, file_context, use_cache=True):
     full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
     streamer = TextIteratorStreamer(
@@ -120,7 +133,8 @@ def generate_with_kv_cache(prompt, file_context, use_cache=True):
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     generation_kwargs = {
-        **inputs,
         "max_new_tokens": 1024,
         "temperature": 0.7,
         "top_p": 0.9,
@@ -149,14 +163,15 @@ if prompt := st.chat_input("Ask your inspection question..."):
         st.error("🔑 Authentication required!")
         st.stop()
-    # Load model if not already loaded
-    if "model" not in st.session_state:
-        model_data = load_model(hf_token)
         if model_data is None:
             st.error("Failed to load model. Please check your token and try again.")
             st.stop()
         st.session_state.model, st.session_state.tokenizer = model_data
     model = st.session_state.model
     tokenizer = st.session_state.tokenizer
@@ -174,7 +189,7 @@ if prompt := st.chat_input("Ask your inspection question..."):
         try:
             with st.chat_message("assistant", avatar=BOT_AVATAR):
                 start_time = time.time()
-                streamer = generate_with_kv_cache(prompt, file_context, use_cache=True)
                 response_container = st.empty()
                 full_response = ""
@@ -209,4 +224,4 @@ if prompt := st.chat_input("Ask your inspection question..."):
         except Exception as e:
             st.error(f"⚡ Generation error: {str(e)}")
     else:
-        st.error("🤖 Model not loaded!")

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from huggingface_hub import login
 from threading import Thread
 import PyPDF2
 )
 # Model names
+BASE_MODEL_NAME = "HuggingFaceTB/SmolLM2-360M"
+MODEL_OPTIONS = {
+    "Full Fine-Tuned": "amiguel/SmolLM2-360M-concise-reasoning",
+    "LoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-lora",
+    "QLoRA Adapter": "amiguel/SmolLM2-360M-concise-reasoning-qlora"  # Hypothetical, adjust if needed
+}
 # Title with rocket emojis
 st.title("🚀 WizNerd Insp 🚀")
     hf_token = st.text_input("Hugging Face Token", type="password",
                            help="Get your token from https://huggingface.co/settings/tokens")
+    st.header("Model Selection 🤖")
+    model_type = st.selectbox("Choose Model Type", list(MODEL_OPTIONS.keys()), index=0)
+    selected_model = MODEL_OPTIONS[model_type]
     st.header("Upload Documents 📂")
     uploaded_file = st.file_uploader(
         "Choose a PDF or XLSX file",
 # Model loading function
 @st.cache_resource
+def load_model(hf_token, model_type, selected_model):
     try:
         if not hf_token:
             st.error("🔐 Authentication required! Please provide a Hugging Face token.")
         login(token=hf_token)
         # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME, token=hf_token)
+        # Load model based on type
+        if model_type == "Full Fine-Tuned":
+            # Load full fine-tuned model directly
+            model = AutoModelForCausalLM.from_pretrained(
+                selected_model,
+                torch_dtype=torch.bfloat16,
+                device_map="auto",
+                token=hf_token
+            )
+        else:
+            # Load base model and apply PEFT adapter
+            base_model = AutoModelForCausalLM.from_pretrained(
+                BASE_MODEL_NAME,
+                torch_dtype=torch.bfloat16,
+                device_map="auto",
+                token=hf_token
+            )
+            model = PeftModel.from_pretrained(
+                base_model,
+                selected_model,
+                torch_dtype=torch.bfloat16,
+                is_trainable=False,  # Inference mode
+                token=hf_token
+            )
+        return model, tokenizer
     except Exception as e:
         st.error(f"🤖 Model loading failed: {str(e)}")
         return None
 # Generation function with KV caching
+def generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True):
     full_prompt = f"Analyze this context:\n{file_context}\n\nQuestion: {prompt}\nAnswer:"
     streamer = TextIteratorStreamer(
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     generation_kwargs = {
+        "input_ids": inputs["input_ids"],
+        "attention_mask": inputs["attention_mask"],
         "max_new_tokens": 1024,
         "temperature": 0.7,
         "top_p": 0.9,
         st.error("🔑 Authentication required!")
         st.stop()
+    # Load model if not already loaded or if model type changed
+    if "model" not in st.session_state or st.session_state.get("model_type") != model_type:
+        model_data = load_model(hf_token, model_type, selected_model)
         if model_data is None:
             st.error("Failed to load model. Please check your token and try again.")
             st.stop()
         st.session_state.model, st.session_state.tokenizer = model_data
+        st.session_state.model_type = model_type
     model = st.session_state.model
     tokenizer = st.session_state.tokenizer
         try:
             with st.chat_message("assistant", avatar=BOT_AVATAR):
                 start_time = time.time()
+                streamer = generate_with_kv_cache(prompt, file_context, model, tokenizer, use_cache=True)
                 response_container = st.empty()
                 full_response = ""
         except Exception as e:
             st.error(f"⚡ Generation error: {str(e)}")
     else:
+        st.error("🤖 Model not loaded!")