from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # Replace with your target Qwen model on Hugging Face MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" # Initialize tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="auto", # or "cuda", etc. if you want to specify trust_remote_code=True ) # Create pipeline qwen_pipeline = pipeline( "text-generation", model=model, tokenizer=tokenizer ) def generate_response(retrieved_texts, query, max_new_tokens=200): context = "\n".join(retrieved_texts) prompt = f"This is the detail about the image:\n{context}\n\nQuestion: {query}\nAnswer:" result = qwen_pipeline(prompt, max_new_tokens=max_new_tokens, ...) generated_text = result[0]["generated_text"] if "Answer:" in generated_text: final_answer = generated_text.split("Answer:")[-1].strip() else: final_answer = generated_text return final_answer