from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B")
model = Qwen2AudioForConditionalGeneration.from_pretrained("danielwusg/svco_qw7b_len256_ep2_ckpt468_20250304", torch_dtype=torch.float16)