输出乱码
async def event_generator():
input_ids = model_manager.tokenizer.encode(input_text, return_tensors="pt")
if model_manager.model.device.type == 'cuda':
input_ids = input_ids.cuda()
generated_ids = []
for _ in range(max_length):
if await request.is_disconnected():
break
output = model_manager.model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True,output_scores=True)
next_token_id = output.sequences[0, -1].item()
generated_ids.append(next_token_id)
input_ids = output.sequences
# 解码并输出当前生成的词
# 检查词元 ID 是否有效
if next_token_id < len(model_manager.tokenizer):
# 解码并输出当前生成的词
print(f"解码并输出当前生成的词 : {model_manager.tokenizer}")
print(f" 解码并输出当前生成的词 next_token_id: {next_token_id}")
current_word = model_manager.tokenizer.decode([next_token_id], skip_special_tokens=True)
print(f"")
print(current_word, end="", flush=True)
print(f"")
else:
print(f"Invalid token ID: {next_token_id}", end="", flush=True)
event_data = {
"event": "message",
"data": current_word
}
# 打印要发送的事件数据
print(f"Sending SSE data: {event_data}")
yield event_data
await asyncio.sleep(0.1)
print(f"model_manager.tokenizer.eos_token_id: {model_manager.tokenizer.eos_token_id}")
print(f"next_token_id: {next_token_id}")
# 如果生成了结束标记,停止生成
if next_token_id == model_manager.tokenizer.eos_token_id:
break
输出结果
解码并输出当前生成的词 : LlamaTokenizerFast(name_or_path='../DeepSeek-R1-Distill-Qwen-7B', vocab_size=151643, model_max_length=16384, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<|begin▁of▁sentence|>', 'eos_token': '<|end▁of▁sentence|>', 'pad_token': '<|end▁of▁sentence|>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={
151643: AddedToken("<|end▁of▁sentence|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151644: AddedToken("<|User|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151645: AddedToken("<|Assistant|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151646: AddedToken("<|begin▁of▁sentence|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151647: AddedToken("<|EOT|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151648: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151649: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151657: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151658: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
解码并输出当前生成的词 next_token_id: 242
�
Sending SSE data: {'event': 'message', 'data': '�'}
model_manager.tokenizer.eos_token_id: 151643
next_token_id: 242
请问 � 这个乱码什么问题导致
我也遇到的乱码问题
</אּí(
δŸㅿ�mericfsr」。
November │
》-->
!」____
,
’
[
0™*__襀
200Ð[
409ꯉ0۹!
OK