Python code with Pipeline

import transformers
import torch

model_id = "VIRNECT/llama-3-Korean-8B-V2"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

pipeline.model.eval()

PROMPT = '''당신은 인간과 λŒ€ν™”ν•˜λŠ” μΉœμ ˆν•œ μ±—λ΄‡μž…λ‹ˆλ‹€. μ§ˆλ¬Έμ— λŒ€ν•œ 정보λ₯Ό 상황에 맞게 μžμ„Ένžˆ μ œκ³΅ν•©λ‹ˆλ‹€. 당신이 μ§ˆλ¬Έμ— λŒ€ν•œ 닡을 λͺ¨λ₯Έλ‹€λ©΄, 사싀은 λͺ¨λ₯Έλ‹€κ³  λ§ν•©λ‹ˆλ‹€.'''
instruction = "화학곡학이 λ‹€λ₯Έ 곡학 뢄야와 μ–΄λ–»κ²Œ λ‹€λ₯Έκ°€μš”?"

messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9
)

print(outputs[0]["generated_text"][len(prompt):])
Downloads last month
2,029
Safetensors
Model size
8.03B params
Tensor type
FP16
Β·
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Model tree for VIRNECT/llama-3-Korean-8B-V2

Quantizations
1 model

Spaces using VIRNECT/llama-3-Korean-8B-V2 6