Python code with Pipeline

import transformers
import torch

model_id = "VIRNECT/llama-3-Korean-8B-r-v2"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

pipeline.model.eval()

PROMPT = '''You are a helpful AI assistant. Please answer the user's questions kindly. 당신은 유λŠ₯ν•œ AI μ–΄μ‹œμŠ€ν„΄νŠΈ μž…λ‹ˆλ‹€. μ‚¬μš©μžμ˜ μ§ˆλ¬Έμ— λŒ€ν•΄ μΉœμ ˆν•˜κ²Œ λ‹΅λ³€ν•΄μ£Όμ„Έμš”.'''
instruction = "화학곡학이 λ‹€λ₯Έ 곡학 뢄야와 μ–΄λ–»κ²Œ λ‹€λ₯Έκ°€μš”?"

messages = [
    {"role": "system", "content": f"{PROMPT}"},
    {"role": "user", "content": f"{instruction}"}
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = pipeline(
    prompt,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.6,
    top_p=0.9
)

print(outputs[0]["generated_text"][len(prompt):])
Downloads last month
2,009
Safetensors
Model size
8.03B params
Tensor type
FP16
Β·
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.

Model tree for VIRNECT/llama-3-Korean-8B-r-v2

Quantizations
1 model

Spaces using VIRNECT/llama-3-Korean-8B-r-v2 5