base_model: unsloth/Qwen2.5-3B-Instruct tags: - text-generation-inference - transformers - qwen2 - trl - grpo license: apache-2.0 language: - en
r = 64, steps = 800