--- library_name: transformers pipeline_tag: text-generation inference: true widget: - text: Hello! example_title: Hello world group: Python --- This model is for debugging. It is randomly initialized with the config from [ibm-fms/Bamba-9B](https://huggingface.co/ibm-fms/Bamba-9B) but is of smaller size. Codes: ```python import os import torch import transformers from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline, set_seed) model_id = "ibm-fms/Bamba-9B" repo_id = "tiny-random/bamba" save_path = f"/tmp/{repo_id}" config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) config.attn_layer_indices = [1] config.attn_rotary_emb = 4 config.hidden_size = 16 config.intermediate_size = 32 config.num_attention_heads = 2 config.num_hidden_layers = 2 config.num_key_value_heads = 1 config.mamba_expand = 4 config.mamba_d_head = 8 config.mamba_n_heads = 8 config.mamba_d_state = 8 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) tokenizer.save_pretrained(save_path) model = AutoModelForCausalLM.from_config( config, torch_dtype=torch.bfloat16, trust_remote_code=True, ) # model.generation_config = GenerationConfig.from_pretrained( # model_id, trust_remote_code=True # ) set_seed(42) with torch.no_grad(): for name, p in sorted(model.named_parameters()): torch.nn.init.normal_(p, 0, 0.5) print(name, p.shape) model.save_pretrained(save_path) model = AutoModelForCausalLM.from_pretrained(save_path).cuda() tokenizer = AutoTokenizer.from_pretrained(save_path) message = ["Hello, world!"] inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False).to(model.device) response = model.generate(**inputs, max_new_tokens=2)[0] print(tokenizer.convert_ids_to_tokens(response, skip_special_tokens=False)) ```