Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType
I'm getting an error while loading processor.
from transformers import SiglipProcessor, SiglipModel
device = "cuda" # the device to load the model onto
ckpt = "google/siglip2-base-patch16-224"
processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
The tokenizer class you load from this checkpoint is 'GemmaTokenizer'.
The class this function is called from is 'SiglipTokenizer'.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 5
2 device = "cuda" # the device to load the model onto
4 ckpt = "google/siglip2-base-patch16-224"
----> 5 processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1070, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1067 if token is not None:
1068 kwargs["token"] = token
-> 1070 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1071 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1073 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1116, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1113 else:
1114 attribute_class = getattr(transformers_module, class_name)
-> 1116 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
1117 return args
File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2052, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
2049 else:
2050 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2052 return cls._from_pretrained(
2053 resolved_vocab_files,
2054 pretrained_model_name_or_path,
2055 init_configuration,
2056 *init_inputs,
2057 token=token,
2058 cache_dir=cache_dir,
2059 local_files_only=local_files_only,
2060 _commit_hash=commit_hash,
2061 _is_local=is_local,
2062 trust_remote_code=trust_remote_code,
2063 **kwargs,
2064 )
File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2292, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
2290 # Instantiate the tokenizer.
2291 try:
-> 2292 tokenizer = cls(*init_inputs, **init_kwargs)
2293 except import_protobuf_decode_error():
2294 logger.info(
2295 "Unable to load tokenizer model from SPM, loading from TikToken will be attempted instead."
2296 "(Google protobuf error: Tried to load SPM model with non-SPM vocab file).",
2297 )
File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:123, in SiglipTokenizer.__init__(self, vocab_file, eos_token, unk_token, pad_token, additional_special_tokens, sp_model_kwargs, model_max_length, do_lower_case, **kwargs)
120 self.do_lower_case = do_lower_case
121 self.vocab_file = vocab_file
--> 123 self.sp_model = self.get_spm_processor()
124 self.vocab_file = vocab_file
126 super().__init__(
127 eos_token=eos_token,
128 unk_token=unk_token,
(...)
134 **kwargs,
135 )
File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:139, in SiglipTokenizer.get_spm_processor(self)
137 def get_spm_processor(self):
138 tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
--> 139 with open(self.vocab_file, "rb") as f:
140 sp_model = f.read()
141 model_pb2 = import_protobuf()
TypeError: expected str, bytes or os.PathLike object, not NoneType```
I have the same issue using the AutoModel and AutoProcessor classes (both from transformers-4.49.0 package)
You need to install the latest version of transformers, for now pip install git+https://github.com/huggingface/transformers
pip install transformers==4.49.0
does not work.
I had this same issue and was just wondering when siglip2 checkpoints will be available as a pypi release?
Edit: I was using this and using the section Using the model yourself
. I installed the version from github but that gives another error. Have pasted it below:
Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Traceback (most recent call last):
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 777, in convert_to_tensors
tensor = as_tensor(value)
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 739, in as_tensor
return torch.tensor(value)
ValueError: expected sequence of length 10 at dim 1 (got 9)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/rsp/data/ML/sarthakExperiments/test/test.py", line 16, in <module>
inputs = processor(text = texts,
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/models/siglip/processing_siglip.py", line 108, in __call__
encoding = self.tokenizer(
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2877, in __call__
encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 2965, in _call_one
return self.batch_encode_plus(
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 3167, in batch_encode_plus
return self._batch_encode_plus(
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_fast.py", line 587, in _batch_encode_plus
return BatchEncoding(sanitized_tokens, sanitized_encodings, tensor_type=return_tensors)
File "/rsp/data/ML/sarthakExperiments/test/venv/lib64/python3.9/site-packages/transformers/tokenization_utils_base.py", line 241, in __init__
self.convert_to_tensors(tensor_type=tensor_type, prepend_batch_axis=pre
Just change AutoProcessor
to AutoImageProcessor
.
@kurisu0306
Tried that and it worked but the inputs only embed the image and not the texts which throws another error
Code:
inputs = processor(text = texts,
images = image,
padding = "max_length",
return_tensors = "pt").to('cuda')
Then I pass these inputs to the model as:
with torch.no_grad():
with torch.autocast('cuda'):
outputs = model(**inputs)
But it throws the error:
.......
958, in forward
raise ValueError("You have to specify input_ids")
ValueError: You have to specify input_ids
On having a look at the output after passing through the processor, this is what inputs has:
(Pdb) inputs.keys()
dict_keys(['pixel_values'])
(Pdb) inputs['pixel_values'].shape
torch.Size([1, 3, 224, 224])
Now it makes sense that my input image got encoded but shouldn't the texts be encoded as well? Maybe that is what is throwing that error?
You need to install the latest version of transformers, for now
pip install git+https://github.com/huggingface/transformers
Is there anyone successed in running siglip2???
I've tried this way to install transformers, but then I get another error:
based on the new transformers, I've tried update the timm into version timm-1.0.13,and another error comes:
I'm struggling into the bug, looking forward to some advice, thanks!
my code is as follows:
from transformers import pipeline
from PIL import Image
import torch
from transformers import AutoModel, AutoProcessor
from transformers.image_utils import load_image
#### get feature
import torch
from transformers import AutoModel, AutoProcessor
from transformers.image_utils import load_image
# load the model and processor
ckpt = "xxxxx/siglip2-large-patch16-256"
model = AutoModel.from_pretrained(ckpt, device_map="auto").eval()
processor = AutoProcessor.from_pretrained(ckpt, use_fast=True)
# load the image
image = Image.open("./dog.jpg")
candidate_labels = ["2 cats", "2 dogs"]
inputs = processor(texts=candidate_labels, images=image, padding="max_length", return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
print(type(outputs))
print(dir(outputs))
print(outputs)
# run infernece
with torch.no_grad():
image_embeddings = model.get_image_features(**inputs)
print(image_embeddings.shape)