Files
Machine-Learning-Collection/ML/Pytorch/more_advanced/finetuning_whisper/test.py

8 lines
271 B
Python

from transformers import WhisperTokenizer
tokenizer = WhisperTokenizer.from_pretrained(
f"openai/whisper-tiny", task="transcribe"
)
encoded_string = tokenizer.encode("")[0]
print(encoded_string) # should print 50258
print(tokenizer.bos_token_id) # should print 50257