tokenizer = None def test(): encoding = tokenizer.encode("测试华为手机10086 8个空格") for token_id in encoding: token = tokenizer.convert_ids_to_tokens([token_id])[0].decode("utf-8") print(token_id, ":", token) if __name__ == "__main__": test()