--- # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 # Doc / guide: https://huggingface.co/docs/hub/model-cards {} --- # Model Card for Model ID ## Model Details ## Uses As Llama hasn't been added to stable transformers package now(2023/4/12), please [install transformers from source](https://huggingface.co/docs/transformers/installation#install-from-source). ```python import torch from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig transformers_model = "master-thesis-hell/llama-7b_sft-v5" model = LlamaForCausalLM.from_pretrained(transformers_model, device_map="auto", torch_dtype=torch.float16) tokenizer = LlamaTokenizer.from_pretrained(transformers_model) # Our tokenizer has been added the special tokens(bos_token, eos_token, pad_token) already. # We also set LlamaTokenizer's parameter `add_bos_token` to False. # If using the original LlamaTokenizer, you would have to configure the above yourself. if tokenizer.pad_token is None: tokenizer.add_special_tokens( { "eos_token": "", "bos_token": "", "pad_token": "[PAD]" } ) def generate_a_response(prompt, generation_config): segmenter = tokenizer.eos_token prompt = tokenizer.bos_token + prompt + segmenter input_ids = tokenizer.encode(prompt, return_tensors='pt').cuda() beam_output = model.generate( input_ids, max_length=1024, generation_config=generation_config ) ans = tokenizer.decode(beam_output[0], skip_special_tokens=False) return ans.split(segmenter)[1].lstrip() generation_config = GenerationConfig( temperature=0.1, top_p=0.65, num_beams=4, no_repeat_ngram_size=7, ) prompt = "台灣最高的建築物是?" response = generate_a_response(prompt, generation_config) print(response) ```