herisan
/

llama-3-8b_mental_health_counseling_conversations

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

llama-3-8b_mental_health_counseling_conversations / README.md

herisan's picture

Update README.md

9591032 verified 6 months ago

|

history blame contribute delete

2.05 kB

	!pip -q install git+https://github.com/huggingface/transformers # need to install from github
	!pip -q install bitsandbytes accelerate xformers einops

	import os
	import torch
	import transformers
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline

	model_name = "herisan/llama-3-8b_mental_health_counseling_conversations"

	# use the commented out parts for running in 4bit
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)


	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config,
	# low_cpu_mem_usage=True
	)


	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.bos_token_id = 1

	stop_token_ids = [0]

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	use_cache=True,
	device_map="auto",
	max_length=2046,
	do_sample=True,
	top_k=5,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id,
	)

	messages = [
	{
	"role": "system",
	"content": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.",
	},
	{"role": "user", "content": "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it. How can I change my feeling of being worthless to everyone?"},
	]
	prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	outputs = pipe(prompt, max_new_tokens=2046, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, truncation=True)
	print(outputs[0]["generated_text"])