GGUF
Not-For-All-Audiences
Inference Endpoints
vdpappu's picture
Update README.md
756fc8a verified
metadata
license: apache-2.0
datasets:
  - jkhedri/psychology-dataset
tags:
  - not-for-all-audiences

Usage

from llama_cpp import Llama
from typing import Optional
import time
from huggingface_hub import hf_hub_download

def generate_prompt(input_text: str, instruction: Optional[str] = None) -> str:
    text = f"### Question: {input_text}\n\n### Answer: "
    if instruction:
        text = f"### Instruction: {instruction}\n\n{text}"
    return text

# Set up the parameters
repo_id = "vdpappu/gemma2_psychologist_1_gguf"
filename = "gemma2_psychologist_1.gguf"
local_dir = "."

downloaded_file_path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
print(f"File downloaded to: {downloaded_file_path}")

# Load the model 
llm = Llama(model_path=downloaded_file_path) #1 is thug
question = "I feel lonely. What should I do?"
prompt = generate_prompt(input_text=question)

start = time.time()
output = llm(prompt, 
             temperature=0.7,
             top_p=0.9,
             top_k=50,
             repeat_penalty=1.5,
             max_tokens=200, 
             stop=["Question:","<eos>"])
end = time.time()
print(f"Inference time: {end-start:.2f} seconds \n")
print(output['choices'][0]['text'])