govtech
/

lionguard-v1

Inference Endpoints

Model card Files Files and versions Community

lionguard-v1 / inference.py

shaunkhoo's picture

fix: correct typo

d8fdbcd 3 months ago

No virus

3.72 kB

	import numpy as np
	import pandas as pd
	import torch
	from transformers import AutoTokenizer, AutoModel
	from huggingface_hub import hf_hub_download
	import sys
	import json
	import onnxruntime as rt

	# Download model config
	repo_path = "govtech/lionguard-v1"
	config_path = hf_hub_download(repo_id=repo_path, filename="config.json")
	with open(config_path, 'r') as f:
	config = json.load(f)

	def get_embeddings(device, data):

	# Load the model and tokenizer
	tokenizer = AutoTokenizer.from_pretrained(config['embedding']['tokenizer'])
	model = AutoModel.from_pretrained(config['embedding']['model'])
	model.eval()
	model.to(device)

	# Generate the embeddings
	batch_size = config['embedding']['batch_size']
	num_batches = int(np.ceil(len(data)/batch_size))
	output = []
	for i in range(num_batches):
	sentences = data[ibatch_size:(i+1)batch_size]
	encoded_input = tokenizer(sentences, max_length=config['embedding']['max_length'], padding=True, truncation=True, return_tensors='pt')
	encoded_input.to(device)
	with torch.no_grad():
	model_output = model(**encoded_input)
	sentence_embeddings = model_output[0][:, 0]
	sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
	output.extend(sentence_embeddings.cpu().numpy())

	return np.array(output)

	def predict(batch_text):

	device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
	embeddings = get_embeddings(device, batch_text)
	embeddings_df = pd.DataFrame(embeddings)

	# Prepare input data
	X_input = np.array(embeddings_df, dtype=np.float32)

	# Load the classifiers
	results = {}
	for category, details in config['classifier'].items():

	# Download the classifier from HuggingFace hub
	local_model_fp = hf_hub_download(repo_id = repo_path, filename = config['classifier'][category]['model_fp'])

	# Run the inference
	session = rt.InferenceSession(local_model_fp)
	input_name = session.get_inputs()[0].name
	outputs = session.run(None, {input_name: X_input})

	# If calibrated, return only the prediction for the unsafe class
	if config['classifier'][category]['calibrated']:
	scores = [output[1] for output in outputs[1]]

	# If not calibrated, we will only get a 1D array for the unsafe class
	else:
	scores = outputs[1].flatten()

	# Generate the predictions depending on the recommended threshold score
	results[category] = {
	'scores': scores,
	'predictions': {
	'high_recall': [1 if score >= config['classifier'][category]['threshold']['high_recall'] else 0 for score in scores],
	'balanced': [1 if score >= config['classifier'][category]['threshold']['balanced'] else 0 for score in scores],
	'high_precision': [1 if score >= config['classifier'][category]['threshold']['high_precision'] else 0 for score in scores]
	}
	}

	return results

	if __name__ == "__main__":

	# Load the data
	input_data = sys.argv[1]
	batch_text = json.loads(input_data)

	# Generate the scores and predictions
	results = predict(batch_text)
	for i in range(len(batch_text)):
	print(f"Text: '{batch_text[i]}'")
	for category in results.keys():
	print(f"[Text {i+1}] {category} score: {results[category]['scores'][i]:.3f} \| HR: {results[category]['predictions']['high_recall'][i]}, B: {results[category]['predictions']['balanced'][i]}, HP: {results[category]['predictions']['high_precision'][i]}")
	print('---------------------------------------------')