from sagemaker.huggingface import HuggingFaceModel import boto3 iam_client = boto3.client('iam') role = iam_client.get_role(RoleName='{IAM_ROLE_WITH_SAGEMAKER_PERMISSIONS}')['Role']['Arn'] # Hub Model configuration. https://huggingface.co/models hub = { 'HF_MODEL_ID':'PygmalionAI/pygmalion-6b', 'HF_TASK':'conversational' } # create Hugging Face Model Class huggingface_model = HuggingFaceModel( transformers_version='4.17.0', pytorch_version='1.10.2', py_version='py38', env=hub, role=role, ) # deploy model to SageMaker Inference predictor = huggingface_model.deploy( initial_instance_count=1, # number of instances instance_type='ml.m5.xlarge' # ec2 instance type ) predictor.predict({ 'inputs': { "past_user_inputs": ["Which movie is the best ?"], "generated_responses": ["It's Die Hard for sure."], "text": "Can you explain why ?" } })