thak123 commited on
Commit
7dcc89a
1 Parent(s): ff19902

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +74 -0
README.md CHANGED
@@ -29,6 +29,80 @@ It achieves the following results on the evaluation set:
29
 
30
  More information needed
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  ## Intended uses & limitations
33
 
34
  More information needed
 
29
 
30
  More information needed
31
 
32
+ ## Usage
33
+ To use the model use the following script.
34
+ Kindly refer to the [app.py](https://huggingface.co/spaces/FFZG-cleopatra/M2SA-demo-multimodal/blob/main/app.py) for the Transform and VisionTextDualEncoderModel class definitions.
35
+
36
+ ```
37
+ import torch
38
+ import torch.nn as nn
39
+
40
+ import torchvision
41
+ from torchvision.transforms import CenterCrop, ConvertImageDtype, Normalize, Resize
42
+ from torchvision.transforms.functional import InterpolationMode
43
+ from torchvision import transforms
44
+ from torchvision.io import ImageReadMode, read_image
45
+
46
+
47
+ from transformers import CLIPModel, AutoModel
48
+ from huggingface_hub import hf_hub_download
49
+ from safetensors.torch import load_model
50
+
51
+ from datasets import load_dataset, load_metric
52
+ from transformers import (
53
+ AutoConfig,
54
+ AutoImageProcessor,
55
+ AutoModelForSequenceClassification,
56
+ AutoTokenizer,
57
+ logging,
58
+ )
59
+
60
+ id2label = {0: "negative", 1: "neutral", 2: "positive"}
61
+ label2id = {"negative": 0, "neutral": 1, "positive": 2}
62
+
63
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-xlm-roberta-base-sentiment-multilingual")
64
+
65
+ model = VisionTextDualEncoderModel(num_classes=3)
66
+ config = model.vision_encoder.config
67
+
68
+ # https://huggingface.co/FFZG-cleopatra/M2SA/blob/main/model.safetensors
69
+ sf_filename = hf_hub_download("FFZG-cleopatra/M2SA", filename="model.safetensors")
70
+
71
+ load_model(model, sf_filename)
72
+ image_processor = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
73
+
74
+ def predict_sentiment(text, image):
75
+ # read the image file
76
+ image = read_image(image, mode=ImageReadMode.RGB)
77
+
78
+ text_inputs = tokenizer(
79
+ text,
80
+ max_length=512,
81
+ padding="max_length",
82
+ truncation=True,
83
+ return_tensors="pt"
84
+ )
85
+
86
+ image_transformations = Transform(
87
+ config.vision_config.image_size,
88
+ image_processor.image_mean,
89
+ image_processor.image_std,
90
+ )
91
+ image_transformations = torch.jit.script(image_transformations)
92
+ pixel_values = image_transformations(image)
93
+ text_inputs["pixel_values"] = pixel_values.unsqueeze(0)
94
+
95
+ prediction = None
96
+ with torch.no_grad():
97
+ outputs = model(**text_inputs)
98
+ print(outputs)
99
+ prediction = np.argmax(outputs["logits"], axis=-1)
100
+ print(id2label[prediction[0].item()])
101
+ return id2label[prediction[0].item()]
102
+
103
+ predict_sentiment(text, image)
104
+ ```
105
+
106
  ## Intended uses & limitations
107
 
108
  More information needed