Update README.md
Browse files
README.md
CHANGED
@@ -32,7 +32,7 @@ import torch
|
|
32 |
import torch.nn as nn
|
33 |
import torch.nn.functional as F
|
34 |
import torchaudio
|
35 |
-
from transformers import AutoConfig,
|
36 |
|
37 |
import librosa
|
38 |
import IPython.display as ipd
|
@@ -44,8 +44,8 @@ import pandas as pd
|
|
44 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
45 |
model_name_or_path = "m3hrdadfi/wav2vec2-xlsr-greek-speech-emotion-recognition"
|
46 |
config = AutoConfig.from_pretrained(model_name_or_path)
|
47 |
-
|
48 |
-
sampling_rate =
|
49 |
model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path).to(device)
|
50 |
```
|
51 |
|
@@ -59,13 +59,11 @@ def speech_file_to_array_fn(path, sampling_rate):
|
|
59 |
|
60 |
def predict(path, sampling_rate):
|
61 |
speech = speech_file_to_array_fn(path, sampling_rate)
|
62 |
-
|
63 |
-
|
64 |
-
input_values = features.input_values.to(device)
|
65 |
-
attention_mask = features.attention_mask.to(device)
|
66 |
|
67 |
with torch.no_grad():
|
68 |
-
logits = model(
|
69 |
|
70 |
scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
|
71 |
outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
|
@@ -73,17 +71,17 @@ def predict(path, sampling_rate):
|
|
73 |
```
|
74 |
|
75 |
```python
|
76 |
-
path = "/path/to/
|
77 |
outputs = predict(path, sampling_rate)
|
78 |
```
|
79 |
|
80 |
```bash
|
81 |
[
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
]
|
88 |
```
|
89 |
|
|
|
32 |
import torch.nn as nn
|
33 |
import torch.nn.functional as F
|
34 |
import torchaudio
|
35 |
+
from transformers import AutoConfig, Wav2Vec2FeatureExtractor
|
36 |
|
37 |
import librosa
|
38 |
import IPython.display as ipd
|
|
|
44 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
45 |
model_name_or_path = "m3hrdadfi/wav2vec2-xlsr-greek-speech-emotion-recognition"
|
46 |
config = AutoConfig.from_pretrained(model_name_or_path)
|
47 |
+
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
|
48 |
+
sampling_rate = feature_extractor.sampling_rate
|
49 |
model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path).to(device)
|
50 |
```
|
51 |
|
|
|
59 |
|
60 |
def predict(path, sampling_rate):
|
61 |
speech = speech_file_to_array_fn(path, sampling_rate)
|
62 |
+
inputs = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
|
63 |
+
inputs = {key: inputs[key].to(device) for key in inputs}
|
|
|
|
|
64 |
|
65 |
with torch.no_grad():
|
66 |
+
logits = model(**inputs).logits
|
67 |
|
68 |
scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
|
69 |
outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
|
|
|
71 |
```
|
72 |
|
73 |
```python
|
74 |
+
path = "/path/to/disgust.wav"
|
75 |
outputs = predict(path, sampling_rate)
|
76 |
```
|
77 |
|
78 |
```bash
|
79 |
[
|
80 |
+
\t{'Emotion': 'anger', 'Score': '0.0%'},
|
81 |
+
\t{'Emotion': 'disgust', 'Score': '99.2%'},
|
82 |
+
\t{'Emotion': 'fear', 'Score': '0.1%'},
|
83 |
+
\t{'Emotion': 'happiness', 'Score': '0.3%'},
|
84 |
+
\t{'Emotion': 'sadness', 'Score': '0.5%'}
|
85 |
]
|
86 |
```
|
87 |
|