kingabzpro commited on
Commit
d036b29
1 Parent(s): 32ccf9b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -14
README.md CHANGED
@@ -51,21 +51,24 @@ python eval.py --model_id kingabzpro/wav2vec2-large-xls-r-300m-Urdu --dataset mo
51
  ### Inference With LM
52
 
53
  ```python
54
- import torch
55
- from datasets import load_dataset
56
- from transformers import AutoModelForCTC, AutoProcessor
57
- import torchaudio.functional as F
58
- model_id = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
59
- sample_iter = iter(load_dataset("mozilla-foundation/common_voice_8_0", "ur", split="test", streaming=True, use_auth_token=True))
 
 
 
 
60
  sample = next(sample_iter)
61
- resampled_audio = F.resample(torch.tensor(sample["audio"]["array"]), 48_000, 16_000).numpy()
62
- model = AutoModelForCTC.from_pretrained(model_id)
63
- processor = AutoProcessor.from_pretrained(model_id)
64
- input_values = processor(resampled_audio, return_tensors="pt").input_values
65
- with torch.no_grad():
66
- logits = model(input_values).logits
67
- transcription = processor.batch_decode(logits.numpy()).text
68
- # => "اب نے ٹپیدسون دیتے ہیں"
69
  ```
70
 
71
 
 
51
  ### Inference With LM
52
 
53
  ```python
54
+ from datasets import load_dataset, Audio
55
+ from transformers import pipeline
56
+ model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
57
+ data = load_dataset("mozilla-foundation/common_voice_8_0",
58
+ "ur",
59
+ split="test",
60
+ streaming=True,
61
+ use_auth_token=True)
62
+
63
+ sample_iter = iter(data.cast_column("path", Audio(sampling_rate=16_000)))
64
  sample = next(sample_iter)
65
+
66
+ asr = pipeline("automatic-speech-recognition", model=model)
67
+ prediction = asr(sample["path"]["array"],
68
+ chunk_length_s=5,
69
+ stride_length_s=1)
70
+ prediction
71
+ # => {'text': 'اب یہ ونگین لمحاتانکھار دلمیں میںفوث کریلیا اجائ'}
 
72
  ```
73
 
74