zeimoto commited on
Commit
f4abbca
1 Parent(s): d292d3f

first commit

Browse files
Files changed (7) hide show
  1. .gitignore +2 -0
  2. app.py +52 -58
  3. favicon.ico +0 -0
  4. nameder.py +62 -0
  5. resources.py +41 -0
  6. speech2text.py +63 -0
  7. translation.py +26 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ **venv
2
+ main.py
app.py CHANGED
@@ -1,63 +1,57 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
60
-
61
 
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
  import gradio as gr
2
+ import json
3
+ from nameder import init_model_ner, get_entity_results
4
+ from speech2text import init_model_trans, transcribe
5
+ from translation import translate
6
+ from resources import NER_Response, NER_Request, entity_labels_sample, set_start, audit_elapsedtime
7
+
8
+
9
+ def translation_to_english(text: str):
10
+ resultado = translate(text)
11
+ return resultado
12
+
13
+ def transcription(audio: bytes):
14
+
15
+ s2t = init_model_trans()
16
+ return transcribe(audio, s2t)
17
+
18
+ def named_entity_recognition(text: str):
19
+ tokenizer, ner = init_model_ner()
20
+ # print('NER:',ner)
21
+ result = get_entity_results(entities_list=entity_labels_sample,
22
+ model=ner,
23
+ tokenizer=tokenizer,
24
+ text=text)
25
+ print('result:',result,type(result))
26
+ return result
27
+
28
+ def get_lead(audio: bytes):
29
+ start = set_start()
30
+ transcribe = transcription(audio)
31
+ translate = translation_to_english(transcribe)
32
+ ner = named_entity_recognition(NER_Request(
33
+ entities=entity_labels_sample,
34
+ text=translate
35
+ ))
36
+ audit_elapsedtime("VoiceLead", start)
37
+ return ner
38
+
39
+ audio_input = gr.Microphone(
40
+ label="Record your audio"
41
+ )
42
+ text_output = gr.Textbox(
43
+ label="Labels",
44
+ info="",
45
+ lines=9,
46
+ value=""
47
+ )
48
+ demo = gr.Interface(
49
+ fn=named_entity_recognition,
50
+ description= "Get the ",
51
+ inputs=[audio_input],
52
+ outputs=[text_output],
53
+ title="VoiceLead"
 
 
 
 
 
54
  )
 
55
 
56
  if __name__ == "__main__":
57
  demo.launch()
favicon.ico ADDED
nameder.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from resources import set_start, audit_elapsedtime, entities_list_to_dict
3
+ from transformers import BertTokenizer, BertForTokenClassification
4
+ import torch
5
+
6
+ #Named-Entity Recognition model
7
+
8
+ def init_model_ner():
9
+ print("Initiating NER model...")
10
+ start = set_start()
11
+
12
+ # Load pre-trained tokenizer and model
13
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
14
+ model = BertForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
15
+
16
+ audit_elapsedtime(function="Initiating NER model", start=start)
17
+ return tokenizer, model
18
+
19
+ def get_entity_results(tokenizer, model, text: str, entities_list: List[str]): #-> Lead_labels:
20
+ print("Initiating entity recognition...")
21
+ start = set_start()
22
+ tokens = tokenizer.tokenize(tokenizer.decode(tokenizer.encode(text)))
23
+ labels = entities_list
24
+
25
+ # Convert tokens to IDs
26
+ input_ids = tokenizer.encode(text, return_tensors="pt")
27
+
28
+ # Perform NER prediction
29
+ with torch.no_grad():
30
+ outputs = model(input_ids)
31
+
32
+ # Get the predicted labels
33
+ predicted_labels = torch.argmax(outputs.logits, dim=2)[0]
34
+
35
+ # Map predicted labels to actual entities
36
+ entities = []
37
+ current_entity = ""
38
+ for i, label_id in enumerate(predicted_labels):
39
+ label = model.config.id2label[label_id.item()]
40
+ token = tokens[i]
41
+ if label.startswith('B-'): # Beginning of a new entity
42
+ if current_entity:
43
+ entities.append(current_entity.strip())
44
+ current_entity = token
45
+ elif label.startswith('I-'): # Inside of an entity
46
+ current_entity += " " + token
47
+ else: # Outside of any entity
48
+ if current_entity:
49
+ entities.append(current_entity.strip())
50
+ current_entity = ""
51
+
52
+ # Filter out only the entities you are interested in
53
+ filtered_entities = [entity for entity in entities if entity in labels]
54
+ # entities_result = model.predict_entities(text, labels)
55
+
56
+ # entities_dict = entities_list_to_dict(entities_list)
57
+ # for entity in entities_result:
58
+ # print(entity["text"], "=>", entity["label"])
59
+ # entities_dict[entity["label"]] = entity["text"]
60
+
61
+ audit_elapsedtime(function="Retreiving entity labels from text", start=start)
62
+ return filtered_entities
resources.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+ from typing import List
4
+ import time
5
+
6
+ class NER_Request (BaseModel):
7
+ text: str
8
+ entities: List[str]
9
+
10
+ class NER_Response (BaseModel):
11
+ success: int
12
+ result: str
13
+ description: Optional[str] = ""
14
+ errorCode: Optional[int] = 0
15
+ errorDescriptin: Optional[str] = ""
16
+
17
+ entity_labels_sample = [
18
+ "team",
19
+ "developer",
20
+ "technology",
21
+ "tool",
22
+ "amount",
23
+ "duration",
24
+ "capacity",
25
+ "company",
26
+ "currency"
27
+ ]
28
+
29
+ def entities_list_to_dict(entitiesList: List[str]):
30
+ return {key: 'string' for key in entitiesList}
31
+
32
+ def set_start () -> time:
33
+ return time.time()
34
+
35
+ def audit_elapsedtime(function: str, start: time):
36
+ end = time.time()
37
+ elapsedtime = end-start
38
+ print("------------------")
39
+ print(f"[{function}] Elapsed time: {elapsedtime}")
40
+ print("------------------")
41
+ return elapsedtime
speech2text.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
2
+ import torch
3
+ from resources import set_start, audit_elapsedtime
4
+
5
+ #Speech to text transcription model
6
+
7
+ def init_model_trans ():
8
+ print("Initiating transcription model...")
9
+ start = set_start()
10
+
11
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
12
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
13
+
14
+ model_id = "openai/whisper-large-v3"
15
+
16
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
18
+ )
19
+ model.to(device)
20
+
21
+ processor = AutoProcessor.from_pretrained(model_id)
22
+
23
+ pipe = pipeline(
24
+ "automatic-speech-recognition",
25
+ model=model,
26
+ tokenizer=processor.tokenizer,
27
+ feature_extractor=processor.feature_extractor,
28
+ max_new_tokens=128,
29
+ chunk_length_s=30,
30
+ batch_size=16,
31
+ return_timestamps=True,
32
+ torch_dtype=torch_dtype,
33
+ device=device,
34
+ )
35
+ print(f'Init model successful')
36
+ audit_elapsedtime(function="Init transc model", start=start)
37
+ return pipe
38
+
39
+ def transcribe (audio_sample: bytes, pipe) -> str:
40
+ print("Initiating transcription...")
41
+ start = set_start()
42
+ result = pipe(audio_sample)
43
+
44
+ audit_elapsedtime(function="Transcription", start=start)
45
+ print("transcription result",result)
46
+
47
+ #st.write('trancription: ', result["text"])
48
+ return result["text"]
49
+
50
+ # def translate (audio_sample: bytes, pipe) -> str:
51
+ # print("Initiating Translation...")
52
+ # start = set_start()
53
+ # # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
54
+ # # sample = dataset[0]["audio"]
55
+
56
+ # #result = pipe(audio_sample)
57
+ # result = pipe(audio_sample, generate_kwargs={"task": "translate"})
58
+
59
+ # audit_elapsedtime(function="Translation", start=start)
60
+ # print("Translation result",result)
61
+
62
+ # #st.write('trancription: ', result["text"])
63
+ # return result["text"]
translation.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
+ from resources import set_start, audit_elapsedtime
3
+ from pydantic import BaseModel
4
+
5
+ #def get_model_name(languageCode: str) -> str:
6
+ # match languageCode:
7
+ # case "pt":
8
+ # model_name = "Helsinki-NLP/opus-mt-pt-en"
9
+ # case _:
10
+ # model_name
11
+ #
12
+ # return model_name
13
+
14
+ def translate(text_to_translate: str) -> str:
15
+
16
+ start = set_start()
17
+ print("Initiating translation model...")
18
+ text_size = len(text_to_translate)*2
19
+ tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
20
+ model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
21
+ pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
22
+ translated_text = pten_pipeline(text_to_translate, max_new_tokens= text_size)[0]['generated_text']
23
+
24
+ elapsedtime=audit_elapsedtime(function="Finished translation", start=start)
25
+ print("Translated text:", translated_text)
26
+ return translated_text