Cosmos48 commited on
Commit
cbced27
1 Parent(s): e1c82aa

Update Voice_Distinction.py

Browse files
Files changed (1) hide show
  1. Voice_Distinction.py +100 -100
Voice_Distinction.py CHANGED
@@ -1,100 +1,100 @@
1
- # type: ignore
2
-
3
- # Importing the required libraries
4
- import io
5
- import streamlit as st
6
- import numpy as np
7
- import librosa
8
- from tensorflow.keras.models import Sequential
9
- from tensorflow.keras.layers import Dense, Dropout
10
- import matplotlib.pyplot as plt
11
- from scipy.io.wavfile import write, read as wav_read
12
- from st_audiorec import st_audiorec
13
-
14
- # Function to convert audio to spectrogram image
15
- def audio_to_spectrogram(file_path):
16
- y, sr = librosa.load(file_path, sr=22050)
17
- mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=512)
18
- mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
19
- plt.figure(figsize=(4, 4))
20
- plt.axis('off')
21
- plt.imshow(mel_spec_db, aspect='auto', origin='lower')
22
- plt.tight_layout()
23
- plt.savefig("spectrogram.png")
24
- plt.close()
25
-
26
- # Function to create the gender classification model
27
- def create_model(vector_length=128):
28
- model = Sequential([
29
- Dense(256, input_shape=(vector_length,), activation='relu'),
30
- Dropout(0.3),
31
- Dense(256, activation='relu'),
32
- Dropout(0.3),
33
- Dense(128, activation='relu'),
34
- Dropout(0.3),
35
- Dense(128, activation='relu'),
36
- Dropout(0.3),
37
- Dense(64, activation='relu'),
38
- Dropout(0.3),
39
- Dense(1, activation='sigmoid')
40
- ])
41
- model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
42
- model.summary()
43
- return model
44
-
45
- # Load the pre-trained model
46
- model = create_model()
47
- model.load_weights("saved_model.h5")
48
-
49
- # Streamlit app
50
- st.title("Voice Gender Detection")
51
- st.write("This application detects the gender from recorded voice using a Multilayer Perceptron")
52
-
53
- # Option to upload a file
54
- uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
55
-
56
- # Function to extract features from audio file
57
- def extract_feature(file_name):
58
- X, sample_rate = librosa.core.load(file_name)
59
- result = np.array([])
60
- mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
61
- result = np.hstack((result, mel))
62
- return result
63
-
64
- # Function to classify gender
65
- def classify_gender(file_path):
66
- features = extract_feature(file_path).reshape(1, -1)
67
- male_prob = model.predict(features, verbose=0)[0][0]
68
- female_prob = 1 - male_prob
69
- gender = "male" if male_prob > female_prob else "female"
70
- probability = round(male_prob, 2) if gender == "male" else round(female_prob, 2)
71
- return gender, probability
72
-
73
- if uploaded_file is not None:
74
- with open("uploaded_audio.wav", "wb") as f:
75
- f.write(uploaded_file.getbuffer())
76
- st.audio(uploaded_file, format='audio/wav')
77
- if st.button("Submit"):
78
- audio_to_spectrogram("uploaded_audio.wav")
79
- st.image("spectrogram.png", caption="Mel Spectrogram of the uploaded audio file", use_column_width="auto", width=200)
80
- gender, probability = classify_gender("uploaded_audio.wav")
81
- st.write(f"Predicted Gender: {gender}")
82
- st.write(f"Probability: {probability}")
83
-
84
- wav_audio_data = st_audiorec()
85
-
86
- if wav_audio_data is not None:
87
- # Convert byte string to numpy array
88
- wav_io = io.BytesIO(wav_audio_data)
89
- sr, audio_data = wav_read(wav_io)
90
-
91
- # Save numpy array to WAV file
92
- wav_file_path = "recorded_audio.wav"
93
- write(wav_file_path, sr, audio_data)
94
-
95
- st.audio(wav_audio_data, format='audio/wav')
96
- audio_to_spectrogram(wav_file_path)
97
- st.image("spectrogram.png", caption="Mel Spectrogram of the uploaded audio file", use_column_width="auto", width=200)
98
- gender, probability = classify_gender(wav_file_path)
99
- st.write(f"Predicted Gender: {gender}")
100
- st.write(f"Probability: {probability}")
 
1
+ # type: ignore
2
+
3
+ # Importing the required libraries
4
+ import io
5
+ import streamlit as st
6
+ import numpy as np
7
+ import librosa
8
+ from tensorflow.keras.models import Sequential
9
+ from tensorflow.keras.layers import Dense, Dropout
10
+ import matplotlib.pyplot as plt
11
+ from scipy.io.wavfile import write, read as wav_read
12
+ from st_audiorec import st_audiorec
13
+
14
+ # Function to convert audio to spectrogram image
15
+ def audio_to_spectrogram(file_path):
16
+ y, sr = librosa.load(file_path, sr=22050)
17
+ mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, hop_length=512)
18
+ mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
19
+ plt.figure(figsize=(4, 4))
20
+ plt.axis('off')
21
+ plt.imshow(mel_spec_db, aspect='auto', origin='lower')
22
+ plt.tight_layout()
23
+ plt.savefig("spectrogram.png")
24
+ plt.close()
25
+
26
+ # Function to create the gender classification model
27
+ def create_model(vector_length=128):
28
+ model = Sequential([
29
+ Dense(256, input_shape=(vector_length,), activation='relu'),
30
+ Dropout(0.3),
31
+ Dense(256, activation='relu'),
32
+ Dropout(0.3),
33
+ Dense(128, activation='relu'),
34
+ Dropout(0.3),
35
+ Dense(128, activation='relu'),
36
+ Dropout(0.3),
37
+ Dense(64, activation='relu'),
38
+ Dropout(0.3),
39
+ Dense(1, activation='sigmoid')
40
+ ])
41
+ model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
42
+ model.summary()
43
+ return model
44
+
45
+ # Load the pre-trained model
46
+ model = create_model()
47
+ model.load_weights("saved_model.h5")
48
+
49
+ # Streamlit app
50
+ st.title("Voice Gender Detection")
51
+ st.write("This application detects the gender from recorded voice using a Multilayer Perceptron")
52
+
53
+ # Option to upload a file
54
+ uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
55
+
56
+ # Function to extract features from audio file
57
+ def extract_feature(file_name):
58
+ X, sample_rate = librosa.core.load(file_name)
59
+ result = np.array([])
60
+ mel = np.mean(librosa.feature.melspectrogram(y=X, sr=sample_rate).T,axis=0)
61
+ result = np.hstack((result, mel))
62
+ return result
63
+
64
+ # Function to classify gender
65
+ def classify_gender(file_path):
66
+ features = extract_feature(file_path).reshape(1, -1)
67
+ male_prob = model.predict(features, verbose=0)[0][0]
68
+ female_prob = 1 - male_prob
69
+ gender = "male" if male_prob > female_prob else "female"
70
+ probability = "{:.2f}".format(male_prob) if gender == "male" else "{:.2f}".format(female_prob)
71
+ return gender, probability
72
+
73
+ if uploaded_file is not None:
74
+ with open("uploaded_audio.wav", "wb") as f:
75
+ f.write(uploaded_file.getbuffer())
76
+ st.audio(uploaded_file, format='audio/wav')
77
+ if st.button("Submit"):
78
+ audio_to_spectrogram("uploaded_audio.wav")
79
+ st.image("spectrogram.png", caption="Mel Spectrogram of the uploaded audio file", use_column_width="auto", width=200)
80
+ gender, probability = classify_gender("uploaded_audio.wav")
81
+ st.write(f"Predicted Gender: {gender}")
82
+ st.write(f"Probability: {probability}")
83
+
84
+ wav_audio_data = st_audiorec()
85
+
86
+ if wav_audio_data is not None:
87
+ # Convert byte string to numpy array
88
+ wav_io = io.BytesIO(wav_audio_data)
89
+ sr, audio_data = wav_read(wav_io)
90
+
91
+ # Save numpy array to WAV file
92
+ wav_file_path = "recorded_audio.wav"
93
+ write(wav_file_path, sr, audio_data)
94
+
95
+ st.audio(wav_audio_data, format='audio/wav')
96
+ audio_to_spectrogram(wav_file_path)
97
+ st.image("spectrogram.png", caption="Mel Spectrogram of the uploaded audio file", use_column_width="auto", width=200)
98
+ gender, probability = classify_gender(wav_file_path)
99
+ st.write(f"Predicted Gender: {gender}")
100
+ st.write(f"Probability: {probability}")