In [12]:
pip install tensorflow librosa numpy pandas matplotlib scikit-learn resampy xgboost

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
import os
import numpy as np
import librosa
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import xgboost as xgb
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

# Path to the dataset
dataset_path = 'soundclips'

# List of categories
categories = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']

# Function to extract features from audio files
def extract_features(file_name):
 try:
 audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
 mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
 mfccs_scaled = np.mean(mfccs.T, axis=0)
 
 return mfccs_scaled
 except Exception as e:
 print(f"Error encountered while parsing file: {file_name}, {e}")
 return None

# Create DataFrame to hold features and labels
features = []
labels = []

# Iterate through each category
for category in categories:
 category_path = os.path.join(dataset_path, category)
 if not os.path.exists(category_path):
 print(f"Directory does not exist: {category_path}")
 continue
 
 for file in os.listdir(category_path):
 file_path = os.path.join(category_path, file)
 data = extract_features(file_path)
 if data is not None and len(data) > 0:
 features.append(data)
 labels.append(category)
 else:
 print(f"Feature extraction failed for file: {file_path}")

# Convert to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Check if features array is empty
if features.size == 0:
 raise ValueError("No features extracted. Please check the dataset and ensure audio files are present and readable.")

# Encode the labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features, labels_categorical, test_size=0.2, random_state=42)


 audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')


Error encountered while parsing file: soundclips\discomfort\Minta Gendong AUD-20150509-WA0000.wav, 
Feature extraction failed for file: soundclips\discomfort\Minta Gendong AUD-20150509-WA0000.wav
Error encountered while parsing file: soundclips\discomfort\recordgntipopok.wav, 
Feature extraction failed for file: soundclips\discomfort\recordgntipopok.wav
Error encountered while parsing file: soundclips\hungry\Lapar AUD-20150509-WA0001.wav, 
Feature extraction failed for file: soundclips\hungry\Lapar AUD-20150509-WA0001.wav
Error encountered while parsing file: soundclips\hungry\record-baby-1 cari puting.wav, 
Feature extraction failed for file: soundclips\hungry\record-baby-1 cari puting.wav
Error encountered while parsing file: soundclips\hungry\record-baby2 puting dilepas.wav, 
Feature extraction failed for file: soundclips\hungry\record-baby2 puting dilepas.wav
Error encountered while parsing file: soundclips\tired\Bangun Tidur AUD-20150509-WA0002.wav, 
Feature extraction failed for 

In [9]:


# Define the model
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(len(categories)))
model.add(Activation('softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# Train the model
num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='audio_classification.keras', 
 verbose=1, save_best_only=True)

history = model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)

# Evaluate the model
test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f'Test accuracy: {test_accuracy[1] * 100:.2f}%')


Epoch 1/100


 super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m9s[0m 770ms/step - accuracy: 0.1250 - loss: 92.2737
Epoch 1: val_loss improved from inf to 11.10916, saving model to audio_classification.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.2303 - loss: 55.0404 - val_accuracy: 0.7282 - val_loss: 11.1092
Epoch 2/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 24ms/step - accuracy: 0.7500 - loss: 10.0197
Epoch 2: val_loss improved from 11.10916 to 8.67415, saving model to audio_classification.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7144 - loss: 12.5967 - val_accuracy: 0.7282 - val_loss: 8.6742
Epoch 3/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - accuracy: 0.6875 - loss: 17.6288
Epoch 3: val_loss improved from 8.67415 to 4.57907, saving model to audio_classification.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6958 - loss: 1.6243 - val_accuracy: 0.7282 - val_loss: 1.0526
Epoch 25/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - accuracy: 0.6250 - loss: 1.3346
Epoch 25: val_loss did not improve from 0.98988
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6771 - loss: 1.2211 - val_accuracy: 0.7282 - val_loss: 1.0524
Epoch 26/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - accuracy: 0.5312 - loss: 2.1067
Epoch 26: val_loss did not improve from 0.98988
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6778 - loss: 1.6074 - val_accuracy: 0.7282 - val_loss: 1.0376
Epoch 27/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - accuracy: 0.5312 - loss: 1.5559
Epoch 27: val_loss did not improve from 0.98988
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - accuracy: 0.7812 - loss: 1.1724
Epoch 50: val_loss improved from 0.98988 to 0.98715, saving model to audio_classification.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7758 - loss: 1.0263 - val_accuracy: 0.7282 - val_loss: 0.9871
Epoch 51/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - accuracy: 0.7500 - loss: 1.0490
Epoch 51: val_loss improved from 0.98715 to 0.98223, saving model to audio_classification.keras
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7547 - loss: 1.0155 - val_accuracy: 0.7282 - val_loss: 0.9822
Epoch 52/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - accuracy: 0.7500 - loss: 1.2764
Epoch 52: val_loss did not improve from 0.98223
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7949 - loss: 0.9619 - val

Epoch 74/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 23ms/step - accuracy: 0.8438 - loss: 0.7377
Epoch 74: val_loss did not improve from 0.92060
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8034 - loss: 0.8272 - val_accuracy: 0.7282 - val_loss: 0.9297
Epoch 75/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 19ms/step - accuracy: 0.8125 - loss: 0.8221
Epoch 75: val_loss did not improve from 0.92060
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8141 - loss: 0.8071 - val_accuracy: 0.7282 - val_loss: 0.9264
Epoch 76/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 21ms/step - accuracy: 0.8438 - loss: 0.6241
Epoch 76: val_loss did not improve from 0.92060
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8313 - loss: 0.7314 - val_accuracy: 0.7282 - val_loss: 0.9241
Epoch 77/100
[1m 1/13[0m [32m━[0m[37m━━━━

Epoch 99/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - accuracy: 0.7188 - loss: 1.1051
Epoch 99: val_loss did not improve from 0.90286
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8075 - loss: 0.7663 - val_accuracy: 0.7282 - val_loss: 0.9030
Epoch 100/100
[1m 1/13[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 20ms/step - accuracy: 0.8125 - loss: 0.6395
Epoch 100: val_loss did not improve from 0.90286
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7935 - loss: 0.7532 - val_accuracy: 0.7282 - val_loss: 0.9033
Test accuracy: 72.82%


In [10]:
# Save the model
model.save('infant_cry_classification_model.keras')