Spaces:

Arsalan8
/

ham_or_spam

Sleeping

App Files Files Community

ham_or_spam / main.py

Arsalan8

Upload 9 files

a3e307c verified 12 months ago

raw

history blame contribute delete

1.82 kB

	import pandas as pd
	from sklearn.model_selection import train_test_split
	import tensorflow as tf
	import numpy as np

	# Assuming your CSV file is named 'spam.csv' and has a header row
	df = pd.read_csv("ham or spam rnn/spam.csv", encoding="ISO-8859-1")

	df["class"] = df["class"].apply(lambda x: 0 if x == "ham" else 1)
	X = df["message"]
	y = df["class"]

	# Split the data into training and testing sets (67% training, 33% testing)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
	test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))










	# Adapt the encoder to only the text samples before batching
	VOCAB_SIZE = 1000
	encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
	encoder.adapt(train_dataset.map(lambda text, label: text)) # Unbatched dataset

	# Batch the datasets
	batch_size = 32
	train_dataset = train_dataset.batch(batch_size)
	test_dataset = test_dataset.batch(batch_size)












	model = tf.keras.Sequential([
	encoder,
	tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()),output_dim=64,mask_zero=True),
	tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
	tf.keras.layers.Dense(64, activation='relu'),
	tf.keras.layers.Dense(1, activation='sigmoid') # Add sigmoid activation for binary classification
	])





	model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
	optimizer=tf.keras.optimizers.Adam(1e-4),
	metrics=['accuracy'])

	history = model.fit(train_dataset, epochs=15, validation_data=test_dataset, validation_steps=30)

	test_loss, test_acc = model.evaluate(test_dataset)

	print('Test Loss:', test_loss)
	print('Test Accuracy:', test_acc)

	model.save('ham or spam rnn/model')