Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- .gitattributes +1 -0
- app.py +22 -0
- main.py +73 -0
- model/fingerprint.pb +3 -0
- model/keras_metadata.pb +3 -0
- model/saved_model.pb +3 -0
- model/variables/variables.data-00000-of-00001 +3 -0
- model/variables/variables.index +0 -0
- requirements.txt +2 -0
- spam.csv +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
|
5 |
+
model = tf.keras.models.load_model('model')
|
6 |
+
|
7 |
+
def predict_spam(message):
|
8 |
+
pred_prob = model.predict([message])[0][0]
|
9 |
+
label = "Spam" if pred_prob > 0.5 else "Ham"
|
10 |
+
confidence = f"{pred_prob * 100:.2f}%" if label == "Spam" else f"{(1 - pred_prob) * 100:.2f}%"
|
11 |
+
return f"{label} ({confidence})"
|
12 |
+
|
13 |
+
|
14 |
+
iface = gr.Interface(
|
15 |
+
fn=predict_spam,
|
16 |
+
inputs="text",
|
17 |
+
outputs="text",
|
18 |
+
title="Ham or Spam Classifier",
|
19 |
+
description="A Ham or Spam Classifier created using TensorFlow. Input a message to see if it's classified as Ham or Spam!",
|
20 |
+
)
|
21 |
+
|
22 |
+
iface.launch(share=True)
|
main.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.model_selection import train_test_split
|
3 |
+
import tensorflow as tf
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Assuming your CSV file is named 'spam.csv' and has a header row
|
7 |
+
df = pd.read_csv("ham or spam rnn/spam.csv", encoding="ISO-8859-1")
|
8 |
+
|
9 |
+
df["class"] = df["class"].apply(lambda x: 0 if x == "ham" else 1)
|
10 |
+
X = df["message"]
|
11 |
+
y = df["class"]
|
12 |
+
|
13 |
+
# Split the data into training and testing sets (67% training, 33% testing)
|
14 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
15 |
+
|
16 |
+
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
|
17 |
+
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
# Adapt the encoder to only the text samples before batching
|
29 |
+
VOCAB_SIZE = 1000
|
30 |
+
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
|
31 |
+
encoder.adapt(train_dataset.map(lambda text, label: text)) # Unbatched dataset
|
32 |
+
|
33 |
+
# Batch the datasets
|
34 |
+
batch_size = 32
|
35 |
+
train_dataset = train_dataset.batch(batch_size)
|
36 |
+
test_dataset = test_dataset.batch(batch_size)
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
model = tf.keras.Sequential([
|
50 |
+
encoder,
|
51 |
+
tf.keras.layers.Embedding(input_dim=len(encoder.get_vocabulary()),output_dim=64,mask_zero=True),
|
52 |
+
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
|
53 |
+
tf.keras.layers.Dense(64, activation='relu'),
|
54 |
+
tf.keras.layers.Dense(1, activation='sigmoid') # Add sigmoid activation for binary classification
|
55 |
+
])
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
|
62 |
+
optimizer=tf.keras.optimizers.Adam(1e-4),
|
63 |
+
metrics=['accuracy'])
|
64 |
+
|
65 |
+
history = model.fit(train_dataset, epochs=15, validation_data=test_dataset, validation_steps=30)
|
66 |
+
|
67 |
+
test_loss, test_acc = model.evaluate(test_dataset)
|
68 |
+
|
69 |
+
print('Test Loss:', test_loss)
|
70 |
+
print('Test Accuracy:', test_acc)
|
71 |
+
|
72 |
+
model.save('ham or spam rnn/model')
|
73 |
+
|
model/fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:288d85bd41a744288499f8596f65a154c23438bd90fd77c8ed4d5336f1216ecc
|
3 |
+
size 57
|
model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00c476b4fe4ca5ff031f780c8601c5f16b27e113d29c563f0c5c67c0f1dad91a
|
3 |
+
size 20219
|
model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c8e8742c3789996f30e06495898505dcbc38c9974bfaf5893514d47439e9a8a
|
3 |
+
size 5050101
|
model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2ac8bb8d57c7c73d1b9c7f3ed0fd0637af012c554a39120ceef1d39b696d7a
|
3 |
+
size 1673052
|
model/variables/variables.index
ADDED
Binary file (2.94 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
tensorflow
|
2 |
+
gradio
|
spam.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|