Spaces:

sashdev
/

humnifierai

Build error

App Files Files Community

sashdev commited on Sep 20

Commit

e2f13a4

•

1 Parent(s): 6ecbb4f

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -94

app.py CHANGED Viewed

@@ -1,107 +1,108 @@
-# Import dependencies
-import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, T5Tokenizer, T5ForConditionalGeneration
-import torch
-import nltk
 import spacy
-from nltk.corpus import wordnet
-import subprocess
-# Download NLTK data (if not already downloaded)
-nltk.download('punkt')
-nltk.download('stopwords')
-nltk.download('wordnet')  # Download WordNet
-# Download spaCy model if not already installed
-try:
-    nlp = spacy.load("en_core_web_sm")
-except OSError:
-    subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-    nlp = spacy.load("en_core_web_sm")
-# Check for GPU and set the device accordingly
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Load AI Detector model and tokenizer from Hugging Face (DistilBERT)
-tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
-model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english").to(device)
-# Load SRDdev Paraphrase model and tokenizer for humanizing text
-paraphrase_tokenizer = T5Tokenizer.from_pretrained("SRDdev/Paraphrase")
-paraphrase_model = T5ForConditionalGeneration.from_pretrained("SRDdev/Paraphrase").to(device)
-# Function to find synonyms using WordNet via NLTK
-def get_synonyms(word):
-    synonyms = set()
-    for syn in wordnet.synsets(word):
-        for lemma in syn.lemmas():
-            synonyms.add(lemma.name())
-    return list(synonyms)
-# Replace words with synonyms using spaCy and WordNet
-def replace_with_synonyms(text):
-    doc = nlp(text)
-    processed_text = []
-    for token in doc:
-        synonyms = get_synonyms(token.text.lower())
-        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:  # Only replace certain types of words
-            replacement = synonyms[0]  # Replace with the first synonym
-            if token.is_title:
-                replacement = replacement.capitalize()
-            processed_text.append(replacement)
-        else:
-            processed_text.append(token.text)
-    return " ".join(processed_text)
-# AI detection function using DistilBERT
-def detect_ai_generated(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).to(device)
-    with torch.no_grad():
-        outputs = model(**inputs)
-    probabilities = torch.softmax(outputs.logits, dim=1)
-    ai_probability = probabilities[0][1].item()  # Probability of being AI-generated
-    return ai_probability
-# Humanize the AI-detected text using the SRDdev Paraphrase model
-def humanize_text(AI_text):
-    paragraphs = AI_text.split("\n")
-    paraphrased_paragraphs = []
-    for paragraph in paragraphs:
-        if paragraph.strip():
-            inputs = paraphrase_tokenizer(paragraph, return_tensors="pt", max_length=512, truncation=True).to(device)
-            paraphrased_ids = paraphrase_model.generate(
-                inputs['input_ids'],
-                max_length=inputs['input_ids'].shape[-1] + 20,  # Slightly more than the original input length
-                num_beams=4,
-                early_stopping=True,
-                length_penalty=1.0,
-                no_repeat_ngram_size=3,
-            )
-            paraphrased_text = paraphrase_tokenizer.decode(paraphrased_ids[0], skip_special_tokens=True)
-            paraphrased_paragraphs.append(paraphrased_text)
-    return "\n\n".join(paraphrased_paragraphs)
-# Main function to handle the overall process
-def main_function(AI_text):
-    # Replace words with synonyms
-    text_with_synonyms = replace_with_synonyms(AI_text)
-    # Detect AI-generated content
-    ai_probability = detect_ai_generated(text_with_synonyms)
-    # Humanize AI text
-    humanized_text = humanize_text(text_with_synonyms)
-    return f"AI-Generated Content: {ai_probability:.2f}%\n\nHumanized Text:\n{humanized_text}"
-# Gradio interface definition
-interface = gr.Interface(
-    fn=main_function,
-    inputs="textbox",
-    outputs="textbox",
-    title="AI Text Humanizer with Synonym Replacement",
-    description="Enter AI-generated text and get a human-written version, with synonyms replaced for more natural output. This space uses models from Hugging Face directly."
-)
-# Launch the Gradio app
-interface.launch(debug=True)

+from nltk.tokenize import word_tokenize
+from nltk.tag import pos_tag
+from nltk.corpus import wordnet as wn
+from sematch.semantic.similarity import WordNetSimilarity
+from vocabulary.vocabulary import Vocabulary as vb
+import json
+from random import randint
 import spacy
+import os.path
+nlp = spacy.load('en_core_web_sm')
+# Function to tag sentence with part of speach
+def tag(sentence):
+ words = word_tokenize(sentence)
+ words = pos_tag(words)
+ return words
+# Determine the POS to paraphrase
+def paraphraseable(tag):
+ return tag.startswith('NN') or tag =='VB' or tag.startswith('JJ')
+# POS tagging
+def pos(tag):
+ if tag.startswith('NN'):
+  return wn.NOUN
+ elif tag.startswith('V'):
+  return wn.VERB
+# Function to crate synonyms using wordnet nltk
+def synonyms(word, tag):
+    listOfLemmas = [baseWord.lemmas() for baseWord in wn.synsets(word, pos(tag))]
+    if len(listOfLemmas) > 0:
+    	listOfLemmas = listOfLemmas[0]
+    	lemmas = [lemma.name().encode('ascii', 'ignore') for lemma in listOfLemmas]
+    	return set(lemmas)
+    else:
+    	return set([])
+# Create  dictonary synonums
+def dictonarySynonums(word):
+	synJSON = vb.synonym(word)
+	if synJSON != False:
+		synonyms_lists = [dictSyno["text"].encode('ascii', 'ignore') for dictSyno in json.loads(vb.synonym(word))]
+		return set(synonyms_lists)
+	else:
+		return set([])
+# controll set to calculate the semantic similarity of synonums from the base words using SPACY
+def controlledSetSpacy(word,similarWords):
+	utf_en_word = nlp(word.decode('utf-8', 'ignore'))
+	for similarWord in similarWords.copy():
+		utf_en_similarWord = nlp(similarWord.decode('utf-8','ignore'))
+		if utf_en_word.similarity(utf_en_similarWord) <.76: # Variable to control accuracy of controlset
+			similarWords.discard(similarWord)
+	return similarWords
+# controll set to calculate the semantic similarity of synonums from the base words using WordNetSimilarity
+def controlledSetWordNetSimilarity(word,similarWords):
+	wns = WordNetSimilarity()
+	for similarWord in similarWords.copy():
+		if wns.word_similarity(word, similarWord, 'li') < 0.9996: # Variable to control accuracy of controlset
+			similarWords.discard(similarWord)
+	return similarWords
+# to to get synonums from wordnet nltk as well as from python dictonary synonums
+def synonymIfExists(sentence):
+ for (word, t) in tag(sentence):
+   if paraphraseable(t) and word not in ["i","I"]:
+    syns = synonyms(word, t)
+    syns.update(dictonarySynonums(word))
+    if syns:
+    	syns = controlledSetWordNetSimilarity(word,syns) # Or use the commented controlled set
+    	#syns = controlledSetSpacy(word,syns)
+    	if len(syns) > 1:
+    		yield [word, list(syns)]
+    		continue
+   yield [word,[]]
+# Function to get the semantic similar synonums and the total count of synonums in the entire sentence
+def paraphrase(sentence):
+	bagOfWords = []
+	counter = 1
+	for tempArray in synonymIfExists(sentence):
+		eachBoW=[]
+		eachBoW.append(tempArray[0])
+		eachBoW.extend(tempArray[1])
+		eachBoW=list(set(eachBoW))
+		counter *= len(eachBoW)
+		bagOfWords.append(eachBoW)
+	return bagOfWords,counter
+# Function to re-create sentence with synonums where the synonums are taken in randon order
+def paraPhraseThisSentence(sentence):
+	ppList = []
+	vList,count = paraphrase(sentence)
+	allWordsCount = len(vList)
+	for y in range(count):
+		str = []
+		returnStr = " "
+		for w in range(allWordsCount):
+			str.append(vList[w][randint(0,len(vList[w])-1)].replace("_"," "))
+		ppList.append(returnStr.join(str))
+	ppList = list(set(ppList))
+	print (ppList)
+	return ppList
+paraPhraseThisSentence("Financial Institutes have always helped the society to become better version of itself.")