-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhumanizer.py
More file actions
111 lines (97 loc) · 3.44 KB
/
humanizer.py
File metadata and controls
111 lines (97 loc) · 3.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import re
import random
import spacy
import nltk
import textstat
from nltk.corpus import wordnet
# İndirilmeyen NLTK veri setlerini indir
nltk.download('wordnet')
nltk.download('omw-1.4')
# spaCy modeli
nlp = spacy.load("en_core_web_sm")
# Regex tabanlı sadeleştirmeler
RE_RULES = {
r"\butilize\b": "use",
r"\bmoreover\b": "also",
r"\bfurthermore\b": "also",
r"\bin order to\b": "to",
r"\bsubsequently\b": "then",
r"\bthus\b": "so",
r"\btherefore\b": "so",
r"\badditionally\b": "also",
r"\bprior to\b": "before",
r"\bassist\b": "help",
r"\bcommence\b": "start",
r"\bterminate\b": "end",
r"\bendeavor\b": "try",
r"\bpertaining to\b": "about",
r"\bnotwithstanding\b": "despite",
r"\bfacilitate\b": "make easier",
r"\bnecessitate\b": "require",
r"\bwith regard to\b": "about",
r"\bsubstantial\b": "significant",
r"\boptimize\b": "improve",
# AI-kalıpları
r"It is important to note that": "",
r"As per our analysis": "We think",
r"It can be observed that": "",
r"In conclusion": "So",
r"This clearly shows that": "This shows",
}
def apply_regex(text: str) -> str:
for patt, rep in RE_RULES.items():
text = re.sub(patt, rep, text, flags=re.IGNORECASE)
return text
def synonym_replace(token):
"""Basit: sadece sıfat, zarf, fiiller için eşanlamlı dene."""
wn_pos = None
if token.pos_ in ('ADJ','ADV','VERB','NOUN'):
wn_pos = {'ADJ':'a','ADV':'r','VERB':'v','NOUN':'n'}[token.pos_]
if not wn_pos:
return token.text
syns = wordnet.synsets(token.text, pos=wn_pos)
if not syns:
return token.text
# Rastgele bir lemma seç
lemmas = [l.name().replace('_',' ') for s in syns for l in s.lemmas() if l.name().lower()!=token.text.lower()]
return random.choice(lemmas) if lemmas else token.text
def humanize_sentence(sent: str) -> str:
# Regex’le sadeleştir
sent = apply_regex(sent)
# spaCy ile token’la, eşanlamlı değiştir
doc = nlp(sent)
new_tokens = []
for tok in doc:
# %25 ihtimalle eşanlamlı değiştir (fazla değil, az dokunuş için)
if random.random() < 0.25:
new_tokens.append(synonym_replace(tok))
else:
new_tokens.append(tok.text)
# Tokenleri tekrar birleştir
out = spacy.tokens.Doc(doc.vocab, words=new_tokens).text
return out
def humanize_text(text: str) -> str:
doc = nlp(text)
sentences = [s.text.strip() for s in doc.sents]
humanized = [humanize_sentence(s) for s in sentences]
return " ".join(humanized)
def report_readability(original: str, transformed: str):
o_score = textstat.flesch_reading_ease(original)
t_score = textstat.flesch_reading_ease(transformed)
print(f"\n🔍 Readability (Flesch) → Before: {o_score:.1f}, After: {t_score:.1f}\n")
def main():
import argparse
parser = argparse.ArgumentParser(description="Advanced AI→Human Text Converter")
parser.add_argument("input", help="Input .txt file path")
parser.add_argument("output", help="Output .txt file path")
args = parser.parse_args()
with open(args.input, 'r', encoding='utf-8') as f:
ai_text = f.read()
print("== Converting... ==")
transformed = humanize_text(ai_text)
report_readability(ai_text, transformed)
with open(args.output, 'w', encoding='utf-8') as f:
f.write(transformed)
print(f"✅ Done! Saved to {args.output}")
if __name__ == "__main__":
main()