go00od commited on
Commit
f23978c
β€’
1 Parent(s): 35113c3

Upload mor.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. mor.py +39 -0
mor.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ # Java μ„€μΉ˜ 확인 및 μ„€μΉ˜
5
+ try:
6
+ subprocess.run(["java", "-version"], check=True)
7
+ except FileNotFoundError:
8
+ print("Java is not installed. Installing Java...")
9
+ subprocess.run(["apt-get", "update"], check=True)
10
+ subprocess.run(["apt-get", "install", "-y", "default-jdk"], check=True) # λ˜λŠ” 'openjdk-17-jdk'
11
+
12
+ # JAVA_HOME ν™˜κ²½ λ³€μˆ˜ μ„€μ •
13
+ java_home = "/usr/lib/jvm/java-17-openjdk-amd64"
14
+ if os.path.exists(java_home):
15
+ os.environ['JAVA_HOME'] = java_home
16
+ else:
17
+ raise EnvironmentError("JAVA_HOME could not be set because the path does not exist.")
18
+
19
+ print(f"JAVA_HOME is set to {java_home}")
20
+
21
+
22
+
23
+ from konlpy.tag import Okt, Komoran
24
+
25
+ komoran = Komoran()
26
+ okt = Okt()
27
+
28
+ # 토큰화λ₯Ό μœ„ν•œ ν˜•νƒœμ†Œ 뢄석
29
+ def tokenize(data):
30
+ tokenized_data = []
31
+ tokenized_sentence = okt.pos(data,norm=True, stem=False)
32
+ for a in tokenized_sentence:
33
+ if a[1] in ['Verb','Adjective']:
34
+ tem= komoran.pos(a[0])
35
+ for word in tem:
36
+ tokenized_data.append(word[0])
37
+ else:
38
+ tokenized_data.append(a[0])
39
+ return tokenized_data