go00od's picture
Upload mor.py with huggingface_hub
f23978c verified
raw
history blame
1.17 kB
import os
import subprocess
# Java μ„€μΉ˜ 확인 및 μ„€μΉ˜
try:
subprocess.run(["java", "-version"], check=True)
except FileNotFoundError:
print("Java is not installed. Installing Java...")
subprocess.run(["apt-get", "update"], check=True)
subprocess.run(["apt-get", "install", "-y", "default-jdk"], check=True) # λ˜λŠ” 'openjdk-17-jdk'
# JAVA_HOME ν™˜κ²½ λ³€μˆ˜ μ„€μ •
java_home = "/usr/lib/jvm/java-17-openjdk-amd64"
if os.path.exists(java_home):
os.environ['JAVA_HOME'] = java_home
else:
raise EnvironmentError("JAVA_HOME could not be set because the path does not exist.")
print(f"JAVA_HOME is set to {java_home}")
from konlpy.tag import Okt, Komoran
komoran = Komoran()
okt = Okt()
# 토큰화λ₯Ό μœ„ν•œ ν˜•νƒœμ†Œ 뢄석
def tokenize(data):
tokenized_data = []
tokenized_sentence = okt.pos(data,norm=True, stem=False)
for a in tokenized_sentence:
if a[1] in ['Verb','Adjective']:
tem= komoran.pos(a[0])
for word in tem:
tokenized_data.append(word[0])
else:
tokenized_data.append(a[0])
return tokenized_data