Spaces:
Sleeping
Sleeping
import os | |
import subprocess | |
# Java μ€μΉ νμΈ λ° μ€μΉ | |
try: | |
subprocess.run(["java", "-version"], check=True) | |
except FileNotFoundError: | |
print("Java is not installed. Installing Java...") | |
subprocess.run(["apt-get", "update"], check=True) | |
subprocess.run(["apt-get", "install", "-y", "default-jdk"], check=True) # λλ 'openjdk-17-jdk' | |
# JAVA_HOME νκ²½ λ³μ μ€μ | |
java_home = "/usr/lib/jvm/java-17-openjdk-amd64" | |
if os.path.exists(java_home): | |
os.environ['JAVA_HOME'] = java_home | |
else: | |
raise EnvironmentError("JAVA_HOME could not be set because the path does not exist.") | |
print(f"JAVA_HOME is set to {java_home}") | |
from konlpy.tag import Okt, Komoran | |
komoran = Komoran() | |
okt = Okt() | |
# ν ν°νλ₯Ό μν ννμ λΆμ | |
def tokenize(data): | |
tokenized_data = [] | |
tokenized_sentence = okt.pos(data,norm=True, stem=False) | |
for a in tokenized_sentence: | |
if a[1] in ['Verb','Adjective']: | |
tem= komoran.pos(a[0]) | |
for word in tem: | |
tokenized_data.append(word[0]) | |
else: | |
tokenized_data.append(a[0]) | |
return tokenized_data |