|
|
|
|
|
|
|
from .parsercode.DFG import DFG_python,DFG_java,DFG_ruby,DFG_go,DFG_php,DFG_javascript,DFG_csharp |
|
from .parsercode.utils import (remove_comments_and_docstrings, |
|
tree_to_token_index, |
|
index_to_code_token, |
|
tree_to_variable_index) |
|
from tree_sitter import Language, Parser |
|
import os |
|
|
|
dfg_function={ |
|
'python':DFG_python, |
|
'java':DFG_java, |
|
'ruby':DFG_ruby, |
|
'go':DFG_go, |
|
'php':DFG_php, |
|
'javascript':DFG_javascript, |
|
'c_sharp':DFG_csharp, |
|
} |
|
|
|
def calc_syntax_match(references, candidate, lang): |
|
return corpus_syntax_match([references], [candidate], lang) |
|
|
|
def corpus_syntax_match(references, candidates, lang): |
|
curr_path = os.path.dirname(os.path.abspath(__file__)) |
|
JAVA_LANGUAGE = Language(curr_path + '/parsercode/my-languages.so', lang) |
|
parser = Parser() |
|
parser.set_language(JAVA_LANGUAGE) |
|
match_count = 0 |
|
total_count = 0 |
|
|
|
for i in range(len(candidates)): |
|
references_sample = references[i] |
|
candidate = candidates[i] |
|
for reference in references_sample: |
|
try: |
|
candidate=remove_comments_and_docstrings(candidate,'java') |
|
except: |
|
pass |
|
try: |
|
reference=remove_comments_and_docstrings(reference,'java') |
|
except: |
|
pass |
|
|
|
candidate_tree = parser.parse(bytes(candidate,'utf8')).root_node |
|
|
|
reference_tree = parser.parse(bytes(reference,'utf8')).root_node |
|
|
|
def get_all_sub_trees(root_node): |
|
node_stack = [] |
|
sub_tree_sexp_list = [] |
|
depth = 1 |
|
node_stack.append([root_node, depth]) |
|
while len(node_stack) != 0: |
|
cur_node, cur_depth = node_stack.pop() |
|
sub_tree_sexp_list.append([cur_node.sexp(), cur_depth]) |
|
for child_node in cur_node.children: |
|
if len(child_node.children) != 0: |
|
depth = cur_depth + 1 |
|
node_stack.append([child_node, depth]) |
|
return sub_tree_sexp_list |
|
cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)] |
|
ref_sexps = get_all_sub_trees(reference_tree) |
|
|
|
|
|
|
|
|
|
for sub_tree, depth in ref_sexps: |
|
if sub_tree in cand_sexps: |
|
match_count += 1 |
|
total_count += len(ref_sexps) |
|
|
|
score = match_count / total_count |
|
return score |
|
|