Spaces:

giulio98
/

codebleu

Runtime error

App Files Files Community

codebleu / bleu.py

giulio98

Updating module

d7607a1 about 2 years ago

raw

history blame

4.88 kB

	# Copyright 2017 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Python implementation of BLEU and smooth-BLEU.

	This module provides a Python implementation of BLEU and smooth-BLEU.
	Smooth BLEU is computed following the method outlined in the paper:
	Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic
	evaluation metrics for machine translation. COLING 2004.
	"""

	import collections
	import math


	def _get_ngrams(segment, max_order):
	"""Extracts all n-grams upto a given maximum order from an input segment.

	Args:
	segment: text segment from which n-grams will be extracted.
	max_order: maximum length in tokens of the n-grams returned by this
	methods.

	Returns:
	The Counter containing all n-grams upto max_order in segment
	with a count of how many times each n-gram occurred.
	"""
	ngram_counts = collections.Counter()
	for order in range(1, max_order + 1):
	for i in range(0, len(segment) - order + 1):
	ngram = tuple(segment[i:i+order])
	ngram_counts[ngram] += 1
	return ngram_counts


	def compute_bleu(reference_corpus, translation_corpus, max_order=4,
	smooth=False):
	"""Computes BLEU score of translated segments against one or more references.

	Args:
	reference_corpus: list of lists of references for each translation. Each
	reference should be tokenized into a list of tokens.
	translation_corpus: list of translations to score. Each translation
	should be tokenized into a list of tokens.
	max_order: Maximum n-gram order to use when computing BLEU score.
	smooth: Whether or not to apply Lin et al. 2004 smoothing.

	Returns:
	3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
	precisions and brevity penalty.
	"""
	matches_by_order = [0] * max_order
	possible_matches_by_order = [0] * max_order
	reference_length = 0
	translation_length = 0
	for (references, translation) in zip(reference_corpus,
	translation_corpus):
	reference_length += min(len(r) for r in references)
	translation_length += len(translation)

	merged_ref_ngram_counts = collections.Counter()
	for reference in references:
	merged_ref_ngram_counts \|= _get_ngrams(reference, max_order)
	translation_ngram_counts = _get_ngrams(translation, max_order)
	overlap = translation_ngram_counts & merged_ref_ngram_counts
	for ngram in overlap:
	matches_by_order[len(ngram)-1] += overlap[ngram]
	for order in range(1, max_order+1):
	possible_matches = len(translation) - order + 1
	if possible_matches > 0:
	possible_matches_by_order[order-1] += possible_matches

	precisions = [0] * max_order
	for i in range(0, max_order):
	if smooth:
	precisions[i] = ((matches_by_order[i] + 1.) /
	(possible_matches_by_order[i] + 1.))
	else:
	if possible_matches_by_order[i] > 0:
	precisions[i] = (float(matches_by_order[i]) /
	possible_matches_by_order[i])
	else:
	precisions[i] = 0.0

	if min(precisions) > 0:
	p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
	geo_mean = math.exp(p_log_sum)
	else:
	geo_mean = 0

	ratio = float(translation_length) / reference_length

	if ratio > 1.0:
	bp = 1.
	else:
	bp = math.exp(1 - 1. / ratio)

	bleu = geo_mean * bp

	return (bleu, precisions, bp, ratio, translation_length, reference_length)


	def _bleu(ref_file, trans_file, subword_option=None):
	max_order = 4
	smooth = True
	ref_files = [ref_file]
	reference_text = []
	for reference_filename in ref_files:
	with open(reference_filename) as fh:
	reference_text.append(fh.readlines())
	per_segment_references = []
	for references in zip(*reference_text):
	reference_list = []
	for reference in references:
	reference_list.append(reference.strip().split())
	per_segment_references.append(reference_list)
	translations = []
	with open(trans_file) as fh:
	for line in fh:
	translations.append(line.strip().split())
	bleu_score, _, _, _, _, _ = compute_bleu(per_segment_references, translations, max_order, smooth)
	return round(100 * bleu_score,2)