Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

compositional_test / transformers /examples /legacy /seq2seq /run_eval_search.py

chendl

add requirements

a1d409e about 1 year ago

raw

history blame

5.98 kB

	#!/usr/bin/env python
	# Copyright 2020 The HuggingFace Team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse
	import itertools
	import operator
	import sys
	from collections import OrderedDict

	from run_eval import datetime_now, run_generate

	from utils import ROUGE_KEYS


	# A table of supported tasks and the list of scores in the order of importance to be sorted by.
	# To add a new task, simply list the score names that `run_eval.run_generate()` returns
	task_score_names = {
	"translation": ["bleu"],
	"summarization": ROUGE_KEYS,
	}


	def parse_search_arg(search):
	groups = search.split()
	entries = dict((g.split("=") for g in groups))
	entry_names = list(entries.keys())
	sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
	matrix = [list(x) for x in itertools.product(*sets)]
	return matrix, entry_names


	def run_search():
	"""
	Run parametric search over the desired hparam space with help of ``run_eval.py``.

	All the arguments except ``--search`` are passed to ``run_eval.py`` as is. The values inside of "--search" are parsed, reformatted and fed to ``run_eval.py`` as additional args.

	The format for the ``--search`` value is a simple string with hparams and colon separated values to try, e.g.:
	```
	--search "num_beams=5:10 length_penalty=0.8:1.0:1.2 early_stopping=true:false"
	```
	which will generate ``12`` ``(232)`` searches for a product of each hparam. For example the example that was just used will invoke ``run_eval.py`` repeatedly with:

	```
	--num_beams 5 --length_penalty 0.8 --early_stopping true
	--num_beams 5 --length_penalty 0.8 --early_stopping false
	[...]
	--num_beams 10 --length_penalty 1.2 --early_stopping false
	```

	On completion, this function prints a markdown table of the results sorted by the best BLEU score and the winning arguments.


	"""
	prog = sys.argv[0]

	parser = argparse.ArgumentParser(
	usage=(
	"\n\nImportant: this script accepts all arguments `run_eval.py` accepts and then a few extra, therefore"
	" refer to `run_eval.py -h` for the complete list."
	)
	)
	parser.add_argument(
	"--search",
	type=str,
	required=False,
	help='param space to search, e.g. "num_beams=5:10 length_penalty=0.8:1.0:1.2"',
	)
	parser.add_argument(
	"--bs", type=int, default=8, required=False, help="initial batch size (may get reduced if it's too big)"
	)
	parser.add_argument("--task", type=str, help="used for task_specific_params + metrics")
	parser.add_argument(
	"--info",
	nargs="?",
	type=str,
	const=datetime_now(),
	help=(
	"add custom notes to be printed before the results table. If no value is passed, the current datetime"
	" string will be used."
	),
	)
	args, args_main = parser.parse_known_args()
	# we share some of the args
	args_main.extend(["--task", args.task])
	args_normal = [prog] + args_main

	# to support variations like translation_en_to_de"
	task = "translation" if "translation" in args.task else "summarization"

	matrix, col_names = parse_search_arg(args.search)
	col_names[0:0] = task_score_names[task] # score cols first
	col_widths = {col: len(str(col)) for col in col_names}
	results = []
	for r in matrix:
	hparams = dict((x.replace("--", "").split() for x in r))
	args_exp = " ".join(r).split()
	args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
	sys.argv = args_normal + args_exp

	# XXX: need to trap CUDA OOM and lower args.bs if that happens and retry

	scores = run_generate(verbose=False)
	# make sure scores are first in the table
	result = OrderedDict()
	for score in task_score_names[task]:
	result[score] = scores[score]
	result.update(hparams)
	results.append(result)

	# find widest entries
	for k, v in result.items():
	l = len(str(v))
	if l > col_widths[k]:
	col_widths[k] = l

	results_sorted = sorted(results, key=operator.itemgetter(*task_score_names[task]), reverse=True)
	print(" \| ".join([f"{col:{col_widths[col]}}" for col in col_names]))
	print(" \| ".join([f"{'-'*col_widths[col]}" for col in col_names]))
	for row in results_sorted:
	print(" \| ".join([f"{row[col]:{col_widths[col]}}" for col in col_names]))

	best = results_sorted[0]
	for score in task_score_names[task]:
	del best[score]
	best_args = [f"--{k} {v}" for k, v in best.items()]
	dyn_args = ["--bs", str(args.bs)]
	if args.info:
	print(f"\nInfo: {args.info}")
	print("\nBest score args:")
	print(" ".join(args_main + best_args + dyn_args))

	return results_sorted


	if __name__ == "__main__":
	# Usage:
	# [normal-run_eval_search.py cmd plus] \
	# --search="num_beams=1:5:10 length_penalty=0.8:1:1.2 early_stopping=true:false"
	#
	# Example:
	# PYTHONPATH="src:examples/seq2seq" python examples/seq2seq/run_eval_search.py $MODEL_NAME \
	# $DATA_DIR/val.source $SAVE_DIR/test_translations.txt --reference_path $DATA_DIR/val.target \
	# --score_path $SAVE_DIR/test_bleu.json --bs $BS --task translation \
	# --search="num_beams=1:5:10 length_penalty=0.8:1:1.2 early_stopping=true:false"
	run_search()