Spaces:
Runtime error
Runtime error
# | |
# Pyserini: Reproducible IR research with sparse and dense representations | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
import os | |
import unittest | |
from pyserini import search | |
class TestLoadTopics(unittest.TestCase): | |
def test_trec1_adhoc(self): | |
topics = search.get_topics('trec1-adhoc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2_adhoc(self): | |
topics = search.get_topics('trec2-adhoc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec3_adhoc(self): | |
topics = search.get_topics('trec3-adhoc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_robust04(self): | |
topics = search.get_topics('robust04') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 250) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_robust05(self): | |
topics = search.get_topics('robust05') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_core17(self): | |
topics = search.get_topics('core17') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_core18(self): | |
topics = search.get_topics('core18') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_wt10g(self): | |
topics = search.get_topics('wt10g') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 100) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2004_terabyte(self): | |
topics = search.get_topics('trec2004-terabyte') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2005_terabyte(self): | |
topics = search.get_topics('trec2005-terabyte') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2006_terabyte(self): | |
topics = search.get_topics('trec2006-terabyte') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2007_million_query(self): | |
topics = search.get_topics('trec2007-million-query') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2008_million_query(self): | |
topics = search.get_topics('trec2008-million-query') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2009_million_query(self): | |
topics = search.get_topics('trec2009-million-query') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 40000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2010_web(self): | |
topics = search.get_topics('trec2010-web') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2011_web(self): | |
topics = search.get_topics('trec2011-web') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2012_web(self): | |
topics = search.get_topics('trec2012-web') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2013_web(self): | |
topics = search.get_topics('trec2013-web') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2014_web(self): | |
topics = search.get_topics('trec2014-web') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mb11(self): | |
topics = search.get_topics('mb11') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mb12(self): | |
topics = search.get_topics('mb12') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 60) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mb13(self): | |
topics = search.get_topics('mb13') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 60) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mb14(self): | |
topics = search.get_topics('mb14') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 55) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_car15(self): | |
topics = search.get_topics('car17v1.5-benchmarkY1test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2125) | |
self.assertFalse(isinstance(next(iter(topics.keys())), int)) | |
def test_car20(self): | |
topics = search.get_topics('car17v2.0-benchmarkY1test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2254) | |
self.assertFalse(isinstance(next(iter(topics.keys())), int)) | |
# MS MARCO V1 | |
def test_msmarco_doc(self): | |
topics = search.get_topics('msmarco-doc-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5193) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-doc-dev-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5193) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-doc-dev-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5193) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-doc-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5793) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_msmarco_passage(self): | |
topics = search.get_topics('msmarco-passage-dev-subset') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-passage-dev-subset-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-passage-dev-subset-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-passage-test-subset') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6837) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_msmarco_passage_deepimpact(self): | |
topics = search.get_topics('msmarco-passage-dev-subset-deepimpact') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_msmarco_passage_unicoil_tidle(self): | |
topics = search.get_topics('msmarco-passage-dev-subset-unicoil-tilde') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_msmarco_passage_distill_splade_max(self): | |
topics = search.get_topics('msmarco-passage-dev-subset-distill-splade-max') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6980) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dl19_doc(self): | |
topics = search.get_topics('dl19-doc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-doc-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-doc-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
def test_dl19_passage(self): | |
topics = search.get_topics('dl19-passage') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-passage-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-passage-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
def test_dl20(self): | |
topics = search.get_topics('dl20') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 200) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl20-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 200) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl20-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 200) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
# MS MARCO V2 | |
def test_msmarco_v2_doc(self): | |
topics = search.get_topics('msmarco-v2-doc-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4552) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-doc-dev-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4552) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-doc-dev-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4552) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-doc-dev2') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-doc-dev2-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-doc-dev2-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5000) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
qrels = search.get_qrels('msmarco-v2-doc-dev2') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 5000) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_msmarco_v2_passage(self): | |
topics = search.get_topics('msmarco-v2-passage-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3903) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-passage-dev-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3903) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-passage-dev-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3903) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-passage-dev2') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4281) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-passage-dev2-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4281) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('msmarco-v2-passage-dev2-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4281) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# TODO: Add DL21 | |
# Various multi-lingual test collections | |
def test_ntcir8_zh(self): | |
topics = search.get_topics('ntcir8-zh') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 73) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
def test_clef2006_fr(self): | |
topics = search.get_topics('clef2006-fr') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 49) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
def test_trec2002_ar(self): | |
topics = search.get_topics('trec2002-ar') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_fire2012_bn(self): | |
topics = search.get_topics('fire2012-bn') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_fire2012_hi(self): | |
topics = search.get_topics('fire2012-hi') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_fire2012_en(self): | |
topics = search.get_topics('fire2012-en') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# Epidemic QA | |
def test_epidemic_qa_expert_prelim(self): | |
topics = search.get_topics('epidemic-qa-expert-prelim') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 45) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_epidemic_qa_consumer_prelim(self): | |
topics = search.get_topics('epidemic-qa-consumer-prelim') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 42) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# DPR datasets | |
def test_dpr_nq_dev(self): | |
topics = search.get_topics('dpr-nq-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 8757) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_nq_test(self): | |
topics = search.get_topics('dpr-nq-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3610) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_wq_test(self): | |
topics = search.get_topics('dpr-wq-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2032) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_squad_test(self): | |
topics = search.get_topics('dpr-squad-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10570) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_curated_test(self): | |
topics = search.get_topics('dpr-curated-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 694) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_trivia_test(self): | |
topics = search.get_topics('dpr-trivia-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 11313) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_dpr_trivia_dev(self): | |
topics = search.get_topics('dpr-trivia-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 8837) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# GarT5 topics | |
def test_gart5_nq_test(self): | |
topics = search.get_topics('nq-test-gar-t5-answers') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3610) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('nq-test-gar-t5-titles') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3610) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('nq-test-gar-t5-sentences') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3610) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('nq-test-gar-t5-all') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3610) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_gart5_trivia_test(self): | |
topics = search.get_topics('dpr-trivia-test-gar-t5-answers') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 11313) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('dpr-trivia-test-gar-t5-titles') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 11313) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('dpr-trivia-test-gar-t5-sentences') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 11313) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('dpr-trivia-test-gar-t5-all') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 11313) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# TREC-COVID | |
def test_covid_round1(self): | |
topics = search.get_topics('covid-round1') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 30) | |
self.assertEqual('coronavirus origin', topics[1]['query']) | |
self.assertEqual('coronavirus remdesivir', topics[30]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('covid-round1-udel') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 30) | |
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query']) | |
self.assertEqual('coronavirus remdesivir remdesivir effective treatment COVID-19', topics[30]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_covid_round2(self): | |
topics = search.get_topics('covid-round2') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 35) | |
self.assertEqual('coronavirus origin', topics[1]['query']) | |
self.assertEqual('coronavirus public datasets', topics[35]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('covid-round2-udel') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 35) | |
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query']) | |
self.assertEqual('coronavirus public datasets public datasets COVID-19', topics[35]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_covid_round3(self): | |
topics = search.get_topics('covid-round3') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 40) | |
self.assertEqual('coronavirus origin', topics[1]['query']) | |
self.assertEqual('coronavirus mutations', topics[40]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('covid-round3-udel') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 40) | |
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query']) | |
self.assertEqual('coronavirus mutations observed mutations SARS-CoV-2 genome mutations', topics[40]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_covid_round4(self): | |
topics = search.get_topics('covid-round4') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 45) | |
self.assertEqual('coronavirus origin', topics[1]['query']) | |
self.assertEqual('coronavirus mental health impact', topics[45]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('covid-round4-udel') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 45) | |
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query']) | |
self.assertEqual('coronavirus mental health impact COVID-19 pandemic impacted mental health', topics[45]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_covid_round5(self): | |
topics = search.get_topics('covid-round5') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
self.assertEqual('coronavirus origin', topics[1]['query']) | |
self.assertEqual('mRNA vaccine coronavirus', topics[50]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('covid-round5-udel') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertEqual('coronavirus origin origin COVID-19', topics[1]['query']) | |
self.assertEqual('mRNA vaccine coronavirus mRNA vaccine SARS-CoV-2 virus', topics[50]['query']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
# TREC News Tracks | |
def test_trec2018_bl(self): | |
topics = search.get_topics('trec2018-bl') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertEqual('fef0f232a9bd94bdb96bac48c7705503', topics[393]['title']) | |
self.assertEqual('a1c41a70-35c7-11e3-8a0e-4e2cf80831fc', topics[825]['title']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2019_bl(self): | |
topics = search.get_topics('trec2019-bl') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 60) | |
self.assertEqual('d7d906991e2883889f850de9ae06655e', topics[870]['title']) | |
self.assertEqual('0d7f5e24cafc019265d3ee4b9745e7ea', topics[829]['title']) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_trec2020_bl(self): | |
topics = search.get_topics('trec2020-bl') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_ar(self): | |
topics = search.get_topics('mrtydi-v1.1-arabic-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 12377) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-arabic-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3115) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-arabic-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1081) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_bn(self): | |
topics = search.get_topics('mrtydi-v1.1-bengali-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1713) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-bengali-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 440) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-bengali-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 111) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_en(self): | |
topics = search.get_topics('mrtydi-v1.1-english-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3547) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-english-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 878) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-english-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 744) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_fi(self): | |
topics = search.get_topics('mrtydi-v1.1-finnish-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6561) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-finnish-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1738) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-finnish-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1254) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_id(self): | |
topics = search.get_topics('mrtydi-v1.1-indonesian-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4902) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-indonesian-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1224) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-indonesian-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 829) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_ja(self): | |
topics = search.get_topics('mrtydi-v1.1-japanese-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3697) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-japanese-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 928) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-japanese-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 720) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_ko(self): | |
topics = search.get_topics('mrtydi-v1.1-korean-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1295) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-korean-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 303) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-korean-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 421) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_ru(self): | |
topics = search.get_topics('mrtydi-v1.1-russian-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 5366) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-russian-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1375) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-russian-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 995) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_sw(self): | |
topics = search.get_topics('mrtydi-v1.1-swahili-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2072) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-swahili-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 526) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-swahili-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 670) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_te(self): | |
topics = search.get_topics('mrtydi-v1.1-telugu-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3880) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-telugu-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 983) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-telugu-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 646) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_mrtydi_11_th(self): | |
topics = search.get_topics('mrtydi-v1.1-thai-train') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3319) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-thai-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 807) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('mrtydi-v1.1-thai-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1190) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_beir(self): | |
topics = search.get_topics('beir-v1.0.0-trec-covid-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
topics = search.get_topics('beir-v1.0.0-bioasq-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 500) | |
topics = search.get_topics('beir-v1.0.0-nfcorpus-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 323) | |
topics = search.get_topics('beir-v1.0.0-nq-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 3452) | |
topics = search.get_topics('beir-v1.0.0-hotpotqa-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 7405) | |
topics = search.get_topics('beir-v1.0.0-fiqa-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 648) | |
topics = search.get_topics('beir-v1.0.0-signal1m-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 97) | |
topics = search.get_topics('beir-v1.0.0-trec-news-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 57) | |
topics = search.get_topics('beir-v1.0.0-robust04-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 249) | |
topics = search.get_topics('beir-v1.0.0-arguana-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1406) | |
topics = search.get_topics('beir-v1.0.0-webis-touche2020-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 49) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-android-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 699) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-english-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1570) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-gaming-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1595) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-gis-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 885) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-mathematica-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 804) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-physics-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1039) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-programmers-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 876) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-stats-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 652) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-tex-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2906) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-unix-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1072) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-webmasters-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 506) | |
topics = search.get_topics('beir-v1.0.0-cqadupstack-wordpress-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 541) | |
topics = search.get_topics('beir-v1.0.0-quora-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10000) | |
topics = search.get_topics('beir-v1.0.0-dbpedia-entity-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 400) | |
topics = search.get_topics('beir-v1.0.0-scidocs-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1000) | |
topics = search.get_topics('beir-v1.0.0-fever-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 6666) | |
topics = search.get_topics('beir-v1.0.0-climate-fever-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1535) | |
topics = search.get_topics('beir-v1.0.0-scifact-test') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 300) | |
def test_hc4_1_0_fa(self): | |
topics = search.get_topics('hc4-v1.0-fa-dev-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-dev-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-dev-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-test-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-test-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-test-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-en-test-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-en-test-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-fa-en-test-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_hc4_1_0_ru(self): | |
topics = search.get_topics('hc4-v1.0-ru-dev-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-dev-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-dev-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 4) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-test-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-test-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-test-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-en-test-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-en-test-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-ru-en-test-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_hc4_1_0_zh(self): | |
topics = search.get_topics('hc4-v1.0-zh-dev-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-zh-dev-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-zh-dev-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 10) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-zh-en-test-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-zh-en-test-desc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('hc4-v1.0-zh-en-test-desc-title') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 50) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_neurclir22(self): | |
for key in ['neuclir22-en-title', 'neuclir22-en-title', 'neuclir22-en-desc-title']: | |
topics = search.get_topics(key) | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 114) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
for key in ['neuclir22-fa-ht-title', 'neuclir22-fa-ht-desc', 'neuclir22-fa-ht-desc-title', | |
'neuclir22-fa-mt-title', 'neuclir22-fa-mt-desc', 'neuclir22-fa-mt-desc-title']: | |
topics = search.get_topics(key) | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 114) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
for key in ['neuclir22-ru-ht-title', 'neuclir22-ru-ht-desc', 'neuclir22-ru-ht-desc-title', | |
'neuclir22-ru-mt-title', 'neuclir22-ru-mt-desc', 'neuclir22-ru-mt-desc-title']: | |
topics = search.get_topics(key) | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 114) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
for key in ['neuclir22-zh-ht-title', 'neuclir22-zh-ht-desc', 'neuclir22-zh-ht-desc-title', | |
'neuclir22-zh-mt-title', 'neuclir22-zh-mt-desc', 'neuclir22-zh-mt-desc-title']: | |
topics = search.get_topics(key) | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 114) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
def test_miracl_10(self): | |
topics = search.get_topics('miracl-v1.0-ar-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 2896) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-bn-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 411) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-en-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 799) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-es-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 648) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-fa-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 632) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-fi-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1271) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-fr-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 343) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-hi-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 350) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-id-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 960) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-ja-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 860) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-ko-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 213) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-ru-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 1252) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-sw-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 482) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-te-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 828) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-th-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 733) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
topics = search.get_topics('miracl-v1.0-zh-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 393) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-de-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 305) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('miracl-v1.0-yo-dev') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 119) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
# General test cases | |
def test_tsv_int_topicreader(self): | |
# Running from command-line, we're in root of repo, but running in IDE, we're in tests/ | |
path = 'tools/topics-and-qrels/topics.msmarco-doc.dev.txt' | |
if not os.path.exists(path): | |
path = f'../{path}' | |
self.assertTrue(os.path.exists(path)) | |
topics = search.get_topics_with_reader('io.anserini.search.topicreader.TsvIntTopicReader', path) | |
self.assertEqual(len(topics), 5193) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
self.assertEqual(search.get_topics('msmarco-doc-dev'), topics) | |
def test_trec_topicreader(self): | |
# Running from command-line, we're in root of repo, but running in IDE, we're in tests/ | |
path = 'tools/topics-and-qrels/topics.robust04.txt' | |
if not os.path.exists(path): | |
path = f'../{path}' | |
self.assertTrue(os.path.exists(path)) | |
topics = search.get_topics_with_reader('io.anserini.search.topicreader.TrecTopicReader', path) | |
self.assertEqual(len(topics), 250) | |
self.assertTrue(isinstance(next(iter(topics.keys())), int)) | |
self.assertEqual(search.get_topics('robust04'), topics) | |
def test_trec_topicreader_nonint_qid(self): | |
# Running from command-line, we're in root of repo, but running in IDE, we're in tests/ | |
path = 'tests/resources/sample_queries_nonint_qid.tsv' | |
if not os.path.exists(path): | |
path = f'../{path}' | |
self.assertTrue(os.path.exists(path)) | |
topics = search.get_topics_with_reader('io.anserini.search.topicreader.TsvStringTopicReader', path) | |
self.assertEqual(len(topics), 3) | |
self.assertTrue(isinstance(next(iter(topics.keys())), str)) | |
self.assertEqual({'30_1', '30_2', '30_3'}, set(topics)) | |
if __name__ == '__main__': | |
unittest.main() | |