Spaces:

unijoh
/

ord

Sleeping

App Files Files Community

ord / app.py

unijoh

Update app.py

b605bba verified 5 months ago

raw

history blame contribute delete

6.4 kB

	import gradio as gr
	import pandas as pd
	import re

	# Load and parse the CSV file from Hugging Face
	def load_data():
	url = "https://huggingface.co/datasets/unijoh/RAVNlex/resolve/main/RAVNlex_small.csv"
	df = pd.read_csv(url, delimiter='\t', encoding='iso-8859-10', dtype=str)
	lemmas = {}
	current_lemma = None

	for row in df.itertuples(index=False, name=None):
	if len(row) < 5:
	print(f"Skipping problematic line: {row}")
	continue
	orto, ppos, phon1, phon2, comm, *pronunciations = map(lambda x: x if isinstance(x, str) else "", row)
	if orto == '---':
	current_lemma = None
	elif current_lemma is None:
	current_lemma = orto.replace("ORTO:", "")
	lemmas[current_lemma] = []
	lemma_data = {
	'word': current_lemma,
	'PPOS': ppos.replace("PPOS:", "") if ppos else "",
	'PHON1': phon1.replace("PHON:", "") if phon1 else "",
	'PHON2': phon2.replace("PHON:", "") if phon2 else "",
	'COMM': comm if comm else "",
	'pronunciations': pronunciations
	}
	lemmas[current_lemma].append(lemma_data)
	else:
	lemma_data = {
	'word': orto.replace("ORTO:", "") if orto else "",
	'PPOS': ppos.replace("PPOS:", "") if ppos else "",
	'PHON1': phon1.replace("PHON:", "") if phon1 else "",
	'PHON2': phon2.replace("PHON:", "") if phon2 else "",
	'COMM': comm if comm else "",
	'pronunciations': pronunciations
	}
	lemmas[current_lemma].append(lemma_data)

	print("Loaded lemmas:", lemmas) # Debugging output
	return lemmas

	lemmas = load_data()

	def expand_ppos(ppos):
	matches = re.findall(r'\[([^\]]+)\]', ppos)
	if matches:
	expanded = []
	for match in matches[0]:
	expanded.append(ppos.replace(f'[{matches[0]}]', match))
	return expanded
	else:
	return [ppos]

	def create_noun_table(lemma, forms):
	table_data = {
	'ncmsn==iuu': '', 'ncmsn==duu': '', 'ncfsn==iuu': '', 'ncfsn==duu': '', 'ncnsn==iuu': '', 'ncnsn==duu': '',
	'ncmsa==iuu': '', 'ncmsa==duu': '', 'ncfsa==iuu': '', 'ncfsa==duu': '', 'ncnsa==iuu': '', 'ncnsa==duu': '',
	'ncmsd==iuu': '', 'ncmsd==duu': '', 'ncfsd==iuu': '', 'ncfsd==duu': '', 'ncnsd==iuu': '', 'ncnsd==duu': '',
	'ncmsg==iou': '', 'ncmsg==dou': '', 'ncfsg==iou': '', 'ncfsg==dou': '', 'ncnsg==iou': '', 'ncnsg==dou': '',
	'ncmpn==iuu': '', 'ncmpn==duu': '', 'ncfnn==iuu': '', 'ncfnn==duu': '', 'ncnnn==iuu': '', 'ncnnn==duu': '',
	'ncmpa==iuu': '', 'ncmpa==duu': '', 'ncfna==iuu': '', 'ncfna==duu': '', 'ncnna==iuu': '', 'ncnna==duu': '',
	'ncmpd==iuu': '', 'ncmpd==duu': '', 'ncmpg==iou': '', 'ncmpg==dou': '', 'ncfnn==iou': '', 'ncfnn==dou': '', 'ncnnn==iou': '', 'ncnnn==dou': ''
	}

	for form in forms:
	ppos = form['PPOS'].lower() # Normalize to lowercase
	word = form['word']
	print(f"Processing: word={word}, ppos={ppos}, key={ppos}")
	expanded_ppos_list = expand_ppos(ppos)
	for expanded_ppos in expanded_ppos_list:
	key = expanded_ppos
	if key in table_data:
	table_data[key] = word
	else:
	print(f"Unmatched key: {key} for word: {word} with PPOS: {ppos}")

	print(f"Final table data for {lemma}: {table_data}") # Debugging output

	table = f"""
	<table border="1">
	<thead>
	<tr>
	<th colspan="2">Eintal</th>
	<th colspan="2">Fleirtal</th>
	</tr>
	<tr>
	<th>Óbundið</th>
	<th>Bundið</th>
	<th>Óbundið</th>
	<th>Bundið</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td>{table_data['ncmsn==iuu'] or table_data['ncfsn==iuu'] or table_data['ncnsn==iuu']}</td>
	<td>{table_data['ncmsn==duu'] or table_data['ncfsn==duu'] or table_data['ncnsn==duu']}</td>
	<td>{table_data['ncmpn==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
	<td>{table_data['ncmpn==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
	</tr>
	<tr>
	<td>{table_data['ncmsa==iuu'] or table_data['ncfsa==iuu'] or table_data['ncnsa==iuu']}</td>
	<td>{table_data['ncmsa==duu'] or table_data['ncfsa==duu'] or table_data['ncnsa==duu']}</td>
	<td>{table_data['ncmpa==iuu'] or table_data['ncfna==iuu'] or table_data['ncnna==iuu']}</td>
	<td>{table_data['ncmpa==duu'] or table_data['ncfna==duu'] or table_data['ncnna==duu']}</td>
	</tr>
	<tr>
	<td>{table_data['ncmsd==iuu'] or table_data['ncfsd==iuu'] or table_data['ncnsd==iuu']}</td>
	<td>{table_data['ncmsd==duu'] or table_data['ncfsd==duu'] or table_data['ncnsd==duu']}</td>
	<td>{table_data['ncmpd==iuu'] or table_data['ncfnn==iuu'] or table_data['ncnnn==iuu']}</td>
	<td>{table_data['ncmpd==duu'] or table_data['ncfnn==duu'] or table_data['ncnnn==duu']}</td>
	</tr>
	<tr>
	<td>{table_data['ncmsg==iou'] or table_data['ncfsg==iou'] or table_data['ncnsg==iou']}</td>
	<td>{table_data['ncmsg==dou'] or table_data['ncfsg==dou'] or table_data['ncnsg==dou']}</td>
	<td>{table_data['ncmpg==iou'] or table_data['ncfnn==iou'] or table_data['ncnnn==iou']}</td>
	<td>{table_data['ncmpg==dou'] or table_data['ncfnn==dou'] or table_data['ncnnn==dou']}</td>
	</tr>
	</tbody>
	</table>
	"""
	return table

	def search_lemma(lemma):
	results = lemmas.get(lemma, None)
	if not results:
	return f"No results found for {lemma}"

	if 'n' in results[0]['PPOS'].lower():
	table = create_noun_table(lemma, results)
	else:
	table = "Only noun tables are currently supported."

	return table

	iface = gr.Interface(
	fn=search_lemma,
	inputs="text",
	outputs="html",
	title="Lemma Search",
	description="Enter a lemma to search for its declensions and pronunciations."
	)

	if __name__ == "__main__":
	iface.launch()