Spaces:
Runtime error
Runtime error
""" | |
The text processing functionality. | |
""" | |
from typing import List, Optional | |
import streamlit as st | |
import pandas as pd | |
import textdescriptives as td | |
def text_to_metrics( | |
string: str, | |
language_short: str, | |
model_size_short: str, | |
metrics: List[str], | |
split_by_line: bool, | |
filename: Optional[str], | |
) -> pd.DataFrame: | |
# Clean and (optionally) split the text | |
string = string.strip() | |
if split_by_line: | |
strings = string.split("\n") | |
else: | |
strings = [string] | |
# Remove empty strings | |
# E.g. due to consecutive newlines | |
strings = [s for s in strings if s] | |
# Will automatically download the relevant model and extract all metrics | |
# TODO: Download beforehand to speed up inference | |
df = td.extract_metrics( | |
text=strings, | |
lang=language_short, | |
spacy_model_size=model_size_short, | |
metrics=metrics, | |
) | |
# Add filename | |
if filename is not None: | |
df["File"] = filename | |
move_column_inplace(df=df, col="File", pos=0) | |
return df | |
def move_column_inplace(df: pd.DataFrame, col: str, pos: int) -> None: | |
""" | |
Move a column to a given column-index position. | |
Taken from the `utipy` package. | |
Parameters | |
---------- | |
df : `pandas.DataFrame`. | |
col : str | |
Name of column to move. | |
pos : int | |
Column index to move `col` to. | |
""" | |
assert ( | |
0 <= pos < len(df.columns) | |
), f"`pos` must be between 0 (incl.) and the number of columns -1. Was {pos}." | |
col = df.pop(col) | |
df.insert(pos, col.name, col) | |