Spaces:
Runtime error
Runtime error
File size: 1,588 Bytes
a177196 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
"""
The text processing functionality.
"""
from typing import List, Optional
import streamlit as st
import pandas as pd
import textdescriptives as td
@st.cache_data
def text_to_metrics(
string: str,
language_short: str,
model_size_short: str,
metrics: List[str],
split_by_line: bool,
filename: Optional[str],
) -> pd.DataFrame:
# Clean and (optionally) split the text
string = string.strip()
if split_by_line:
strings = string.split("\n")
else:
strings = [string]
# Remove empty strings
# E.g. due to consecutive newlines
strings = [s for s in strings if s]
# Will automatically download the relevant model and extract all metrics
# TODO: Download beforehand to speed up inference
df = td.extract_metrics(
text=strings,
lang=language_short,
spacy_model_size=model_size_short,
metrics=metrics,
)
# Add filename
if filename is not None:
df["File"] = filename
move_column_inplace(df=df, col="File", pos=0)
return df
def move_column_inplace(df: pd.DataFrame, col: str, pos: int) -> None:
"""
Move a column to a given column-index position.
Taken from the `utipy` package.
Parameters
----------
df : `pandas.DataFrame`.
col : str
Name of column to move.
pos : int
Column index to move `col` to.
"""
assert (
0 <= pos < len(df.columns)
), f"`pos` must be between 0 (incl.) and the number of columns -1. Was {pos}."
col = df.pop(col)
df.insert(pos, col.name, col)
|