Skip to content

Commit

Permalink
Make parse_text private static method
Browse files Browse the repository at this point in the history
  • Loading branch information
hagenw committed Jul 25, 2024
1 parent bb3820b commit 7b4aaaf
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 45 deletions.
34 changes: 33 additions & 1 deletion audbcards/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import inspect
import os
import pickle
import re
import typing

import jinja2
Expand Down Expand Up @@ -166,6 +167,33 @@ def _load_pickled(path: str):
with open(path, "rb") as f:
return pickle.load(f)

@staticmethod
def _parse_text(text: str) -> str:
"""Remove unsupported characters and restrict length.
The text is stripped from HTML tags or newlines,
and limited to a maximum length of 100 characters.
Args:
text: input text
Returns:
parsed text
"""
# Missing text
if pd.isna(text):
return ""
# Remove newlines
text = text.replace("\n", "\\n")
# Remove HTML tags
text = re.sub("<[^<]+?>", "", text)
# Limit length
max_characters_per_entry = 100
if len(text) > max_characters_per_entry:
text = text[: max_characters_per_entry - 3] + "..."
return text

@staticmethod
def _save_pickled(obj, path: str):
"""Save object instance to path as pickle."""
Expand Down Expand Up @@ -523,7 +551,7 @@ def tables_preview(self) -> typing.Dict[str, typing.List[typing.List[str]]]:
header = [df.columns.tolist()]
body = df.head(5).astype("string").values.tolist()
# Remove unwanted chars and limit length of each entry
body = [[utils.parse_text(column) for column in row] for row in body]
body = [[self._parse_text(column) for column in row] for row in body]
preview[table] = header + body
return preview

Expand Down Expand Up @@ -824,6 +852,10 @@ def _load_pickled(path: str):
ds = _Dataset._load_pickled(path)
return ds

@staticmethod
def _parse_text(text: str) -> str:
return _Dataset._parse_text(text)

@staticmethod
def _save_pickled(obj, path: str):
"""Save object instance to path as pickle."""
Expand Down
29 changes: 0 additions & 29 deletions audbcards/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import re
import typing

import matplotlib.pyplot as plt
import pandas as pd

import audeer
import audformat
Expand Down Expand Up @@ -101,33 +99,6 @@ def limit_presented_samples(
return samples


def parse_text(text: str) -> str:
"""Remove unsupported characters and restrict length.
The text is stripped from HTML tags or newlines,
and limited to a maximum length of 100 characters.
Args:
text: input text
Returns:
parsed text
"""
# Missing text
if pd.isna(text):
return ""
# Remove newlines
text = text.replace("\n", "\\n")
# Remove HTML tags
text = re.sub("<[^<]+?>", "", text)
# Limit length
max_characters_per_entry = 100
if len(text) > max_characters_per_entry:
text = text[: max_characters_per_entry - 3] + "..."
return text


def set_plot_margins(
*,
left=0,
Expand Down
15 changes: 15 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,21 @@ def test_dataset_cache_loading(audb_cache, tmpdir, repository, db, request):
assert dataset.repository_object == repository


@pytest.mark.parametrize(
"text, expected",
[
("abc\ndef", "abc\\ndef"),
("a" * 101, "a" * 97 + "..."),
('<a href="http://www.google.de">text link</a>', "text link"),
(None, ""),
(pd.NA, ""),
],
)
def test_dataset_parse_text(text, expected):
"""Test parsing of text."""
assert audbcards.Dataset._parse_text(text) == expected


class TestDatasetLoadTables:
r"""Test load_tables argument of audbcards.Dataset."""

Expand Down
15 changes: 0 additions & 15 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,3 @@ def test_limit_presented_samples(sample, limit, replacement_text, expected):
sample, limit, replacement_text
)
assert limited_sample == expected


@pytest.mark.parametrize(
"text, expected",
[
("abc\ndef", "abc\\ndef"),
("a" * 101, "a" * 97 + "..."),
('<a href="http://www.google.de">text link</a>', "text link"),
(None, ""),
(pd.NA, ""),
],
)
def test_parse_text(text, expected):
"""Test parsing of text."""
assert audbcards.core.utils.parse_text(text) == expected

0 comments on commit 7b4aaaf

Please sign in to comment.