Skip to content

Commit

Permalink
fixed code according to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Golodkov committed Jul 13, 2023
1 parent d2832a0 commit bdc6c5c
Show file tree
Hide file tree
Showing 47 changed files with 93 additions and 456 deletions.
2 changes: 1 addition & 1 deletion tests/api_tests/content_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __check_metadata(self, metadata: dict) -> None:
if "other_fields" in metadata:
self.assertIsInstance(metadata["other_fields"], dict)

def check_english_doc(self, result: dict) -> None:
def _check_english_doc(self, result: dict) -> None:
content = result["content"]
structure = content["structure"]
self._check_tree_sanity(structure)
Expand Down
10 changes: 0 additions & 10 deletions tests/api_tests/test_api_doctype_law.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,16 +616,6 @@ def test_foiv_html(self) -> None:
self.assertEqual("4.2.1.", node["text"].strip())
self.assertEqual("item", node['metadata']['paragraph_type'])

@unittest.skip("TODO fix this")
def test_number_not_part(self) -> None:
file_name = "31(1).txt"
result = self._send_request(file_name, dict(document_type="law"), expected_code=200)
document_tree = result["content"]["structure"]
self.__test_law_tree_sanity(document_tree)
node = self._get_by_tree_path(document_tree, "0.0.3.5.0.0")
self.assertTrue(node["text"].strip().endswith("2 настоящей статьи."))
self.assertEqual("raw_text", node['metadata']['paragraph_type'])

def test_html_invisible_table(self) -> None:
file_name = "invisibly_table4.html"
result = self._send_request(file_name, dict(document_type="law"), expected_code=200)
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_doctype_tz.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from tests.test_utils import tree2linear


class TestLawApiDocReader(AbstractTestApiDocReader):
class TestTZApiDocReader(AbstractTestApiDocReader):

def test_doc_tz(self) -> None:
file_name = "alpaca_tz.doc"
Expand Down
4 changes: 2 additions & 2 deletions tests/api_tests/test_api_format_archives.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def _check_archive_with_english_doc(self, file_name: str) -> None:
result = self._send_request(file_name, dict(with_attachments="True"))
self.assertEqual(len(result['attachments']), 4)
english_doc = [doc for doc in result['attachments'] if doc["metadata"]["file_name"].startswith("english_doc")][0]
self.check_english_doc(english_doc)
self._check_english_doc(english_doc)

def test_zip(self) -> None:
file_name = "arch_with_attachs.zip"
Expand Down Expand Up @@ -77,4 +77,4 @@ def test_broken_archive(self) -> None:
result = self._send_request(file_name, dict(with_attachments="True"))
self.assertEqual(len(result['attachments']), 7)
english_doc = [doc for doc in result['attachments'] if doc["metadata"]["file_name"].startswith("english_doc")][0]
self.check_english_doc(english_doc)
self._check_english_doc(english_doc)
6 changes: 0 additions & 6 deletions tests/api_tests/test_api_format_docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,18 +102,12 @@ def test_tricky_doc(self) -> None:
file_name = "doc.docx"
_ = self._send_request(file_name)

def test_broken_docx(self) -> None:
self._send_request("broken.docx", expected_code=415)

def test_not_stripped_xml(self) -> None:
self._send_request("not_stripped_xml.docx", expected_code=200)

def test_docx_with_comments(self) -> None:
_ = self._send_request("with_comments.docx", expected_code=200)

def test_send_wo_file(self) -> None:
self._send_request_wo_file(expected_code=422)

def test_return_html(self) -> None:
file_name = "example.doc"
result = self._send_request(file_name, data={"structure_type": "tree", "return_format": "html"})
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_format_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiCSVReader(AbstractTestApiDocReader):
class TestApiJSONReader(AbstractTestApiDocReader):

def _get_abs_path(self, file_name: str) -> str:
return os.path.abspath(os.path.join(self.data_directory_path, "json", file_name))
Expand Down
6 changes: 3 additions & 3 deletions tests/api_tests/test_api_format_pdf_auto_text_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,21 @@ def test_auto_pdf_with_text_layer(self) -> None:
file_name = os.path.join("..", "pdf_with_text_layer", "english_doc.pdf")
result = self._send_request(file_name, dict(pdf_with_text_layer="auto"))
self.assertIn("Assume document has a correct textual layer", result["warnings"])
self.check_english_doc(result)
self._check_english_doc(result)

def test_auto_pdf_with_wrong_text_layer(self) -> None:
file_name = "english_doc_bad_text.pdf"
result = self._send_request(file_name, dict(pdf_with_text_layer="auto"))
self.assertIn("Assume document has incorrect textual layer", result["warnings"])
self.check_english_doc(result)
self._check_english_doc(result)

def test_auto_document_mixed(self) -> None:
file_name = "mixed_pdf.pdf"
for pdf_with_text_layer in "auto", "auto_tabby":
result = self._send_request(file_name, dict(pdf_with_text_layer=pdf_with_text_layer))
self.assertIn("Assume document has a correct textual layer", result["warnings"])
self.assertIn("Assume the first page hasn't a textual layer", result["warnings"])
self.check_english_doc(result)
self._check_english_doc(result)
structure = result["content"]["structure"]
list_items = structure["subparagraphs"][1]["subparagraphs"]
self.assertEqual("3) продолжаем список\n", list_items[2]["text"])
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_format_pdf_page_limit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiPdfReader(AbstractTestApiDocReader):
class TestApiPdfPageLimit(AbstractTestApiDocReader):

def _get_abs_path(self, file_name: str) -> str:
return os.path.join(self.data_directory_path, "pdf_with_text_layer", file_name)
Expand Down
89 changes: 1 addition & 88 deletions tests/api_tests/test_api_format_pdf_tabby_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __filter_by_name(self, annotations: List[dict], name: str) -> List[dict]:
def test_example_file(self) -> None:
file_name = "english_doc.pdf"
result = self._send_request(file_name, data=dict(pdf_with_text_layer="tabby"))
self.check_english_doc(result)
self._check_english_doc(result)

@unittest.skip("TODO: add two layers output order support, e.g footnotes and main text.")
def test_former_txt_file(self) -> None:
Expand Down Expand Up @@ -231,93 +231,6 @@ def test_pdf_with_tables(self) -> None:
self.assertEqual("", table[1][0])
self.assertEqual("Прогноз", table[1][1])
self.assertEqual("Прогноз бюджета", table[1][2])
self.assertEqual("Прогноз бюджета", table[1][3])
self.assertEqual("Прогноз бюджета", table[1][4])
self.assertEqual("Расходы", table[2][0])
self.assertEqual("19,8", table[2][1])
self.assertEqual("18,6", table[2][2])
self.assertEqual("17,3", table[2][3])
self.assertEqual("16,1", table[2][4])
self.assertEqual("Доходы", table[3][0])
self.assertEqual("16,1", table[3][1])
self.assertEqual("15,4", table[3][2])
self.assertEqual("15,1", table[3][3])
self.assertEqual("15,0", table[3][4])
self.assertEqual("Нефтегазовые\nдоходы", table[4][0])
self.assertEqual("5,8", table[4][1])
self.assertEqual("5,8", table[4][2])
self.assertEqual("5,5", table[4][3])
self.assertEqual("5,4", table[4][4])
self.assertEqual("Ненефтегазов\nые доходы", table[5][0])
self.assertEqual("10,4", table[5][1])
self.assertEqual("9,6", table[5][2])
self.assertEqual("9,6", table[5][3])
self.assertEqual("9,6", table[5][4])
self.assertEqual("Сальдо\nбюджета", table[6][0])
self.assertEqual("-3,7", table[6][1])
self.assertEqual("-3,2", table[6][2])
self.assertEqual("-2,2", table[6][3])
self.assertEqual("-1,2", table[6][4])
self.assertEqual("", table[7][0])
self.assertEqual("2016", table[7][1])
self.assertEqual("2017", table[7][2])
self.assertEqual("2018", table[7][3])
self.assertEqual("2019", table[7][4])
self.assertEqual("", table[8][0])
self.assertEqual("Прогноз", table[8][1])
self.assertEqual("Прогноз бюджета", table[8][2])
self.assertEqual("Прогноз бюджета", table[8][3])
self.assertEqual("Прогноз бюджета", table[8][4])
self.assertEqual("Расходы", table[9][0])
self.assertEqual("19,8", table[9][1])
self.assertEqual("18,6", table[9][2])
self.assertEqual("17,3", table[9][3])
self.assertEqual("16,1", table[9][4])
self.assertEqual("Доходы", table[10][0])
self.assertEqual("16,1", table[10][1])
self.assertEqual("15,4", table[10][2])
self.assertEqual("15,1", table[10][3])
self.assertEqual("15,0", table[10][4])
self.assertEqual("Нефтегазовые\nдоходы", table[11][0])
self.assertEqual("5,8", table[11][1])
self.assertEqual("5,8", table[11][2])
self.assertEqual("5,5", table[11][3])
self.assertEqual("5,4", table[11][4])
self.assertEqual("Ненефтегазов\nые доходы", table[12][0])
self.assertEqual("10,4", table[12][1])
self.assertEqual("9,6", table[12][2])
self.assertEqual("9,6", table[12][3])
self.assertEqual("9,6", table[12][4])
self.assertEqual("Сальдо\nбюджета", table[13][0])
self.assertEqual("-3,7", table[13][1])
self.assertEqual("-3,2", table[13][2])
self.assertEqual("-2,2", table[13][3])
self.assertEqual("-1,2", table[13][4])
self.assertEqual("", table[14][0])
self.assertEqual("2016", table[14][1])
self.assertEqual("2017", table[14][2])
self.assertEqual("2018", table[14][3])
self.assertEqual("2019", table[14][4])
self.assertEqual("", table[15][0])
self.assertEqual("Прогноз", table[15][1])
self.assertEqual("Прогноз бюджета", table[15][2])
self.assertEqual("Прогноз бюджета", table[15][3])
self.assertEqual("Прогноз бюджета", table[15][4])
self.assertEqual("Расходы", table[16][0])
self.assertEqual("19,8", table[16][1])
self.assertEqual("18,6", table[16][2])
self.assertEqual("17,3", table[16][3])
self.assertEqual("16,1", table[16][4])
self.assertEqual("Доходы", table[17][0])
self.assertEqual("16,1", table[17][1])
self.assertEqual("15,4", table[17][2])
self.assertEqual("15,1", table[17][3])
self.assertEqual("15,0", table[17][4])
self.assertEqual("Нефтегазовые\nдоходы", table[18][0])
self.assertEqual("5,8", table[18][1])
self.assertEqual("5,8", table[18][2])
self.assertEqual("5,5", table[18][3])
self.assertEqual("5,4", table[18][4])
self.assertEqual("Ненефтегазов\nые доходы", table[19][0])
self.assertEqual("10,4", table[19][1])
self.assertEqual("9,6", table[19][2])
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_format_pdf_with_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiPdfReader(AbstractTestApiDocReader):
class TestApiPdfWithText(AbstractTestApiDocReader):

def _get_abs_path(self, file_name: str) -> str:
return os.path.join(self.data_directory_path, "pdf_with_text_layer", file_name)
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_format_pptx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiExcelReader(AbstractTestApiDocReader):
class TestApiPPTXReader(AbstractTestApiDocReader):

data_directory_path = os.path.join(AbstractTestApiDocReader.data_directory_path, "pptx")

Expand Down
10 changes: 5 additions & 5 deletions tests/api_tests/test_api_misc_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiDocReader(AbstractTestApiDocReader):
class TestLanguage(AbstractTestApiDocReader):

data_directory_path = os.path.join(AbstractTestApiDocReader.data_directory_path, "docx")

def test_en_doc(self) -> None:
file_name = "english_doc.doc"
result = self._send_request(file_name, dict(language="eng", structure_type="tree"))
self.check_english_doc(result)
self._check_english_doc(result)

def test_en_docx(self) -> None:
file_name = "english_doc.docx"
result = self._send_request(file_name, dict(language="eng", structure_type="tree"))
self.check_english_doc(result)
self._check_english_doc(result)

def test_en_odt(self) -> None:
file_name = "english_doc.odt"
result = self._send_request(file_name, dict(language="eng", structure_type="tree"))
self.check_english_doc(result)
self._check_english_doc(result)

def test_en_pdf(self) -> None:
file_name = "../pdf_with_text_layer/english_doc.pdf"
result = self._send_request(file_name, dict(language="eng"))
self.check_english_doc(result)
self._check_english_doc(result)
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_misc_list_patching.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiDocReader(AbstractTestApiDocReader):
class TestListPatching(AbstractTestApiDocReader):

def test_list_patching(self) -> None:
file_name = "docx/13_moloko_1_polug.docx"
Expand Down
6 changes: 0 additions & 6 deletions tests/api_tests/test_api_misc_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

from tests.api_tests.abstract_api_test import AbstractTestApiDocReader

# test_structure
# тесты на ошибки вынести в отдельный файл
# вести файлик с темами тестов

class TestApi(AbstractTestApiDocReader):

Expand Down Expand Up @@ -45,6 +42,3 @@ def test_text(self) -> None:
self.assertEqual(content["subparagraphs"][1]["subparagraphs"][0]["text"].strip(), '1. Элемент нумерованного списка')
self.assertEqual(content["subparagraphs"][1]["subparagraphs"][0]["metadata"]['paragraph_type'], 'list_item')
self._check_metainfo(result['metadata'], 'text/plain', file_name)

def test_bin_file(self) -> None:
self._send_request("file.bin", expected_code=415)
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_misc_multipage_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestRecognizedTable(AbstractTestApiDocReader):
class TestMultipageTable(AbstractTestApiDocReader):

def _get_abs_path(self, file_name: str) -> str:
return os.path.join(self.data_directory_path, "tables", file_name)
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_misc_nesting_list.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiDocReader(AbstractTestApiDocReader):
class TestNestingList(AbstractTestApiDocReader):

def test_list_nesting_content(self) -> None:
file_name = "docx/pr14tz_v5_2007_03_01.docx"
Expand Down
2 changes: 1 addition & 1 deletion tests/api_tests/test_api_misc_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tests.api_tests.abstract_api_test import AbstractTestApiDocReader


class TestApiDocReader(AbstractTestApiDocReader):
class TestStructure(AbstractTestApiDocReader):

data_directory_path = os.path.join(AbstractTestApiDocReader.data_directory_path, "docx")

Expand Down
14 changes: 1 addition & 13 deletions tests/api_tests/test_api_misc_with_attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,6 @@ class TestApiAttachmentsReader(AbstractTestApiDocReader):

data_directory_path = AbstractTestApiDocReader.data_directory_path

def check_pdf_1(self, pdf: dict) -> None:
content = pdf["content"]['structure']
self.assertEqual("Глава 543\n", content["subparagraphs"][0]["text"])
self.assertEqual("Какой-то текст.\n", content["subparagraphs"][0]["subparagraphs"][0]["text"])
self.assertEqual(content["subparagraphs"][0]["subparagraphs"][1]["subparagraphs"][0]['text'], '1.\n')
self.assertEqual(content["subparagraphs"][0]["subparagraphs"][1]["subparagraphs"][1]['text'], '2.\n')
self.assertEqual(content["subparagraphs"][0]["subparagraphs"][1]["subparagraphs"][2]['text'], '3.\n')

def check_pdf_2(self, pdf: dict) -> None:
content = pdf["content"]
self.assertEqual("Пример документа\n", content['structure']['subparagraphs'][0]["text"])

def _check_attachments(self, attachments: List[dict]) -> None:
for attachment in attachments:
self.assertTrue(attachment["attachments"] is not None)
Expand Down Expand Up @@ -172,7 +160,7 @@ def test_docx_images_base64(self) -> None:
with open(path, "wb") as file_out:
file_out.write(base64.decodebytes(base64_encode.encode()))
result_english = self._send_request(file_name=path, data={})
self.check_english_doc(result_english)
self._check_english_doc(result_english)

def test_docx_images_no_base64(self) -> None:
metadata = self.__check_base64(False)
Expand Down
6 changes: 0 additions & 6 deletions tests/unit_tests/abstract_converter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@


class AbstractConverterTest(TestCase):
"""
Class for testing abstract converter
"""
path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "data"))

def setUp(self) -> None:
Expand All @@ -27,9 +24,6 @@ def tearDown(self) -> None:
self.tmp_dir.cleanup()

def _convert(self, filename: str, extension: str, converter: AbstractConverter) -> None:
"""
Method for converting file into another extension
"""
filename_with_extension = filename + extension
file = os.path.join(self.path, filename_with_extension)
tmp_file = os.path.join(self.tmp_dir.name, filename_with_extension)
Expand Down
Loading

0 comments on commit bdc6c5c

Please sign in to comment.