diff --git a/.flake8 b/.flake8 index 401f544b..c0511db7 100644 --- a/.flake8 +++ b/.flake8 @@ -28,6 +28,7 @@ exclude = *__init__.py, resources, venv, + .venv, build, dedoc.egg-info, docs/_build, @@ -48,5 +49,5 @@ per-file-ignores = scripts/*:T201 scripts/benchmark_pdf_performance*:JS101 tests/custom_test_runner.py:ANN001,ANN201,ANN202,ANN204,N802 - docs/source/_static/code_examples/*:I251 + docs/source/_static/code_examples/*:I251,T201 docs/source/_static/code_examples/langchain/*:FOL001,FOL002,FOL003,FOL004,FOL005,I100,I202,I251 diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 4cb468e8..262c3ba5 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -33,3 +33,4 @@ jobs: python dedoc_usage_tutorial.py python dedoc_add_new_doc_type_tutorial.py python dedoc_add_new_structure_type_tutorial.py + python dedoc_using_patterns_tutorial.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 09231202..2b5eae7a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: 5.0.4 hooks: - id: flake8 - exclude: \.github|.*__init__\.py|resources|docs|venv|build|dedoc\.egg-info|scripts/fintoc2022/metric.py + exclude: \.github|.*__init__\.py|resources|docs|venv|\.venv|build|dedoc\.egg-info|scripts/fintoc2022/metric.py args: - "--config=.flake8" additional_dependencies: [ diff --git a/dedoc/api/api_args.py b/dedoc/api/api_args.py index 8f3e1415..8ffdc7b9 100644 --- a/dedoc/api/api_args.py +++ b/dedoc/api/api_args.py @@ -8,6 +8,7 @@ class QueryParameters: # type of document structure parsing document_type: str = Form("", enum=["", "law", "tz", "diploma", "article", "fintoc"], description="Document domain") + patterns: str = Form("", description='Patterns for default document type (when document_type="")') structure_type: str = Form("tree", enum=["linear", "tree"], description="Output structure type") return_format: str = Form("json", enum=["json", "html", "plain_text", "tree", "collapsed_tree", "ujson", "pretty_json"], description="Response representation, most types (except json) are used for debug purposes only") diff --git a/dedoc/api/web/index.html b/dedoc/api/web/index.html index 423dbcfe..d0c8b984 100644 --- a/dedoc/api/web/index.html +++ b/dedoc/api/web/index.html @@ -28,7 +28,7 @@

Parameters configuration

Type of document structure parsing

-
document_type, structure_type, return_format +
document_type, patterns, structure_type, return_format

Type of document structure parsing

+

+

+ Patterns for default structure extractor (document_type="other")
+
+ +
+

+