From 4ee4791e661f8872b229278d49c3b6a1efccb4cf Mon Sep 17 00:00:00 2001
From: Nasty <bogatenkova.anastasiya@mail.ru>
Date: Mon, 26 Aug 2024 18:09:38 +0300
Subject: [PATCH] TLDR-748 docs for classes

---
 .../default_structure_extractor.py            |  3 +
 .../patterns/abstract_pattern.py              | 33 ++++++-
 .../patterns/bracket_list_pattern.py          | 37 +++++++-
 .../patterns/bracket_roman_list_pattern.py    | 43 ++++++++-
 .../patterns/bullet_list_pattern.py           | 39 +++++++-
 .../patterns/dotted_list_pattern.py           | 46 ++++++++-
 .../patterns/letter_list_pattern.py           | 49 +++++++++-
 .../patterns/regexp_pattern.py                | 67 ++++++++++++-
 .../patterns/roman_list_pattern.py            | 43 ++++++++-
 .../patterns/start_word_pattern.py            | 61 +++++++++++-
 .../patterns/tag_header_pattern.py            |  4 +-
 .../patterns/tag_list_pattern.py              |  4 +-
 .../patterns/tag_pattern.py                   | 80 +++++++++++++++-
 docs/source/dedoc_api_usage/api.rst           |  6 +-
 docs/source/index.rst                         |  5 +-
 docs/source/modules/structure_extractors.rst  | 94 +++++++++++++++++++
 docs/source/readers_output/line_types.rst     | 10 +-
 docs/source/structure_types/other.rst         |  5 +
 docs/source/tutorials/using_patterns.rst      | 12 +++
 19 files changed, 608 insertions(+), 33 deletions(-)
 create mode 100644 docs/source/tutorials/using_patterns.rst

diff --git a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py
index 911780fb..3037476b 100644
--- a/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py
+++ b/dedoc/structure_extractors/concrete_structure_extractors/default_structure_extractor.py
@@ -20,6 +20,9 @@ def extract(self, document: UnstructuredDocument, parameters: Optional[dict] = N
         Extract basic structure from the given document and add additional information to the lines' metadata.
         To get the information about the method's parameters look at the documentation of the class \
         :class:`~dedoc.structure_extractors.AbstractStructureExtractor`.
+
+        ``parameters`` parameter can contain patterns for configuring lines types and their levels in the output document tree ("patterns" key).
+        Please see :ref:`dedoc_structure_extractors_patterns` and :ref:`using_patterns` to get information how to use patterns for making your custom structure.
         """
         parameters = {} if parameters is None else parameters
         patterns = self.__get_patterns(parameters)
diff --git a/dedoc/structure_extractors/patterns/abstract_pattern.py b/dedoc/structure_extractors/patterns/abstract_pattern.py
index 199eec22..20ac26de 100644
--- a/dedoc/structure_extractors/patterns/abstract_pattern.py
+++ b/dedoc/structure_extractors/patterns/abstract_pattern.py
@@ -1,14 +1,28 @@
 from abc import ABC, abstractmethod
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
 
 
 class AbstractPattern(ABC):
+    """
+    Base class for all patterns to configure structure extraction by :class:`~dedoc.structure_extractors.DefaultStructureExtractor`.
+    """
     _name = ""
 
-    def __init__(self, line_type: Optional[str], level_1: Optional[int], level_2: Optional[int], can_be_multiline: Optional[bool or str]) -> None:
+    def __init__(self, line_type: Optional[str], level_1: Optional[int], level_2: Optional[int], can_be_multiline: Optional[Union[bool, str]]) -> None:
+        """
+        Initialise pattern with default values of :class:`~dedoc.data_structures.HierarchyLevel` attributes.
+        They can be used in :meth:`~dedoc.structure_extractors.patterns.abstract_pattern.AbstractPattern.get_hierarchy_level`
+        according to specific pattern logic.
+
+        :param line_type: type of the line, e.g. "header", "bullet_list_item", "chapter", etc.
+        :param level_1: value of a line primary importance
+        :param level_2: level of the line inside specific class
+        :param can_be_multiline: is used to unify lines inside tree node by :class:`~dedoc.structure_constructors.TreeConstructor`,
+            if line can be multiline, it can be joined with another line. If ``None`` is given, can_be_multiline is set to ``True``.
+        """
         from dedoc.utils.parameter_utils import get_bool_value
 
         self._line_type = line_type
@@ -18,12 +32,27 @@ def __init__(self, line_type: Optional[str], level_1: Optional[int], level_2: Op
 
     @classmethod
     def name(cls: "AbstractPattern") -> str:
+        """
+        Returns ``_name`` attribute, is used in parameters configuration to choose a specific pattern.
+        Each pattern has a unique non-empty name.
+        """
         return cls._name
 
     @abstractmethod
     def match(self, line: LineWithMeta) -> bool:
+        """
+        Check if the given line satisfies to the pattern requirements.
+        Line text, annotations or metadata (``metadata.tag_hierarchy_level``) can be used to decide, if the line matches the pattern or not.
+        """
         pass
 
     @abstractmethod
     def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel:
+        """
+        This method should be applied only when :meth:`~dedoc.structure_extractors.patterns.abstract_pattern.AbstractPattern.match`
+        returned ``True`` for the given line.
+
+        Get :class:`~dedoc.data_structures.HierarchyLevel` for initialising ``line.metadata.hierarchy_level`` attribute.
+        Please see :ref:`add_structure_type_hierarchy_level` to get more information about :class:`~dedoc.data_structures.HierarchyLevel`.
+        """
         pass
diff --git a/dedoc/structure_extractors/patterns/bracket_list_pattern.py b/dedoc/structure_extractors/patterns/bracket_list_pattern.py
index 524d2fe1..35a78d98 100644
--- a/dedoc/structure_extractors/patterns/bracket_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/bracket_list_pattern.py
@@ -1,11 +1,44 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.structure_extractors.feature_extractors.list_features.prefix.bracket_prefix import BracketPrefix
 from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern
 
 
 class BracketListPattern(RegexpPattern):
+    """
+    Pattern for matching numbered lists with brackets, e.g.
+
+    ::
+
+        1) first element
+        2) second element
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import BracketListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [BracketListPattern(line_type="list_item", level_1=1, level_2=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "bracket_list", "line_type": "list_item", "level_1": 1, "level_2": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "bracket_list"
 
-    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=BracketPrefix.regexp, line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/bracket_roman_list_pattern.py b/dedoc/structure_extractors/patterns/bracket_roman_list_pattern.py
index 35965cd6..281299e1 100644
--- a/dedoc/structure_extractors/patterns/bracket_roman_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/bracket_roman_list_pattern.py
@@ -1,11 +1,50 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.structure_extractors.feature_extractors.list_features.prefix.bracket_roman_prefix import BracketRomanPrefix
 from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern
 
 
 class BracketRomanListPattern(RegexpPattern):
+    """
+    Pattern for matching roman lists with brackets, e.g.
+
+    ::
+
+        i) first item
+        ii) second item
+        iii) third item
+        iv) forth item
+
+    .. note::
+
+        The pattern is case-insensitive (lower and upper letters are not differed).
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import BracketRomanListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [BracketRomanListPattern(line_type="list_item", level_1=1, level_2=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "bracket_roman_list", "line_type": "list_item", "level_1": 1, "level_2": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "bracket_roman_list"
 
-    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=BracketRomanPrefix.regexp, line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/bullet_list_pattern.py b/dedoc/structure_extractors/patterns/bullet_list_pattern.py
index e0dacd4b..0695abc1 100644
--- a/dedoc/structure_extractors/patterns/bullet_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/bullet_list_pattern.py
@@ -1,11 +1,46 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.structure_extractors.feature_extractors.list_features.prefix.bullet_prefix import BulletPrefix
 from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern
 
 
 class BulletListPattern(RegexpPattern):
+    """
+    Pattern for matching bulleted lists, e.g.
+
+    ::
+
+        - first item
+        - second item
+
+    or with other bullet markers ``-, —, −, –, ®, ., •, ,, ‚, ©, ⎯, °, *, >, ●, ♣, ①, ▪, *, +``.
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import BulletListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [BulletListPattern(line_type="list_item", level_1=1, level_2=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "bullet_list", "line_type": "list_item", "level_1": 1, "level_2": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "bullet_list"
 
-    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=BulletPrefix.regexp, line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/dotted_list_pattern.py b/dedoc/structure_extractors/patterns/dotted_list_pattern.py
index ef4a071b..d085ddb2 100644
--- a/dedoc/structure_extractors/patterns/dotted_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/dotted_list_pattern.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -7,9 +7,51 @@
 
 
 class DottedListPattern(RegexpPattern):
+    """
+    Pattern for matching numbered lists with dots, e.g.
+
+    ::
+
+        1. first element
+            1.1. first sub-element
+            1.2. second sub-element
+        2. second element
+
+    The number of dots is unlimited.
+    There is no ``level_2`` parameter in this pattern, ``level_2`` is calculated as the number of numbers between dots, e.g.
+
+    * ``1.`` → ``level_2=1``
+    * ``1.1`` or ``1.1.`` → ``level_2=2``
+    * ``1.2.3.4`` or ``1.2.3.4.`` → ``level_2=4``
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import DottedListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [DottedListPattern(line_type="list_item", level_1=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "dotted_list", "line_type": "list_item", "level_1": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "dotted_list"
 
-    def __init__(self, line_type: str, level_1: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=DottedPrefix.regexp, line_type=line_type, level_1=level_1, level_2=None, can_be_multiline=can_be_multiline)
 
     def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel:
diff --git a/dedoc/structure_extractors/patterns/letter_list_pattern.py b/dedoc/structure_extractors/patterns/letter_list_pattern.py
index ae24ba73..b9c39591 100644
--- a/dedoc/structure_extractors/patterns/letter_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/letter_list_pattern.py
@@ -1,11 +1,56 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.structure_extractors.feature_extractors.list_features.prefix.any_letter_prefix import AnyLetterPrefix
 from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern
 
 
 class LetterListPattern(RegexpPattern):
+    """
+    Pattern for matching lists with letters and brackets, e.g.
+
+    ::
+
+        a) first element
+        b) second element
+
+    or (example for Armenian language)
+
+    ::
+
+        ա) տեղաբաշխել
+        բ) Հայաստանի Հանրապետության
+        գ) սահմանապահ վերակարգերի
+
+    .. note::
+
+        The pattern is case-insensitive (lower and upper letters are not differed).
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import LetterListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [LetterListPattern(line_type="list_item", level_1=1, level_2=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "letter_list", "line_type": "list_item", "level_1": 1, "level_2": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "letter_list"
 
-    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=AnyLetterPrefix.regexp, line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/regexp_pattern.py b/dedoc/structure_extractors/patterns/regexp_pattern.py
index b8fc2276..a456983a 100644
--- a/dedoc/structure_extractors/patterns/regexp_pattern.py
+++ b/dedoc/structure_extractors/patterns/regexp_pattern.py
@@ -1,5 +1,5 @@
 import re
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -7,6 +7,47 @@
 
 
 class RegexpPattern(AbstractPattern):
+    """
+    Pattern for matching line text by a regular expression.
+
+    .. note::
+
+        The pattern is case-insensitive (lower and upper letters are not differed).
+        Before regular expression matching, the line text is stripped (space symbols are deleted from both sides).
+
+    .. seealso::
+
+        Syntax for writing regular expressions is described in the `Python documentation <https://docs.python.org/3/library/re.html>`_.
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        import re
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import RegexpPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [
+            RegexpPattern(regexp="^chapter\s\d+\.", line_type="chapter", level_1=1, can_be_multiline=False),
+            RegexpPattern(regexp=re.compile(r"^part\s\d+\.\d+\."), line_type="part", level_1=2, can_be_multiline=False)
+        ]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "regexp", "regexp": "^chapter\s\d+\.", "line_type": "chapter", "level_1": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """ # noqa
     _name = "regexp"
 
     def __init__(self,
@@ -14,14 +55,36 @@ def __init__(self,
                  line_type: str,
                  level_1: Optional[int] = None,
                  level_2: Optional[int] = None,
-                 can_be_multiline: Optional[bool or str] = None) -> None:
+                 can_be_multiline: Optional[Union[bool, str]] = None) -> None:
+        """
+        Initialise pattern with default values of :class:`~dedoc.data_structures.HierarchyLevel` attributes.
+
+        :param regexp: regular expression for checking, if the line text matches the pattern.
+            Note that regular expression is used on the lowercase and stripped line.
+        :param line_type: type of the line, e.g. "header", "bullet_list_item", "chapter", etc.
+        :param level_1: value of a line primary importance
+        :param level_2: level of the line inside specific class
+        :param can_be_multiline: is used to unify lines inside tree node by :class:`~dedoc.structure_constructors.TreeConstructor`,
+            if line can be multiline, it can be joined with another line. If ``None`` is given, can_be_multiline is set to ``True``.
+        """
         super().__init__(line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
         self._regexp = re.compile(regexp) if isinstance(regexp, str) else regexp
 
     def match(self, line: LineWithMeta) -> bool:
+        """
+        Check if the pattern is suitable for the given line.
+        Line text is checked by applying pattern's regular expression, text is stripped and made lowercase beforehand.
+        """
         text = line.line.strip().lower()
         match = self._regexp.match(text)
         return match is not None
 
     def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel:
+        """
+        This method should be applied only when :meth:`~dedoc.structure_extractors.patterns.RegexpPattern.match`
+        returned ``True`` for the given line.
+
+        Return :class:`~dedoc.data_structures.HierarchyLevel` for initialising ``line.metadata.hierarchy_level``.
+        The attributes ``line_type``, ``level_1``, ``level_2``, ``can_be_multiline`` are equal to values given during class initialisation.
+        """
         return HierarchyLevel(line_type=self._line_type, level_1=self._level_1, level_2=self._level_2, can_be_multiline=self._can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/roman_list_pattern.py b/dedoc/structure_extractors/patterns/roman_list_pattern.py
index 619ba420..03e8761a 100644
--- a/dedoc/structure_extractors/patterns/roman_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/roman_list_pattern.py
@@ -1,11 +1,50 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.structure_extractors.feature_extractors.list_features.prefix.roman_prefix import RomanPrefix
 from dedoc.structure_extractors.patterns.regexp_pattern import RegexpPattern
 
 
 class RomanListPattern(RegexpPattern):
+    """
+    Pattern for matching roman lists with dots, e.g.
+
+    ::
+
+        I. first item
+        II. second item
+        III. third item
+        IV. forth item
+
+    .. note::
+
+        The pattern is case-insensitive (lower and upper letters are not differed).
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import RomanListPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [RomanListPattern(line_type="list_item", level_1=1, level_2=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "roman_list", "line_type": "list_item", "level_1": 1, "level_2": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "roman_list"
 
-    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[bool or str] = None) -> None:
+    def __init__(self, line_type: str, level_1: int, level_2: int, can_be_multiline: Optional[Union[bool, str]] = None) -> None:
         super().__init__(regexp=RomanPrefix.regexp, line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/start_word_pattern.py b/dedoc/structure_extractors/patterns/start_word_pattern.py
index a7b9b46f..27bb71c8 100644
--- a/dedoc/structure_extractors/patterns/start_word_pattern.py
+++ b/dedoc/structure_extractors/patterns/start_word_pattern.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -6,6 +6,41 @@
 
 
 class StartWordPattern(AbstractPattern):
+    """
+    Pattern for lines that begin with some specific text (e.g. Introduction, Chapter, etc.).
+
+    .. note::
+
+        The pattern is case-insensitive (lower and upper letters are not differed).
+        Before matching, the line text is stripped (space symbols are deleted from both sides).
+        Start word for marching is also stripped and made lowercase.
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        import re
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import StartWordPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [StartWordPattern(start_word="chapter", line_type="chapter", level_1=1, can_be_multiline=False)]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "start_word", "start_word": "chapter", "line_type": "chapter", "level_1": 1, "can_be_multiline": "false"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "start_word"
 
     def __init__(self,
@@ -13,13 +48,35 @@ def __init__(self,
                  line_type: str,
                  level_1: Optional[int] = None,
                  level_2: Optional[int] = None,
-                 can_be_multiline: Optional[bool or str] = None) -> None:
+                 can_be_multiline: Optional[Union[bool, str]] = None) -> None:
+        """
+        Initialise pattern with default values of :class:`~dedoc.data_structures.HierarchyLevel` attributes.
+
+        :param start_word: string for checking of line text beginning.
+            Note that start_word will be stripped and made lowercase, and will be used on the lowercase and stripped line.
+        :param line_type: type of the line, e.g. "header", "bullet_list_item", "chapter", etc.
+        :param level_1: value of a line primary importance
+        :param level_2: level of the line inside specific class
+        :param can_be_multiline: is used to unify lines inside tree node by :class:`~dedoc.structure_constructors.TreeConstructor`,
+            if line can be multiline, it can be joined with another line. If ``None`` is given, can_be_multiline is set to ``True``.
+        """
         super().__init__(line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
         self.__start_word = start_word.strip().lower()
 
     def match(self, line: LineWithMeta) -> bool:
+        """
+        Check if the pattern is suitable for the given line.
+        Line text is checked if it starts with the given ``start_word``, text is stripped and made lowercase beforehand.
+        """
         text = line.line.strip().lower()
         return text.startswith(self.__start_word)
 
     def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel:
+        """
+        This method should be applied only when :meth:`~dedoc.structure_extractors.patterns.StartWordPattern.match`
+        returned ``True`` for the given line.
+
+        Return :class:`~dedoc.data_structures.HierarchyLevel` for initialising ``line.metadata.hierarchy_level``.
+        The attributes ``line_type``, ``level_1``, ``level_2``, ``can_be_multiline`` are equal to values given during class initialisation.
+        """
         return HierarchyLevel(line_type=self._line_type, level_1=self._level_1, level_2=self._level_2, can_be_multiline=self._can_be_multiline)
diff --git a/dedoc/structure_extractors/patterns/tag_header_pattern.py b/dedoc/structure_extractors/patterns/tag_header_pattern.py
index 927e148d..e6639d04 100644
--- a/dedoc/structure_extractors/patterns/tag_header_pattern.py
+++ b/dedoc/structure_extractors/patterns/tag_header_pattern.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -12,7 +12,7 @@ def __init__(self,
                  line_type: Optional[str] = None,
                  level_1: Optional[int] = None,
                  level_2: Optional[int] = None,
-                 can_be_multiline: Optional[bool or str] = None,
+                 can_be_multiline: Optional[Union[bool, str]] = None,
                  default_line_type: str = HierarchyLevel.header,
                  default_level_1: int = 1,
                  default_level_2: Optional[int] = None) -> None:
diff --git a/dedoc/structure_extractors/patterns/tag_list_pattern.py b/dedoc/structure_extractors/patterns/tag_list_pattern.py
index 44da867c..d237143d 100644
--- a/dedoc/structure_extractors/patterns/tag_list_pattern.py
+++ b/dedoc/structure_extractors/patterns/tag_list_pattern.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -12,7 +12,7 @@ def __init__(self,
                  line_type: Optional[str] = None,
                  level_1: Optional[int] = None,
                  level_2: Optional[int] = None,
-                 can_be_multiline: Optional[bool or str] = None,
+                 can_be_multiline: Optional[Union[bool, str]] = None,
                  default_line_type: str = HierarchyLevel.list_item,
                  default_level_1: int = 2,
                  default_level_2: Optional[int] = None) -> None:
diff --git a/dedoc/structure_extractors/patterns/tag_pattern.py b/dedoc/structure_extractors/patterns/tag_pattern.py
index 88adf384..9e59437e 100644
--- a/dedoc/structure_extractors/patterns/tag_pattern.py
+++ b/dedoc/structure_extractors/patterns/tag_pattern.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import Optional, Union
 
 from dedoc.data_structures.hierarchy_level import HierarchyLevel
 from dedoc.data_structures.line_with_meta import LineWithMeta
@@ -6,16 +6,69 @@
 
 
 class TagPattern(AbstractPattern):
+    """
+    Pattern for using information from readers saved in ``line.metadata.tag_hierarchy_level``.
+    Can be useful for paragraph extraction in PDF documents and images,
+    because PDF and image readers save information about paragraphs in ``line.metadata.tag_hierarchy_level.can_be_multiline``.
+
+    .. seealso::
+
+        Please see :ref:`readers_line_types` if you need information, which line types can be extracted by each reader.
+
+    Example of library usage:
+
+    .. code-block:: python
+
+        import re
+        from dedoc.structure_extractors import DefaultStructureExtractor
+        from dedoc.structure_extractors.patterns import TagPattern
+
+        reader = ...
+        structure_extractor = DefaultStructureExtractor()
+        patterns = [TagPattern(default_line_type="raw_text")]
+        document = reader.read(file_path=file_path)
+        document = structure_extractor.extract(document=document, parameters={"patterns": patterns})
+
+    Example of API usage:
+
+    .. code-block:: python
+
+        import requests
+
+        patterns = [{"name": "tag", "default_line_type": "raw_text"}]
+        parameters = {"patterns": str(patterns)}
+        with open(file_path, "rb") as file:
+            files = {"file": (file_name, file)}
+            r = requests.post("http://localhost:1231/upload", files=files, data=parameters)
+    """
     _name = "tag"
 
     def __init__(self,
                  line_type: Optional[str] = None,
                  level_1: Optional[int] = None,
                  level_2: Optional[int] = None,
-                 can_be_multiline: Optional[bool or str] = None,
+                 can_be_multiline: Optional[Union[bool, str]] = None,
                  default_line_type: str = HierarchyLevel.raw_text,
                  default_level_1: Optional[int] = None,
                  default_level_2: Optional[int] = None) -> None:
+        """
+        Initialise pattern for configuring values of :class:`~dedoc.data_structures.HierarchyLevel` attributes.
+        It is recommended to configure ``default_*`` values in case ``line.metadata.tag_hierarchy_level`` miss some values.
+        If you want to use values from ``line.metadata.tag_hierarchy_level``, it is recommended to leave
+        ``line_type``, ``level_1``, ``level_2``, ``can_be_multiline`` empty.
+
+        ``can_be_multiline`` is filled in PDF and images readers during paragraph detection, so if you want to extract paragraphs,
+        you shouldn't set ``can_be_multiline`` during pattern initialization.
+
+        :param line_type: type of the line, replaces line_type from tag_hierarchy_level if non-empty.
+        :param level_1: value of a line primary importance, replaces level_1 from tag_hierarchy_level if non-empty.
+        :param level_2: level of the line inside specific class, replaces level_2 from tag_hierarchy_level if non-empty.
+        :param can_be_multiline: is used to unify lines inside tree node by :class:`~dedoc.structure_constructors.TreeConstructor`,
+            if line can be multiline, it can be joined with another line. If not None, replaces can_be_multiline from tag_hierarchy_level.
+        :param default_line_type: type of the line, is used when tag_hierarchy_level.line_type == "unknown".
+        :param default_level_1: value of a line primary importance, is used when tag_hierarchy_level.level_1 is None.
+        :param default_level_2: level of the line inside specific class, is used when tag_hierarchy_level.level_2 is None.
+        """
         super().__init__(line_type=line_type, level_1=level_1, level_2=level_2, can_be_multiline=can_be_multiline)
         self._can_be_multiline_none = can_be_multiline is None
         self._default_line_type = default_line_type
@@ -23,9 +76,32 @@ def __init__(self,
         self._default_level_2 = default_level_2
 
     def match(self, line: LineWithMeta) -> bool:
+        """
+        Check if the pattern is suitable for the given line: ``line.metadata.tag_hierarchy_level`` should not be empty.
+        ``line.metadata.tag_hierarchy_level`` is filled during reading step, some readers can skip ``tag_hierarchy_level`` initialisation.
+        """
         return line.metadata.tag_hierarchy_level is not None
 
     def get_hierarchy_level(self, line: LineWithMeta) -> HierarchyLevel:
+        """
+        This method should be applied only when :meth:`~dedoc.structure_extractors.patterns.TagPattern.match`
+        returned ``True`` for the given line.
+
+        Return :class:`~dedoc.data_structures.HierarchyLevel` for initialising ``line.metadata.hierarchy_level``.
+        The attribute ``line_type`` is initialised according to the following rules:
+
+        * if non-empty ``line_type`` is given during pattern initialisation, then its value is used in the result;
+        * if ``line_type`` is not given (or ``None`` is given) and ``line.metadata.tag_hierarchy_level`` is not ``unknown``, \
+            the ``line_type`` value from ``line.metadata.tag_hierarchy_level`` is used in the result;
+        * otherwise (``line_type`` is empty and ``line.metadata.tag_hierarchy_level`` is ``unknown``) ``default_line_type`` value is used in the result.
+
+        Similar rules work for ``level_1`` and ``level_2`` with comparing with ``None`` instead of ``unknown``.
+
+        The ``can_be_multiline`` attribute is initialised according to the following rules:
+
+        * if non-empty ``can_be_multiline`` is given during pattern initialisation, then its value is used in the result;
+        * otherwise ``can_be_multiline`` value from ``line.metadata.tag_hierarchy_level`` is used in the result.
+        """
         return HierarchyLevel(
             line_type=self._get_line_type(line),
             level_1=self._get_level_1(line),
diff --git a/docs/source/dedoc_api_usage/api.rst b/docs/source/dedoc_api_usage/api.rst
index 13cd0eaf..96896125 100644
--- a/docs/source/dedoc_api_usage/api.rst
+++ b/docs/source/dedoc_api_usage/api.rst
@@ -42,10 +42,10 @@ Post-requests should be sent to ``http://localhost:1231/upload``.
         "is_one_column_document": "true",
         "return_format": 'html'
     }
-    with open(filename, 'rb') as file:
-        files = {'file': (filename, file)}
+    with open(filename, "rb") as file:
+        files = {"file": (filename, file)}
         r = requests.post("http://localhost:1231/upload", files=files, data=data)
-        result = r.content.decode('utf-8')
+        result = r.content.decode("utf-8")
 
 The ``data`` dictionary in the example contains some parameters to parse the given file.
 They are described in the section :ref:`api_parameters`.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0f2aed1d..b9dc7e2e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -213,6 +213,7 @@ Currently the following domains can be handled:
 
 For a document of unknown or unsupported domain there is an option to use default structure extractor
 (``document_type=other`` at :ref:`api_parameters`), the default document structure described :ref:`here <other_structure>`.
+This type of structure is configurable (see :ref:`using_patterns`).
 
 
 .. toctree::
@@ -223,14 +224,16 @@ For a document of unknown or unsupported domain there is an option to use defaul
    getting_started/usage
    parameters/parameters
 
+
 .. toctree::
    :maxdepth: 1
    :caption: Tutorials
 
    tutorials/add_new_doc_format
    tutorials/add_new_structure_type
-   tutorials/creating_document_classes
    tutorials/add_new_language
+   tutorials/creating_document_classes
+   tutorials/using_patterns
 
 
 .. toctree::
diff --git a/docs/source/modules/structure_extractors.rst b/docs/source/modules/structure_extractors.rst
index 86ded2c3..14313786 100644
--- a/docs/source/modules/structure_extractors.rst
+++ b/docs/source/modules/structure_extractors.rst
@@ -64,3 +64,97 @@ dedoc.structure_extractors
     :members:
 
     .. autoattribute:: document_type
+
+
+.. _dedoc_structure_extractors_patterns:
+
+Patterns for :class:`~dedoc.structure_extractors.DefaultStructureExtractor`
+---------------------------------------------------------------------------
+
+Structure patterns are used for a more flexible configuring of lines types and levels during structure extraction step.
+They are useful only for :class:`~dedoc.structure_extractors.DefaultStructureExtractor` (in API when "document_type"="other").
+Please see :ref:`using_patterns` to get examples of patterns usage.
+
+
+.. autoclass:: dedoc.structure_extractors.patterns.abstract_pattern.AbstractPattern
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.RegexpPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.StartWordPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.TagPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.BracketListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.BracketRomanListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.BulletListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.DottedListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.LetterListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.RomanListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.TagHeaderPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
+
+.. autoclass:: dedoc.structure_extractors.patterns.TagListPattern
+    :show-inheritance:
+    :special-members: __init__
+    :members:
+
+    .. autoattribute:: _name
diff --git a/docs/source/readers_output/line_types.rst b/docs/source/readers_output/line_types.rst
index 666a8d35..7dc696af 100644
--- a/docs/source/readers_output/line_types.rst
+++ b/docs/source/readers_output/line_types.rst
@@ -4,8 +4,8 @@ Types of textual lines
 ======================
 
 Each reader returns :class:`~dedoc.data_structures.UnstructuredDocument` with textual lines.
-Readers don't fill ``hierarchy_level`` metadata field (structure extractors do this), but they can fill ``hierarchy_level_tag`` with information about line types.
-Below the readers are enlisted that can return non-empty ``hierarchy_level_tag`` in document lines metadata:
+Readers don't fill ``hierarchy_level`` metadata field (structure extractors do this), but they can fill ``tag_hierarchy_level`` with information about line types.
+Below the readers are enlisted that can return non-empty ``tag_hierarchy_level`` in document lines metadata:
 
 * `+` means that the reader can return lines of this type.
 * `-` means that the reader doesn't return  lines of this type due to complexity of the task or lack of information provided by the format.
@@ -42,7 +42,7 @@ Below the readers are enlisted that can return non-empty ``hierarchy_level_tag``
 
    * - :class:`~dedoc.readers.RawTextReader`
      - `-`
-     - `+`
+     - `-`
      - `+`
      - `-`
 
@@ -54,7 +54,7 @@ Below the readers are enlisted that can return non-empty ``hierarchy_level_tag``
 
    * - :class:`~dedoc.readers.PdfImageReader`
      - `-`
-     - `+`
+     - `-`
      - `+`
      - `-`
 
@@ -66,6 +66,6 @@ Below the readers are enlisted that can return non-empty ``hierarchy_level_tag``
 
    * - :class:`~dedoc.readers.PdfTxtlayerReader`
      - `-`
-     - `+`
+     - `-`
      - `+`
      - `-`
diff --git a/docs/source/structure_types/other.rst b/docs/source/structure_types/other.rst
index 13a4e716..022fafd6 100644
--- a/docs/source/structure_types/other.rst
+++ b/docs/source/structure_types/other.rst
@@ -3,6 +3,11 @@
 Default document structure type
 ===============================
 
+.. note::
+
+    This structure type is configurable: you can change lines types and levels in the tree hierarchy.
+    Please see :ref:`using_patterns` for more details.
+
 Below we will consider document lines as nodes of the document tree.
 In some cases document lines are paragraphs of the text (e.g. in docx).
 
diff --git a/docs/source/tutorials/using_patterns.rst b/docs/source/tutorials/using_patterns.rst
new file mode 100644
index 00000000..3b25a3db
--- /dev/null
+++ b/docs/source/tutorials/using_patterns.rst
@@ -0,0 +1,12 @@
+.. _using_patterns:
+
+Configure structure extraction using patterns
+=============================================
+
+
+Use patterns in Dedoc library
+-----------------------------
+
+
+Use patterns in Dedoc API
+-------------------------