Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Speeding up pysigma by using a faster yaml parser #133

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
394 changes: 189 additions & 205 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pyyaml = "^6.0"
pyparsing = "^3.0.7"
requests = "^2.28.1"
packaging = "^22.0"
ryaml = { version = "^0.4.0", optional = true }

[tool.poetry.dev-dependencies]
pytest = "^6.2.2"
Expand All @@ -34,6 +35,9 @@ pylint = "^2.15.7"
Sphinx = "^4.2.0"
mypy = "^0.931"

[tool.poetry.extras]
rust = ["ryaml"]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
3 changes: 2 additions & 1 deletion sigma/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from sigma.rule import SigmaRule
from sigma.exceptions import SigmaCollectionError, SigmaError, SigmaRuleLocation
import yaml
from sigma import yaml


@dataclass
class SigmaCollection:
Expand Down
3 changes: 2 additions & 1 deletion sigma/modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def apply(self, val : Union[SigmaType, Sequence[SigmaType]]) -> List[SigmaType]:
])
]
else:
print(val)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forgotten test code? 😉 Please remove.

if not self.type_check(val):
raise SigmaTypeError(f"Modifier {self.__class__.__name__} incompatible to value type of '{ val }'", source=self.source)
r = self.modify(val)
Expand Down Expand Up @@ -304,4 +305,4 @@ def modify(self, val : SigmaString) -> SigmaString:
reverse_modifier_mapping : Dict[str, str] = {
modifier_class.__name__: identifier
for identifier, modifier_class in modifier_mapping.items()
}
}
4 changes: 2 additions & 2 deletions sigma/processing/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sigma.processing.transformations import transformations, Transformation
from sigma.processing.conditions import rule_conditions, RuleProcessingCondition, detection_item_conditions, DetectionItemProcessingCondition, field_name_conditions, FieldNameProcessingCondition
from sigma.exceptions import SigmaConfigurationError, SigmaTypeError
import yaml
from sigma import yaml

from sigma.types import SigmaFieldReference, SigmaType

Expand Down Expand Up @@ -304,4 +304,4 @@ def __radd__(self, other : Literal[0]) -> "ProcessingPipeline":
if other == 0:
return self
else:
return NotImplemented
return NotImplemented
27 changes: 10 additions & 17 deletions sigma/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from uuid import UUID
from enum import Enum, auto
from datetime import date, datetime
import yaml
import sigma
from sigma.types import SigmaType, SigmaNull, SigmaString, SigmaNumber, sigma_type
from sigma.modifiers import SigmaModifier, modifier_mapping, reverse_modifier_mapping, SigmaValueModifier, SigmaListModifier
from sigma.conditions import SigmaCondition, ConditionAND, ConditionOR, ConditionFieldEqualsValueExpression, ConditionValueExpression, ParentChainMixin
from sigma.processing.tracking import ProcessingItemTrackingMixin
import sigma.exceptions as sigma_exceptions
from sigma.exceptions import SigmaRuleLocation, SigmaValueError, SigmaError
from sigma import yaml


class EnumLowercaseStringMixin:
def __str__(self) -> str:
Expand Down Expand Up @@ -506,18 +507,6 @@ def __getitem__(self, key : str) -> SigmaDetection:
"""Get detection by name"""
return self.detections[key]

class SigmaYAMLLoader(yaml.SafeLoader):
"""Custom YAML loader implementing additional functionality for Sigma."""
def construct_mapping(self, node, deep=...):
keys = set()
for k, v in node.value:
key = self.construct_object(k, deep=deep)
if key in keys:
raise yaml.error.YAMLError("Duplicate key '{k}'")
else:
keys.add(key)

return super().construct_mapping(node, deep)

@dataclass
class SigmaRule(ProcessingItemTrackingMixin):
Expand Down Expand Up @@ -595,12 +584,16 @@ def from_dict(
if rule_date is not None:
if not isinstance(rule_date, date) and not isinstance(rule_date, datetime):
try:
rule_date = date(*(int(i) for i in rule_date.split("/")))
rule_date = datetime.strptime(rule_date, "%Y-%m-%dT%H:%M:%S")
except ValueError:
try:
rule_date = date(*(int(i) for i in rule_date.split("-")))
rule_date = date(*(int(i) for i in rule_date.split("/")))
except ValueError:
errors.append(sigma_exceptions.SigmaDateError(f"Rule date '{ rule_date }' is invalid, must be yyyy/mm/dd or yyyy-mm-dd", source=source))
try:
rule_date = date(*(int(i) for i in rule_date.split("-")))
except ValueError:
msg = f"Rule date '{ rule_date }' is invalid, must be yyyy/mm/dd, yyyy-mm-dd or yyyy-mm-ddTHH:MM:SS"
errors.append(sigma_exceptions.SigmaDateError(msg, source=source))

# parse log source
logsource = None
Expand Down Expand Up @@ -649,7 +642,7 @@ def from_dict(
@classmethod
def from_yaml(cls, rule : str, collect_errors : bool = False) -> "SigmaRule":
"""Convert YAML input string with single document into SigmaRule object."""
parsed_rule = yaml.load(rule, SigmaYAMLLoader)
parsed_rule = yaml.load(rule)
return cls.from_dict(parsed_rule, collect_errors)

def to_dict(self) -> dict:
Expand Down
5 changes: 3 additions & 2 deletions sigma/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from sigma.exceptions import SigmaConfigurationError
from sigma.rule import SigmaRule
from sigma.validators.base import SigmaRuleValidator, SigmaValidationIssue
import yaml
from sigma import yaml


class SigmaValidator:
"""
Expand Down Expand Up @@ -135,4 +136,4 @@ def validate_rules(self, rules : Iterator[SigmaRule]) -> List[SigmaValidationIss
issue
for rule in rules
for issue in self.validate_rule(rule)
] + self.finalize()
] + self.finalize()
50 changes: 50 additions & 0 deletions sigma/yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
try:
import ryaml
import yaml
using_ryaml = True
except ImportError:
import yaml
using_ryaml = False

class SigmaYAMLLoader(yaml.SafeLoader):
"""Custom YAML loader implementing additional functionality for Sigma."""
def construct_mapping(self, node, deep=...):
keys = set()
for k, v in node.value:
key = self.construct_object(k, deep=deep)
if key in keys:
raise yaml.error.YAMLError("Duplicate key '{k}'")
else:
keys.add(key)
return super().construct_mapping(node, deep)


def safe_load_all(yaml_str):
if not using_ryaml:
return yaml.safe_load_all(yaml_str)
if isinstance(yaml_str, str):
return ryaml.loads_all(yaml_str)
return ryaml.load_all(yaml_str)


def load(yaml_str):
if not using_ryaml:
return yaml.load(yaml_str, SigmaYAMLLoader)

if isinstance(yaml_str, str):
result = ryaml.loads(yaml_str)
else:
result = ryaml.load(yaml_str)

# TODO validate keys here.
return result


def safe_load(yaml_str):
if not using_ryaml:
return yaml.safe_load(yaml_str)
if isinstance(yaml_str, str):
result = ryaml.loads(yaml_str)
else:
result = ryaml.load(yaml_str)
return result
5 changes: 4 additions & 1 deletion tests/test_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,8 @@ def test_sigmarule_bad_status():
def test_sigmarule_bad_date():
with pytest.raises(sigma_exceptions.SigmaDateError, match="Rule date.*test.yml"):
SigmaRule.from_dict({ "date": "bad" }, source=sigma_exceptions.SigmaRuleLocation("test.yml"))


def test_sigmarule_date():
expected_date = date(3000,1,2)
rule = SigmaRule.from_yaml("""
Expand All @@ -539,6 +541,7 @@ def test_sigmarule_date():
assert rule is not None
assert rule.date == expected_date


def test_sigmarule_datetime():
expected_date = datetime(3000,1,2,3,4,5)
rule = SigmaRule.from_yaml("""
Expand Down Expand Up @@ -811,4 +814,4 @@ def test_sigma_rule_overlapping_selections():
[
len(ands.args)
for ands in cond.args
] == [2, 4]
] == [2, 4]