Skip to content

Commit

Permalink
compiler: use "github.com/dlclark/regexp2" instead of stdlib regexp (#…
Browse files Browse the repository at this point in the history
…143)

Package "github.com/dlclark/regexp2" supports negative lookahead,
which is required by ory/oathkeeper#321

Signed-off-by: Aynur Zulkarnaev <[email protected]>
  • Loading branch information
ayzu authored and aeneasr committed Jan 8, 2020
1 parent 0e3588c commit 5c9c93e
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 42 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,11 @@ var pol = &ladon.DefaultPolicy{

// Which resources this policy affects.
// Again, you can put regular expressions in inside < >.
Resources: []string{"myrn:some.domain.com:resource:123", "myrn:some.domain.com:resource:345", "myrn:something:foo:<.+>"},
Resources: []string{
"myrn:some.domain.com:resource:123", "myrn:some.domain.com:resource:345",
"myrn:something:foo:<.+>", "myrn:some.domain.com:resource:<(?!protected).*>",
"myrn:some.domain.com:resource:<[[:digit:]]+>"
},

// Which actions this policy affects. Supports RegExp
// Again, you can put regular expressions in inside < >.
Expand Down Expand Up @@ -698,7 +702,7 @@ Ladon's limitations are listed here.
### Regular expressions
Matching regular expressions has a complexity of `O(n)` and databases such as MySQL or Postgres can not
Matching regular expressions has a complexity of `O(n)` ([except](https://groups.google.com/d/msg/golang-nuts/7qgSDWPIh_E/OHTAm4wRZL0J) lookahead/lookbehind assertions) and databases such as MySQL or Postgres can not
leverage indexes when parsing regular expressions. Thus, there is considerable overhead when using regular
expressions.
Expand Down
65 changes: 36 additions & 29 deletions compiler/regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,40 +36,45 @@ package compiler
// Use of this source code is governed by a BSD-style
// license as follows:

//Copyright (c) 2012 Rodrigo Moraes. All rights reserved.
// Copyright (c) 2012 Rodrigo Moraes. All rights reserved.
//
//Redistribution and use in source and binary forms, with or without
//modification, are permitted provided that the following conditions are
//met:
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//* Redistributions of source code must retain the above copyright
//notice, this list of conditions and the following disclaimer.
//* Redistributions in binary form must reproduce the above
//copyright notice, this list of conditions and the following disclaimer
//in the documentation and/or other materials provided with the
//distribution.
//* Neither the name of Google Inc. nor the names of its
//contributors may be used to endorse or promote products derived from
//this software without specific prior written permission.
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
//A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
//OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
//SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
//LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
//DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
//THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
//(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
//OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import (
"bytes"
"fmt"
"regexp"
"time"

"github.com/dlclark/regexp2"
)

const regexp2MatchTimeout = time.Millisecond * 250

// delimiterIndices returns the first level delimiter indices from a string.
// It returns an error in case of unbalanced delimiters.
func delimiterIndices(s string, delimiterStart, delimiterEnd byte) ([]int, error) {
Expand Down Expand Up @@ -105,18 +110,17 @@ func delimiterIndices(s string, delimiterStart, delimiterEnd byte) ([]int, error
// reg, err := compiler.CompileRegex("foo:bar.baz:<[0-9]{2,10}>", '<', '>')
// // if err != nil ...
// reg.MatchString("foo:bar.baz:123")
func CompileRegex(tpl string, delimiterStart, delimiterEnd byte) (*regexp.Regexp, error) {
func CompileRegex(tpl string, delimiterStart, delimiterEnd byte) (*regexp2.Regexp, error) {
// Check if it is well-formed.
idxs, errBraces := delimiterIndices(tpl, delimiterStart, delimiterEnd)
if errBraces != nil {
return nil, errBraces
}
varsR := make([]*regexp.Regexp, len(idxs)/2)
varsR := make([]*regexp2.Regexp, len(idxs)/2)
pattern := bytes.NewBufferString("")
pattern.WriteByte('^')

var end int
var err error
for i := 0; i < len(idxs); i += 2 {
// Set all values we are interested in.
raw := tpl[end:idxs[i]]
Expand All @@ -125,10 +129,12 @@ func CompileRegex(tpl string, delimiterStart, delimiterEnd byte) (*regexp.Regexp
// Build the regexp pattern.
varIdx := i / 2
fmt.Fprintf(pattern, "%s(%s)", regexp.QuoteMeta(raw), patt)
varsR[varIdx], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
reg, err := regexp2.Compile(fmt.Sprintf("^%s$", patt), regexp2.RE2)
if err != nil {
return nil, err
}
reg.MatchTimeout = regexp2MatchTimeout
varsR[varIdx] = reg
}

// Add the remaining.
Expand All @@ -137,10 +143,11 @@ func CompileRegex(tpl string, delimiterStart, delimiterEnd byte) (*regexp.Regexp
pattern.WriteByte('$')

// Compile full regexp.
reg, errCompile := regexp.Compile(pattern.String())
reg, errCompile := regexp2.Compile(pattern.String(), regexp2.RE2)
if errCompile != nil {
return nil, errCompile
}
reg.MatchTimeout = regexp2MatchTimeout

return reg, nil
}
18 changes: 15 additions & 3 deletions compiler/regex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
package compiler

import (
"regexp"
"testing"

"github.com/dlclark/regexp2"
"github.com/stretchr/testify/assert"
)

Expand All @@ -45,8 +45,18 @@ func TestRegexCompiler(t *testing.T) {
{"urn:foo.bar.com:{.*{}", '{', '}', true, "", true},
{"urn:foo:<.*>", '<', '>', false, "urn:foo:bar:baz", false},

{`urn:foo:<user=(?!admin).*>`, '<', '>', false, "urn:foo:user=john", false},
{`urn:foo:<user=(?!admin).*>`, '<', '>', false, "urn:foo:user=admin", true},

{`urn:foo:user=<[[:digit:]]*>`, '<', '>', false, "urn:foo:user=admin", true},
{`urn:foo:user=<[[:digit:]]*>`, '<', '>', false, "urn:foo:user=62235", false},

{`urn:foo:user={(?P<id>\d{3})}`, '{', '}', false, "urn:foo:user=622", false},
{`urn:foo:user=<(?P<id>\d{3})>`, '<', '>', false, "urn:foo:user=622", false},
{`urn:foo:user=<(?P<id>\d{3})>`, '<', '>', false, "urn:foo:user=aaa", true},

// Ignoring this case for now...
//{"urn:foo.bar.com:{.*\\{}", '{', '}', false, "", true},
// {"urn:foo.bar.com:{.*\\{}", '{', '}', false, "", true},
} {
k++
result, err := CompileRegex(c.template, c.delimiterStart, c.delimiterEnd)
Expand All @@ -56,8 +66,10 @@ func TestRegexCompiler(t *testing.T) {
}

t.Logf("Case %d compiled to: %s", k, result.String())
ok, err := regexp.MatchString(result.String(), c.matchAgainst)
re := regexp2.MustCompile(result.String(), regexp2.RE2)
ok, err := re.MatchString(c.matchAgainst)
assert.Nil(t, err, "Case %d", k)
assert.Equal(t, !c.failMatch, ok, "Case %d", k)

}
}
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module github.com/ory/ladon

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dlclark/regexp2 v1.2.0
github.com/golang/mock v1.1.1
github.com/hashicorp/golang-lru v0.5.0
github.com/ory/pagination v0.0.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dlclark/regexp2 v1.2.0 h1:8sAhBGEM0dRWogWqWyQeIJnxjWO6oIjl8FKqREDsGfk=
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
github.com/golang/mock v1.1.1 h1:G5FRp8JnTd7RQH5kemVNlMeyXQAztQ3mOWV95KxsXH8=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA=
Expand Down
52 changes: 52 additions & 0 deletions ladon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,22 @@ var pols = []Policy{
Resources: []string{"<.*>"},
Effect: DenyAccess,
},
&DefaultPolicy{
ID: "4",
Description: "This policy allows swen to update any resource except `protected` resources",
Subjects: []string{"swen"},
Actions: []string{"update"},
Resources: []string{"myrn:some.domain.com:resource:<(?!protected).*>"},
Effect: AllowAccess,
},
&DefaultPolicy{
ID: "5",
Description: "This policy allows richard to update resources which names consists of digits only",
Subjects: []string{"richard"},
Actions: []string{"update"},
Resources: []string{"myrn:some.domain.com:resource:<[[:digit:]]+>"},
Effect: AllowAccess,
},
}

// Some test cases
Expand Down Expand Up @@ -154,6 +170,42 @@ var cases = []struct {
},
expectErr: true,
},
{
description: "should pass because swen is allowed to update all resources except `protected` resources.",
accessRequest: &Request{
Subject: "swen",
Action: "update",
Resource: "myrn:some.domain.com:resource:123",
},
expectErr: false,
},
{
description: "should fail because swen is not allowed to update `protected` resource",
accessRequest: &Request{
Subject: "swen",
Action: "update",
Resource: "myrn:some.domain.com:resource:protected123",
},
expectErr: true,
},
{
description: "should fail because richard is not allowed to update a resource with alphanumeric name",
accessRequest: &Request{
Subject: "richard",
Action: "update",
Resource: "myrn:some.domain.com:resource:protected123",
},
expectErr: true,
},
{
description: "should pass because richard is allowed to update a resources with a name containing digits only",
accessRequest: &Request{
Subject: "richard",
Action: "update",
Resource: "myrn:some.domain.com:resource:25222",
},
expectErr: false,
},
}

func TestLadon(t *testing.T) {
Expand Down
26 changes: 18 additions & 8 deletions matcher_regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
package ladon

import (
"regexp"
"strings"

"github.com/dlclark/regexp2"
"github.com/hashicorp/golang-lru"
"github.com/pkg/errors"

Expand All @@ -45,26 +45,26 @@ func NewRegexpMatcher(size int) *RegexpMatcher {
type RegexpMatcher struct {
*lru.Cache

C map[string]*regexp.Regexp
C map[string]*regexp2.Regexp
}

func (m *RegexpMatcher) get(pattern string) *regexp.Regexp {
func (m *RegexpMatcher) get(pattern string) *regexp2.Regexp {
if val, ok := m.Cache.Get(pattern); !ok {
return nil
} else if reg, ok := val.(*regexp.Regexp); !ok {
} else if reg, ok := val.(*regexp2.Regexp); !ok {
return nil
} else {
return reg
}
}

func (m *RegexpMatcher) set(pattern string, reg *regexp.Regexp) {
func (m *RegexpMatcher) set(pattern string, reg *regexp2.Regexp) {
m.Cache.Add(pattern, reg)
}

// Matches a needle with an array of regular expressions and returns true if a match was found.
func (m *RegexpMatcher) Matches(p Policy, haystack []string, needle string) (bool, error) {
var reg *regexp.Regexp
var reg *regexp2.Regexp
var err error
for _, h := range haystack {

Expand All @@ -80,7 +80,12 @@ func (m *RegexpMatcher) Matches(p Policy, haystack []string, needle string) (boo
}

if reg = m.get(h); reg != nil {
if reg.MatchString(needle) {
if matched, err := reg.MatchString(needle); err != nil {
// according to regexp2 documentation: https://github.com/dlclark/regexp2#usage
// The only error that the *Match* methods should return is a Timeout if you set the
// re.MatchTimeout field. Any other error is a bug in the regexp2 package.
return false, errors.WithStack(err)
} else if matched {
return true, nil
}
continue
Expand All @@ -92,7 +97,12 @@ func (m *RegexpMatcher) Matches(p Policy, haystack []string, needle string) (boo
}

m.set(h, reg)
if reg.MatchString(needle) {
if matched, err := reg.MatchString(needle); err != nil {
// according to regexp2 documentation: https://github.com/dlclark/regexp2#usage
// The only error that the *Match* methods should return is a Timeout if you set the
// re.MatchTimeout field. Any other error is a bug in the regexp2 package.
return false, errors.WithStack(err)
} else if matched {
return true, nil
}
}
Expand Down

0 comments on commit 5c9c93e

Please sign in to comment.