From eca4daefccaf005e36f2e831b966ef7ba8512258 Mon Sep 17 00:00:00 2001 From: Wismill Date: Mon, 3 Jun 2019 14:42:04 +0200 Subject: [PATCH] Use xmlbf for XML parsing (#67) A new implementation of the RDF/XML parser, using the xmlbf library. The previous RDF/XML parser was based on HXT, but failed 65 of the W3C parsing unit tests. This new xmlbf based parser fixes all these failing tests (all but one, being worked on), and is ~25% faster too. --- .gitignore | 1 + .hlint.yaml | 65 + .travis.yml | 8 +- bench/MainCriterion.hs | 95 +- data/ttl/conformance/test-00.out | 2 +- examples/ESWC.hs | 6 +- examples/ParseURLs.hs | 2 +- rdf-tests | 2 +- rdf4h.cabal | 58 +- src/Data/RDF.hs | 2 + src/Data/RDF/Graph/AdjHashMap.hs | 13 +- src/Data/RDF/Graph/AlgebraicGraph.hs | 113 ++ src/Data/RDF/Graph/HashMapSP.hs | 48 +- src/Data/RDF/Graph/MapSP.hs | 40 +- src/Data/RDF/Graph/TList.hs | 8 +- src/Data/RDF/Graph/TPatriciaTree.hs | 46 +- src/Data/RDF/IRI.hs | 49 +- src/Data/RDF/Namespace.hs | 74 +- src/Data/RDF/Query.hs | 6 +- src/Data/RDF/Types.hs | 38 +- src/Rdf4hParseMain.hs | 39 +- src/Rdf4hQueryMain.hs | 16 +- src/Text/RDF/RDF4H/NTriplesParser.hs | 20 +- src/Text/RDF/RDF4H/ParserUtils.hs | 77 +- src/Text/RDF/RDF4H/TurtleParser.hs | 51 +- src/Text/RDF/RDF4H/TurtleSerializer.hs | 16 +- src/Text/RDF/RDF4H/XmlParser.hs | 1126 +++++++++-------- src/Text/RDF/RDF4H/XmlParser/Identifiers.hs | 107 ++ src/Text/RDF/RDF4H/XmlParserHXT.hs | 12 +- stack.yaml | 12 +- testsuite/tests/Data/RDF/IRITests.hs | 3 - testsuite/tests/Data/RDF/PropertyTests.hs | 47 +- testsuite/tests/Test.hs | 28 +- .../RDF/RDF4H/TurtleParser_ConformanceTest.hs | 19 +- .../tests/Text/RDF/RDF4H/XmlParser_Test.hs | 36 +- testsuite/tests/W3C/Manifest.hs | 15 +- testsuite/tests/W3C/NTripleTest.hs | 4 +- testsuite/tests/W3C/RdfXmlTest.hs | 35 +- testsuite/tests/W3C/TurtleTest.hs | 58 +- testsuite/tests/W3C/W3CAssertions.hs | 11 +- 40 files changed, 1471 insertions(+), 937 deletions(-) create mode 100644 .hlint.yaml create mode 100644 src/Data/RDF/Graph/AlgebraicGraph.hs create mode 100644 src/Text/RDF/RDF4H/XmlParser/Identifiers.hs diff --git a/.gitignore b/.gitignore index 346cc90..42b31db 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ TAGS *.backup /.cabal-sandbox cabal.sandbox.config +cabal.project.local countries.ttl *.prof bench/MainCriterion diff --git a/.hlint.yaml b/.hlint.yaml new file mode 100644 index 0000000..4057568 --- /dev/null +++ b/.hlint.yaml @@ -0,0 +1,65 @@ +# HLint configuration file +# https://github.com/ndmitchell/hlint +########################## + +# This file contains a template configuration file, which is typically +# placed as .hlint.yaml in the root of your project + + +# Specify additional command line arguments +# +# - arguments: [--color, --cpp-simple, -XQuasiQuotes] + + +# Control which extensions/flags/modules/functions can be used +# +# - extensions: +# - default: false # all extension are banned by default +# - name: [PatternGuards, ViewPatterns] # only these listed extensions can be used +# - {name: CPP, within: CrossPlatform} # CPP can only be used in a given module +# +# - flags: +# - {name: -w, within: []} # -w is allowed nowhere +# +# - modules: +# - {name: [Data.Set, Data.HashSet], as: Set} # if you import Data.Set qualified, it must be as 'Set' +# - {name: Control.Arrow, within: []} # Certain modules are banned entirely +# +# - functions: +# - {name: unsafePerformIO, within: []} # unsafePerformIO can only appear in no modules + + +# Add custom hints for this project +# +# Will suggest replacing "wibbleMany [myvar]" with "wibbleOne myvar" +# - error: {lhs: "wibbleMany [x]", rhs: wibbleOne x} + + +# Turn on hints that are off by default +# +# Ban "module X(module X) where", to require a real export list +# - warn: {name: Use explicit module export list} +# +# Replace a $ b $ c with a . b $ c +# - group: {name: dollar, enabled: true} +# +# Generalise map to fmap, ++ to <> +- group: {name: generalise, enabled: true} + + +# Ignore some builtin hints +# - ignore: {name: Use let} +# - ignore: {name: Use const, within: SpecialModule} # Only within certain modules + + +# Define some custom infix operators +# - fixity: infixr 3 ~^#^~ + + +# To generate a suitable file for HLint do: +# $ hlint --default > .hlint.yaml + +- ignore: {name: Eta reduce} +- ignore: {name: Redundant bracket} +- ignore: {name: Reduce duplication} +- ignore: {name: Use camelCase} diff --git a/.travis.yml b/.travis.yml index 500904b..6d809b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,10 +81,6 @@ matrix: compiler: ": #stack 7.10.3" addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-7" - compiler: ": #stack 8.0.1" - addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-9" compiler: ": #stack 8.0.2" addons: {apt: {packages: [libgmp-dev]}} @@ -97,6 +93,10 @@ matrix: compiler: ": #stack 8.4.3" addons: {apt: {packages: [libgmp-dev]}} + - env: BUILD=stack ARGS="--resolver lts-13" + compiler: ": #stack 8.6.5" + addons: {apt: {packages: [libgmp-dev]}} + # Nightly builds are allowed to fail # - env: BUILD=stack ARGS="--resolver nightly" # compiler: ": #stack nightly" diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index ffa8d67..6077c0c 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -4,6 +4,7 @@ module Main where import Prelude hiding (readFile) +import Data.Semigroup (Semigroup(..)) import Criterion import Criterion.Types import Criterion.Main @@ -18,21 +19,15 @@ import Control.DeepSeq (NFData) -- $ gzip -d bills.099.actions.rdf.gz parseXmlRDF :: Rdf a => T.Text -> RDF a -parseXmlRDF s = - let (Right rdf) = parseString (XmlParser Nothing Nothing) s - in rdf +parseXmlRDF = either (error . show) id . parseString (XmlParser Nothing Nothing) {-# INLINE parseXmlRDF #-} parseNtRDF :: Rdf a => T.Text -> RDF a -parseNtRDF s = - let (Right rdf) = parseString NTriplesParser s - in rdf +parseNtRDF = either (error . show) id . parseString NTriplesParser {-# INLINE parseNtRDF #-} parseTtlRDF :: Rdf a => T.Text -> RDF a -parseTtlRDF s = - let (Right rdf) = parseString (TurtleParser Nothing Nothing) s - in rdf +parseTtlRDF = either (error . show) id . parseString (TurtleParser Nothing Nothing) {-# INLINE parseTtlRDF #-} queryGr :: Rdf a => (Maybe Node,Maybe Node,Maybe Node,RDF a) -> [Triple] @@ -48,15 +43,19 @@ main :: IO () main = defaultMainWith (defaultConfig {resamples = 100}) [ env + -- [FIXME] Do not rely on system's defaults to read files. (do fawltyContentTurtle <- readFile "data/ttl/fawlty1.ttl" fawltyContentNTriples <- readFile "data/nt/all-fawlty-towers.nt" - rdf1' <- parseFile (XmlParser Nothing Nothing) xmlFile - rdf2' <- parseFile (XmlParser Nothing Nothing) xmlFile - let rdf1 = either (error . show) id rdf1' :: RDF TList + xmlContent <- readFile xmlFile + let rdf1' = parseString (XmlParser Nothing Nothing) xmlContent + rdf2' = parseString (XmlParser Nothing Nothing) xmlContent + rdf3' =parseString (XmlParser Nothing Nothing) xmlContent + rdf1 = either (error . show) id rdf1' :: RDF TList rdf2 = either (error . show) id rdf2' :: RDF AdjHashMap + rdf3 = either (error . show) id rdf3' :: RDF AlgebraicGraph triples = triplesOf rdf1 - return (rdf1, rdf2, triples, fawltyContentNTriples, fawltyContentTurtle)) $ - \ ~(triplesList, adjMap, triples, fawltyContentNTriples, fawltyContentTurtle) -> + return (rdf1, rdf2, rdf3, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent)) $ + \ ~(triplesList, adjMap, algGraph, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent) -> bgroup "rdf4h" [ bgroup @@ -81,42 +80,56 @@ main = defaultMainWith let res = parseString (TurtleParserCustom Nothing Nothing Attoparsec) t :: Either ParseFailure (RDF TList) in either (error . show) id res ) fawltyContentTurtle + , bench "xml-xmlbf" $ + nf (\t -> + let res = parseString (XmlParser Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent + , bench "xml-xht" $ + nf (\t -> + let res = parseString (XmlParserHXT Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent ] , bgroup "query" - (queryBench "TList" triplesList ++ - queryBench "AdjHashMap" adjMap - -- queryBench "SP" mapSP ++ queryBench "HashSP" hashMapSP + (queryBench "TList" triplesList <> + queryBench "AdjHashMap" adjMap <> + queryBench "AlgebraicGraph" algGraph + -- queryBench "SP" mapSP <> queryBench "HashSP" hashMapSP ) , bgroup "select" - (selectBench "TList" triplesList ++ - selectBench "AdjHashMap" adjMap - -- selectBench "SP" mapSP ++ selectBench "HashSP" hashMapSP + (selectBench "TList" triplesList <> + selectBench "AdjHashMap" adjMap <> + selectBench "AlgebraicGraph" algGraph + -- selectBench "SP" mapSP <> selectBench "HashSP" hashMapSP ) , bgroup "add-remove-triples" - (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList - ++ addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap + (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList <> + addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap <> + addRemoveTriples "AlgebraicGraph" triples (empty :: RDF AlgebraicGraph) algGraph ) , bgroup "count_triples" [ bench "TList" (nf (length . triplesOf) triplesList) , bench "AdjHashMap" (nf (length . triplesOf) adjMap) + , bench "AlgebraicGraph" (nf (length . triplesOf) algGraph) ] ] ] selectBench :: Rdf a => String -> RDF a -> [Benchmark] selectBench label gr = - [ bench (label ++ " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) - , bench (label ++ " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) - , bench (label ++ " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) - , bench (label ++ " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) - , bench (label ++ " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) - , bench (label ++ " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) - , bench (label ++ " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) + [ bench (label <> " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) + , bench (label <> " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) + , bench (label <> " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) + , bench (label <> " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) + , bench (label <> " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) + , bench (label <> " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) + , bench (label <> " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) ] subjSelect, predSelect, objSelect, selectNothing :: Maybe (Node -> Bool) @@ -133,25 +146,25 @@ queryNothing = Nothing queryBench :: Rdf a => String -> RDF a -> [Benchmark] queryBench label gr = - [ bench (label ++ " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) - , bench (label ++ " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) - , bench (label ++ " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) - , bench (label ++ " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) - , bench (label ++ " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) - , bench (label ++ " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) - , bench (label ++ " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) + [ bench (label <> " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) + , bench (label <> " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) + , bench (label <> " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) + , bench (label <> " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) + , bench (label <> " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) + , bench (label <> " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) + , bench (label <> " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) ] -addRemoveTriples :: (NFData a,NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] +addRemoveTriples :: (NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] addRemoveTriples lbl triples emptyGr populatedGr = - [ bench (lbl ++ "-add-triples") $ nf addTriples (triples,emptyGr) - , bench (lbl ++ "-remove-triples") $ nf removeTriples (triples,populatedGr) + [ bench (lbl <> "-add-triples") $ nf addTriples (triples,emptyGr) + , bench (lbl <> "-remove-triples") $ nf removeTriples (triples,populatedGr) ] addTriples :: Rdf a => (Triples,RDF a) -> RDF a addTriples (triples,emptyGr) = - foldr (\t g -> addTriple g t) emptyGr triples + foldr (flip addTriple) emptyGr triples removeTriples :: Rdf a => (Triples,RDF a) -> RDF a removeTriples (triples,populatedGr) = - foldr (\t g -> removeTriple g t) populatedGr triples + foldr (flip removeTriple) populatedGr triples diff --git a/data/ttl/conformance/test-00.out b/data/ttl/conformance/test-00.out index 1209817..7f8b717 100644 --- a/data/ttl/conformance/test-00.out +++ b/data/ttl/conformance/test-00.out @@ -1 +1 @@ -_:genid1 . +_:genid1 . diff --git a/examples/ESWC.hs b/examples/ESWC.hs index b0b8f39..2ef8cc7 100644 --- a/examples/ESWC.hs +++ b/examples/ESWC.hs @@ -13,8 +13,8 @@ heldByProp = "swc:heldBy" eswcCommitteeMembers :: RDF TList -> [T.Text] eswcCommitteeMembers graph = let triples = query graph (Just (unode eswcCommitteeURI)) (Just (unode heldByProp)) Nothing - memberURIs = map objectOf triples - in map + memberURIs = fmap objectOf triples + in fmap (\memberURI -> let (LNode (PlainL (firstName::T.Text))) = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:firstName")) Nothing @@ -22,7 +22,7 @@ eswcCommitteeMembers graph = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:lastName")) Nothing in (T.append firstName (T.append (T.pack " ") lastName))) memberURIs - + main :: IO () main = do result <- parseURL diff --git a/examples/ParseURLs.hs b/examples/ParseURLs.hs index bbfc286..8986ebe 100644 --- a/examples/ParseURLs.hs +++ b/examples/ParseURLs.hs @@ -11,7 +11,7 @@ timBernersLee :: IO () timBernersLee = do Right (rdf::RDF TList) <- parseURL (XmlParser Nothing Nothing) "http://www.w3.org/People/Berners-Lee/card.rdf" let ts = query rdf (Just (UNode "http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk")) (Just (UNode "dct:title")) Nothing - let talks = map (\(Triple _ _ (LNode (PlainL s))) -> s) ts + let talks = fmap (\(Triple _ _ (LNode (PlainL s))) -> s) ts print talks main :: IO () diff --git a/rdf-tests b/rdf-tests index e24f243..280e9de 160000 --- a/rdf-tests +++ b/rdf-tests @@ -1 +1 @@ -Subproject commit e24f243f79087a61a1b1aa72f5c7c27470155c33 +Subproject commit 280e9de3aaefa6b292a151bd455204d49a0c09db diff --git a/rdf4h.cabal b/rdf4h.cabal index 0b0ec3b..c5dc581 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -19,18 +19,32 @@ cabal-version: >= 1.8 build-type: Simple category: RDF stability: Experimental -tested-with: GHC==7.10.2, GHC==8.0.2 +tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.8.2, GHC==8.4.3, GHC==8.6.5 extra-tmp-files: test extra-source-files: examples/ParseURLs.hs , examples/ESWC.hs + +source-repository head + type: git + location: https://github.com/robstewart57/rdf4h.git + + +flag dev + description: Developer build + manual: True + default: False + + library + hs-source-dirs: src exposed-modules: Data.RDF , Data.RDF.IRI , Data.RDF.Namespace , Data.RDF.Types , Data.RDF.Query , Data.RDF.Graph.AdjHashMap + , Data.RDF.Graph.AlgebraicGraph , Data.RDF.Graph.TList , Text.RDF.RDF4H.TurtleParser , Text.RDF.RDF4H.TurtleSerializer @@ -38,6 +52,7 @@ library , Text.RDF.RDF4H.NTriplesSerializer , Text.RDF.RDF4H.XmlParser , Text.RDF.RDF4H.XmlParserHXT + , Text.RDF.RDF4H.XmlParser.Identifiers , Text.RDF.RDF4H.ParserUtils build-depends: attoparsec , base >= 4.8.0.0 @@ -48,6 +63,7 @@ library , HTTP >= 4000.0.0 , hxt >= 9.3.1.2 , text >= 1.2.1.0 + , algebraic-graphs >= 0.3 && < 0.5 , unordered-containers , hashable , deepseq @@ -65,8 +81,12 @@ library if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* - hs-source-dirs: src - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + executable rdf4h main-is: src/Rdf4hParseMain.hs @@ -78,11 +98,17 @@ executable rdf4h if impl(ghc < 7.6) build-depends: ghc-prim - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + test-suite test-rdf4h type: exitcode-stdio-1.0 main-is: Test.hs + hs-source-dirs: testsuite/tests other-modules: Data.RDF.PropertyTests Data.RDF.GraphImplTests Data.RDF.IRITests @@ -90,9 +116,9 @@ test-suite test-rdf4h Text.RDF.RDF4H.XmlParser_Test W3C.Manifest W3C.NTripleTest + W3C.TurtleTest W3C.RdfXmlTest W3C.W3CAssertions - ghc-options: -Wall -fno-warn-orphans -funbox-strict-fields build-depends: base >= 4.8.0.0 && < 6 , rdf4h , tasty @@ -108,9 +134,15 @@ test-suite test-rdf4h if impl(ghc < 7.6) build-depends: ghc-prim + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* + + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields - other-modules: W3C.TurtleTest - hs-source-dirs: testsuite/tests benchmark rdf4h-bench type: exitcode-stdio-1.0 @@ -121,8 +153,12 @@ benchmark rdf4h-bench criterion, rdf4h, text >= 1.2.1.0 - ghc-options: -Wall -source-repository head - type: git - location: https://github.com/robstewart57/rdf4h.git + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* + + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields diff --git a/src/Data/RDF.hs b/src/Data/RDF.hs index ee68d6a..2173371 100644 --- a/src/Data/RDF.hs +++ b/src/Data/RDF.hs @@ -15,6 +15,7 @@ module Data.RDF ( -- * RDF type class instances module Data.RDF.Graph.TList, module Data.RDF.Graph.AdjHashMap, + module Data.RDF.Graph.AlgebraicGraph, -- module Data.RDF.Graph.HashMapSP, -- module Data.RDF.Graph.MapSP, @@ -30,6 +31,7 @@ module Data.RDF ( import Data.RDF.Namespace import Data.RDF.Graph.TList import Data.RDF.Graph.AdjHashMap +import Data.RDF.Graph.AlgebraicGraph -- import Data.RDF.Graph.HashMapSP -- import Data.RDF.Graph.MapSP import Text.RDF.RDF4H.NTriplesSerializer diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 4c93c9f..1148725 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -13,11 +12,11 @@ module Data.RDF.Graph.AdjHashMap (AdjHashMap) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Data.List import Data.Binary (Binary) import Data.RDF.Types import Data.RDF.Query -import Data.RDF.Namespace import Data.Hashable () import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HashMap @@ -103,16 +102,16 @@ instance Rdf AdjHashMap where -- show (AdjHashMap ((spoMap, _), _, _)) = -- let ts = concatMap (uncurry tripsSubj) subjPredMaps -- where subjPredMaps = HashMap.toList spoMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts -showGraph' :: RDF AdjHashMap -> [Char] +showGraph' :: RDF AdjHashMap -> String showGraph' ((AdjHashMap ((spoMap, _), _, _))) = let ts = concatMap (uncurry tripsSubj) subjPredMaps where subjPredMaps = HashMap.toList spoMap - in concatMap (\t -> show t ++ "\n") ts + in concatMap (\t -> show t <> "\n") ts -- instance Show (RDF AdjHashMap) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) -- some convenience type alias for readability @@ -132,7 +131,7 @@ prefixMappings' (AdjHashMap (_, _, pms)) = pms addPrefixMappings' :: RDF AdjHashMap -> PrefixMappings -> Bool -> RDF AdjHashMap addPrefixMappings' (AdjHashMap (ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in AdjHashMap (ts, baseURL, merge pms pms') empty' :: RDF AdjHashMap diff --git a/src/Data/RDF/Graph/AlgebraicGraph.hs b/src/Data/RDF/Graph/AlgebraicGraph.hs new file mode 100644 index 0000000..9d2c16f --- /dev/null +++ b/src/Data/RDF/Graph/AlgebraicGraph.hs @@ -0,0 +1,113 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} -- [TODO] Remove when the missing NFData instance is added to Alga. +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE EmptyDataDecls #-} + + +module Data.RDF.Graph.AlgebraicGraph + ( AlgebraicGraph + ) where + + +import Data.Semigroup (Semigroup(..)) +import Control.DeepSeq (NFData(..)) +import Data.Binary +import Data.RDF.Namespace +import Data.RDF.Query +import Data.RDF.Types (RDF, Rdf(..), BaseUrl, Triples, Triple(..), Node, Subject, Predicate, Object, NodeSelector) +import qualified Algebra.Graph.Labelled as G +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS +import GHC.Generics + + +data AlgebraicGraph deriving (Generic) + +instance Binary AlgebraicGraph +instance NFData AlgebraicGraph + +data instance RDF AlgebraicGraph = AlgebraicGraph + { _graph :: G.Graph (HashSet Node) Node + , _baseUrl :: Maybe BaseUrl + , _prefixMappings :: PrefixMappings + } deriving (Generic, NFData) + +-- [TODO] Remove this orphan instance when the missing NFData instance is added to Alga. +instance (NFData e, NFData a) => NFData (G.Graph e a) where + rnf G.Empty = () + rnf (G.Vertex x ) = rnf x + rnf (G.Connect e x y) = e `seq` rnf x `seq` rnf y + +instance Rdf AlgebraicGraph where + baseUrl = _baseUrl + prefixMappings = _prefixMappings + addPrefixMappings = addPrefixMappings' + empty = empty' + mkRdf = mkRdf' + addTriple = addTriple' + removeTriple = removeTriple' + triplesOf = triplesOf' + uniqTriplesOf = triplesOf' + select = select' + query = query' + showGraph = showGraph' + +toEdge :: Triple -> (HashSet Predicate, Subject, Object) +toEdge (Triple s p o) = (HS.singleton p, s, o) + +toTriples :: (HashSet Predicate, Subject, Object) -> Triples +toTriples (ps, s, o) = [Triple s p o | p <- HS.toList ps] + +showGraph' :: RDF AlgebraicGraph -> String +showGraph' r = concatMap (\t -> show t ++ "\n") (expandTriples r) + +addPrefixMappings' :: RDF AlgebraicGraph -> PrefixMappings -> Bool -> RDF AlgebraicGraph +addPrefixMappings' (AlgebraicGraph g baseURL pms) pms' replace = + let merge = if replace then flip (<>) else (<>) + in AlgebraicGraph g baseURL (merge pms pms') + +empty' :: RDF AlgebraicGraph +empty' = AlgebraicGraph G.empty mempty (PrefixMappings mempty) + +mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph +mkRdf' ts baseURL pms = + let g = G.edges . fmap toEdge $ ts + in AlgebraicGraph g baseURL pms + +addTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +addTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let g' = G.edge (HS.singleton p) s o + in AlgebraicGraph (G.overlay g g') baseURL pms + +removeTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +removeTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let ps = G.edgeLabel s o g + g' + | HS.null ps = g + | elem p ps = G.replaceEdge (HS.delete p ps) s o g + | otherwise = g + in AlgebraicGraph g' baseURL pms + +triplesOf' :: RDF AlgebraicGraph -> Triples +triplesOf' (AlgebraicGraph g _ _) = mconcat $ toTriples <$> G.edgeList g + +select' :: RDF AlgebraicGraph -> NodeSelector -> NodeSelector -> NodeSelector -> Triples +select' r Nothing Nothing Nothing = triplesOf r +select' (AlgebraicGraph g _ _) s p o = let (res, _, _) = G.foldg e v c g in res + where + e = (mempty, mempty, mempty) + v x = (mempty, s ?? x, o ?? x) + (??) f x' = let xs = HS.singleton x' in maybe xs (`HS.filter` xs) f + c ps (ts1, ss1, os1) (ts2, ss2, os2) = (ts3, ss3, os3) + where + ss3 = ss1 <> ss2 + os3 = os1 <> os2 + ts3 + | HS.null ps' = ts1 <> ts2 + | otherwise = ts1 <> ts2 <> [Triple s' p' o' | s' <- HS.toList ss3, p' <- HS.toList ps', o' <- HS.toList os3] + ps' = maybe ps (`HS.filter` ps) p + +query' :: RDF AlgebraicGraph -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples +query' r Nothing Nothing Nothing = triplesOf r +query' r s p o = select r ((==) <$> s) ((==) <$> p) ((==) <$> o) diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 4bc6a95..2893fc7 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} -- |A graph implementation mapping (S,P) pairs to O, backed by 'Data.Map'. @@ -11,6 +10,7 @@ module Data.RDF.Graph.HashMapSP (HashSP) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.RDF.Types import Data.RDF.Query @@ -47,15 +47,15 @@ instance Rdf HashSP where -- instance Show (HashSP) where -- show (HashSP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts -showGraph' :: RDF HashSP -> [Char] +showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts - + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap + in concatMap (\t -> show t <> "\n") ts + -- instance Show (HashSP) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) type SPMap = HashMap (Subject,Predicate) [Object] @@ -67,7 +67,7 @@ prefixMappings' (HashSP (_, _, pms)) = pms addPrefixMappings' :: RDF HashSP -> PrefixMappings -> Bool -> RDF HashSP addPrefixMappings' (HashSP (tsMap, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in HashSP (tsMap, baseURL, merge pms pms') empty' :: RDF HashSP @@ -77,10 +77,10 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF HashSP mkRdf' triples baseURL pms = HashSP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF HashSP -> Triples -triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap +triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap uniqTriplesOf' :: RDF HashSP -> Triples uniqTriplesOf' = nub . expandTriples @@ -93,47 +93,47 @@ select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) Nothing Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF HashSP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -144,42 +144,42 @@ query' (HashSP (tsMap,_,_)) Nothing (Just p) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts query' (HashSP (tsMap,_,_)) Nothing Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (HashSP (tsMap,_,_)) Nothing (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this RDF HashSP instance query' (HashSP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap + (fmap (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap query' (HashSP (tsMap,_,_)) (Just s) Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/MapSP.hs b/src/Data/RDF/Graph/MapSP.hs index 9b4f58b..036047e 100644 --- a/src/Data/RDF/Graph/MapSP.hs +++ b/src/Data/RDF/Graph/MapSP.hs @@ -3,8 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} @@ -73,12 +71,12 @@ instance Rdf SP where -- instance Show SP where -- show (SP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts showGraph' :: RDF SP -> String showGraph' (SP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap + in concatMap (\t -> show t <> "\n") ts type SPMap = Map (Subject,Predicate) [Object] @@ -100,10 +98,10 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF SP mkRdf' triples baseURL pms = SP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = Map.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = Map.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF SP -> Triples -triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap +triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap uniqTriplesOf' :: RDF SP -> Triples uniqTriplesOf' = nub . expandTriples @@ -116,47 +114,47 @@ select' (SP (tsMap,_,_)) Nothing (Just pSelector) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) Nothing Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (SP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF SP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -167,42 +165,42 @@ query' (SP (tsMap,_,_)) Nothing (Just p) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts query' (SP (tsMap,_,_)) Nothing Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (SP (tsMap,_,_)) Nothing (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this SP instance query' (SP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . Map.findWithDefault [] (s,p)) tsMap + (fmap (Triple s p) . Map.findWithDefault [] (s,p)) tsMap query' (SP (tsMap,_,_)) (Just s) Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index 7c76435..bffa82a 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -2,7 +2,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE EmptyDataDecls #-} @@ -20,6 +19,7 @@ module Data.RDF.Graph.TList (TList) where import Prelude +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.Binary import Data.RDF.Namespace @@ -70,15 +70,15 @@ instance Rdf TList where query = query' showGraph = showGraph' -showGraph' :: RDF TList -> [Char] -showGraph' gr = concatMap (\t -> show t ++ "\n") (expandTriples gr) +showGraph' :: RDF TList -> String +showGraph' gr = concatMap (\t -> show t <> "\n") (expandTriples gr) prefixMappings' :: RDF TList -> PrefixMappings prefixMappings' (TListC(_, _, pms)) = pms addPrefixMappings' :: RDF TList -> PrefixMappings -> Bool -> RDF TList addPrefixMappings' (TListC(ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TListC(ts, baseURL, merge pms pms') baseUrl' :: RDF TList -> Maybe BaseUrl diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 09e3276..eae2b4a 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -2,7 +2,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving, FlexibleInstances, BangPatterns #-} +{-# LANGUAGE FlexibleInstances, BangPatterns #-} {-# LANGUAGE EmptyDataDecls #-} module Data.RDF.Graph.TPatriciaTree (TPatriciaTree) where @@ -49,7 +49,7 @@ prefixMappings' (TPatriciaTree (_,_,_,pms')) = pms' addPrefixMappings' :: RDF TPatriciaTree -> PrefixMappings -> Bool -> RDF TPatriciaTree addPrefixMappings' (TPatriciaTree (g, idxLookup, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TPatriciaTree (g, idxLookup, baseURL, merge pms pms') baseUrl' :: RDF TPatriciaTree -> Maybe BaseUrl @@ -101,28 +101,28 @@ mkRdf' ts base' pms' = triplesOf' :: RDF TPatriciaTree -> Triples triplesOf' (TPatriciaTree (g,idxLookup,_,_)) = - map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in Triple s p o) (G.labEdges g) uniqTriplesOf' :: RDF TPatriciaTree -> Triples uniqTriplesOf' ptG@(TPatriciaTree (g,idxLookup,_,_)) = - nub $ map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + nub $ fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in expandTriple (prefixMappings ptG) (Triple s p o)) (G.labEdges g) mkTriples :: IntMap.IntMap Node -> Node -> [(Node, IntMap.Key)] -> [(Node, IntMap.Key)] -> [Triple] mkTriples idxLookup thisNode adjsIn adjsOut = - let ts1 = map (\(predNode,subjIdx) -> + let ts1 = fmap (\(predNode,subjIdx) -> let s = fromJust (IntMap.lookup subjIdx idxLookup) in Triple s predNode thisNode ) adjsIn - ts2 = map (\(predNode,objIdx) -> + ts2 = fmap (\(predNode,objIdx) -> let o = fromJust (IntMap.lookup objIdx idxLookup) in Triple thisNode predNode o ) adjsOut - in ts1 ++ ts2 + in ts1 <> ts2 select' :: RDF TPatriciaTree -> NodeSelector -> NodeSelector -> NodeSelector -> Triples select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel = @@ -137,7 +137,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn adjsOut' = filter (\(p,_idxObj) -> fromJust maybePredSel p ) adjsOut @@ -147,7 +147,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsOut' = filter (\(_p,idxObj) -> fromJust maybeObjSel (fromJust (IntMap.lookup idxObj idxLookup)) ) adjsOut @@ -155,7 +155,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeObjSel thisNode then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -165,7 +165,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(_p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) ) adjsIn @@ -176,7 +176,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn @@ -186,7 +186,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -199,7 +199,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns @@ -218,7 +218,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -229,7 +229,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isNothing maybePred && isJust maybeObj = let adjsOut' = filter (\(_p,idxObj) -> fromJust (IntMap.lookup idxObj idxLookup) == fromJust maybeObj ) adjsOut @@ -237,7 +237,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeObj then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -247,7 +247,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isNothing maybePred && isJust maybeObj = let adjsIn' = filter (\(_p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj ) adjsIn @@ -258,7 +258,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -268,7 +268,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -281,7 +281,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _ ) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 28fdcc5..0cdd7ce 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -16,15 +16,16 @@ module Data.RDF.IRI , serializeIRI , parseIRI, parseRelIRI , validateIRI, resolveIRI + , removeIRIFragment ) where import Data.Semigroup (Semigroup(..)) -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe (maybe, isJust) import Data.Functor import Data.List (intersperse) import Control.Applicative import Control.Monad (guard) -import Control.Arrow ((***), (&&&), (>>>)) +import Control.Arrow (first, (&&&), (>>>)) import Data.Char (isAlpha, isDigit, isAlphaNum, toUpper, toLower) import Data.Text (Text) import qualified Data.Text as T @@ -92,21 +93,24 @@ data SchemaError | MissingColon -- ^ Schemas must be followed by a colon deriving (Show, Eq) +removeIRIFragment :: IRIRef -> IRIRef +removeIRIFragment (IRIRef s a p q _) = IRIRef s a p q Nothing + -- [TODO] use Builder serializeIRI :: IRIRef -> Text serializeIRI (IRIRef s a p q f) = mconcat - [ fromMaybe mempty (scheme <$> s) - , fromMaybe mempty (authority <$> a) + [ maybe mempty scheme s + , maybe mempty authority a , path p - , fromMaybe mempty (query <$> q) - , fromMaybe mempty (fragment <$> f)] + , maybe mempty query q + , maybe mempty fragment f ] where scheme (Scheme s') = s' <> ":" authority (Authority u (Host h) p') = mconcat [ "//" - , fromMaybe mempty (userInfo <$> u) + , maybe mempty userInfo u , h - , fromMaybe mempty (port <$> p') ] + , maybe mempty port p' ] userInfo (UserInfo u) = u <> "@" port (Port p') = (":" <>) . T.pack . show $ p' path (Path p') = p' @@ -123,12 +127,15 @@ parseRelIRI :: Text -> Either String IRIRef parseRelIRI = P.parseOnly $ irelativeRefParser <* (P.endOfInput "Unexpected characters at the end") validateIRI :: Text -> Either String Text -validateIRI t = const t <$> parseIRI t +validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt --- [FIXME] Currently, this is a correct but naive implemenation. -resolveIRI :: Text -> Text -> Either String Text +-- [FIXME] Currently, this is a correct but naive implementation. +resolveIRI + :: Text -- ^ Base URI + -> Text -- ^ URI to resolve + -> Either String Text resolveIRI baseIri iri = serializeIRI <$> resolvedIRI where resolvedIRI = either (const resolvedRelativeIRI) resolveAbsoluteIRI (parseIRI iri) @@ -169,8 +176,8 @@ iriParser = do scheme <- Just <$> schemeParser _ <- P.string ":" "Missing colon after scheme" (authority, path) <- ihierPartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef scheme authority path query fragment) -- ihier-part = "//" iauthority ipath-abempty @@ -194,8 +201,8 @@ ihierPartParser = irelativeRefParser :: Parser IRIRef irelativeRefParser = do (authority, path) <- irelativePartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef Nothing authority path query fragment) -- irelative-part = "//" iauthority ipath-abempty @@ -212,9 +219,9 @@ irelativePartParser = -- iauthority = [ iuserinfo "@" ] ihost [ ":" port ] iauthorityParser :: Parser Authority iauthorityParser = - Authority <$> P.option Nothing (Just <$> (iuserInfoParser <* P.string "@")) + Authority <$> optional (iuserInfoParser <* P.string "@") <*> ihostParser - <*> P.option Nothing (Just <$> (P.string ":" *> portParser)) + <*> optional (P.string ":" *> portParser) "Authority" -- iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) @@ -270,7 +277,7 @@ ipathRootlessParser' = mconcat <$> sequence [isegmentNzParser, ipathAbEmptyParse -- ipath-empty = 0 ipathEmptyParser :: Parser (Maybe Authority, Path) -ipathEmptyParser = const (Nothing, mempty) <$> ipathEmptyParser' +ipathEmptyParser = (Nothing, mempty) <$ ipathEmptyParser' ipathEmptyParser' :: Parser Text ipathEmptyParser' = P.string mempty "Empty path" @@ -406,7 +413,7 @@ ipV6AddressParser = do h16 = parseBetween 1 4 (P.takeWhile isHexaDigit) ipNotElided (leading, lengthL) = guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 <|> - guard (lengthL == 8) *> pure mempty + (guard (lengthL == 8) $> mempty) ipElided (_, lengthL) = do guard $ lengthL <= 8 elision <- P.string "::" @@ -476,10 +483,10 @@ isSubDelims c = c `elem` ("!$&'()*+,;=" :: String) iauthWithPathParser :: Parser (Maybe Authority, Path) iauthWithPathParser = do void (P.string "//") - curry (Just *** id) <$> iauthorityParser <*> ipathAbEmptyParser + curry (first Just) <$> iauthorityParser <*> ipathAbEmptyParser isHexaDigit :: Char -> Bool -isHexaDigit c = (c >= '0' && c <= '9') || +isHexaDigit c = (isDigit c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') diff --git a/src/Data/RDF/Namespace.hs b/src/Data/RDF/Namespace.hs index afc3eb2..8c48674 100644 --- a/src/Data/RDF/Namespace.hs +++ b/src/Data/RDF/Namespace.hs @@ -6,75 +6,73 @@ module Data.RDF.Namespace( -- * Namespace types and functions Namespace(..), mkPlainNS, mkPrefixedNS, mkPrefixedNS', PrefixMapping(PrefixMapping), PrefixMappings(PrefixMappings), toPMList, - mergePrefixMappings, mkUri, prefixOf, uriOf, -- * Predefined namespace values - rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2, + rdf, rdfs, dc, dct, owl, xml, xsd, skos, foaf, ex, ex2, standard_ns_mappings, ns_mappings ) where import qualified Data.Text as T import Data.RDF.Types import qualified Data.Map as Map +import Data.Semigroup ((<>)) standard_namespaces :: [Namespace] standard_namespaces = [rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2] -- |The set of common predefined namespaces as a 'PrefixMappings' value. -standard_ns_mappings :: PrefixMappings +standard_ns_mappings :: PrefixMappings standard_ns_mappings = ns_mappings standard_namespaces -- |Takes a list of 'Namespace's and returns 'PrefixMappings'. ns_mappings :: [Namespace] -> PrefixMappings -ns_mappings ns = PrefixMappings $ Map.fromList $ - map (\(PrefixedNS pre uri) -> (pre, uri)) ns +ns_mappings ns = PrefixMappings $ Map.fromList $ + fmap (\(PrefixedNS pre uri) -> (pre, uri)) ns -- |The RDF namespace. -rdf :: Namespace -rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +rdf :: Namespace +rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -- |The RDF Schema namespace. rdfs :: Namespace -rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" +rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" -- |The Dublin Core namespace. -dc :: Namespace -dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" +dc :: Namespace +dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" -- |The Dublin Core terms namespace. -dct :: Namespace -dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" +dct :: Namespace +dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" -- |The OWL namespace. -owl :: Namespace -owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" +owl :: Namespace +owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" -- |The XML Schema namespace. -xsd :: Namespace -xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" +xml :: Namespace +xml = mkPrefixedNS' "xml" "http://www.w3.org/XML/1998/namespace" + +-- |The XML Schema namespace. +xsd :: Namespace +xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" -- |The SKOS namespace. skos :: Namespace -skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" +skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" -- |The friend of a friend namespace. foaf :: Namespace -foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" +foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" -- |Example namespace #1. -ex :: Namespace -ex = mkPrefixedNS' "ex" "http://www.example.org/" +ex :: Namespace +ex = mkPrefixedNS' "ex" "http://www.example.org/" -- |Example namespace #2. -ex2 :: Namespace -ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" - - --- |Perform a left-biased merge of the two sets of prefix mappings. -mergePrefixMappings :: PrefixMappings -> PrefixMappings -> PrefixMappings -mergePrefixMappings (PrefixMappings p1s) (PrefixMappings p2s) = - PrefixMappings $ Map.union p1s p2s +ex2 :: Namespace +ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" -- |View the prefix mappings as a list of key-value pairs. The PM in -- in the name is to reduce name clashes if used without qualifying. @@ -87,13 +85,13 @@ mkUri ns local = uriOf ns `T.append` local -- |Make a namespace for the given URI reference. -mkPlainNS :: T.Text -> Namespace -mkPlainNS = PlainNS +mkPlainNS :: T.Text -> Namespace +mkPlainNS = PlainNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively. -mkPrefixedNS :: T.Text -> T.Text -> Namespace -mkPrefixedNS = PrefixedNS +mkPrefixedNS :: T.Text -> T.Text -> Namespace +mkPrefixedNS = PrefixedNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively, using strings which will be converted to bytestrings @@ -102,11 +100,11 @@ mkPrefixedNS' :: String -> String -> Namespace mkPrefixedNS' s1 s2 = mkPrefixedNS (T.pack s1) (T.pack s2) -- |Determine the URI of the given namespace. -uriOf :: Namespace -> T.Text -uriOf (PlainNS uri) = uri -uriOf (PrefixedNS _ uri) = uri +uriOf :: Namespace -> T.Text +uriOf (PlainNS uri) = uri +uriOf (PrefixedNS _ uri) = uri -- |Determine the prefix of the given namespace, if it has one. -prefixOf :: Namespace -> Maybe T.Text -prefixOf (PlainNS _) = Nothing -prefixOf (PrefixedNS p _) = Just p +prefixOf :: Namespace -> Maybe T.Text +prefixOf (PlainNS _) = Nothing +prefixOf (PrefixedNS p _) = Just p diff --git a/src/Data/RDF/Query.hs b/src/Data/RDF/Query.hs index 2cbe131..8a34757 100644 --- a/src/Data/RDF/Query.hs +++ b/src/Data/RDF/Query.hs @@ -20,6 +20,8 @@ module Data.RDF.Query ( import Prelude hiding (pred) import Data.List +import Data.Maybe (fromMaybe) +import Data.Semigroup ((<>)) import Data.RDF.Types import qualified Data.RDF.Namespace as NS import Data.Text (Text) @@ -132,7 +134,7 @@ isGraphIsomorphic g1 g2 = Automorphism.isIsomorphic g1' g2' where triples = expandTriples g triplesHashMap :: HashMap (Subject,Predicate) [Object] - triplesHashMap = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- triples] + triplesHashMap = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- triples] triplesGrouped :: [((Subject,Predicate),[Object])] triplesGrouped = HashMap.toList triplesHashMap (dataGraph,_,_) = (graphFromEdges . fmap (\((s,p),os) -> (s,p,os))) triplesGrouped @@ -156,7 +158,7 @@ expandNode _ n = n -- Also expands "a" to "http://www.w3.org/1999/02/22-rdf-syntax-ns#type". expandURI :: PrefixMappings -> Text -> Text expandURI _ "a" = NS.mkUri NS.rdf "type" -expandURI pms iri = maybe iri id $ foldl' f Nothing (NS.toPMList pms) +expandURI pms iri = fromMaybe iri $ foldl' f Nothing (NS.toPMList pms) where f :: Maybe Text -> (Text, Text) -> Maybe Text f x (p, u) = x <|> (T.append u <$> T.stripPrefix (T.append p ":") iri) diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 47b4cde..066ce15 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -40,7 +40,7 @@ module Data.RDF.Types ( PrefixMappings(PrefixMappings),PrefixMapping(PrefixMapping), -- * Supporting types - BaseUrl(BaseUrl), NodeSelector, ParseFailure(ParseFailure) + BaseUrl(..), NodeSelector, ParseFailure(ParseFailure) ) where @@ -188,10 +188,10 @@ uriValidate = either (const Nothing) Just . isRdfURI -- |Same as 'uriValidate', but on 'String' rather than 'Text' uriValidateString :: String -> Maybe String -uriValidateString = liftA T.unpack . uriValidate . fromString +uriValidateString = fmap T.unpack . uriValidate . fromString isRdfURI :: Text -> Either ParseError Text -isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " ++ T.unpack t) t +isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " <> T.unpack t) t -- IRIREF from NTriples spec (without <> enclosing) -- [8] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' @@ -274,9 +274,9 @@ type Triples = [Triple] -- /subj/ must be a 'UNode' or 'BNode', and /pred/ must be a 'UNode'. triple :: Subject -> Predicate -> Object -> Triple triple s p o - | isLNode s = error $ "subject must be UNode or BNode: " ++ show s - | isLNode p = error $ "predicate must be UNode, not LNode: " ++ show p - | isBNode p = error $ "predicate must be UNode, not BNode: " ++ show p + | isLNode s = error $ "subject must be UNode or BNode: " <> show s + | isLNode p = error $ "predicate must be UNode, not LNode: " <> show p + | isBNode p = error $ "predicate must be UNode, not BNode: " <> show p | otherwise = Triple s p o -- |Answer if given node is a URI Ref node. @@ -462,7 +462,7 @@ class RdfSerializer s where -- |The base URL of an RDF. -newtype BaseUrl = BaseUrl Text +newtype BaseUrl = BaseUrl { unBaseUrl :: Text } deriving (Eq, Ord, Show, NFData, Semigroup, Generic) instance Binary BaseUrl @@ -569,8 +569,8 @@ instance Show Namespace where show (PrefixedNS prefix uri) = printf "(PrefixNS %s %s)" (T.unpack prefix) (T.unpack uri) -- |An alias for a map from prefix to namespace URI. -newtype PrefixMappings = PrefixMappings (Map Text Text) - deriving (Eq, Ord,NFData, Generic) +newtype PrefixMappings = PrefixMappings (Map Text Text) + deriving (Eq, Ord, NFData, Semigroup, Monoid, Generic) instance Binary PrefixMappings @@ -579,7 +579,7 @@ instance Show PrefixMappings where -- worth optimizing yet. show (PrefixMappings pmap) = printf "PrefixMappings [%s]" mappingsStr where showPM = show . PrefixMapping - mappingsStr = List.intercalate ", " (map showPM (Map.toList pmap)) + mappingsStr = List.intercalate ", " (fmap showPM (Map.toList pmap)) -- |A mapping of a prefix to the URI for that prefix. newtype PrefixMapping = PrefixMapping (Text, Text) @@ -592,7 +592,7 @@ instance Show PrefixMapping where -- | Resolve a prefix using the given prefix mappings. resolveQName :: Text -> PrefixMappings -> Maybe Text -resolveQName prefix (PrefixMappings pms') = Map.lookup prefix pms' +resolveQName prefix (PrefixMappings pms) = Map.lookup prefix pms {-# INLINE mkAbsoluteUrl #-} {-# DEPRECATED mkAbsoluteUrl "Use resolveIRI instead, because mkAbsoluteUrl is a partial function" #-} @@ -624,12 +624,10 @@ canonicalizerTable = doubleUri = "http://www.w3.org/2001/XMLSchema#double" _integerStr, _decimalStr, _doubleStr :: Text -> Text -_integerStr t = - if T.length t == 1 - then t - else if T.head t == '0' - then _integerStr (T.tail t) - else t +_integerStr t + | T.length t == 1 = t + | T.head t == '0' = _integerStr (T.tail t) + | otherwise = t -- exponent: [eE] ('-' | '+')? [0-9]+ -- ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) @@ -649,12 +647,12 @@ fileSchemeToFilePath (UNode fileScheme) | otherwise = Nothing where textToFilePath = pure . fromString <=< stringToFilePath . T.unpack - stringToFilePath = fixPrefix <=< pure . Network.uriPath <=< Network.parseURI + stringToFilePath = fixPrefix <=< pure . unEscapeString . Network.uriPath <=< Network.parseURI fixPrefix "" = Nothing fixPrefix p@(p':p'') | p' == FP.pathSeparator = Just (FP.normalise p) -- Posix path | p' == '/' = Just (FP.normalise p'') -- Windows classic Path - | otherwise = Just ("\\\\" ++ FP.normalise p) -- Windows UNC Path + | otherwise = Just ("\\\\" <> FP.normalise p) -- Windows UNC Path fileSchemeToFilePath _ = Nothing -- | Converts a file path to a URI with "file:" scheme @@ -663,7 +661,7 @@ filePathToUri p | FP.isRelative p = Nothing | otherwise = Just . fromString . as_uri . FP.normalise $ p where - as_uri = ("file://" ++) . escapeURIString isAllowedInURI . as_posix . fix_prefix + as_uri = ("file://" <>) . escapeURIString isAllowedInURI . as_posix . fix_prefix fix_prefix p' = case (FP.takeDrive p') of "/" -> p' '\\':'\\':_ -> drop 2 p' diff --git a/src/Rdf4hParseMain.hs b/src/Rdf4hParseMain.hs index efd159a..26992a8 100644 --- a/src/Rdf4hParseMain.hs +++ b/src/Rdf4hParseMain.hs @@ -7,6 +7,7 @@ module Main where import Data.RDF +import Data.Semigroup ((<>)) import qualified Data.Text as T import qualified Data.Text.IO as TIO @@ -32,7 +33,7 @@ main = when (null args) (ioError (userError - ("\n\n" ++ "INPUT-URI required\n\n" ++ usageInfo header options))) + ("\n\n" <> "INPUT-URI required\n\n" <> usageInfo header options))) let debug = Debug `elem` opts inputUri = head args inputFormat = getWithDefault (InputFormat "turtle") opts @@ -41,8 +42,8 @@ main = outputBaseUri = getWithDefault (OutputBaseUri inputBaseUri) opts unless (outputFormat == "ntriples" || outputFormat == "turtle") (hPrintf stderr - ("'" ++ - outputFormat ++ + ("'" <> + outputFormat <> "' is not a valid output format. Supported output formats are: ntriples, turtle\n") >> exitWith (ExitFailure 1)) when debug @@ -88,7 +89,7 @@ main = >>= \ (res :: Either ParseFailure (RDF TList)) -> write outputFormat docUri emptyPms res - (str, _) -> putStrLn ("Invalid format: " ++ str) >> exitFailure + (str, _) -> putStrLn ("Invalid format: " <> str) >> exitFailure write :: (Rdf a) => String -> Maybe T.Text -> PrefixMappings -> Either ParseFailure (RDF a) -> IO () write format docUri pms res = case res of @@ -98,7 +99,7 @@ write format docUri pms res = case res of doWriteRdf rdf = case format of "turtle" -> writeRdf (TurtleSerializer docUri pms) rdf "ntriples" -> writeRdf NTriplesSerializer rdf - unknown -> error $ "Unknown output format: " ++ unknown + unknown -> error $ "Unknown output format: " <> unknown -- Get the input base URI from the argument list or flags, using the -- first string arg as the default if not found in string args (as @@ -135,7 +136,7 @@ strValue (InputFormat s) = s strValue (InputBaseUri s) = s strValue (OutputFormat s) = s strValue (OutputBaseUri s) = s -strValue flag = error $ "No string value for flag: " ++ show flag +strValue flag = error $ "No string value for flag: " <> show flag -- The commandline arguments we accept. None are required. data Flag @@ -160,28 +161,28 @@ instance Eq Flag where -- The top part of the usage output. header :: String header = - "\nrdf4h_parse: an RDF parser and serializer\n\n" ++ - "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_parse: an RDF parser and serializer\n\n" <> + "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "d" ["debug"] (NoArg Debug) "Print debug info (like INPUT-BASE-URI used, etc.)" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ - " ntriples N-Triples\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> + " ntriples N-Triples\n" <> " xml RDF/XML" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" - , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" ++ - " ntriples N-Triples (default)\n" ++ + , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" <> + " ntriples N-Triples (default)\n" <> " turtle Turtle" - , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" ++ + , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" <> " Default is input/parser base URI." ] @@ -189,4 +190,4 @@ compilerOpts :: [String] -> IO ([Flag], [String]) compilerOpts argv = case getOpt Permute options argv of (o,n,[] ) -> return (o,n) - (_,_,errs) -> ioError (userError ("\n\n" ++ concat errs ++ usageInfo header options)) + (_,_,errs) -> ioError (userError ("\n\n" <> concat errs <> usageInfo header options)) diff --git a/src/Rdf4hQueryMain.hs b/src/Rdf4hQueryMain.hs index ed07bfd..7ebe05d 100644 --- a/src/Rdf4hQueryMain.hs +++ b/src/Rdf4hQueryMain.hs @@ -12,21 +12,21 @@ data Flag header :: String header = - "\nrdf4h_query: utility for querying RDF data\n\n" ++ - "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_query: utility for querying RDF data\n\n" <> + "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "v" ["verbose"] (NoArg Verbose) "Display extra information messages to stderr" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> " ntriples N-Triples" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" ] diff --git a/src/Text/RDF/RDF4H/NTriplesParser.hs b/src/Text/RDF/RDF4H/NTriplesParser.hs index f1d22ee..2295ca9 100644 --- a/src/Text/RDF/RDF4H/NTriplesParser.hs +++ b/src/Text/RDF/RDF4H/NTriplesParser.hs @@ -13,7 +13,7 @@ module Text.RDF.RDF4H.NTriplesParser import Prelude hiding (readFile) import Data.Semigroup ((<>)) -import Data.Char (isDigit, isLetter, isAlphaNum) +import Data.Char (isDigit, isLetter, isAlphaNum, isAsciiUpper, isAsciiLower) import Control.Applicative import Control.Monad (void) @@ -39,7 +39,7 @@ import System.IO (IOMode(..), withFile, hSetNewlineMode, noNewlineTranslation, h -- class. data NTriplesParser = NTriplesParser -data NTriplesParserCustom = NTriplesParserCustom Parser +newtype NTriplesParserCustom = NTriplesParserCustom Parser -- |'NTriplesParser' is an instance of 'RdfParser' using parsec based parsers. instance RdfParser NTriplesParser where @@ -109,13 +109,13 @@ nt_langtag :: (CharParsing m, Monad m) => m T.Text nt_langtag = do ss <- char '@' *> some (satisfy isLetter) rest <- concat <$> many (char '-' *> some (satisfy isAlphaNum) >>= \lang_str -> pure ('-':lang_str)) - pure (T.pack (ss ++ rest)) + pure (T.pack (ss <> rest)) -- [8] IRIREF nt_iriref :: (CharParsing m, Monad m) => m T.Text nt_iriref = between (char '<') (char '>') $ do raw_iri <- iriFragment - either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " ++ show raw_iri + either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " <> show raw_iri -- [153s] ECHAR nt_echar :: (CharParsing m, Monad m) => m Char @@ -165,8 +165,8 @@ nt_blank_node_label = do -- [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] nt_pn_chars_base :: CharParsing m => m Char nt_pn_chars_base = try $ satisfy isBaseChar - where isBaseChar c = (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z') + where isBaseChar c = (isAsciiUpper c) + || (isAsciiLower c) || (c >= '\x00C0' && c <= '\x00D6') || (c >= '\x00D8' && c <= '\x00F6') || (c >= '\x00F8' && c <= '\x02FF') @@ -251,11 +251,11 @@ handleAttoparsec :: (Rdf a) => T.Text -> Either ParseFailure (RDF a) handleAttoparsec bs = handleResult $ parse nt_ntripleDoc (T.encodeUtf8 bs) where handleResult res = case res of - Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" ++ show err + Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" <> show err -- error $ - -- "\nnot consumed: " ++ show i - -- ++ "\ncontexts: " ++ show contexts - -- ++ "\nerror: " ++ show err + -- "\nnot consumed: " <> show i + -- <> "\ncontexts: " <> show contexts + -- <> "\nerror: " <> show err Partial f -> handleResult (f (T.encodeUtf8 mempty)) Done _ ts -> Right $ mkRdf ts Nothing (PrefixMappings mempty) diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index f4d6dcd..dda1d22 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -1,16 +1,31 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -module Text.RDF.RDF4H.ParserUtils( - parseFromURL, - Parser(..) -) where +module Text.RDF.RDF4H.ParserUtils + ( Parser(..) + , parseFromURL + -- RDF + , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode + , rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode + , rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype + , rdfType, rdfLi, rdfListIndex + , rdfDescription, rdfXmlLiteral + , rdfAboutEach, rdfAboutEachPrefix, rdfBagID + -- XML + , xmlLang + -- XSD + , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri + ) where import Data.RDF.Types +import Data.RDF.Namespace import Control.Exception.Lifted import Network.HTTP.Conduit import Data.Text.Encoding (decodeUtf8) +import Data.Semigroup ((<>)) import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) import qualified Data.Text as T data Parser = Parsec | Attoparsec @@ -24,15 +39,61 @@ parseFromURL :: (Rdf rdfImpl) => (T.Text -> Either ParseFailure (RDF rdfImpl)) - parseFromURL parseFunc url = do result <- Control.Exception.Lifted.try $ simpleHttp url case result of - Left (ex::HttpException) -> - case ex of + Left (err :: HttpException) -> + case err of (HttpExceptionRequest _req content) -> case content of - ConnectionTimeout -> do + ConnectionTimeout -> return $ errResult "Connection timed out" - _ -> return $ errResult ("HttpExceptionRequest content: " ++ show content) + _ -> return $ errResult ("HttpExceptionRequest content: " <> show content) (InvalidUrlException{}) -> return $ errResult "Invalid URL exception" Right bs -> do let s = decodeUtf8 $ BS.toStrict bs return (parseFunc s) + +rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node +rdfTypeNode = UNode $ mkUri rdf "type" +rdfNilNode = UNode $ mkUri rdf "nil" +rdfFirstNode = UNode $ mkUri rdf "first" +rdfRestNode = UNode $ mkUri rdf "rest" + +rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode :: Node +rdfSubjectNode = UNode $ mkUri rdf "subject" +rdfPredicateNode = UNode $ mkUri rdf "predicate" +rdfObjectNode = UNode $ mkUri rdf "object" +rdfStatementNode = UNode $ mkUri rdf "Statement" + +-- Core terms +rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype :: Text +rdfTag = mkUri rdf "RDF" +rdfID = mkUri rdf "ID" +rdfAbout = mkUri rdf "about" +rdfParseType = mkUri rdf "parseType" +rdfResource = mkUri rdf "resource" +rdfNodeID = mkUri rdf "nodeID" +rdfDatatype = mkUri rdf "datatype" + +rdfType, rdfLi, rdfListIndex :: Text +rdfType = mkUri rdf "type" +rdfLi = mkUri rdf "li" +rdfListIndex = mkUri rdf "_" + +rdfXmlLiteral, rdfDescription :: Text +rdfXmlLiteral = mkUri rdf "XMLLiteral" +rdfDescription = mkUri rdf "Description" + +-- Old terms +rdfAboutEach, rdfAboutEachPrefix, rdfBagID :: Text +rdfAboutEach = mkUri rdf "aboutEach" +rdfAboutEachPrefix = mkUri rdf "aboutEachPrefix" +rdfBagID = mkUri rdf "bagID" + +xmlLang :: Text +xmlLang = mkUri xml "lang" + +xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text +xsdIntUri = mkUri xsd "integer" +xsdDoubleUri = mkUri xsd "double" +xsdDecimalUri = mkUri xsd "decimal" +xsdBooleanUri = mkUri xsd "boolean" diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index 675c2e8..ec4e8d6 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -8,6 +8,7 @@ module Text.RDF.RDF4H.TurtleParser ( TurtleParser(TurtleParser) , TurtleParserCustom(TurtleParserCustom) + , parseTurtleDebug ) where import Prelude hiding (readFile) @@ -16,20 +17,23 @@ import Data.Char (toLower, toUpper, isDigit, isHexDigit) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe +import Data.Either +import Data.Semigroup ((<>)) import Data.RDF.Types import Data.RDF.IRI -import Data.RDF.Namespace +import Data.RDF.Graph.TList import Text.RDF.RDF4H.ParserUtils import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) import qualified Data.Text as T import Data.Sequence (Seq, (|>)) +import Data.Functor (($>)) import qualified Data.Foldable as F import Control.Monad import Text.Parser.Char import Text.Parser.Combinators import Text.Parser.LookAhead -import Control.Applicative +import Control.Applicative hiding (empty) import Control.Monad.State.Class import Control.Monad.State.Strict @@ -73,6 +77,9 @@ type ParseState = , Seq Triple -- the triples encountered while parsing; always added to on the right side , Map String Integer ) -- map blank node names to generated id. +parseTurtleDebug :: String -> IO (RDF TList) +parseTurtleDebug f = fromRight empty <$> parseFile (TurtleParserCustom (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/") Attoparsec) f + -- grammar rule: [1] turtleDoc t_turtleDoc :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m (Seq Triple, PrefixMappings) t_turtleDoc = @@ -176,7 +183,7 @@ t_sparql_base = do updateBaseUrl (Just $ Just newBaseIri) t_verb :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () -t_verb = try t_predicate <|> (char 'a' *> pure rdfTypeNode) >>= setPredicate +t_verb = try t_predicate <|> (char 'a' $> rdfTypeNode) >>= setPredicate -- grammar rule: [11] predicate ::= iri t_predicate :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node @@ -189,7 +196,7 @@ t_pname_ns = do (_, _, _, pms, _, _, _, _) <- get case resolveQName pre pms of Just n -> pure n - Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre) + Nothing -> unexpected ("Cannot resolve QName prefix: " <> T.unpack pre) -- grammar rules: [168s] PN_LOCAL -- [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? @@ -199,9 +206,9 @@ t_pn_local = do xs <- option "" $ try $ do let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> - (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws *> pure ".")) + (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws $> ".")) concat <$> many recsve - pure (T.pack (x ++ xs)) + pure (T.pack (x <> xs)) where satisfy_str = pure <$> satisfy isDigit t_pn_chars_str = pure <$> t_pn_chars @@ -235,7 +242,7 @@ t_subject = iri <|> t_blankNode <|> t_collection >>= setSubject -- [137s] BlankNode ::= BLANK_NODE_LABEL | ANON t_blankNode :: (CharParsing m, MonadState ParseState m) => m Node t_blankNode = do - genID <- try t_blank_node_label <|> (t_anon *> pure mempty) + genID <- try t_blank_node_label <|> (t_anon $> mempty) mp <- currGenIdLookup maybe (newBN genID) getExistingBN (Map.lookup genID mp) where @@ -297,7 +304,7 @@ t_collection = withConstantSubjectPredicate $ void (many t_ws) return root where - empty_list = lookAhead (char ')') *> return rdfNilNode + empty_list = lookAhead (char ')') $> rdfNilNode non_empty_list = do ns <- sepEndBy1 element (some t_ws) addTripleForObject rdfNilNode @@ -313,18 +320,6 @@ t_collection = withConstantSubjectPredicate $ return bn getSubject = get >>= \(_, _, _, _, s, _, _, _) -> pure s -rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node -rdfTypeNode = UNode $ mkUri rdf "type" -rdfNilNode = UNode $ mkUri rdf "nil" -rdfFirstNode = UNode $ mkUri rdf "first" -rdfRestNode = UNode $ mkUri rdf "rest" - -xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text -xsdIntUri = mkUri xsd "integer" -xsdDoubleUri = mkUri xsd "double" -xsdDecimalUri = mkUri xsd "decimal" -xsdBooleanUri = mkUri xsd "boolean" - t_literal :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_literal = LNode <$> try t_rdf_literal <|> @@ -507,7 +502,7 @@ updateBaseUrl val = _modifyState val no no no no no -- combines get_current and increment into a single function nextIdCounter :: MonadState ParseState m => m Integer nextIdCounter = get >>= \(bUrl, dUrl, i, pms, s, p, ts, genMap) -> - put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) *> pure i + put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) $> i nextBlankNode :: MonadState ParseState m => m Node nextBlankNode = BNodeGen . fromIntegral <$> nextIdCounter @@ -570,8 +565,8 @@ addTripleForObject obj = do t <- getTriple s p put (bUrl, dUrl, i, pms, s, p, ts |> t, genMap) where - getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " ++ show obj - getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " ++ show obj + getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " <> show obj + getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " <> show obj getTriple (Just s') (Just p') = pure $ Triple s' p' obj @@ -631,7 +626,7 @@ parseStringAttoparsec bUrl docUrl t = handleResult' $ parse (evalStateT t_turtle where handleResult' res = case res of Fail _ _ err -> -- error err - Left $ ParseFailure $ "Parse failure: \n" ++ show err + Left $ ParseFailure $ "Parse failure: \n" <> show err Partial f -> handleResult' (f mempty) Done _ (ts,pms) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -650,12 +645,12 @@ parseURLAttoparsec bUrl docUrl = parseFromURL (parseStringAttoparsec bUrl docUrl --------------------------------- initialState :: Maybe BaseUrl -> Maybe T.Text -> ParseState -initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) +initialState bUrl docUrl = (BaseUrl <$> docUrl <|> bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) handleResult bUrl result = case result of - (Left err) -> Left (ParseFailure $ "Parse failure: \n" ++ show err) + (Left err) -> Left (ParseFailure $ "Parse failure: \n" <> show err) (Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -668,7 +663,7 @@ caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) -- Match the string 's', accepting either lowercase or uppercase form of each character caseInsensitiveString :: (CharParsing m, Monad m) => String -> m String -caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" ++ s ++ "\"" +caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" <> s <> "\"" tryIriResolution :: (CharParsing m, Monad m) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> m T.Text tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl @@ -676,4 +671,4 @@ tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl tryIriResolution' (Just (BaseUrl bIri)) _ = either err pure (resolveIRI bIri iriFrag) tryIriResolution' _ (Just dIri) = either err pure (resolveIRI dIri iriFrag) tryIriResolution' _ _ = either err pure (resolveIRI mempty iriFrag) - err m = unexpected $ "Cannot resolve IRI: " ++ m ++ " " ++ show (mbUrl, mdUrl, iriFrag) + err m = unexpected $ mconcat ["Cannot resolve IRI: ", m, " ", show (mbUrl, mdUrl, iriFrag)] diff --git a/src/Text/RDF/RDF4H/TurtleSerializer.hs b/src/Text/RDF/RDF4H/TurtleSerializer.hs index ace29b1..46ffcd7 100644 --- a/src/Text/RDF/RDF4H/TurtleSerializer.hs +++ b/src/Text/RDF/RDF4H/TurtleSerializer.hs @@ -1,4 +1,4 @@ --- |An RDF serializer for Turtle +-- |An RDF serializer for Turtle -- . module Text.RDF.RDF4H.TurtleSerializer( @@ -23,7 +23,7 @@ data TurtleSerializer = TurtleSerializer (Maybe T.Text) PrefixMappings instance RdfSerializer TurtleSerializer where hWriteRdf (TurtleSerializer docUrl pms) h rdf = _writeRdf h docUrl (addPrefixMappings rdf pms False) writeRdf s = hWriteRdf s stdout - hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (mergePrefixMappings (prefixMappings rdf) pms) + hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (prefixMappings rdf <> pms) writeH s = hWriteRdf s stdout -- TODO: should use mdUrl to render <> where appropriate hWriteTs (TurtleSerializer docUrl pms) h = writeTriples h docUrl pms @@ -31,7 +31,7 @@ instance RdfSerializer TurtleSerializer where hWriteT (TurtleSerializer docUrl pms) h = writeTriple h docUrl pms writeT s = hWriteT s stdout hWriteN (TurtleSerializer docUrl (PrefixMappings pms)) h n = writeNode h docUrl n pms - writeN s = hWriteN s stdout + writeN s = hWriteN s stdout -- TODO: writeRdf currently merges standard namespace prefix mappings with -- the ones that the RDF already contains, so that if the RDF has none @@ -72,10 +72,10 @@ writeTriples :: Handle -> Maybe T.Text -> PrefixMappings -> Triples -> IO () writeTriples h mdUrl (PrefixMappings pms) ts = mapM_ (writeSubjGroup h mdUrl revPms) (groupBy equalSubjects ts) where - revPms = Map.fromList $ map (\(k,v) -> (v,k)) $ Map.toList pms + revPms = Map.fromList $ (\(k,v) -> (v,k)) <$> Map.toList pms writeTriple :: Handle -> Maybe T.Text -> PrefixMappings -> Triple -> IO () -writeTriple h mdUrl (PrefixMappings pms) t = +writeTriple h mdUrl (PrefixMappings pms) t = w subjectOf >> space >> w predicateOf >> space >> w objectOf where w :: (Triple -> Node) -> IO () @@ -100,9 +100,9 @@ writeSubjGroup h dUrl pms ts@(t:_) = writePredGroup :: Handle -> Maybe T.Text -> Map T.Text T.Text -> Triples -> IO () writePredGroup _ _ _ [] = return () writePredGroup h docUrl pms (t:ts) = - -- The doesn't rule out <> in either the predicate or object (as well as subject), + -- The doesn't rule out <> in either the predicate or object (as well as subject), -- so we pass the docUrl through to writeNode in all cases. - writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> + writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> writeNode h docUrl (objectOf t) pms >> mapM_ (\t' -> hPutStr h ", " >> writeNode h docUrl (objectOf t') pms) ts @@ -131,7 +131,7 @@ _debugPMs pms = mapM_ (\(k, v) -> T.putStr k >> putStr "__" >> T.putStrLn v) (M -- Expects a map from uri to prefix, and returns the (prefix, uri_expansion) -- from the mappings such that uri_expansion is a prefix of uri, or Nothing if --- there is no such mapping. This function does a linear-time search over the +-- there is no such mapping. This function does a linear-time search over the -- map, but the prefix mappings should always be very small, so it's okay for now. findMapping :: Map T.Text T.Text -> T.Text -> Maybe (T.Text, T.Text) findMapping pms uri = diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index ebd143d..9958fed 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,532 +1,654 @@ {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE DoAndIfThenElse #-} -{-# LANGUAGE DeriveDataTypeable #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} -- |An parser for the RDF/XML format -- . module Text.RDF.RDF4H.XmlParser - -- ( - -- XmlParser'(XmlParser') - -- , xmlEg - -- ) -where - -import Text.RDF.RDF4H.ParserUtils (parseFromURL) - -import Debug.Trace -import qualified Control.Applicative as Applicative -import Control.Exception -import Control.Monad -import qualified Data.HashMap.Strict as HashMap + ( XmlParser(..) + , parseXmlDebug + ) where + +import Data.RDF.Types hiding (empty, resolveQName) +import qualified Data.RDF.Types as RDF +import Data.RDF.IRI +import Data.RDF.Graph.TList +import Text.RDF.RDF4H.ParserUtils hiding (Parser) +import Text.RDF.RDF4H.XmlParser.Identifiers + +import Control.Applicative +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Semigroup ((<>)) +import Data.Set (Set) +import qualified Data.Set as S import qualified Data.Map as Map -import Data.Maybe -import Data.RDF.IRI -import Data.RDF.Types -import Data.RDF.Graph.TList +import Data.Maybe +import Data.Either +import Data.Bifunctor +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS +import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM -import Data.Text (Text) -import Data.Text.Encoding +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T -import Xmlbf hiding (Node) -import qualified Xmlbf (Node) +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Builder as BB +import Xmlbf hiding (Node, State) import qualified Xmlbf.Xeno as Xeno - -data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) + instance RdfParser XmlParser where - parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl - parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl - parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl + parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl + parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl + parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl + +-- |Configuration for the XML parser +data XmlParser = XmlParser + (Maybe BaseUrl) + -- ^ The /default/ base URI to parse the document. + (Maybe Text) + -- ^ The /retrieval URI/ of the XML document. + +parseFile' :: (Rdf a) + => Maybe BaseUrl + -> Maybe Text + -> FilePath + -> IO (Either ParseFailure (RDF a)) +parseFile' bUrl dUrl fpath = parseXmlRDF bUrl dUrl <$> TIO.readFile fpath -parseFile' :: - (Rdf a) +parseURL' :: (Rdf a) => Maybe BaseUrl + -- ^ The optional base URI of the document. -> Maybe Text + -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String + -- ^ The location URI from which to retrieve the XML document. -> IO (Either ParseFailure (RDF a)) -parseFile' bUrl dUrl fpath = - TIO.readFile fpath >>= return . parseXmlRDF bUrl dUrl - -parseURL' :: - (Rdf a) - => Maybe BaseUrl -- ^ The optional base URI of the document. - -> Maybe Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. - -> String -- ^ The location URI from which to retrieve the XML document. - -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF - -- corresponding to the XML document. + -- ^ The parse result, which is either a @ParseFailure@ or the RDF + -- corresponding to the XML document. parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) --- -- |Global state for the parser --- data GParseState = GParseState --- { stateGenId :: Int --- } deriving (Show) +-- |The parser monad. +type Parser = ParserT (ExceptT String (State ParseState)) -- |Local state for the parser (dependant on the parent xml elements) -data ParseState = ParseState { stateBaseUrl :: Maybe BaseUrl - , stateLang :: Maybe Text - , stateSubject :: Subject - , stateGenId :: Int - } - deriving(Show) - -data ParserException = ParserException String - deriving (Show) -instance Exception ParserException - -testXeno :: Text -> Either String [Xmlbf.Node] -testXeno = Xeno.nodes . T.encodeUtf8 +data ParseState = ParseState + { stateBaseUri :: Maybe BaseUrl + -- ^ The local base URI. + , stateIdSet :: Set Text + -- ^ The set of @rdf:ID@ found in the scope of the current base URI. + , statePrefixMapping :: PrefixMappings + -- ^ The namespace mapping. + , stateLang :: Maybe Text + -- ^ The local @xml:lang@ + , stateNodeAttrs :: HashMap Text Text + -- ^ Current node RDF attributes. + , stateSubject :: Maybe Subject + -- ^ Current subject for triple construction. + , stateCollectionIndex :: Int + -- ^ Current collection index. + , stateGenId :: Int + } deriving(Show) -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) - => Maybe BaseUrl -- ^ The base URL for the RDF if required - -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available - -> Text -- ^ The contents to parse - -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure -parseXmlRDF bUrl dUrl xmlStr = - case Xeno.nodes (T.encodeUtf8 xmlStr) of - Left xmlParseError -> Left (ParseFailure xmlParseError) - Right nodes -> -- error (show nodes) - case runParser (rdfParser bUrl dUrl) nodes of - Left rdfParseError -> Left (ParseFailure rdfParseError) - Right rdf -> Right rdf --- TODO: use bUrl and dUrl - -rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) -rdfParser bUrl dUrl = do - let initState = ParseState bUrl Nothing undefined 0 - rdf <- rdfDescription initState - newlines - -- tree <- showTree - -- error (show tree) - void pEndOfInput - return rdf - --- Text "\n" --- TODO: check that all that follows from \n is zero or more ' ' characters. -newline :: Parser () -newline = do - t <- pText - if not $ anyUsefulChars (TL.toStrict t) - then pure () - else pFail "not a newline text node" - where - anyUsefulChars t = - if T.length t == 0 - then False - else - let c = T.head t - in if (c /= '\n' && c /= '\r' && c /= ' ') - then True - else anyUsefulChars (T.tail t) - -newlines :: Parser () -newlines = void (many newline) - -pNodeNot :: Text -> Parser () -pNodeNot t = do - n <- pName - if (n /= t) - then pure () - else pFail ("forbidden element name: " ++ show t) - -{- -[ ("xmlns:si","https://www.w3schools.com/rdf/") -, ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") -] --} -prefixes :: Parser [(Text,Text)] -prefixes = do - xs <- HashMap.toList <$> pAttrs - pure (map (\(k,v) -> (fromJust (T.stripPrefix "xmlns:" k),v)) xs) - -oneAttr :: Parser (Text,Text) -oneAttr = do - xs <- pAttrs - case length (HashMap.toList xs) of - 1 -> pure $ head (HashMap.toList xs) - _ -> pFail "not one attr" - -rdfTriplesP :: ParseState -> Parser (Triples,ParseState) -rdfTriplesP st = do - newlines - pElement "rdf:Description" $ do - newlines - ((subj, reifiedTriples), st') <- subjP st - (ts) <- concat <$> many (predObjP st') - newlines - -- tree <- showTree - -- error (show tree) - void pEndOfInput - pure (ts ++ reifiedTriples,st') - -- pure $ ((map (\(p, o) -> triple subj p o) predObjs ++ reifiedTriples), st') - -{- NOTE: - remember to use `showTree` in the fork of xmlbf when pEndOfInput needs - debugging. --} - -subjP :: ParseState -> Parser ((Node,Triples),ParseState) -subjP st = do - -- void (pNodeNot "rdf:RDF") -- rdfms-rdf-names-use-error-001 - (do - s <- unode <$> pAttr "rdf:about" - pure ((s,[]),st { stateSubject = s } ) - <|> do - -- theId <- pAttr "rdf:ID" - let theBnode = BNodeGen (stateGenId st) - st' = st { stateGenId = stateGenId st + 1} - pure ((theBnode,[]),st')) - --- predObjP :: Parser ((Node,Node)) -predObjP :: ParseState -> Parser Triples -predObjP st = do - void newlines - (do pAnyElement $ do - void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 - p <- unode <$> pName - (ts) <- - (do - -- typed literal - theType <- pAttr "rdf:datatype" - theText <- pText - pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))]) - <|> - -- blank node - (do (p1,o1) <- oneAttr - -- TODO: increment stateGenId - let bnode = BNodeGen (stateGenId st) - t1 = triple (stateSubject st) p bnode - a = case stateBaseUrl st of - Nothing -> T.pack "" - Just (BaseUrl uri) -> uri - Right txt = resolveIRI a p1 - p2 = unode txt - -- TODO: typed and lang literals - t2 = triple bnode p2 (lnode (plainL o1)) - pure [t1,t2]) - <|> - (do - -- plain literal - theText <- pText - newlines - pure [triple - (stateSubject st) - p - (lnode (plainL (TL.toStrict theText)))]) - newlines - pure ts - <|> - pFail "unable to parse predicate/object pair" - ) - -- TODO: reify triple - --- TODO: unodes, and all different kinds of plain text nodes --- objP :: Parser (Node) --- objP {- st -} = do --- -- unode --- -- xs <- head <$> prefixes --- -- error (show xs) --- -- TODO for: --- -- Element "eg:Creator" --- -- [("eg:named","D\252rst")] --- -- [] --- pure (unode "http://www.example.com") --- <|> do --- -- typed literal --- theType <- pAttr "rdf:datatype" --- theText <- pText --- pure ((lnode (typedL (TL.toStrict theText) theType))) --- <|> do --- -- plain literal --- theText <- pText --- pure (lnode (plainL (TL.toStrict theText))) - - - -rdfDescription' :: ParseState -> Parser (PrefixMappings,Maybe BaseUrl,Triples) -rdfDescription' st = do - newlines - pfixes <- prefixes - (_,(triples,st')) <- pElement' (rdfTriplesP st) - newlines - pure (PrefixMappings (Map.fromList pfixes), Nothing, triples) - -rdfDescription :: Rdf a => ParseState -> Parser (RDF a) -rdfDescription st = do - (pfixes,bUrl,triples) <- pElement "rdf:RDF" (rdfDescription' st) - pure $ mkRdf triples bUrl pfixes - -{- -[ Text "\n" -, Element - "rdf:RDF" - [ ("xmlns:si","https://www.w3schools.com/rdf/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n" - , Element - "rdf:Description" - [ ("rdf:about","https://www.w3schools.com") ] - [ Text "\n" - , Element - "si:title" - [] - [ Text "W3Schools" ] - , Text "\n" - , Element - "si:author" - [] - [ Text "Jan Egil Refsnes" ] - , Text "\n" - ] - ,Text "\n" - ] -,Text "\n" -] --} -xmlEg = T.pack $ unlines - [ "" - , "" - , "" - , "W3Schools" - , "Jan Egil Refsnes" - , "" - , "" - ] - -test1 :: Bool -test1 = triplesOf got == expected + => Maybe BaseUrl + -- ^ The base URI for the RDF if required + -> Maybe Text + -- ^ The request URI for the document to if available + -> Text + -- ^ The contents to parse + -> Either ParseFailure (RDF a) + -- ^ The RDF representation of the triples or ParseFailure +parseXmlRDF bUrl dUrl = parseRdf . parseXml where - Right (got::RDF TList) = parseXmlRDF Nothing Nothing xmlEg - expected = - [ Triple - (UNode "https://www.w3schools.com") - (UNode "si:title") - (LNode (PlainL "W3Schools")) - , Triple - (UNode "https://www.w3schools.com") - (UNode "si:author") - (LNode (PlainL "Jan Egil Refsnes")) - ] - --- missing in Xmlbf - --- | @'pElement'' p@ runs a 'Parser' @p@ inside a element node and --- returns a pair with the name of the parsed element and result of --- @p@. This fails if such element does not exist at the current --- position. + bUrl' = BaseUrl <$> dUrl <|> bUrl + parseXml = Xeno.fromRawXml . T.encodeUtf8 + parseRdf = first ParseFailure . join . second parseRdf' + parseRdf' ns = join $ evalState (runExceptT (parseM rdfParser ns)) initState + initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 + +-- |A parser for debugging purposes. +parseXmlDebug + :: FilePath + -- ^ Path of the file to parse. + -> IO (RDF TList) +parseXmlDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f + +-- |Document parser +rdfParser :: Rdf a => Parser (RDF a) +rdfParser = do + bUri <- currentBaseUri + triples <- (pRdf <* pWs) <|> pNodeElementList + pEndOfInput + mkRdf triples bUri <$> currentPrefixMappings + +-- |Parser for @rdf:RDF@, if present. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#RDF +pRdf :: Parser Triples +pRdf = pAnyElement $ do + attrs <- pRDFAttrs + uri <- pName >>= pQName + guard (uri == rdfTag) + unless (null attrs) $ throwError "rdf:RDF: The set of attributes should be empty." + pNodeElementList + +-- |Parser for XML QName: resolve the namespace with the mapping in context. -- --- Leading whitespace is ignored. If you need to preserve that whitespace for --- some reason, capture it using 'pText' before using 'pElement''. +-- Throws an error if the namespace is not defined. +pQName :: Text -> Parser Text +pQName qn = do + pm <- currentPrefixMappings + let qn' = resolveQName pm qn >>= validateIRI + either throwError pure qn' + +-- |Process the attributes of an XML element. +-- +-- To be called __once__ per XML element. +pRDFAttrs :: Parser (HashMap Text Text) +pRDFAttrs = do + -- Language (xml:lang) + liftA2 (<|>) pLang currentLang >>= setLang + -- Base URI (xml:base) + liftA2 (<|>) pBase currentBaseUri >>= setBaseUri + bUri <- currentBaseUri + -- Process the rest of the attributes + attrs <- pAttrs + -- Get the namespace definitions (xmlns:) + pm <- updatePrefixMappings (PrefixMappings $ HM.foldlWithKey' mkNameSpace mempty attrs) + -- Filter and resolve RDF attributes + let as = HM.foldlWithKey' (mkRdfAttribute pm bUri) mempty attrs + setNodeAttrs as + pure as + where + -- |Check if an XML attribute is a namespace definition + -- and if so add it to the mapping. + mkNameSpace + :: Map.Map Text Text + -- ^ Current namespace mapping + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> Map.Map Text Text + mkNameSpace ns qn iri = + let qn' = parseQName qn + ns' = f <$> qn' <*> validateIRI iri + f (Nothing , "xmlns") iri' = Map.insert mempty iri' ns + f (Just "xmlns", prefix ) iri' = Map.insert prefix iri' ns + f _ _ = ns + in either (const ns) id ns' + -- |Check if an XML attribute is an RDF attribute + -- and if so resolve its URI and keep it. + mkRdfAttribute + :: PrefixMappings + -- ^ Namespace mapping + -> Maybe BaseUrl + -- ^ Base URI + -> HM.HashMap Text Text + -- ^ Current set of RDF attributes + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> HM.HashMap Text Text + mkRdfAttribute pm bUri as qn v = + let as' = parseQName qn >>= f + -- [NOTE] Ignore XML reserved names + f (Nothing, n) + | T.isPrefixOf "xml" n = Right as + | otherwise = case bUri of + Nothing -> Right as -- [FIXME] manage missing base URI + Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' n + f qn'@(Just prefix, _) + | T.isPrefixOf "xml" prefix = Right as + | otherwise = (\a -> HM.insert a v as) <$> resolveQName' pm qn' + in either (const as) id as' + +-- |Return the value of the requested RDF attribute using its URI. -- --- Consumes the element from the parser state. -pElement' :: Parser a -> Parser (T.Text,a) -pElement' p = do - res <- p - name <- pName - return (name,res) - -pText' :: TL.Text -> Parser TL.Text -pText' t = do - let pTextFail = pFail ("Missing text node " ++ show t) - (do t' <- pText - if t == t' then pure t - else pTextFail - <|> pTextFail) - - --- parser combinators missing in Xmlbf -between :: Parser a -> Parser b -> Parser c -> Parser c -between open close thing = open *> thing <* close - -manyTill :: Parser a -> Parser end -> Parser [a] -manyTill thing z = many thing <* z - -(<|>) :: Parser a -> Parser a -> Parser a -(<|>) a b = a Applicative.<|> b - -some :: Parser a -> Parser [a] -some = Applicative.some - -many :: Parser a -> Parser [a] -many = Applicative.many - --- pElem :: Text -> Parser Text --- oneOf :: Parser [a] -> Parser a --- noneOf :: Parser [a] -> Parser a - - ---------------------------- --- Example trees from xeno - --- data/xml/example07.rdf -{- -[ Text "\n" -, Element "rdf:RDF" - [ ("xmlns:dc","http://purl.org/dc/elements/1.1/") - , ("xmlns:ex","http://example.org/stuff/1.0/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n " - , Element "rdf:Description" - [ ("rdf:about","http://www.w3.org/TR/rdf-syntax-grammar") - , ("dc:title","RDF/XML Syntax Specification (Revised)") - ] - [ Text "\n " - , Element "ex:editor" - [] - [ Text "\n " - , Element "rdf:Description" - [ ("ex:fullName","Dave Beckett") - ] - [ Text "\n " - , Element "ex:homePage" - [ ("rdf:resource","http://purl.org/net/dajobe/") - ] - [] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] --} - - -{- rdf-tests/rdf-xml/amp-in-url/test001.rdf - -[ Text "\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n\n " - , Element "rdf:Description" - [("rdf:about","http://example/q?abc=1&def=2") - ] - [ Text "\n " - , Element "rdf:value" - [] - [Text "xxx"] - , Text "\n " - ] - , Text "\n\n" - ] -, Text "\n" -] --} - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:RDF" - [] - [] - , Text "\n" - ] -, Text "\n" -] - --} - - - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-011.rdf" -Description is forbidden as a property element name. - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/node1")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:resource","http://example.org/node2")] - [] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-literals/test001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n \n\n " - , Element "rdf:Description" - [("rdf:about","http://www.w3.org/TR/2002/WD-charmod-20020220")] - [ Text "\n\n \n " - , Element "eg:Creator" - [("eg:named","D\252rst")] - [] - , Text "\n \n\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- rdf-tests/rdf-xml/rdf-charmod-uris/test001.rdf - -[ Text "\r\n\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n\r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr\233")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text "\r\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-uris/test002.rdf" - -[ Text "\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n \r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr%C3%A9")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text " \r\n" -] - --} +-- Fails if the attribute is not defined. +pRDFAttr :: Text -> Parser Text +pRDFAttr a = do + as <- currentNodeAttrs + maybe + (fail . mconcat $ ["Attribute \"", T.unpack a, "\" not found."]) + pure + (HM.lookup a as) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList +pNodeElementList :: Parser Triples +pNodeElementList = pWs *> (mconcat <$> some (keepState pNodeElement <* pWs)) + +-- |White spaces parser +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#ws +pWs :: Parser () +pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard + where + -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S + ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' + +-- https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement +pNodeElement :: Parser Triples +pNodeElement = pAnyElement $ do + -- Process attributes + void pRDFAttrs + -- Process URI, subject and @rdf:type@. + (s, mt) <- pSubject + ts1 <- pPropertyAttrs s + -- Process propertyEltList + ts2 <- keepState pPropertyEltList + setSubject (Just s) + let ts = ts1 <> ts2 + pure $ maybe ts (:ts) mt + +-- |Process the following parts of a @nodeElement@: URI, subject and @rdf:type@. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement +pSubject :: Parser (Node, Maybe Triple) +pSubject = do + -- Create the subject + -- [TODO] check the attributes that only one of the following may work + s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen + setSubject (Just s) + -- Resolve URI + uri <- pName >>= pQName + -- Check that the URI is allowed + unless (checkNodeUri uri) (throwError $ "URI not allowed: " <> T.unpack uri) + -- Optional rdf:type triple + mtype <- optional (pType1 s uri) + pure (s, mtype) + where + checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= rdfLi && isNotOldTerm uri + pUnodeId = (pIdAttr >>= mkUNodeID) <* removeNodeAttr rdfID + pBnode = (BNode <$> pNodeIdAttr) <* removeNodeAttr rdfNodeID + pUnode = (unode <$> pAboutAttr) <* removeNodeAttr rdfAbout + -- Default subject: a new blank node + pBnodeGen = newBNode + pType1 n uri = + if uri /= rdfDescription + then pure $ Triple n rdfTypeNode (unode uri) + else empty + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttr +pPropertyAttrs :: Node -> Parser Triples +pPropertyAttrs s = do + attrs <- currentNodeAttrs + HM.elems <$> HM.traverseWithKey f attrs + where + f attr value + | not (isPropertyAttrURI attr) = throwError $ "URI not allowed for attribute: " <> T.unpack attr + | attr == rdfType = pure $ Triple s rdfTypeNode (unode value) + | otherwise = do + lang <- currentLang + pure $ let mkLiteral = maybe plainL (flip plainLL) lang + in Triple s (unode attr) (lnode (mkLiteral value)) + +pLang :: Parser (Maybe Text) +pLang = optional (pAttr "xml:lang") + +-- [TODO] resolve base uri in context +pBase :: Parser (Maybe BaseUrl) +pBase = optional $ do + uri <- pAttr "xml:base" + -- Parse and remove fragment + BaseUrl <$> either + throwError + (pure . serializeIRI . removeIRIFragment) + (parseIRI uri) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyEltList +pPropertyEltList :: Parser Triples +pPropertyEltList = pWs + *> resetCollectionIndex + *> fmap mconcat (many (pPropertyElt <* pWs)) + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyElt +pPropertyElt :: Parser Triples +pPropertyElt = pAnyElement $ do + -- Process attributes + void pRDFAttrs + -- Process the predicate from the URI + uri <- pName >>= pQName >>= listExpansion + unless (isPropertyAttrURI uri) (throwError $ "URI not allowed for propertyElt: " <> T.unpack uri) + let p = unode uri + -- Process 'propertyElt' + pParseTypeLiteralPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pEmptyPropertyElt p + where + listExpansion u + | u == rdfLi = nextCollectionIndex + | otherwise = pure u + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#resourcePropertyElt +pResourcePropertyElt :: Node -> Parser Triples +pResourcePropertyElt p = do + pWs + -- [NOTE] We need to restore part of the state after exploring the element' children. + (ts1, o) <- keepState $ liftA2 (,) pNodeElement currentSubject + pWs + mi <- optional pIdAttr <* removeNodeAttr rdfID + -- No other attribute is allowed. + checkAllowedAttributes [] + -- Generated triple + s <- currentSubject + let mt = flip Triple p <$> s <*> o + -- Reify the triple + ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe (ts1 <> ts2) (:(ts1 <> ts2)) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt +pLiteralPropertyElt :: Node -> Parser Triples +pLiteralPropertyElt p = do + l <- pText + -- No children + pChildren >>= guard . null + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfDatatype] + dt <- optional pDatatypeAttr + s <- currentSubject + lang <- currentLang + -- Generated triple + let l' = TL.toStrict l + o = lnode . fromMaybe (plainL l') $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) + mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt +pParseTypeLiteralPropertyElt :: Node -> Parser Triples +pParseTypeLiteralPropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Literal") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + l <- pXMLLiteral + -- Generated triple + s <- currentSubject + let o = lnode (typedL l rdfXmlLiteral) + mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt +pParseTypeResourcePropertyElt :: Node -> Parser Triples +pParseTypeResourcePropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Resource") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + -- Generated triple + s <- currentSubject + o <- newBNode + let mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple + ts1 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + setSubject (Just o) + -- Explore children + ts2 <- keepCollectionIndex pPropertyEltList + --setSubject s + pure $ maybe (ts1 <> ts2) ((<> ts2) . (:ts1)) mt + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt +pParseTypeCollectionPropertyElt :: Node -> Parser Triples +pParseTypeCollectionPropertyElt p = do + pt <- pRDFAttr rdfParseType + guard (pt == "Collection") + mi <- optional pIdAttr <* removeNodeAttr rdfID + checkAllowedAttributes [rdfParseType] + s <- currentSubject + case s of + Nothing -> pure mempty + Just s' -> do + r <- optional pNodeElement + case r of + Nothing -> + -- Empty collection + let t = Triple s' p rdfNilNode + in ([t] <>) <$> maybe (pure mempty) (`reifyTriple` t) mi + Just ts1 -> do + -- Non empty collection + s'' <- currentSubject + n <- newBNode + -- Triples corresping to the first item + let t = Triple s' p n + ts2 = maybe mempty (\s''' -> [t, Triple n rdfFirstNode s''']) s'' + -- Process next item + ts3 <- go n + -- Reify triple + ts4 <- maybe (pure mempty) (`reifyTriple` t) mi + pure $ mconcat [ts1, ts2, ts3, ts4] + where + go s = do + -- Generate the triples of the current item. + r <- optional pNodeElement + case r of + -- End of the collection + Nothing -> pure [Triple s rdfRestNode rdfNilNode] + -- Add the item to the collection and process the next item + Just ts1 -> do + s' <- currentSubject + n <- newBNode + let ts2 = maybe mempty (\s'' -> [Triple s rdfRestNode n, Triple n rdfFirstNode s'']) s' + -- Next item + ts3 <- go n + pure $ mconcat [ts1, ts2, ts3] + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt +pParseTypeOtherPropertyElt :: Node -> Parser Triples +pParseTypeOtherPropertyElt _p = do + pt <- pRDFAttr rdfParseType + guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") + checkAllowedAttributes [rdfParseType] + _mi <- optional pIdAttr <* removeNodeAttr rdfID + -- [FIXME] Implement 'parseTypeOtherPropertyElt' + throwError "Not implemented: rdf:parseType = other" + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt +pEmptyPropertyElt :: Node -> Parser Triples +pEmptyPropertyElt p = do + s <- currentSubject + case s of + Nothing -> pure mempty + Just s' -> do + mi <- optional pIdAttr <* removeNodeAttr rdfID + o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode + let t = Triple s' p o + -- Reify triple + ts1 <- maybe (pure mempty) (`reifyTriple` t) mi + ts2 <- pPropertyAttrs o + pure (t:ts1 <> ts2) + where + pResourceAttr' = unode <$> pResourceAttr <* removeNodeAttr rdfResource + pNodeIdAttr' = BNode <$> pNodeIdAttr <* removeNodeAttr rdfNodeID + +checkAllowedAttributes :: HashSet Text -> Parser () +checkAllowedAttributes as = do + attrs <- currentNodeAttrs + let diff = HS.difference (HM.keysSet attrs) as + unless (null diff) (throwError $ "Attributes not allowed: " <> show diff) + +-- See: https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-xmlliteral, +-- https://www.w3.org/TR/rdf-syntax-grammar/#literal +pXMLLiteral :: Parser Text +pXMLLiteral = + T.decodeUtf8 . BL.toStrict . BB.toLazyByteString . encode <$> pChildren + +pIdAttr :: Parser Text +pIdAttr = do + i <- pRDFAttr rdfID + i' <- either throwError pure (checkRdfId i) + -- Check the uniqueness of the ID in the context of the current base URI. + checkIdIsUnique i' + pure i' + +checkIdIsUnique :: Text -> Parser () +checkIdIsUnique i = do + notUnique <- S.member i <$> currentIdSet + when notUnique (throwError $ "rdf:ID already used in this context: " <> T.unpack i) + updateIdSet i + +pNodeIdAttr :: Parser Text +pNodeIdAttr = do + i <- pRDFAttr rdfNodeID + either throwError pure (checkRdfId i) + +pAboutAttr :: Parser Text +pAboutAttr = pRDFAttr rdfAbout >>= checkIRI "rdf:about" + +pResourceAttr :: Parser Text +pResourceAttr = pRDFAttr rdfResource >>= checkIRI "rdf:resource" + +pDatatypeAttr :: Parser Text +pDatatypeAttr = pRDFAttr rdfDatatype >>= checkIRI "rdf:datatype" + +reifyTriple :: Text -> Triple -> Parser Triples +reifyTriple i (Triple s p' o) = do + n <- mkUNodeID i + pure [ Triple n rdfTypeNode rdfStatementNode + , Triple n rdfSubjectNode s + , Triple n rdfPredicateNode p' + , Triple n rdfObjectNode o ] + +-------------------------------------------------------------------------------- +-- URI checks + +checkIRI :: String -> Text -> Parser Text +checkIRI msg iri = do + bUri <- maybe mempty unBaseUrl <$> currentBaseUri + case uriValidate iri of + Nothing -> throwError $ mconcat ["Malformed IRI for \"", msg, "\": ", T.unpack iri] + Just iri' -> either throwError pure (resolveIRI bUri iri') + +-- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs +isPropertyAttrURI :: Text -> Bool +isPropertyAttrURI uri + = isNotCoreSyntaxTerm uri + && uri /= rdfDescription + && uri /= rdfLi + && isNotOldTerm uri + +-- https://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms +isNotCoreSyntaxTerm :: Text -> Bool +isNotCoreSyntaxTerm uri + = uri /= rdfTag && uri /= rdfID && uri /= rdfAbout + && uri /= rdfParseType && uri /= rdfResource + && uri /= rdfNodeID && uri /= rdfDatatype + +-- https://www.w3.org/TR/rdf-syntax-grammar/#oldTerms +isNotOldTerm :: Text -> Bool +isNotOldTerm uri = uri /= rdfAboutEach + && uri /= rdfAboutEachPrefix + && uri /= rdfBagID + +-------------------------------------------------------------------------------- +-- Parser's state utils + +-- |Create a new unique blank node +newBNode :: Parser Node +newBNode = do + modify $ \st -> st { stateGenId = stateGenId st + 1 } + BNodeGen . stateGenId <$> get + +-- |Process a parser, restoring the state except for stateGenId and stateIdSet +keepState :: Parser a -> Parser a +keepState p = do + st <- get + let bUri = stateBaseUri st + is = stateIdSet st + p <* do + st' <- get + let i = stateGenId st' + bUri' = stateBaseUri st' + is' = stateIdSet st' + -- Update the set of ID if necessary + if bUri /= bUri' + then put (st { stateGenId = i }) + else put (st { stateGenId = i, stateIdSet = is <> is' }) + +currentIdSet :: Parser (Set Text) +currentIdSet = stateIdSet <$> get + +updateIdSet :: Text -> Parser () +updateIdSet i = do + is <- currentIdSet + modify (\st -> st { stateIdSet = S.insert i is }) + +currentNodeAttrs :: Parser (HashMap Text Text) +currentNodeAttrs = stateNodeAttrs <$> get + +setNodeAttrs :: HashMap Text Text -> Parser () +setNodeAttrs as = modify (\st -> st { stateNodeAttrs = as }) + +removeNodeAttr :: Text -> Parser () +removeNodeAttr a = HM.delete a <$> currentNodeAttrs >>= setNodeAttrs + +currentPrefixMappings :: Parser PrefixMappings +currentPrefixMappings = statePrefixMapping <$> get + +updatePrefixMappings :: PrefixMappings -> Parser PrefixMappings +updatePrefixMappings pm = do + pm' <- (<> pm) <$> currentPrefixMappings + modify (\st -> st { statePrefixMapping = pm' }) + pure pm' + +currentCollectionIndex :: Parser Int +currentCollectionIndex = stateCollectionIndex <$> get + +setCollectionIndex :: Int -> Parser () +setCollectionIndex i = modify (\st -> st { stateCollectionIndex = i }) + +keepCollectionIndex :: Parser a -> Parser a +keepCollectionIndex p = do + i <- currentCollectionIndex + p <* setCollectionIndex i + +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#section-List-Expand +nextCollectionIndex :: Parser Text +nextCollectionIndex = do + modify $ \st -> st { stateCollectionIndex = stateCollectionIndex st + 1 } + (rdfListIndex <>) . T.pack . show . stateCollectionIndex <$> get + +resetCollectionIndex :: Parser () +resetCollectionIndex = modify $ \st -> st { stateCollectionIndex = 0 } + +currentBaseUri :: Parser (Maybe BaseUrl) +currentBaseUri = stateBaseUri <$> get + +setBaseUri :: (Maybe BaseUrl) -> Parser () +setBaseUri u = modify (\st -> st { stateBaseUri = u }) + +mkUNodeID :: Text -> Parser Node +mkUNodeID t = mkUnode <$> currentBaseUri + where + mkUnode = unode . \case + Nothing -> t + Just (BaseUrl u) -> mconcat [u, "#", t] + +currentSubject :: Parser (Maybe Subject) +currentSubject = stateSubject <$> get + +setSubject :: (Maybe Subject) -> Parser () +setSubject s = modify (\st -> st { stateSubject = s }) + +currentLang :: Parser (Maybe Text) +currentLang = stateLang <$> get + +setLang :: (Maybe Text) -> Parser () +setLang lang = modify (\st -> st { stateLang = lang }) diff --git a/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs new file mode 100644 index 0000000..6592525 --- /dev/null +++ b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +module Text.RDF.RDF4H.XmlParser.Identifiers + ( -- rdf:ID validation + checkRdfId + -- Qualified names + , resolveQName, resolveQName' + , parseQName + ) where + + +import Data.Functor ((<$)) +import Control.Applicative (liftA2, Alternative(..)) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Map as Map +import Data.Attoparsec.Text (Parser, ()) +import qualified Data.Attoparsec.Text as P + +import Data.RDF.Namespace + +-------------------------------------------------------------------------------- +-- rdf:ID + +-- |Validate the value of @rdf:ID@. +-- +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id +checkRdfId + :: Text + -- ^ Value of a @rdf:ID@ attribute to validate. + -> Either String Text +checkRdfId t = t <$ parseId t + +parseId :: Text -> Either String Text +parseId = P.parseOnly $ pNCName <* (P.endOfInput "Unexpected characters at the end") + +-------------------------------------------------------------------------------- +-- Qualified names + +-- |Parse and resolve a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames +resolveQName + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> Text + -- ^ Raw qualified name to process. + -> Either String Text +resolveQName pm qn = parseQName qn >>= resolveQName' pm + +-- |Resolve a qualified name. +resolveQName' + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> (Maybe Text, Text) + -- ^ (namespace, local name) + -> Either String Text +resolveQName' (PrefixMappings pm) (Nothing, name) = + case Map.lookup mempty pm of + Nothing -> Left $ mconcat ["Cannot resolve QName \"", T.unpack name, "\": no default namespace defined."] + Just iri -> Right $ iri <> name +resolveQName' (PrefixMappings pm) (Just prefix, name) = + case Map.lookup prefix pm of + Nothing -> Left $ mconcat ["Cannot resolve QName: prefix \"", T.unpack prefix, "\" not defined"] + Just iri -> Right $ iri <> name + +-- |Parse a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames +parseQName :: Text -> Either String (Maybe Text, Text) +parseQName = P.parseOnly $ pQName <* (P.endOfInput "Unexpected characters at the end of a QName") + +-- https://www.w3.org/TR/xml-names/#ns-qualnames +-- https://www.w3.org/TR/xml-names/#NT-QName +pQName :: Parser (Maybe Text, Text) +pQName = pPrefixedName <|> pUnprefixedNamed + where pUnprefixedNamed = (empty,) <$> pLocalPart + +-- https://www.w3.org/TR/xml-names/#NT-PrefixedName +pPrefixedName :: Parser (Maybe Text, Text) +pPrefixedName = do + prefix <- pLocalPart <* P.char ':' + localPart <- pLocalPart + pure (Just prefix, localPart) + +-- https://www.w3.org/TR/xml-names/#NT-LocalPart +pLocalPart :: Parser Text +pLocalPart = pNCName + +-- http://www.w3.org/TR/REC-xml-names/#NT-NCName +pNCName :: Parser Text +pNCName = liftA2 T.cons pNameStartChar pNameRest + where + pNameStartChar = P.satisfy isValidFirstCharId + pNameRest = P.takeWhile isValidRestCharId + isValidFirstCharId c + = ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') + || ('\xC0' <= c && c <= '\xD6') || ('\xD8' <= c && c <= '\xF6') + || ('\xF8' <= c && c <= '\x2FF') || ('\x370' <= c && c <= '\x37D') + || ('\x37F' <= c && c <= '\x1FFF') || ('\x200C' <= c && c <= '\x200D') + || ('\x2070' <= c && c <= '\x218F') || ('\x2C00' <= c && c <= '\x2FEF') + || ('\x3001' <= c && c <= '\xD7FF') || ('\xF900' <= c && c <= '\xFDCF') + || ('\xFDF0' <= c && c <= '\xFFFD') || ('\x10000' <= c && c <= '\xEFFFF') + isValidRestCharId c = isValidFirstCharId c + || c == '-' || c == '.' || ('0' <= c && c <= '9') + || ('\x0300' <= c && c <= '\x036F') || ('\x203F' <= c && c <= '\x2040') diff --git a/src/Text/RDF/RDF4H/XmlParserHXT.hs b/src/Text/RDF/RDF4H/XmlParserHXT.hs index 843ff78..8bec792 100644 --- a/src/Text/RDF/RDF4H/XmlParserHXT.hs +++ b/src/Text/RDF/RDF4H/XmlParserHXT.hs @@ -21,7 +21,7 @@ import Data.List (isPrefixOf) import qualified Data.Map as Map (fromList) import Data.Maybe import Data.Typeable -import Text.RDF.RDF4H.ParserUtils +import Text.RDF.RDF4H.ParserUtils hiding (rdfType) import Data.RDF.IRI import Data.RDF.Types (Rdf,RDF,RdfParser(..),Node(BNodeGen),BaseUrl(..),Triple(..),Triples,Subject,Predicate,Object,PrefixMappings(..),ParseFailure(ParseFailure),mkRdf,lnode,plainL,plainLL,typedL,unode,bnode,unodeValidate) import Data.Text (Text) @@ -220,7 +220,7 @@ isMetaAttr = isA (== "rdf:about") -- -- And that specifically: -- --- +-- -- foo -- -- @@ -278,7 +278,7 @@ parsePredicatesFromChildren = updateState , second hasPredicateAttr :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)) , this :-> defaultA ] - + -- See: Issue http://www.w3.org/2000/03/rdf-tracking/#rdfms-rdf-names-use -- section: Illegal or unusual use of names from the RDF namespace -- @@ -317,7 +317,7 @@ validPropElementName = proc (state,predXml) -> do parseObjectsFromChildren :: forall a. (ArrowIf a, ArrowXml a, ArrowState GParseState a) => LParseState -> Predicate -> a XmlTree Triple parseObjectsFromChildren s p = - choiceA + choiceA [ isText :-> (neg( isWhiteSpace) >>> getText >>> arr (Triple (stateSubject s) p . mkLiteralNode s)) , isElem :-> (parseObjectDescription) ] @@ -405,10 +405,8 @@ validNodeElementName = neg (hasName "rdf:RDF") >>> neg (hasName "rdf:aboutEach") >>> neg (hasName "rdf:aboutEachPrefix") -rdfXmlLiteral :: Text rdfFirst,rdfRest,rdfNil,rdfType,rdfStatement,rdfSubject,rdfPredicate,rdfObject :: Node -rdfXmlLiteral = T.pack "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" rdfFirst = (unode . T.pack) "rdf:first" rdfRest = (unode . T.pack) "rdf:rest" rdfNil = (unode . T.pack) "rdf:nil" @@ -466,7 +464,7 @@ xmlName str = go [] str else Nothing isValid c = isAlphaNum c || '_' == c - -- || '-' == c + -- '-' == c || '.' == c || ':' == c diff --git a/stack.yaml b/stack.yaml index 666abb9..d755ed7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -2,12 +2,10 @@ resolver: lts-12.1 compiler-check: newer-minor packages: - '.' -# - /home/rob/code/haskell/xmlbf/xmlbf -# - /home/rob/code/haskell/xmlbf/xmlbf-xeno extra-deps: - hgal-2.0.0.2 -- git: git@gitlab.com:k0001/xmlbf.git - commit: f46b96a401ac2ef6f30f8939c9f7bf92f38df383 - subdirs: - - xmlbf - - xmlbf-xeno \ No newline at end of file + - git: git@gitlab.com:k0001/xmlbf.git + commit: ce65be6366c0aaafcc282268d767c52380e459e6 + subdirs: + - xmlbf + - xmlbf-xeno diff --git a/testsuite/tests/Data/RDF/IRITests.hs b/testsuite/tests/Data/RDF/IRITests.hs index 09d0664..3b5197a 100644 --- a/testsuite/tests/Data/RDF/IRITests.hs +++ b/testsuite/tests/Data/RDF/IRITests.hs @@ -1,6 +1,3 @@ ---{-# LANGUAGE DeriveGeneric #-} ---{-# LANGUAGE TypeFamilies #-} ---{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Data.RDF.IRITests diff --git a/testsuite/tests/Data/RDF/PropertyTests.hs b/testsuite/tests/Data/RDF/PropertyTests.hs index f4417d9..9205d59 100644 --- a/testsuite/tests/Data/RDF/PropertyTests.hs +++ b/testsuite/tests/Data/RDF/PropertyTests.hs @@ -11,6 +11,7 @@ import Data.RDF.Namespace hiding (rdf) import qualified Data.Text as T import Test.QuickCheck import Data.List +import Data.Semigroup ((<>)) import qualified Data.Set as Set import qualified Data.Map as Map import Control.Monad @@ -105,7 +106,7 @@ instance Arbitrary PrefixMappings where arbitraryBaseUrl :: Gen BaseUrl arbitraryBaseUrl = oneof $ - map + fmap (return . BaseUrl . T.pack) ["http://example.org/", "http://example.com/a", "http://asdf.org/b", "http://asdf.org/c"] @@ -131,7 +132,7 @@ arbitraryPrefixMappings = p_empty :: Rdf rdf => RDF rdf -> Bool -p_empty empty = triplesOf empty == [] +p_empty empty = null (triplesOf empty) -- triplesOf any RDF should return unique triples used to create it p_mkRdf_triplesOf @@ -445,7 +446,7 @@ p_remove_triple_from_singleton_graph_query_s :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_s _unused singletonGraph = - query newGr (Just s) Nothing Nothing == [] + null (query newGr (Just s) Nothing Nothing) where tripleInGraph@(Triple s _p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -457,7 +458,7 @@ p_remove_triple_from_singleton_graph_query_p :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_p _unused singletonGraph = - query newGr Nothing (Just p) Nothing == [] + null (query newGr Nothing (Just p) Nothing) where tripleInGraph@(Triple _s p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -469,7 +470,7 @@ p_remove_triple_from_singleton_graph_query_o :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_o _unused singletonGraph = - query newGr Nothing Nothing (Just o) == [] + null (query newGr Nothing Nothing (Just o)) where tripleInGraph@(Triple _s _p o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -482,10 +483,10 @@ p_add_then_remove_triples p_add_then_remove_triples _empty genTriples = let emptyGraph = _empty populatedGraph = - foldr (\t gr -> addTriple gr t) emptyGraph genTriples + foldr (flip addTriple) emptyGraph genTriples emptiedGraph = - foldr (\t gr -> removeTriple gr t) populatedGraph genTriples - in triplesOf emptiedGraph == [] + foldr (flip removeTriple) populatedGraph genTriples + in null (triplesOf emptiedGraph) equivNode :: (Node -> Node -> Bool) -> (Triple -> Node) @@ -546,7 +547,7 @@ tripleFromGen tripleFromGen _triplesOf rdf = if null ts then return Nothing - else oneof $ map (return . Just) ts + else oneof $ fmap (return . Just) ts where ts = _triplesOf rdf @@ -560,12 +561,13 @@ languages :: [T.Text] languages = [T.pack "fr", T.pack "en"] datatypes :: [T.Text] -datatypes = map (mkUri xsd . T.pack) ["string", "int", "token"] +datatypes = fmap (mkUri xsd . T.pack) ["string", "int", "token"] uris :: [T.Text] -uris = - map (mkUri ex) [T.pack n `T.append` T.pack (show (i::Int)) | n <- ["foo", "bar", "quz", "zak"], i <- [0..2]] - ++ [T.pack "ex:" `T.append` T.pack n `T.append` T.pack (show (i::Int)) | n <- ["s", "p", "o"], i <- [1..3]] +uris = [mkUri ex (n <> T.pack (show (i :: Int))) + | n <- ["foo", "bar", "quz", "zak"], i <- [0 .. 2]] + <> ["ex:" <> n <> T.pack (show (i::Int)) + | n <- ["s", "p", "o"], i <- [1..3]] plainliterals :: [LValue] plainliterals = [plainLL lit lang | lit <- litvalues, lang <- languages] @@ -574,16 +576,16 @@ typedliterals :: [LValue] typedliterals = [typedL lit dtype | lit <- litvalues, dtype <- datatypes] litvalues :: [T.Text] -litvalues = map T.pack ["hello", "world", "peace", "earth", "", "haskell"] +litvalues = fmap T.pack ["hello", "world", "peace", "earth", "", "haskell"] unodes :: [Node] -unodes = map UNode uris +unodes = fmap UNode uris bnodes :: [ Node] -bnodes = map (BNode . \i -> T.pack ":_genid" `T.append` T.pack (show (i::Int))) [1..5] +bnodes = fmap (BNode . \i -> T.pack ":_genid" <> T.pack (show (i::Int))) [1..5] lnodes :: [Node] -lnodes = [LNode lit | lit <- plainliterals ++ typedliterals] +lnodes = [LNode lit | lit <- plainliterals <> typedliterals] -- maximum number of triples maxN :: Int @@ -601,11 +603,10 @@ instance Arbitrary Triple where arbitrary = do s <- arbitraryS p <- arbitraryP - o <- arbitraryO - return (triple s p o) + triple s p <$> arbitraryO instance Arbitrary Node where - arbitrary = oneof $ map return unodes + arbitrary = oneof $ fmap return unodes arbitraryTs :: Gen Triples arbitraryTs = do @@ -613,9 +614,9 @@ arbitraryTs = do sequence [arbitrary | _ <- [1 .. n]] arbitraryS, arbitraryP, arbitraryO :: Gen Node -arbitraryS = oneof $ map return $ unodes ++ bnodes -arbitraryP = oneof $ map return unodes -arbitraryO = oneof $ map return $ unodes ++ bnodes ++ lnodes +arbitraryS = oneof $ fmap return $ unodes <> bnodes +arbitraryP = oneof $ fmap return unodes +arbitraryO = oneof $ fmap return $ unodes <> bnodes <> lnodes ---------------------------------------------------- -- Unit test cases -- diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 9fb0053..96c4c52 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -25,13 +25,13 @@ suiteFilesDirXml = "rdf-tests/rdf-xml/" suiteFilesDirNTriples = "rdf-tests/ntriples/" mfPathTurtle,mfPathXml,mfPathNTriples :: T.Text -mfPathTurtle = T.concat [suiteFilesDirTurtle, "manifest.ttl"] -mfPathXml = T.concat [suiteFilesDirXml, "manifest.ttl"] -mfPathNTriples = T.concat [suiteFilesDirNTriples, "manifest.ttl"] +mfPathTurtle = mconcat [suiteFilesDirTurtle, "manifest.ttl"] +mfPathXml = mconcat [suiteFilesDirXml, "manifest.ttl"] +mfPathNTriples = mconcat [suiteFilesDirNTriples, "manifest.ttl"] mfBaseURITurtle,mfBaseURIXml,mfBaseURINTriples :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" -mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" +mfBaseURITurtle = W3CTurtleTest.mfBaseURITurtle +mfBaseURIXml = W3CRdfXmlTest.mfBaseURIXml mfBaseURINTriples = BaseUrl "http://www.w3.org/2013/N-TriplesTests/" main :: IO () @@ -41,9 +41,8 @@ main = do dir <- getCurrentDirectory let fileSchemeUri suitesDir = fromJust . filePathToUri $ (dir T.unpack suitesDir) - turtleManifest <- - loadManifest mfPathTurtle (fileSchemeUri suiteFilesDirTurtle) - xmlManifest <- loadManifest mfPathXml (fileSchemeUri suiteFilesDirXml) + turtleManifest <- loadManifest mfPathTurtle (unBaseUrl mfBaseURITurtle) + xmlManifest <- loadManifest mfPathXml (unBaseUrl mfBaseURIXml) nTriplesManifest <- loadManifest mfPathNTriples (fileSchemeUri suiteFilesDirNTriples) -- run tests @@ -61,7 +60,12 @@ main = do (graphTests "AdjHashMap" (empty :: RDF AdjHashMap) - (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap))] + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap)) + , + (graphTests + "AlgebraicGraph" + (empty :: RDF AlgebraicGraph) + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph))] , testGroup "graph-impl-unit-tests" @@ -95,15 +99,15 @@ main = do "parser-w3c-tests-turtle" [ testGroup "parser-w3c-tests-turtle-parsec" - [W3CTurtleTest.testsParsec turtleManifest] + [W3CTurtleTest.testsParsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] , testGroup "parser-w3c-tests-turtle-attoparsec" - [W3CTurtleTest.testsAttoparsec turtleManifest] + [W3CTurtleTest.testsAttoparsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] ] , testGroup "parser-w3c-tests-xml" - [ W3CRdfXmlTest.tests xmlManifest + [ W3CRdfXmlTest.tests (dir T.unpack suiteFilesDirXml) xmlManifest ] ] ) diff --git a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs index 62cdc02..382c039 100644 --- a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs +++ b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs @@ -4,6 +4,7 @@ module Text.RDF.RDF4H.TurtleParser_ConformanceTest ( tests ) where +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty import Test.Tasty.HUnit as TU @@ -48,7 +49,7 @@ fpath :: String -> Int -> String -> String fpath name i ext = printf "data/ttl/conformance/%s-%02d.%s" name i ext :: String tests :: [TestTree] -tests = ts1 ++ ts2 ++ ts3 +tests = ts1 <> ts2 <> ts3 where ts1 = fmap checkGoodConformanceTest [0..29] ts2 = fmap checkBadConformanceTest [0..15] ts3 = fmap (uncurry checkGoodOtherTest) otherTestFiles @@ -72,7 +73,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] checkBadConformanceTest :: Int -> TestTree checkBadConformanceTest i = @@ -82,21 +83,21 @@ checkBadConformanceTest i = -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. equivalent :: Rdf a => Either ParseFailure (RDF a) -> Either ParseFailure (RDF a) -> Maybe String -equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " ++ show e -equivalent _ (Left e) = Just $ "Parse failure of the input graph: " ++ show e +equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " <> show e +equivalent _ (Left e) = Just $ "Parse failure of the input graph: " <> show e equivalent (Right gr1) (Right gr2) = checkSize <|> (test $! zip gr1ts gr2ts) where gr1ts = uordered $ triplesOf gr1 gr2ts = uordered $ triplesOf gr2 length1 = length gr1ts length2 = length gr2ts - checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " ++ (show length1) ++ ", got: " ++ (show length2)) + checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " <> (show length1) <> ", got: " <> (show length2)) test [] = Nothing test ((t1,t2):ts) = maybe (test ts) pure (compareTriple t1 t2) compareTriple t1@(Triple s1 p1 o1) t2@(Triple s2 p2 o2) = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -140,14 +141,14 @@ assertLoadSuccess, assertLoadFailure :: String -> IO (Either ParseFailure (RDF T assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () assertLoadFailure idStr exprGr = do g <- exprGr case g of Left _ -> return () - Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." + Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." assertEquivalent :: Rdf a => String -> IO (Either ParseFailure (RDF a)) -> IO (Either ParseFailure (RDF a)) -> TU.Assertion assertEquivalent testname r1 r2 = do @@ -155,7 +156,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg mkDocUrl :: Text -> String -> Int -> Maybe Text mkDocUrl baseDocUrl fname testNum = Just . fromString $ printf "%s%s-%02d.ttl" baseDocUrl fname testNum diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index 8d8f979..530dfdc 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -7,9 +7,9 @@ module Text.RDF.RDF4H.XmlParser_Test -- todo: QuickCheck tests +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty -import Test.Tasty.HUnit import Test.Tasty.HUnit as TU -- Import common libraries to facilitate tests @@ -34,8 +34,8 @@ tests = , testCase "NML2" test_parseXmlRDF_NML2 , testCase "NML3" test_parseXmlRDF_NML3 ] - ++ - map (uncurry checkGoodOtherTest) otherTestFiles + <> + fmap (uncurry checkGoodOtherTest) otherTestFiles otherTestFiles :: [(String, String)] otherTestFiles = [ ("data/xml", "example07") @@ -70,8 +70,8 @@ loadExpectedGraph1 fname = do loadInputGraph1 :: String -> String -> IO (Either ParseFailure (RDF TList)) loadInputGraph1 dir fname = - TIO.readFile (printf "%s/%s.rdf" dir fname :: String) >>= - return . parseString (XmlParser Nothing (mkDocUrl1 testBaseUri fname)) >>= return . handleLoad + (parseString (XmlParser Nothing (mkDocUrl1 testBaseUri dir fname)) <$> + TIO.readFile (printf "%s/%s.rdf" dir fname :: String)) doGoodConformanceTest :: IO (Either ParseFailure (RDF TList)) -> IO (Either ParseFailure (RDF TList)) -> @@ -80,7 +80,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] mkTextNode :: T.Text -> Node mkTextNode = lnode . plainL @@ -90,7 +90,7 @@ testParse exRDF ex = case parsed of Right result -> assertBool - ("expected: " ++ show ex ++ "but got: " ++ show result) + ("expected: " <> show ex <> "but got: " <> show result) (isIsomorphic (result :: RDF TList) (ex :: RDF TList)) Left (ParseFailure err) -> assertFailure err @@ -174,7 +174,7 @@ test_parseXmlRDF_vCardPersonal :: Assertion test_parseXmlRDF_vCardPersonal = testParse "\ - \\ + \\ \Corky Crystal\ \Corks\ \\ @@ -353,7 +353,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. @@ -372,13 +372,13 @@ equivalent (Right gr1) (Right gr2) = test $! zip gr1ts gr2ts compareTriple t1 t2 = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") where (s1, p1, o1) = f t1 (s2, p2, o2) = f t2 f t = (subjectOf t, predicateOf t, objectOf t) - -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" ++ show i) - -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" ++ show i) + -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" <> show i) + -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" <> show i) -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -403,20 +403,20 @@ assertLoadSuccess :: String -> IO (Either ParseFailure (RDF TList)) -> TU.Assert assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () -- assertLoadFailure idStr exprGr = do -- g <- exprGr -- case g of -- Left _ -> return () --- Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." +-- Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." handleLoad :: Either ParseFailure (RDF TList) -> Either ParseFailure (RDF TList) handleLoad res = case res of l@(Left _) -> l - (Right gr) -> Right $ mkRdf (map normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) + (Right gr) -> Right $ mkRdf (fmap normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) normalize :: Triple -> Triple normalize t = let s' = normalizeN $ subjectOf t @@ -424,12 +424,12 @@ normalize t = let s' = normalizeN $ subjectOf t o' = normalizeN $ objectOf t in triple s' p' o' normalizeN :: Node -> Node -normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" ++ show i) +normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" <> show i) normalizeN n = n -- The Base URI to be used for all conformance tests: testBaseUri :: String testBaseUri = "http://www.w3.org/2001/sw/DataAccess/df1/tests/" -mkDocUrl1 :: String -> String -> Maybe T.Text -mkDocUrl1 baseDocUrl fname = Just $ T.pack $ printf "%s%s.rdf" baseDocUrl fname +mkDocUrl1 :: String -> String -> String -> Maybe T.Text +mkDocUrl1 baseDocUrl dir fname = Just . T.pack $ printf "%s/%s/%s.rdf" baseDocUrl dir fname diff --git a/testsuite/tests/W3C/Manifest.hs b/testsuite/tests/W3C/Manifest.hs index 736db43..cd46877 100644 --- a/testsuite/tests/W3C/Manifest.hs +++ b/testsuite/tests/W3C/Manifest.hs @@ -7,6 +7,7 @@ module W3C.Manifest ( TestEntry(..) ) where +import Data.Semigroup ((<>)) import Data.RDF.Graph.TList import Data.RDF.Query import Data.RDF.Types @@ -125,18 +126,18 @@ mfUnrecognizedDatatypes = unode "http://www.w3.org/2001/sw/DataAccess/tests/test -- | Load the manifest from the given file; -- apply the given namespace as the base IRI of the manifest. loadManifest :: T.Text -> T.Text -> IO Manifest -loadManifest manifestPath baseIRI = do - parseFile testParser (T.unpack manifestPath) >>= return . rdfToManifest . fromEither +loadManifest manifestPath baseIRI = + (rdfToManifest . fromEither) <$> parseFile testParser (T.unpack manifestPath) where testParser = TurtleParser (Just $ BaseUrl baseIRI) Nothing rdfToManifest :: RDF TList -> Manifest rdfToManifest rdf = Manifest desc tpls - where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" ++ show (triplesOf rdf))) descNode + where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" <> show (triplesOf rdf))) descNode -- FIXME: Inconsistent use of nodes for describing the manifest (W3C bug) descNode = query rdf (Just manifestNode) (Just rdfsLabel) Nothing - ++ query rdf (Just manifestNode) (Just mfName) Nothing + <> query rdf (Just manifestNode) (Just mfName) Nothing -- descNode = query rdf (Just manifestNode) (Just mfName) Nothing - tpls = map (rdfToTestEntry rdf) $ rdfCollectionToList rdf collectionHead + tpls = (rdfToTestEntry rdf) <$> rdfCollectionToList rdf collectionHead collectionHead = objectOf $ headDef (error "query: mf:node & mf:entries") $ query rdf (Just manifestNode) (Just mfEntries) Nothing manifestNode = headDef (error "manifestSubjectNodes yielding empty list") $ manifestSubjectNodes rdf @@ -156,7 +157,7 @@ triplesToTestEntry rdf ts = (UNode "http://www.w3.org/ns/rdftest#TestXMLNegativeSyntax") -> mkTestXMLNegativeSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax") -> mkTestNTriplesPositiveSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax") -> mkTestNTriplesNegativeSyntax ts - n -> error ("Unknown test case: " ++ show n) + n -> error ("Unknown test case: " <> show n) mkTestTurtleEval :: Triples -> TestEntry mkTestTurtleEval ts = TestTurtleEval { @@ -273,7 +274,7 @@ manifestSubjectNodes :: RDF TList -> [Subject] manifestSubjectNodes rdf = subjectNodes rdf [mfManifest] subjectNodes :: RDF TList -> [Object] -> [Subject] -subjectNodes rdf = (map subjectOf) . concatMap queryType +subjectNodes rdf = (fmap subjectOf) . concatMap queryType where queryType n = query rdf Nothing (Just rdfType) (Just n) -- | Text of the literal node. diff --git a/testsuite/tests/W3C/NTripleTest.hs b/testsuite/tests/W3C/NTripleTest.hs index e4ed915..3126545 100644 --- a/testsuite/tests/W3C/NTripleTest.hs +++ b/testsuite/tests/W3C/NTripleTest.hs @@ -3,6 +3,7 @@ module W3C.NTripleTest , testsAttoparsec ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -36,11 +37,10 @@ mfEntryToTest testParser (TestNTriplesNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x testParserParsec :: NTriplesParserCustom testParserParsec = NTriplesParserCustom Parsec testParserAttoparsec :: NTriplesParserCustom testParserAttoparsec = NTriplesParserCustom Attoparsec - diff --git a/testsuite/tests/W3C/RdfXmlTest.hs b/testsuite/tests/W3C/RdfXmlTest.hs index 6da83dd..0bce804 100644 --- a/testsuite/tests/W3C/RdfXmlTest.hs +++ b/testsuite/tests/W3C/RdfXmlTest.hs @@ -2,8 +2,10 @@ module W3C.RdfXmlTest ( tests + , mfBaseURIXml ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -18,29 +20,34 @@ import Text.RDF.RDF4H.XmlParser import Text.RDF.RDF4H.NTriplesParser import Data.RDF.Graph.TList -tests :: Manifest -> TestTree -tests = runManifestTests mfEntryToTest +tests :: String -> Manifest -> TestTree +tests = runManifestTests . mfEntryToTest -- Functions to map manifest test entries to unit tests. -- They are defined here to avoid cluttering W3C.Manifest -- with functions that may not be needed to those who -- just want to parse Manifest files. -- TODO: They should probably be moved to W3C.Manifest after all. -mfEntryToTest :: TestEntry -> TestTree -mfEntryToTest (TestXMLEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile testParser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) +mfEntryToTest :: String -> TestEntry -> TestTree +mfEntryToTest dir (TestXMLEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (testParser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest (TestXMLNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir (TestXMLNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (testParser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURIXml) +getFilePath _ _ = error "Unexpected node" mfBaseURIXml :: BaseUrl mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" -testParser :: XmlParser -testParser = XmlParser (Just mfBaseURIXml) Nothing +testParser :: String -> XmlParser +testParser dUri = XmlParser (Just mfBaseURIXml) (Just . T.pack $ dUri) diff --git a/testsuite/tests/W3C/TurtleTest.hs b/testsuite/tests/W3C/TurtleTest.hs index 8e30ede..68c19c2 100644 --- a/testsuite/tests/W3C/TurtleTest.hs +++ b/testsuite/tests/W3C/TurtleTest.hs @@ -3,11 +3,13 @@ module W3C.TurtleTest ( testsParsec , testsAttoparsec + , mfBaseURITurtle ) where import Test.Tasty import qualified Test.Tasty.HUnit as TU +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import qualified Data.Text as T @@ -21,41 +23,47 @@ import Text.RDF.RDF4H.NTriplesParser import Text.RDF.RDF4H.ParserUtils import Data.RDF.Graph.TList -testsParsec :: Manifest -> TestTree -testsParsec = runManifestTests (mfEntryToTest testParserParsec) +testsParsec :: String -> Manifest -> TestTree +testsParsec = runManifestTests . (`mfEntryToTest` testParserParsec) -testsAttoparsec :: Manifest -> TestTree -testsAttoparsec = runManifestTests (mfEntryToTest testParserAttoparsec) +testsAttoparsec :: String -> Manifest -> TestTree +testsAttoparsec = runManifestTests . (`mfEntryToTest` testParserAttoparsec) -mfEntryToTest :: TurtleParserCustom -> TestEntry -> TestTree -mfEntryToTest parser (TestTurtleEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile parser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) +mfEntryToTest :: String -> (String -> TurtleParserCustom) -> TestEntry -> TestTree +mfEntryToTest dir parser (TestTurtleEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (parser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest parser (TestTurtleNegativeEval nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeEval nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest parser (TestTurtlePositiveSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtlePositiveSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsParsed rdf -mfEntryToTest parser (TestTurtleNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x +-- [NOTE] Was previously: http://www.w3.org/2013/TurtleTests/ mfBaseURITurtle :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" +mfBaseURITurtle = BaseUrl "http://w3c.github.io/rdf-tests/turtle/" -- testParser :: TurtleParser -- testParser = TurtleParser (Just mfBaseURITurtle) Nothing -testParserParsec :: TurtleParserCustom -testParserParsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Parsec +testParserParsec :: String -> TurtleParserCustom +testParserParsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Parsec -testParserAttoparsec :: TurtleParserCustom -testParserAttoparsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Attoparsec +testParserAttoparsec :: String -> TurtleParserCustom +testParserAttoparsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Attoparsec + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURITurtle) +getFilePath _ _ = error "Unexpected node" diff --git a/testsuite/tests/W3C/W3CAssertions.hs b/testsuite/tests/W3C/W3CAssertions.hs index 8cf6a88..2ee45c9 100644 --- a/testsuite/tests/W3C/W3CAssertions.hs +++ b/testsuite/tests/W3C/W3CAssertions.hs @@ -6,6 +6,7 @@ module W3C.W3CAssertions , nodeURI ) where +import Data.Semigroup ((<>)) import qualified Data.Text as T import Data.RDF import qualified Test.HUnit as TU @@ -14,13 +15,13 @@ import W3C.Manifest runManifestTests :: (TestEntry -> TestTree) -> Manifest -> TestTree runManifestTests mfEntryToTest manifest = - testGroup (T.unpack $ description manifest) $ map mfEntryToTest $ entries manifest + testGroup (T.unpack $ description manifest) $ mfEntryToTest <$> entries manifest assertIsIsomorphic :: IO (RDF TList) -> IO (RDF TList) -> IO () assertIsIsomorphic r1 r2 = do gr1 <- r1 gr2 <- r2 - TU.assertBool ("not isomorphic: " ++ show gr1 ++ " compared with " ++ show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) + TU.assertBool ("not isomorphic: " <> show gr1 <> " compared with " <> show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) where noBlankNodes g = (all noBlanks . expandTriples) g noBlanks (Triple s p o) = not (blankNode s) @@ -41,12 +42,12 @@ assertIsIsomorphic r1 r2 = do assertIsParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsParsed r1 = do gr1 <- r1 - TU.assertBool ("unable to parse, reason:\n" ++ show gr1) (isParsed gr1) + TU.assertBool ("unable to parse, reason:\n" <> show gr1) (isParsed gr1) assertIsNotParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsNotParsed r1 = do gr1 <- r1 - TU.assertBool ("parsed unexpectantly:\n" ++ show gr1) (not (isParsed gr1)) + TU.assertBool ("parsed unexpectantly:\n" <> show gr1) (not (isParsed gr1)) isParsed :: Either a b -> Bool isParsed (Left _) = False @@ -54,4 +55,4 @@ isParsed (Right _) = True nodeURI :: Node -> String nodeURI (UNode u) = T.unpack u -nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " ++ show node +nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " <> show node