From 1a3b60e76bceedbca426feea74a657186488f9b9 Mon Sep 17 00:00:00 2001 From: John Pavel Date: Wed, 23 Jan 2019 21:10:47 +0000 Subject: [PATCH 01/39] Hlint cleanups --- .hlint.yaml | 65 +++++++++++++++++++ bench/MainCriterion.hs | 4 +- rdf-tests | 2 +- src/Data/RDF/Graph/AdjHashMap.hs | 2 +- src/Data/RDF/Graph/HashMapSP.hs | 2 +- src/Data/RDF/Graph/TList.hs | 2 +- src/Data/RDF/IRI.hs | 26 ++++---- src/Data/RDF/Query.hs | 3 +- src/Data/RDF/Types.hs | 12 ++-- src/Text/RDF/RDF4H/NTriplesParser.hs | 6 +- src/Text/RDF/RDF4H/ParserUtils.hs | 2 +- src/Text/RDF/RDF4H/TurtleParser.hs | 11 ++-- src/Text/RDF/RDF4H/XmlParser.hs | 4 +- testsuite/tests/Data/RDF/PropertyTests.hs | 17 +++-- .../tests/Text/RDF/RDF4H/XmlParser_Test.hs | 5 +- testsuite/tests/W3C/Manifest.hs | 4 +- testsuite/tests/W3C/RdfXmlTest.hs | 4 +- testsuite/tests/W3C/TurtleTest.hs | 4 +- 18 files changed, 119 insertions(+), 56 deletions(-) create mode 100644 .hlint.yaml diff --git a/.hlint.yaml b/.hlint.yaml new file mode 100644 index 0000000..3f5ae3b --- /dev/null +++ b/.hlint.yaml @@ -0,0 +1,65 @@ +# HLint configuration file +# https://github.com/ndmitchell/hlint +########################## + +# This file contains a template configuration file, which is typically +# placed as .hlint.yaml in the root of your project + + +# Specify additional command line arguments +# +# - arguments: [--color, --cpp-simple, -XQuasiQuotes] + + +# Control which extensions/flags/modules/functions can be used +# +# - extensions: +# - default: false # all extension are banned by default +# - name: [PatternGuards, ViewPatterns] # only these listed extensions can be used +# - {name: CPP, within: CrossPlatform} # CPP can only be used in a given module +# +# - flags: +# - {name: -w, within: []} # -w is allowed nowhere +# +# - modules: +# - {name: [Data.Set, Data.HashSet], as: Set} # if you import Data.Set qualified, it must be as 'Set' +# - {name: Control.Arrow, within: []} # Certain modules are banned entirely +# +# - functions: +# - {name: unsafePerformIO, within: []} # unsafePerformIO can only appear in no modules + + +# Add custom hints for this project +# +# Will suggest replacing "wibbleMany [myvar]" with "wibbleOne myvar" +# - error: {lhs: "wibbleMany [x]", rhs: wibbleOne x} + + +# Turn on hints that are off by default +# +# Ban "module X(module X) where", to require a real export list +# - warn: {name: Use explicit module export list} +# +# Replace a $ b $ c with a . b $ c +# - group: {name: dollar, enabled: true} +# +# Generalise map to fmap, ++ to <> +# - group: {name: generalise, enabled: true} + + +# Ignore some builtin hints +# - ignore: {name: Use let} +# - ignore: {name: Use const, within: SpecialModule} # Only within certain modules + + +# Define some custom infix operators +# - fixity: infixr 3 ~^#^~ + + +# To generate a suitable file for HLint do: +# $ hlint --default > .hlint.yaml + +- ignore: {name: Eta reduce} +- ignore: {name: Redundant bracket} +- ignore: {name: Reduce duplication} +- ignore: {name: Use camelCase} diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index ffa8d67..69f3f67 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -150,8 +150,8 @@ addRemoveTriples lbl triples emptyGr populatedGr = addTriples :: Rdf a => (Triples,RDF a) -> RDF a addTriples (triples,emptyGr) = - foldr (\t g -> addTriple g t) emptyGr triples + foldr (flip addTriple) emptyGr triples removeTriples :: Rdf a => (Triples,RDF a) -> RDF a removeTriples (triples,populatedGr) = - foldr (\t g -> removeTriple g t) populatedGr triples + foldr (flip removeTriple) populatedGr triples diff --git a/rdf-tests b/rdf-tests index e24f243..b3136e9 160000 --- a/rdf-tests +++ b/rdf-tests @@ -1 +1 @@ -Subproject commit e24f243f79087a61a1b1aa72f5c7c27470155c33 +Subproject commit b3136e909c6f1bfa550290bfb6cc41a29f2dc40d diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 4c93c9f..467e89e 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -105,7 +105,7 @@ instance Rdf AdjHashMap where -- where subjPredMaps = HashMap.toList spoMap -- in concatMap (\t -> show t ++ "\n") ts -showGraph' :: RDF AdjHashMap -> [Char] +showGraph' :: RDF AdjHashMap -> String showGraph' ((AdjHashMap ((spoMap, _), _, _))) = let ts = concatMap (uncurry tripsSubj) subjPredMaps where subjPredMaps = HashMap.toList spoMap diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 4bc6a95..1c243e4 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -49,7 +49,7 @@ instance Rdf HashSP where -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap -- in concatMap (\t -> show t ++ "\n") ts -showGraph' :: RDF HashSP -> [Char] +showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap in concatMap (\t -> show t ++ "\n") ts diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index 7c76435..632129e 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -70,7 +70,7 @@ instance Rdf TList where query = query' showGraph = showGraph' -showGraph' :: RDF TList -> [Char] +showGraph' :: RDF TList -> String showGraph' gr = concatMap (\t -> show t ++ "\n") (expandTriples gr) prefixMappings' :: RDF TList -> PrefixMappings diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 28fdcc5..d901792 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -19,12 +19,12 @@ module Data.RDF.IRI ) where import Data.Semigroup (Semigroup(..)) -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe (maybe, isJust) import Data.Functor import Data.List (intersperse) import Control.Applicative import Control.Monad (guard) -import Control.Arrow ((***), (&&&), (>>>)) +import Control.Arrow (first, (&&&), (>>>)) import Data.Char (isAlpha, isDigit, isAlphaNum, toUpper, toLower) import Data.Text (Text) import qualified Data.Text as T @@ -95,18 +95,18 @@ data SchemaError -- [TODO] use Builder serializeIRI :: IRIRef -> Text serializeIRI (IRIRef s a p q f) = mconcat - [ fromMaybe mempty (scheme <$> s) - , fromMaybe mempty (authority <$> a) + [ maybe mempty scheme s + , maybe mempty authority a , path p - , fromMaybe mempty (query <$> q) - , fromMaybe mempty (fragment <$> f)] + , maybe mempty query q + , maybe mempty fragment f ] where scheme (Scheme s') = s' <> ":" authority (Authority u (Host h) p') = mconcat [ "//" - , fromMaybe mempty (userInfo <$> u) + , maybe mempty userInfo u , h - , fromMaybe mempty (port <$> p') ] + , maybe mempty port p' ] userInfo (UserInfo u) = u <> "@" port (Port p') = (":" <>) . T.pack . show $ p' path (Path p') = p' @@ -123,7 +123,7 @@ parseRelIRI :: Text -> Either String IRIRef parseRelIRI = P.parseOnly $ irelativeRefParser <* (P.endOfInput "Unexpected characters at the end") validateIRI :: Text -> Either String Text -validateIRI t = const t <$> parseIRI t +validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt @@ -270,7 +270,7 @@ ipathRootlessParser' = mconcat <$> sequence [isegmentNzParser, ipathAbEmptyParse -- ipath-empty = 0 ipathEmptyParser :: Parser (Maybe Authority, Path) -ipathEmptyParser = const (Nothing, mempty) <$> ipathEmptyParser' +ipathEmptyParser = (Nothing, mempty) <$ ipathEmptyParser' ipathEmptyParser' :: Parser Text ipathEmptyParser' = P.string mempty "Empty path" @@ -406,7 +406,7 @@ ipV6AddressParser = do h16 = parseBetween 1 4 (P.takeWhile isHexaDigit) ipNotElided (leading, lengthL) = guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 <|> - guard (lengthL == 8) *> pure mempty + (guard (lengthL == 8) $> mempty) ipElided (_, lengthL) = do guard $ lengthL <= 8 elision <- P.string "::" @@ -476,10 +476,10 @@ isSubDelims c = c `elem` ("!$&'()*+,;=" :: String) iauthWithPathParser :: Parser (Maybe Authority, Path) iauthWithPathParser = do void (P.string "//") - curry (Just *** id) <$> iauthorityParser <*> ipathAbEmptyParser + curry (first Just) <$> iauthorityParser <*> ipathAbEmptyParser isHexaDigit :: Char -> Bool -isHexaDigit c = (c >= '0' && c <= '9') || +isHexaDigit c = (isDigit c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') diff --git a/src/Data/RDF/Query.hs b/src/Data/RDF/Query.hs index 2cbe131..f7a8e1e 100644 --- a/src/Data/RDF/Query.hs +++ b/src/Data/RDF/Query.hs @@ -20,6 +20,7 @@ module Data.RDF.Query ( import Prelude hiding (pred) import Data.List +import Data.Maybe (fromMaybe) import Data.RDF.Types import qualified Data.RDF.Namespace as NS import Data.Text (Text) @@ -156,7 +157,7 @@ expandNode _ n = n -- Also expands "a" to "http://www.w3.org/1999/02/22-rdf-syntax-ns#type". expandURI :: PrefixMappings -> Text -> Text expandURI _ "a" = NS.mkUri NS.rdf "type" -expandURI pms iri = maybe iri id $ foldl' f Nothing (NS.toPMList pms) +expandURI pms iri = fromMaybe iri $ foldl' f Nothing (NS.toPMList pms) where f :: Maybe Text -> (Text, Text) -> Maybe Text f x (p, u) = x <|> (T.append u <$> T.stripPrefix (T.append p ":") iri) diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 47b4cde..062ea6f 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -188,7 +188,7 @@ uriValidate = either (const Nothing) Just . isRdfURI -- |Same as 'uriValidate', but on 'String' rather than 'Text' uriValidateString :: String -> Maybe String -uriValidateString = liftA T.unpack . uriValidate . fromString +uriValidateString = fmap T.unpack . uriValidate . fromString isRdfURI :: Text -> Either ParseError Text isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " ++ T.unpack t) t @@ -624,12 +624,10 @@ canonicalizerTable = doubleUri = "http://www.w3.org/2001/XMLSchema#double" _integerStr, _decimalStr, _doubleStr :: Text -> Text -_integerStr t = - if T.length t == 1 - then t - else if T.head t == '0' - then _integerStr (T.tail t) - else t +_integerStr t + | T.length t == 1 = t + | T.head t == '0' = _integerStr (T.tail t) + | otherwise = t -- exponent: [eE] ('-' | '+')? [0-9]+ -- ('-' | '+') ? ( [0-9]+ '.' [0-9]* exponent | '.' ([0-9])+ exponent | ([0-9])+ exponent ) diff --git a/src/Text/RDF/RDF4H/NTriplesParser.hs b/src/Text/RDF/RDF4H/NTriplesParser.hs index 4f765bb..b7ad3ec 100644 --- a/src/Text/RDF/RDF4H/NTriplesParser.hs +++ b/src/Text/RDF/RDF4H/NTriplesParser.hs @@ -13,7 +13,7 @@ module Text.RDF.RDF4H.NTriplesParser import Prelude hiding (readFile) import Data.Semigroup ((<>)) -import Data.Char (isDigit, isLetter, isAlphaNum) +import Data.Char (isDigit, isLetter, isAlphaNum, isAsciiUpper, isAsciiLower) import Control.Applicative import Control.Monad (void) @@ -165,8 +165,8 @@ nt_blank_node_label = do -- [157s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] nt_pn_chars_base :: CharParsing m => m Char nt_pn_chars_base = try $ satisfy isBaseChar - where isBaseChar c = (c >= 'A' && c <= 'Z') - || (c >= 'a' && c <= 'z') + where isBaseChar c = (isAsciiUpper c) + || (isAsciiLower c) || (c >= '\x00C0' && c <= '\x00D6') || (c >= '\x00D8' && c <= '\x00F6') || (c >= '\x00F8' && c <= '\x02FF') diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index f17ad71..0c9cdd0 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -28,7 +28,7 @@ _parseURL parseFunc url = do case ex of (HttpExceptionRequest _req content) -> case content of - ConnectionTimeout -> do + ConnectionTimeout -> return $ errResult "Connection timed out" _ -> return $ errResult ("HttpExceptionRequest content: " ++ show content) (InvalidUrlException{}) -> diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index 08b1485..9d1559a 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -24,6 +24,7 @@ import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) import qualified Data.Text as T import Data.Sequence (Seq, (|>)) +import Data.Functor (($>)) import qualified Data.Foldable as F import Control.Monad import Text.Parser.Char @@ -176,7 +177,7 @@ t_sparql_base = do updateBaseUrl (Just $ Just newBaseIri) t_verb :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m () -t_verb = try t_predicate <|> (char 'a' *> pure rdfTypeNode) >>= setPredicate +t_verb = try t_predicate <|> (char 'a' $> rdfTypeNode) >>= setPredicate -- grammar rule: [11] predicate ::= iri t_predicate :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node @@ -199,7 +200,7 @@ t_pn_local = do xs <- option "" $ try $ do let recsve = (t_pn_chars_str <|> string ":" <|> t_plx) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> - (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws *> pure ".")) + (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws $> ".")) concat <$> many recsve pure (T.pack (x ++ xs)) where @@ -235,7 +236,7 @@ t_subject = iri <|> t_blankNode <|> t_collection >>= setSubject -- [137s] BlankNode ::= BLANK_NODE_LABEL | ANON t_blankNode :: (CharParsing m, MonadState ParseState m) => m Node t_blankNode = do - genID <- try t_blank_node_label <|> (t_anon *> pure mempty) + genID <- try t_blank_node_label <|> (t_anon $> mempty) mp <- currGenIdLookup maybe (newBN genID) getExistingBN (Map.lookup genID mp) where @@ -297,7 +298,7 @@ t_collection = withConstantSubjectPredicate $ void (many t_ws) return root where - empty_list = lookAhead (char ')') *> return rdfNilNode + empty_list = lookAhead (char ')') $> rdfNilNode non_empty_list = do ns <- sepEndBy1 element (some t_ws) addTripleForObject rdfNilNode @@ -507,7 +508,7 @@ updateBaseUrl val = _modifyState val no no no no no -- combines get_current and increment into a single function nextIdCounter :: MonadState ParseState m => m Integer nextIdCounter = get >>= \(bUrl, dUrl, i, pms, s, p, ts, genMap) -> - put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) *> pure i + put (bUrl, dUrl, i+1, pms, s, p, ts, genMap) $> i nextBlankNode :: MonadState ParseState m => m Node nextBlankNode = BNodeGen . fromIntegral <$> nextIdCounter diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index a371d42..b5e5761 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -102,7 +102,7 @@ instance Exception ParserException -- Returns either a @ParseFailure@ or a new RDF containing the parsed triples. parseFile' :: (Rdf a) => Maybe BaseUrl -> Maybe Text -> String -> IO (Either ParseFailure (RDF a)) parseFile' bUrl dUrl fpath = - TIO.readFile fpath >>= return . parseXmlRDF bUrl dUrl + parseXmlRDF bUrl dUrl <$> TIO.readFile fpath -- |Parse the document at the given location URL as an XML document, using an optional @BaseUrl@ -- as the base URI, and using the given document URL as the URI of the XML document itself. @@ -438,7 +438,7 @@ my_expandURI -- |Make a UNode from an absolute string mkUNode :: forall a. (ArrowIf a) => a String Node mkUNode = choiceA [ (arr (isJust . unodeValidate . T.pack)) :-> (arr (unode . T.pack)) - , arr (\_ -> True) :-> arr (\uri -> throw (ParserException ("Invalid URI: " ++ uri))) + , arr (const True) :-> arr (\uri -> throw (ParserException ("Invalid URI: " ++ uri))) ] -- |Make a UNode from a rdf:ID element, expanding relative URIs diff --git a/testsuite/tests/Data/RDF/PropertyTests.hs b/testsuite/tests/Data/RDF/PropertyTests.hs index f4417d9..a5a0fee 100644 --- a/testsuite/tests/Data/RDF/PropertyTests.hs +++ b/testsuite/tests/Data/RDF/PropertyTests.hs @@ -131,7 +131,7 @@ arbitraryPrefixMappings = p_empty :: Rdf rdf => RDF rdf -> Bool -p_empty empty = triplesOf empty == [] +p_empty empty = null (triplesOf empty) -- triplesOf any RDF should return unique triples used to create it p_mkRdf_triplesOf @@ -445,7 +445,7 @@ p_remove_triple_from_singleton_graph_query_s :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_s _unused singletonGraph = - query newGr (Just s) Nothing Nothing == [] + null (query newGr (Just s) Nothing Nothing) where tripleInGraph@(Triple s _p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -457,7 +457,7 @@ p_remove_triple_from_singleton_graph_query_p :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_p _unused singletonGraph = - query newGr Nothing (Just p) Nothing == [] + null (query newGr Nothing (Just p) Nothing) where tripleInGraph@(Triple _s p _o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -469,7 +469,7 @@ p_remove_triple_from_singleton_graph_query_o :: (Rdf rdf) => RDF rdf -> SingletonGraph rdf -> Bool p_remove_triple_from_singleton_graph_query_o _unused singletonGraph = - query newGr Nothing Nothing (Just o) == [] + null (query newGr Nothing Nothing (Just o)) where tripleInGraph@(Triple _s _p o) = head (triplesOf (rdfGraph singletonGraph)) newGr = removeTriple (rdfGraph singletonGraph) tripleInGraph @@ -482,10 +482,10 @@ p_add_then_remove_triples p_add_then_remove_triples _empty genTriples = let emptyGraph = _empty populatedGraph = - foldr (\t gr -> addTriple gr t) emptyGraph genTriples + foldr (flip addTriple) emptyGraph genTriples emptiedGraph = - foldr (\t gr -> removeTriple gr t) populatedGraph genTriples - in triplesOf emptiedGraph == [] + foldr (flip removeTriple) populatedGraph genTriples + in null (triplesOf emptiedGraph) equivNode :: (Node -> Node -> Bool) -> (Triple -> Node) @@ -601,8 +601,7 @@ instance Arbitrary Triple where arbitrary = do s <- arbitraryS p <- arbitraryP - o <- arbitraryO - return (triple s p o) + triple s p <$> arbitraryO instance Arbitrary Node where arbitrary = oneof $ map return unodes diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index 8d8f979..e29554d 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -9,7 +9,6 @@ module Text.RDF.RDF4H.XmlParser_Test -- Testing imports import Test.Tasty -import Test.Tasty.HUnit import Test.Tasty.HUnit as TU -- Import common libraries to facilitate tests @@ -70,8 +69,8 @@ loadExpectedGraph1 fname = do loadInputGraph1 :: String -> String -> IO (Either ParseFailure (RDF TList)) loadInputGraph1 dir fname = - TIO.readFile (printf "%s/%s.rdf" dir fname :: String) >>= - return . parseString (XmlParser Nothing (mkDocUrl1 testBaseUri fname)) >>= return . handleLoad + (parseString (XmlParser Nothing (mkDocUrl1 testBaseUri fname)) <$> + TIO.readFile (printf "%s/%s.rdf" dir fname :: String)) doGoodConformanceTest :: IO (Either ParseFailure (RDF TList)) -> IO (Either ParseFailure (RDF TList)) -> diff --git a/testsuite/tests/W3C/Manifest.hs b/testsuite/tests/W3C/Manifest.hs index 736db43..ec9196e 100644 --- a/testsuite/tests/W3C/Manifest.hs +++ b/testsuite/tests/W3C/Manifest.hs @@ -125,8 +125,8 @@ mfUnrecognizedDatatypes = unode "http://www.w3.org/2001/sw/DataAccess/tests/test -- | Load the manifest from the given file; -- apply the given namespace as the base IRI of the manifest. loadManifest :: T.Text -> T.Text -> IO Manifest -loadManifest manifestPath baseIRI = do - parseFile testParser (T.unpack manifestPath) >>= return . rdfToManifest . fromEither +loadManifest manifestPath baseIRI = + (rdfToManifest . fromEither) <$> parseFile testParser (T.unpack manifestPath) where testParser = TurtleParser (Just $ BaseUrl baseIRI) Nothing rdfToManifest :: RDF TList -> Manifest diff --git a/testsuite/tests/W3C/RdfXmlTest.hs b/testsuite/tests/W3C/RdfXmlTest.hs index 6da83dd..0c20cf5 100644 --- a/testsuite/tests/W3C/RdfXmlTest.hs +++ b/testsuite/tests/W3C/RdfXmlTest.hs @@ -30,8 +30,8 @@ mfEntryToTest :: TestEntry -> TestTree mfEntryToTest (TestXMLEval nm _ _ act' res') = let act = (UNode . fromJust . fileSchemeToFilePath) act' res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile testParser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) + parsedRDF = (fromEither <$> parseFile testParser (nodeURI act)) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser (nodeURI res)) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF mfEntryToTest (TestXMLNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' diff --git a/testsuite/tests/W3C/TurtleTest.hs b/testsuite/tests/W3C/TurtleTest.hs index 8e30ede..4122ff8 100644 --- a/testsuite/tests/W3C/TurtleTest.hs +++ b/testsuite/tests/W3C/TurtleTest.hs @@ -31,8 +31,8 @@ mfEntryToTest :: TurtleParserCustom -> TestEntry -> TestTree mfEntryToTest parser (TestTurtleEval nm _ _ act' res') = let act = (UNode . fromJust . fileSchemeToFilePath) act' res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = parseFile parser (nodeURI act) >>= return . fromEither :: IO (RDF TList) - expectedRDF = parseFile NTriplesParser (nodeURI res) >>= return . fromEither :: IO (RDF TList) + parsedRDF = (fromEither <$> parseFile parser (nodeURI act)) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser (nodeURI res)) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF mfEntryToTest parser (TestTurtleNegativeEval nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' From 287b0179e79a59798e60e6cd883d2bf625c7bda3 Mon Sep 17 00:00:00 2001 From: John Pavel Date: Wed, 23 Jan 2019 21:16:51 +0000 Subject: [PATCH 02/39] replace some data with newtype --- src/Text/RDF/RDF4H/NTriplesParser.hs | 2 +- src/Text/RDF/RDF4H/XmlParser.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/RDF/RDF4H/NTriplesParser.hs b/src/Text/RDF/RDF4H/NTriplesParser.hs index b7ad3ec..df060c1 100644 --- a/src/Text/RDF/RDF4H/NTriplesParser.hs +++ b/src/Text/RDF/RDF4H/NTriplesParser.hs @@ -39,7 +39,7 @@ import System.IO (IOMode(..), withFile, hSetNewlineMode, noNewlineTranslation, h -- class. data NTriplesParser = NTriplesParser -data NTriplesParserCustom = NTriplesParserCustom Parser +newtype NTriplesParserCustom = NTriplesParserCustom Parser -- |'NTriplesParser' is an instance of 'RdfParser' using parsec based parsers. instance RdfParser NTriplesParser where diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index b5e5761..eb834e3 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -80,7 +80,7 @@ instance RdfParser XmlParser where -- |Global state for the parser -data GParseState = GParseState { stateGenId :: Int +newtype GParseState = GParseState { stateGenId :: Int } deriving(Show) @@ -91,7 +91,7 @@ data LParseState = LParseState { stateBaseUrl :: BaseUrl } deriving(Show) -data ParserException = ParserException String +newtype ParserException = ParserException String deriving (Show,Typeable) instance Exception ParserException From 6dc637980b4d02daf56e3f973dcc3477d86fbcf8 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 14:08:37 +0200 Subject: [PATCH 03/39] Add support for Algebraic Graphs. Fix #59 --- bench/MainCriterion.hs | 59 ++++++++------ rdf4h.cabal | 5 ++ src/Data/RDF.hs | 2 + src/Data/RDF/Graph/AlgebraicGraph.hs | 113 +++++++++++++++++++++++++++ testsuite/tests/Test.hs | 7 +- 5 files changed, 159 insertions(+), 27 deletions(-) create mode 100644 src/Data/RDF/Graph/AlgebraicGraph.hs diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index 69f3f67..d5ccb73 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -4,6 +4,7 @@ module Main where import Prelude hiding (readFile) +import Data.Semigroup (Semigroup(..)) import Criterion import Criterion.Types import Criterion.Main @@ -52,11 +53,13 @@ main = defaultMainWith fawltyContentNTriples <- readFile "data/nt/all-fawlty-towers.nt" rdf1' <- parseFile (XmlParser Nothing Nothing) xmlFile rdf2' <- parseFile (XmlParser Nothing Nothing) xmlFile + rdf3' <- parseFile (XmlParser Nothing Nothing) xmlFile let rdf1 = either (error . show) id rdf1' :: RDF TList rdf2 = either (error . show) id rdf2' :: RDF AdjHashMap + rdf3 = either (error . show) id rdf3' :: RDF AlgebraicGraph triples = triplesOf rdf1 - return (rdf1, rdf2, triples, fawltyContentNTriples, fawltyContentTurtle)) $ - \ ~(triplesList, adjMap, triples, fawltyContentNTriples, fawltyContentTurtle) -> + return (rdf1, rdf2, rdf3, triples, fawltyContentNTriples, fawltyContentTurtle)) $ + \ ~(triplesList, adjMap, algGraph, triples, fawltyContentNTriples, fawltyContentTurtle) -> bgroup "rdf4h" [ bgroup @@ -85,38 +88,42 @@ main = defaultMainWith , bgroup "query" - (queryBench "TList" triplesList ++ - queryBench "AdjHashMap" adjMap - -- queryBench "SP" mapSP ++ queryBench "HashSP" hashMapSP + (queryBench "TList" triplesList <> + queryBench "AdjHashMap" adjMap <> + queryBench "AlgebraicGraph" algGraph + -- queryBench "SP" mapSP <> queryBench "HashSP" hashMapSP ) , bgroup "select" - (selectBench "TList" triplesList ++ - selectBench "AdjHashMap" adjMap - -- selectBench "SP" mapSP ++ selectBench "HashSP" hashMapSP + (selectBench "TList" triplesList <> + selectBench "AdjHashMap" adjMap <> + selectBench "AlgebraicGraph" algGraph + -- selectBench "SP" mapSP <> selectBench "HashSP" hashMapSP ) , bgroup "add-remove-triples" - (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList - ++ addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap + (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList <> + addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap <> + addRemoveTriples "AlgebraicGraph" triples (empty :: RDF AlgebraicGraph) algGraph ) , bgroup "count_triples" [ bench "TList" (nf (length . triplesOf) triplesList) , bench "AdjHashMap" (nf (length . triplesOf) adjMap) + , bench "AlgebraicGraph" (nf (length . triplesOf) algGraph) ] ] ] selectBench :: Rdf a => String -> RDF a -> [Benchmark] selectBench label gr = - [ bench (label ++ " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) - , bench (label ++ " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) - , bench (label ++ " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) - , bench (label ++ " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) - , bench (label ++ " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) - , bench (label ++ " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) - , bench (label ++ " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) + [ bench (label <> " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) + , bench (label <> " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) + , bench (label <> " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) + , bench (label <> " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) + , bench (label <> " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) + , bench (label <> " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) + , bench (label <> " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) ] subjSelect, predSelect, objSelect, selectNothing :: Maybe (Node -> Bool) @@ -133,19 +140,19 @@ queryNothing = Nothing queryBench :: Rdf a => String -> RDF a -> [Benchmark] queryBench label gr = - [ bench (label ++ " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) - , bench (label ++ " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) - , bench (label ++ " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) - , bench (label ++ " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) - , bench (label ++ " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) - , bench (label ++ " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) - , bench (label ++ " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) + [ bench (label <> " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) + , bench (label <> " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) + , bench (label <> " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) + , bench (label <> " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) + , bench (label <> " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) + , bench (label <> " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) + , bench (label <> " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) ] addRemoveTriples :: (NFData a,NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] addRemoveTriples lbl triples emptyGr populatedGr = - [ bench (lbl ++ "-add-triples") $ nf addTriples (triples,emptyGr) - , bench (lbl ++ "-remove-triples") $ nf removeTriples (triples,populatedGr) + [ bench (lbl <> "-add-triples") $ nf addTriples (triples,emptyGr) + , bench (lbl <> "-remove-triples") $ nf removeTriples (triples,populatedGr) ] addTriples :: Rdf a => (Triples,RDF a) -> RDF a diff --git a/rdf4h.cabal b/rdf4h.cabal index fceecaa..c2e9350 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -31,6 +31,7 @@ library , Data.RDF.Types , Data.RDF.Query , Data.RDF.Graph.AdjHashMap + , Data.RDF.Graph.AlgebraicGraph , Data.RDF.Graph.TList , Text.RDF.RDF4H.TurtleParser , Text.RDF.RDF4H.TurtleSerializer @@ -47,6 +48,7 @@ library , HTTP >= 4000.0.0 , hxt >= 9.3.1.2 , text >= 1.2.1.0 + , algebraic-graphs >= 0.4 && < 5 , unordered-containers , hashable , deepseq @@ -120,6 +122,9 @@ benchmark rdf4h-bench text >= 1.2.1.0 ghc-options: -Wall + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* + source-repository head type: git location: https://github.com/robstewart57/rdf4h.git diff --git a/src/Data/RDF.hs b/src/Data/RDF.hs index 2f20b4b..593e3b7 100644 --- a/src/Data/RDF.hs +++ b/src/Data/RDF.hs @@ -15,6 +15,7 @@ module Data.RDF ( -- * RDF type class instances module Data.RDF.Graph.TList, module Data.RDF.Graph.AdjHashMap, + module Data.RDF.Graph.AlgebraicGraph, -- module Data.RDF.Graph.HashMapSP, -- module Data.RDF.Graph.MapSP, @@ -29,6 +30,7 @@ module Data.RDF ( import Data.RDF.Namespace import Data.RDF.Graph.TList import Data.RDF.Graph.AdjHashMap +import Data.RDF.Graph.AlgebraicGraph -- import Data.RDF.Graph.HashMapSP -- import Data.RDF.Graph.MapSP import Text.RDF.RDF4H.NTriplesSerializer diff --git a/src/Data/RDF/Graph/AlgebraicGraph.hs b/src/Data/RDF/Graph/AlgebraicGraph.hs new file mode 100644 index 0000000..d376f28 --- /dev/null +++ b/src/Data/RDF/Graph/AlgebraicGraph.hs @@ -0,0 +1,113 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} -- [TODO] Remove when the missing NFData instance is added to Alga. +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE EmptyDataDecls #-} + + +module Data.RDF.Graph.AlgebraicGraph + ( AlgebraicGraph + ) where + + +import Data.Semigroup (Semigroup(..)) +import Control.DeepSeq (NFData(..)) +import Data.Binary +import Data.RDF.Namespace +import Data.RDF.Query +import Data.RDF.Types (RDF, Rdf(..), BaseUrl, Triples, Triple(..), Node, Subject, Predicate, Object, NodeSelector) +import qualified Algebra.Graph.Labelled as G +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS +import GHC.Generics + + +data AlgebraicGraph deriving (Generic) + +instance Binary AlgebraicGraph +instance NFData AlgebraicGraph + +data instance RDF AlgebraicGraph = AlgebraicGraph + { _graph :: G.Graph (HashSet Node) Node + , _baseUrl :: Maybe BaseUrl + , _prefixMappings :: PrefixMappings + } deriving (Generic, NFData) + +-- [TODO] Remove this orphan instance when the missing NFData instance is added to Alga. +instance (NFData e, NFData a) => NFData (G.Graph e a) where + rnf G.Empty = () + rnf (G.Vertex x ) = rnf x + rnf (G.Connect e x y) = e `seq` rnf x `seq` rnf y + +instance Rdf AlgebraicGraph where + baseUrl = _baseUrl + prefixMappings = _prefixMappings + addPrefixMappings = addPrefixMappings' + empty = empty' + mkRdf = mkRdf' + addTriple = addTriple' + removeTriple = removeTriple' + triplesOf = triplesOf' + uniqTriplesOf = triplesOf' + select = select' + query = query' + showGraph = showGraph' + +toEdge :: Triple -> (HashSet Predicate, Subject, Object) +toEdge (Triple s p o) = (HS.singleton p, s, o) + +toTriples :: (HashSet Predicate, Subject, Object) -> Triples +toTriples (ps, s, o) = [Triple s p o | p <- HS.toList ps] + +showGraph' :: RDF AlgebraicGraph -> String +showGraph' r = concatMap (\t -> show t ++ "\n") (expandTriples r) + +addPrefixMappings' :: RDF AlgebraicGraph -> PrefixMappings -> Bool -> RDF AlgebraicGraph +addPrefixMappings' (AlgebraicGraph g baseURL pms) pms' replace = + let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + in AlgebraicGraph g baseURL (merge pms pms') + +empty' :: RDF AlgebraicGraph +empty' = AlgebraicGraph G.empty mempty (PrefixMappings mempty) + +mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph +mkRdf' ts baseURL pms = + let g = G.edges . fmap toEdge $ ts + in AlgebraicGraph g baseURL pms + +addTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +addTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let g' = G.edge (HS.singleton p) s o + in AlgebraicGraph (G.overlay g g') baseURL pms + +removeTriple' :: RDF AlgebraicGraph -> Triple -> RDF AlgebraicGraph +removeTriple' (AlgebraicGraph g baseURL pms) (Triple s p o) = + let ps = G.edgeLabel s o g + g' + | HS.null ps = g + | elem p ps = G.replaceEdge (HS.delete p ps) s o g + | otherwise = g + in AlgebraicGraph g' baseURL pms + +triplesOf' :: RDF AlgebraicGraph -> Triples +triplesOf' (AlgebraicGraph g _ _) = mconcat $ toTriples <$> G.edgeList g + +select' :: RDF AlgebraicGraph -> NodeSelector -> NodeSelector -> NodeSelector -> Triples +select' r Nothing Nothing Nothing = triplesOf r +select' (AlgebraicGraph g _ _) s p o = let (res, _, _) = G.foldg e v c g in res + where + e = (mempty, mempty, mempty) + v x = (mempty, s ?? x, o ?? x) + (??) f x' = let xs = HS.singleton x' in maybe xs (`HS.filter` xs) f + c ps (ts1, ss1, os1) (ts2, ss2, os2) = (ts3, ss3, os3) + where + ss3 = ss1 <> ss2 + os3 = os1 <> os2 + ts3 + | HS.null ps' = ts1 <> ts2 + | otherwise = ts1 <> ts2 <> [Triple s' p' o' | s' <- HS.toList ss3, p' <- HS.toList ps', o' <- HS.toList os3] + ps' = maybe ps (`HS.filter` ps) p + +query' :: RDF AlgebraicGraph -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples +query' r Nothing Nothing Nothing = triplesOf r +query' r s p o = select r ((==) <$> s) ((==) <$> p) ((==) <$> o) diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 9fb0053..62dd0af 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -61,7 +61,12 @@ main = do (graphTests "AdjHashMap" (empty :: RDF AdjHashMap) - (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap))] + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AdjHashMap)) + , + (graphTests + "AlgebraicGraph" + (empty :: RDF AlgebraicGraph) + (mkRdf :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF AlgebraicGraph))] , testGroup "graph-impl-unit-tests" From 79aa1f415fbc66f6aacfb2942ad3258882e9408f Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 16:22:19 +0200 Subject: [PATCH 04/39] hlint --- rdf4h.cabal | 2 ++ src/Data/RDF/Graph/AdjHashMap.hs | 1 - src/Data/RDF/Graph/HashMapSP.hs | 3 +-- src/Data/RDF/Graph/MapSP.hs | 2 -- src/Data/RDF/Graph/TList.hs | 1 - src/Data/RDF/Graph/TPatriciaTree.hs | 2 +- src/Data/RDF/IRI.hs | 12 ++++++------ src/Text/RDF/RDF4H/XmlParser.hs | 8 ++++---- testsuite/tests/Data/RDF/IRITests.hs | 3 --- testsuite/tests/Data/RDF/PropertyTests.hs | 16 +++++++++------- 10 files changed, 23 insertions(+), 27 deletions(-) diff --git a/rdf4h.cabal b/rdf4h.cabal index fceecaa..8f35731 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -105,6 +105,8 @@ test-suite test-rdf4h if impl(ghc < 7.6) build-depends: ghc-prim + if !impl(ghc >= 8.0) + build-depends: semigroups == 0.18.* other-modules: W3C.TurtleTest hs-source-dirs: testsuite/tests diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 467e89e..0690cae 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE ScopedTypeVariables #-} diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 1c243e4..09b5d54 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -3,7 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections, GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} -- |A graph implementation mapping (S,P) pairs to O, backed by 'Data.Map'. @@ -53,7 +52,7 @@ showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap in concatMap (\t -> show t ++ "\n") ts - + -- instance Show (HashSP) where -- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) diff --git a/src/Data/RDF/Graph/MapSP.hs b/src/Data/RDF/Graph/MapSP.hs index 9b4f58b..7ff0920 100644 --- a/src/Data/RDF/Graph/MapSP.hs +++ b/src/Data/RDF/Graph/MapSP.hs @@ -3,8 +3,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE EmptyDataDecls #-} diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index 632129e..e5d3b39 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -2,7 +2,6 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE EmptyDataDecls #-} diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 09e3276..892b23f 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -2,7 +2,7 @@ {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving, FlexibleInstances, BangPatterns #-} +{-# LANGUAGE FlexibleInstances, BangPatterns #-} {-# LANGUAGE EmptyDataDecls #-} module Data.RDF.Graph.TPatriciaTree (TPatriciaTree) where diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index d901792..6477f9e 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -169,8 +169,8 @@ iriParser = do scheme <- Just <$> schemeParser _ <- P.string ":" "Missing colon after scheme" (authority, path) <- ihierPartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef scheme authority path query fragment) -- ihier-part = "//" iauthority ipath-abempty @@ -194,8 +194,8 @@ ihierPartParser = irelativeRefParser :: Parser IRIRef irelativeRefParser = do (authority, path) <- irelativePartParser - query <- P.option Nothing (Just <$> iqueryParser) - fragment <- P.option Nothing (Just <$> ifragmentParser) + query <- optional iqueryParser + fragment <- optional ifragmentParser return (IRIRef Nothing authority path query fragment) -- irelative-part = "//" iauthority ipath-abempty @@ -212,9 +212,9 @@ irelativePartParser = -- iauthority = [ iuserinfo "@" ] ihost [ ":" port ] iauthorityParser :: Parser Authority iauthorityParser = - Authority <$> P.option Nothing (Just <$> (iuserInfoParser <* P.string "@")) + Authority <$> optional (iuserInfoParser <* P.string "@") <*> ihostParser - <*> P.option Nothing (Just <$> (P.string ":" *> portParser)) + <*> optional (P.string ":" *> portParser) "Authority" -- iuserinfo = *( iunreserved / pct-encoded / sub-delims / ":" ) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index eb834e3..07c3c10 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -221,7 +221,7 @@ isMetaAttr = isA (== "rdf:about") -- -- And that specifically: -- --- +-- -- foo -- -- @@ -279,7 +279,7 @@ parsePredicatesFromChildren = updateState , second hasPredicateAttr :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)) , this :-> defaultA ] - + -- See: Issue http://www.w3.org/2000/03/rdf-tracking/#rdfms-rdf-names-use -- section: Illegal or unusual use of names from the RDF namespace -- @@ -318,7 +318,7 @@ validPropElementName = proc (state,predXml) -> do parseObjectsFromChildren :: forall a. (ArrowIf a, ArrowXml a, ArrowState GParseState a) => LParseState -> Predicate -> a XmlTree Triple parseObjectsFromChildren s p = - choiceA + choiceA [ isText :-> (neg( isWhiteSpace) >>> getText >>> arr (Triple (stateSubject s) p . mkLiteralNode s)) , isElem :-> (parseObjectDescription) ] @@ -443,7 +443,7 @@ mkUNode = choiceA [ (arr (isJust . unodeValidate . T.pack)) :-> (arr (unode . T. -- |Make a UNode from a rdf:ID element, expanding relative URIs mkRelativeNode :: forall a. (ArrowXml a) => LParseState -> a XmlTree Node -mkRelativeNode s = (getAttrValue "rdf:ID" >>> (arrL (maybeToList . xmlName)) >>> arr (\x -> '#':x)) &&& baseUrl +mkRelativeNode s = (getAttrValue "rdf:ID" >>> (arrL (maybeToList . xmlName)) >>> arr ('#':)) &&& baseUrl >>> expandURI >>> arr (unode . T.pack) where baseUrl = constA (case stateBaseUrl s of BaseUrl b -> T.unpack b) diff --git a/testsuite/tests/Data/RDF/IRITests.hs b/testsuite/tests/Data/RDF/IRITests.hs index 09d0664..3b5197a 100644 --- a/testsuite/tests/Data/RDF/IRITests.hs +++ b/testsuite/tests/Data/RDF/IRITests.hs @@ -1,6 +1,3 @@ ---{-# LANGUAGE DeriveGeneric #-} ---{-# LANGUAGE TypeFamilies #-} ---{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Data.RDF.IRITests diff --git a/testsuite/tests/Data/RDF/PropertyTests.hs b/testsuite/tests/Data/RDF/PropertyTests.hs index a5a0fee..75fd8d4 100644 --- a/testsuite/tests/Data/RDF/PropertyTests.hs +++ b/testsuite/tests/Data/RDF/PropertyTests.hs @@ -11,6 +11,7 @@ import Data.RDF.Namespace hiding (rdf) import qualified Data.Text as T import Test.QuickCheck import Data.List +import Data.Semigroup ((<>)) import qualified Data.Set as Set import qualified Data.Map as Map import Control.Monad @@ -563,9 +564,10 @@ datatypes :: [T.Text] datatypes = map (mkUri xsd . T.pack) ["string", "int", "token"] uris :: [T.Text] -uris = - map (mkUri ex) [T.pack n `T.append` T.pack (show (i::Int)) | n <- ["foo", "bar", "quz", "zak"], i <- [0..2]] - ++ [T.pack "ex:" `T.append` T.pack n `T.append` T.pack (show (i::Int)) | n <- ["s", "p", "o"], i <- [1..3]] +uris = [mkUri ex (n <> T.pack (show (i :: Int))) + | n <- ["foo", "bar", "quz", "zak"], i <- [0 .. 2]] + <> ["ex:" <> n <> T.pack (show (i::Int)) + | n <- ["s", "p", "o"], i <- [1..3]] plainliterals :: [LValue] plainliterals = [plainLL lit lang | lit <- litvalues, lang <- languages] @@ -580,10 +582,10 @@ unodes :: [Node] unodes = map UNode uris bnodes :: [ Node] -bnodes = map (BNode . \i -> T.pack ":_genid" `T.append` T.pack (show (i::Int))) [1..5] +bnodes = map (BNode . \i -> T.pack ":_genid" <> T.pack (show (i::Int))) [1..5] lnodes :: [Node] -lnodes = [LNode lit | lit <- plainliterals ++ typedliterals] +lnodes = [LNode lit | lit <- plainliterals <> typedliterals] -- maximum number of triples maxN :: Int @@ -612,9 +614,9 @@ arbitraryTs = do sequence [arbitrary | _ <- [1 .. n]] arbitraryS, arbitraryP, arbitraryO :: Gen Node -arbitraryS = oneof $ map return $ unodes ++ bnodes +arbitraryS = oneof $ map return $ unodes <> bnodes arbitraryP = oneof $ map return unodes -arbitraryO = oneof $ map return $ unodes ++ bnodes ++ lnodes +arbitraryO = oneof $ map return $ unodes <> bnodes <> lnodes ---------------------------------------------------- -- Unit test cases -- From c8f61f89d0ee4004721eca62e7a79060b556053e Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 16:27:33 +0200 Subject: [PATCH 05/39] Replace `++` with `<>` --- .hlint.yaml | 2 +- bench/MainCriterion.hs | 43 ++++++++++--------- src/Data/RDF/Graph/AdjHashMap.hs | 7 +-- src/Data/RDF/Graph/HashMapSP.hs | 35 +++++++-------- src/Data/RDF/Graph/MapSP.hs | 32 +++++++------- src/Data/RDF/Graph/TList.hs | 3 +- src/Data/RDF/Graph/TPatriciaTree.hs | 30 ++++++------- src/Data/RDF/Query.hs | 3 +- src/Data/RDF/Types.hs | 12 +++--- src/Rdf4hParseMain.hs | 39 +++++++++-------- src/Rdf4hQueryMain.hs | 16 +++---- src/Text/RDF/RDF4H/NTriplesParser.hs | 12 +++--- src/Text/RDF/RDF4H/ParserUtils.hs | 3 +- src/Text/RDF/RDF4H/TurtleParser.hs | 17 ++++---- src/Text/RDF/RDF4H/XmlParser.hs | 11 ++--- .../RDF/RDF4H/TurtleParser_ConformanceTest.hs | 17 ++++---- .../tests/Text/RDF/RDF4H/XmlParser_Test.hs | 19 ++++---- testsuite/tests/W3C/Manifest.hs | 7 +-- testsuite/tests/W3C/NTripleTest.hs | 4 +- testsuite/tests/W3C/RdfXmlTest.hs | 3 +- testsuite/tests/W3C/TurtleTest.hs | 3 +- testsuite/tests/W3C/W3CAssertions.hs | 9 ++-- 22 files changed, 171 insertions(+), 156 deletions(-) diff --git a/.hlint.yaml b/.hlint.yaml index 3f5ae3b..4057568 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -44,7 +44,7 @@ # - group: {name: dollar, enabled: true} # # Generalise map to fmap, ++ to <> -# - group: {name: generalise, enabled: true} +- group: {name: generalise, enabled: true} # Ignore some builtin hints diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index 69f3f67..ce3c24e 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -4,6 +4,7 @@ module Main where import Prelude hiding (readFile) +import Data.Semigroup ((<>)) import Criterion import Criterion.Types import Criterion.Main @@ -85,20 +86,20 @@ main = defaultMainWith , bgroup "query" - (queryBench "TList" triplesList ++ + (queryBench "TList" triplesList <> queryBench "AdjHashMap" adjMap - -- queryBench "SP" mapSP ++ queryBench "HashSP" hashMapSP + -- queryBench "SP" mapSP <> queryBench "HashSP" hashMapSP ) , bgroup "select" - (selectBench "TList" triplesList ++ + (selectBench "TList" triplesList <> selectBench "AdjHashMap" adjMap - -- selectBench "SP" mapSP ++ selectBench "HashSP" hashMapSP + -- selectBench "SP" mapSP <> selectBench "HashSP" hashMapSP ) , bgroup "add-remove-triples" (addRemoveTriples "TList" triples (empty :: RDF TList) triplesList - ++ addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap + <> addRemoveTriples "AdjHashMap" triples (empty :: RDF AdjHashMap) adjMap ) , bgroup "count_triples" @@ -110,13 +111,13 @@ main = defaultMainWith selectBench :: Rdf a => String -> RDF a -> [Benchmark] selectBench label gr = - [ bench (label ++ " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) - , bench (label ++ " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) - , bench (label ++ " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) - , bench (label ++ " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) - , bench (label ++ " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) - , bench (label ++ " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) - , bench (label ++ " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) + [ bench (label <> " SPO") $ nf selectGr (subjSelect,predSelect,objSelect,gr) + , bench (label <> " SP") $ nf selectGr (subjSelect,predSelect,selectNothing,gr) + , bench (label <> " S") $ nf selectGr (subjSelect,selectNothing,selectNothing,gr) + , bench (label <> " PO") $ nf selectGr (selectNothing,predSelect,objSelect,gr) + , bench (label <> " SO") $ nf selectGr (subjSelect,selectNothing,objSelect,gr) + , bench (label <> " P") $ nf selectGr (selectNothing,predSelect,selectNothing,gr) + , bench (label <> " O") $ nf selectGr (selectNothing,selectNothing,objSelect,gr) ] subjSelect, predSelect, objSelect, selectNothing :: Maybe (Node -> Bool) @@ -133,19 +134,19 @@ queryNothing = Nothing queryBench :: Rdf a => String -> RDF a -> [Benchmark] queryBench label gr = - [ bench (label ++ " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) - , bench (label ++ " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) - , bench (label ++ " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) - , bench (label ++ " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) - , bench (label ++ " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) - , bench (label ++ " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) - , bench (label ++ " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) + [ bench (label <> " SPO") $ nf queryGr (subjQuery,predQuery,objQuery,gr) + , bench (label <> " SP") $ nf queryGr (subjQuery,predQuery,queryNothing,gr) + , bench (label <> " S") $ nf queryGr (subjQuery,queryNothing,queryNothing,gr) + , bench (label <> " PO") $ nf queryGr (queryNothing,predQuery,objQuery,gr) + , bench (label <> " SO") $ nf queryGr (subjQuery,queryNothing,objQuery,gr) + , bench (label <> " P") $ nf queryGr (queryNothing,predQuery,queryNothing,gr) + , bench (label <> " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) ] addRemoveTriples :: (NFData a,NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] addRemoveTriples lbl triples emptyGr populatedGr = - [ bench (lbl ++ "-add-triples") $ nf addTriples (triples,emptyGr) - , bench (lbl ++ "-remove-triples") $ nf removeTriples (triples,populatedGr) + [ bench (lbl <> "-add-triples") $ nf addTriples (triples,emptyGr) + , bench (lbl <> "-remove-triples") $ nf removeTriples (triples,populatedGr) ] addTriples :: Rdf a => (Triples,RDF a) -> RDF a diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 0690cae..5d9d545 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -12,6 +12,7 @@ module Data.RDF.Graph.AdjHashMap (AdjHashMap) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Data.List import Data.Binary (Binary) import Data.RDF.Types @@ -102,16 +103,16 @@ instance Rdf AdjHashMap where -- show (AdjHashMap ((spoMap, _), _, _)) = -- let ts = concatMap (uncurry tripsSubj) subjPredMaps -- where subjPredMaps = HashMap.toList spoMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts showGraph' :: RDF AdjHashMap -> String showGraph' ((AdjHashMap ((spoMap, _), _, _))) = let ts = concatMap (uncurry tripsSubj) subjPredMaps where subjPredMaps = HashMap.toList spoMap - in concatMap (\t -> show t ++ "\n") ts + in concatMap (\t -> show t <> "\n") ts -- instance Show (RDF AdjHashMap) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) -- some convenience type alias for readability diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 09b5d54..22f2dab 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -10,6 +10,7 @@ module Data.RDF.Graph.HashMapSP (HashSP) where import Prelude hiding (pred) +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.RDF.Types import Data.RDF.Query @@ -46,15 +47,15 @@ instance Rdf HashSP where -- instance Show (HashSP) where -- show (HashSP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts + in concatMap (\t -> show t <> "\n") ts -- instance Show (HashSP) where --- show gr = concatMap (\t -> show t ++ "\n") (triplesOf gr) +-- show gr = concatMap (\t -> show t <> "\n") (triplesOf gr) type SPMap = HashMap (Subject,Predicate) [Object] @@ -76,7 +77,7 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF HashSP mkRdf' triples baseURL pms = HashSP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF HashSP -> Triples triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap @@ -92,47 +93,47 @@ select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) Nothing Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) <> ts select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF HashSP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -143,26 +144,26 @@ query' (HashSP (tsMap,_,_)) Nothing (Just p) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts query' (HashSP (tsMap,_,_)) Nothing Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) <> ts query' (HashSP (tsMap,_,_)) Nothing (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts -- optimal pattern for this RDF HashSP instance @@ -173,12 +174,12 @@ query' (HashSP (tsMap,_,_)) (Just s) Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/MapSP.hs b/src/Data/RDF/Graph/MapSP.hs index 7ff0920..0825653 100644 --- a/src/Data/RDF/Graph/MapSP.hs +++ b/src/Data/RDF/Graph/MapSP.hs @@ -71,12 +71,12 @@ instance Rdf SP where -- instance Show SP where -- show (SP (tsMap,_,_)) = -- let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap --- in concatMap (\t -> show t ++ "\n") ts +-- in concatMap (\t -> show t <> "\n") ts showGraph' :: RDF SP -> String showGraph' (SP (tsMap,_,_)) = let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap - in concatMap (\t -> show t ++ "\n") ts + in concatMap (\t -> show t <> "\n") ts type SPMap = Map (Subject,Predicate) [Object] @@ -98,7 +98,7 @@ mkRdf' :: Triples -> Maybe BaseUrl -> PrefixMappings -> RDF SP mkRdf' triples baseURL pms = SP (tsMap, baseURL, pms) where tsMap = sortAndGroup triples - sortAndGroup xs = Map.fromListWith (++) [((s,p), [o]) | Triple s p o <- xs] + sortAndGroup xs = Map.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF SP -> Triples triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap @@ -114,47 +114,47 @@ select' (SP (tsMap,_,_)) Nothing (Just pSelector) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) Nothing Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) ++ ts + findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) <> ts select' (SP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) ++ ts + then map (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF SP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -165,26 +165,26 @@ query' (SP (tsMap,_,_)) Nothing (Just p) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts query' (SP (tsMap,_,_)) Nothing Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) ++ ts + findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) <> ts query' (SP (tsMap,_,_)) Nothing (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList ++ ts + then map (Triple s p) oList <> ts else ts -- optimal pattern for this SP instance @@ -195,12 +195,12 @@ query' (SP (tsMap,_,_)) (Just s) Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) ++ ts + then map (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index e5d3b39..6578eb1 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -19,6 +19,7 @@ module Data.RDF.Graph.TList (TList) where import Prelude +import Data.Semigroup ((<>)) import Control.DeepSeq (NFData) import Data.Binary import Data.RDF.Namespace @@ -70,7 +71,7 @@ instance Rdf TList where showGraph = showGraph' showGraph' :: RDF TList -> String -showGraph' gr = concatMap (\t -> show t ++ "\n") (expandTriples gr) +showGraph' gr = concatMap (\t -> show t <> "\n") (expandTriples gr) prefixMappings' :: RDF TList -> PrefixMappings prefixMappings' (TListC(_, _, pms)) = pms diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 892b23f..3108d17 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -122,7 +122,7 @@ mkTriples idxLookup thisNode adjsIn adjsOut = let o = fromJust (IntMap.lookup objIdx idxLookup) in Triple thisNode predNode o ) adjsOut - in ts1 ++ ts2 + in ts1 <> ts2 select' :: RDF TPatriciaTree -> NodeSelector -> NodeSelector -> NodeSelector -> Triples select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel = @@ -137,7 +137,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn adjsOut' = filter (\(p,_idxObj) -> fromJust maybePredSel p ) adjsOut @@ -147,7 +147,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsOut' = filter (\(_p,idxObj) -> fromJust maybeObjSel (fromJust (IntMap.lookup idxObj idxLookup)) ) adjsOut @@ -155,7 +155,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeObjSel thisNode then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isNothing maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -165,7 +165,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isNothing maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(_p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) ) adjsIn @@ -176,7 +176,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,_idxSubj) -> fromJust maybePredSel p ) adjsIn @@ -186,7 +186,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubjSel && isJust maybePredSel && isJust maybeObjSel = let adjsIn' = filter (\(p,idxSubj) -> fromJust maybeSubjSel (fromJust (IntMap.lookup idxSubj idxLookup)) @@ -199,7 +199,7 @@ select' (TPatriciaTree (g,idxLookup,_,_)) maybeSubjSel maybePredSel maybeObjSel ts2 = if fromJust maybeSubjSel thisNode then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns @@ -218,7 +218,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -229,7 +229,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if not (null adjsOut') then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isNothing maybePred && isJust maybeObj = let adjsOut' = filter (\(_p,idxObj) -> fromJust (IntMap.lookup idxObj idxLookup) == fromJust maybeObj ) adjsOut @@ -237,7 +237,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeObj then mkTriples idxLookup thisNode adjsIn [] else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isNothing maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -247,7 +247,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isNothing maybePred && isJust maybeObj = let adjsIn' = filter (\(_p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj ) adjsIn @@ -258,7 +258,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 | isNothing maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,_idxSubj) -> p == fromJust maybePred ) adjsIn @@ -268,7 +268,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = then mkTriples idxLookup thisNode adjsIn' [] else [] ts2 = mkTriples idxLookup thisNode [] adjsOut' - in ts1 ++ ts2 + in ts1 <> ts2 | isJust maybeSubj && isJust maybePred && isJust maybeObj = let adjsIn' = filter (\(p,idxSubj) -> fromJust (IntMap.lookup idxSubj idxLookup) == fromJust maybeSubj @@ -281,7 +281,7 @@ query' (TPatriciaTree (g,idxLookup,_,_)) maybeSubj maybePred maybeObj = ts2 = if thisNode == fromJust maybeSubj then mkTriples idxLookup thisNode [] adjsOut' else [] - in ts1 ++ ts2 + in ts1 <> ts2 cfun ( _ , _ , _ , _ ) = undefined -- not sure why this pattern is needed to exhaust cfun arg patterns diff --git a/src/Data/RDF/Query.hs b/src/Data/RDF/Query.hs index f7a8e1e..8a34757 100644 --- a/src/Data/RDF/Query.hs +++ b/src/Data/RDF/Query.hs @@ -21,6 +21,7 @@ module Data.RDF.Query ( import Prelude hiding (pred) import Data.List import Data.Maybe (fromMaybe) +import Data.Semigroup ((<>)) import Data.RDF.Types import qualified Data.RDF.Namespace as NS import Data.Text (Text) @@ -133,7 +134,7 @@ isGraphIsomorphic g1 g2 = Automorphism.isIsomorphic g1' g2' where triples = expandTriples g triplesHashMap :: HashMap (Subject,Predicate) [Object] - triplesHashMap = HashMap.fromListWith (++) [((s,p), [o]) | Triple s p o <- triples] + triplesHashMap = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- triples] triplesGrouped :: [((Subject,Predicate),[Object])] triplesGrouped = HashMap.toList triplesHashMap (dataGraph,_,_) = (graphFromEdges . fmap (\((s,p),os) -> (s,p,os))) triplesGrouped diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 062ea6f..25f8a54 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -191,7 +191,7 @@ uriValidateString :: String -> Maybe String uriValidateString = fmap T.unpack . uriValidate . fromString isRdfURI :: Text -> Either ParseError Text -isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " ++ T.unpack t) t +isRdfURI t = parse (iriFragment <* eof) ("Invalid URI: " <> T.unpack t) t -- IRIREF from NTriples spec (without <> enclosing) -- [8] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>' @@ -274,9 +274,9 @@ type Triples = [Triple] -- /subj/ must be a 'UNode' or 'BNode', and /pred/ must be a 'UNode'. triple :: Subject -> Predicate -> Object -> Triple triple s p o - | isLNode s = error $ "subject must be UNode or BNode: " ++ show s - | isLNode p = error $ "predicate must be UNode, not LNode: " ++ show p - | isBNode p = error $ "predicate must be UNode, not BNode: " ++ show p + | isLNode s = error $ "subject must be UNode or BNode: " <> show s + | isLNode p = error $ "predicate must be UNode, not LNode: " <> show p + | isBNode p = error $ "predicate must be UNode, not BNode: " <> show p | otherwise = Triple s p o -- |Answer if given node is a URI Ref node. @@ -652,7 +652,7 @@ fileSchemeToFilePath (UNode fileScheme) fixPrefix p@(p':p'') | p' == FP.pathSeparator = Just (FP.normalise p) -- Posix path | p' == '/' = Just (FP.normalise p'') -- Windows classic Path - | otherwise = Just ("\\\\" ++ FP.normalise p) -- Windows UNC Path + | otherwise = Just ("\\\\" <> FP.normalise p) -- Windows UNC Path fileSchemeToFilePath _ = Nothing -- | Converts a file path to a URI with "file:" scheme @@ -661,7 +661,7 @@ filePathToUri p | FP.isRelative p = Nothing | otherwise = Just . fromString . as_uri . FP.normalise $ p where - as_uri = ("file://" ++) . escapeURIString isAllowedInURI . as_posix . fix_prefix + as_uri = ("file://" <>) . escapeURIString isAllowedInURI . as_posix . fix_prefix fix_prefix p' = case (FP.takeDrive p') of "/" -> p' '\\':'\\':_ -> drop 2 p' diff --git a/src/Rdf4hParseMain.hs b/src/Rdf4hParseMain.hs index efd159a..26992a8 100644 --- a/src/Rdf4hParseMain.hs +++ b/src/Rdf4hParseMain.hs @@ -7,6 +7,7 @@ module Main where import Data.RDF +import Data.Semigroup ((<>)) import qualified Data.Text as T import qualified Data.Text.IO as TIO @@ -32,7 +33,7 @@ main = when (null args) (ioError (userError - ("\n\n" ++ "INPUT-URI required\n\n" ++ usageInfo header options))) + ("\n\n" <> "INPUT-URI required\n\n" <> usageInfo header options))) let debug = Debug `elem` opts inputUri = head args inputFormat = getWithDefault (InputFormat "turtle") opts @@ -41,8 +42,8 @@ main = outputBaseUri = getWithDefault (OutputBaseUri inputBaseUri) opts unless (outputFormat == "ntriples" || outputFormat == "turtle") (hPrintf stderr - ("'" ++ - outputFormat ++ + ("'" <> + outputFormat <> "' is not a valid output format. Supported output formats are: ntriples, turtle\n") >> exitWith (ExitFailure 1)) when debug @@ -88,7 +89,7 @@ main = >>= \ (res :: Either ParseFailure (RDF TList)) -> write outputFormat docUri emptyPms res - (str, _) -> putStrLn ("Invalid format: " ++ str) >> exitFailure + (str, _) -> putStrLn ("Invalid format: " <> str) >> exitFailure write :: (Rdf a) => String -> Maybe T.Text -> PrefixMappings -> Either ParseFailure (RDF a) -> IO () write format docUri pms res = case res of @@ -98,7 +99,7 @@ write format docUri pms res = case res of doWriteRdf rdf = case format of "turtle" -> writeRdf (TurtleSerializer docUri pms) rdf "ntriples" -> writeRdf NTriplesSerializer rdf - unknown -> error $ "Unknown output format: " ++ unknown + unknown -> error $ "Unknown output format: " <> unknown -- Get the input base URI from the argument list or flags, using the -- first string arg as the default if not found in string args (as @@ -135,7 +136,7 @@ strValue (InputFormat s) = s strValue (InputBaseUri s) = s strValue (OutputFormat s) = s strValue (OutputBaseUri s) = s -strValue flag = error $ "No string value for flag: " ++ show flag +strValue flag = error $ "No string value for flag: " <> show flag -- The commandline arguments we accept. None are required. data Flag @@ -160,28 +161,28 @@ instance Eq Flag where -- The top part of the usage output. header :: String header = - "\nrdf4h_parse: an RDF parser and serializer\n\n" ++ - "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_parse: an RDF parser and serializer\n\n" <> + "\nUsage: rdf4h_parse [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "d" ["debug"] (NoArg Debug) "Print debug info (like INPUT-BASE-URI used, etc.)" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ - " ntriples N-Triples\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> + " ntriples N-Triples\n" <> " xml RDF/XML" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" - , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" ++ - " ntriples N-Triples (default)\n" ++ + , Option "o" ["output"] (ReqArg OutputFormat "FORMAT") $ "Set output format/serializer to one of:\n" <> + " ntriples N-Triples (default)\n" <> " turtle Turtle" - , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" ++ + , Option "O" ["output-base-uri"] (ReqArg OutputBaseUri "URI") $ "Set the output format/serializer base URI. '-' for none.\n" <> " Default is input/parser base URI." ] @@ -189,4 +190,4 @@ compilerOpts :: [String] -> IO ([Flag], [String]) compilerOpts argv = case getOpt Permute options argv of (o,n,[] ) -> return (o,n) - (_,_,errs) -> ioError (userError ("\n\n" ++ concat errs ++ usageInfo header options)) + (_,_,errs) -> ioError (userError ("\n\n" <> concat errs <> usageInfo header options)) diff --git a/src/Rdf4hQueryMain.hs b/src/Rdf4hQueryMain.hs index ed07bfd..7ebe05d 100644 --- a/src/Rdf4hQueryMain.hs +++ b/src/Rdf4hQueryMain.hs @@ -12,21 +12,21 @@ data Flag header :: String header = - "\nrdf4h_query: utility for querying RDF data\n\n" ++ - "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" ++ - " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" ++ - " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" ++ - " Default is INPUT-URI\n" ++ + "\nrdf4h_query: utility for querying RDF data\n\n" <> + "\nUsage: rdf4h_query [OPTION...] INPUT-URI [INPUT-BASE-URI]\n\n" <> + " INPUT-URI a filename, URI or '-' for standard input (stdin).\n" <> + " INPUT-BASE-URI the input/parser base URI or '-' for none.\n" <> + " Default is INPUT-URI\n" <> " Equivalent to -I INPUT-BASE-URI, --input-base-uri INPUT-BASE-URI\n\n" options :: [OptDescr Flag] options = [ Option "h" ["help"] (NoArg Help) "Display this help, then exit" , Option "v" ["verbose"] (NoArg Verbose) "Display extra information messages to stderr" - , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" ++ - " turtle Turtle (default)\n" ++ + , Option "i" ["input"] (ReqArg InputFormat "FORMAT") $ "Set input format/parser to one of:\n" <> + " turtle Turtle (default)\n" <> " ntriples N-Triples" - , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" ++ + , Option "I" ["input-base-uri"] (ReqArg InputBaseUri "URI") $ "Set the input/parser base URI. '-' for none.\n" <> " Default is INPUT-BASE-URI argument value.\n\n" ] diff --git a/src/Text/RDF/RDF4H/NTriplesParser.hs b/src/Text/RDF/RDF4H/NTriplesParser.hs index df060c1..6dedde3 100644 --- a/src/Text/RDF/RDF4H/NTriplesParser.hs +++ b/src/Text/RDF/RDF4H/NTriplesParser.hs @@ -109,13 +109,13 @@ nt_langtag :: (CharParsing m, Monad m) => m T.Text nt_langtag = do ss <- char '@' *> some (satisfy isLetter) rest <- concat <$> many (char '-' *> some (satisfy isAlphaNum) >>= \lang_str -> pure ('-':lang_str)) - pure (T.pack (ss ++ rest)) + pure (T.pack (ss <> rest)) -- [8] IRIREF nt_iriref :: (CharParsing m, Monad m) => m T.Text nt_iriref = between (char '<') (char '>') $ do raw_iri <- iriFragment - either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " ++ show raw_iri + either (const empty) pure (validateIRI raw_iri) "Only absolute IRIs allowed in NTriples format, which this isn't: " <> show raw_iri -- [153s] ECHAR nt_echar :: (CharParsing m, Monad m) => m Char @@ -251,11 +251,11 @@ handleAttoparsec :: (Rdf a) => T.Text -> Either ParseFailure (RDF a) handleAttoparsec bs = handleResult $ parse nt_ntripleDoc (T.encodeUtf8 bs) where handleResult res = case res of - Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" ++ show err + Fail _i _contexts err -> Left $ ParseFailure $ "Parse failure: \n" <> show err -- error $ - -- "\nnot consumed: " ++ show i - -- ++ "\ncontexts: " ++ show contexts - -- ++ "\nerror: " ++ show err + -- "\nnot consumed: " <> show i + -- <> "\ncontexts: " <> show contexts + -- <> "\nerror: " <> show err Partial f -> handleResult (f (T.encodeUtf8 mempty)) Done _ ts -> Right $ mkRdf ts Nothing (PrefixMappings mempty) diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index 0c9cdd0..e73d1ed 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -10,6 +10,7 @@ import Data.RDF.Types import Control.Exception.Lifted import Network.HTTP.Conduit import Data.Text.Encoding (decodeUtf8) +import Data.Semigroup ((<>)) import qualified Data.ByteString.Lazy as BS import qualified Data.Text as T @@ -30,7 +31,7 @@ _parseURL parseFunc url = do case content of ConnectionTimeout -> return $ errResult "Connection timed out" - _ -> return $ errResult ("HttpExceptionRequest content: " ++ show content) + _ -> return $ errResult ("HttpExceptionRequest content: " <> show content) (InvalidUrlException{}) -> return $ errResult "Invalid URL exception" Right bs -> do diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index 9d1559a..43d5dd2 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -16,6 +16,7 @@ import Data.Char (toLower, toUpper, isDigit, isHexDigit) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe +import Data.Semigroup ((<>)) import Data.RDF.Types import Data.RDF.IRI import Data.RDF.Namespace @@ -190,7 +191,7 @@ t_pname_ns = do (_, _, _, pms, _, _, _, _) <- get case resolveQName pre pms of Just n -> pure n - Nothing -> unexpected ("Cannot resolve QName prefix: " ++ T.unpack pre) + Nothing -> unexpected ("Cannot resolve QName prefix: " <> T.unpack pre) -- grammar rules: [168s] PN_LOCAL -- [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))? @@ -202,7 +203,7 @@ t_pn_local = do (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." <* lookAhead (try recsve))) <|> (t_pn_chars_str <|> string ":" <|> t_plx <|> try (string "." *> notFollowedBy t_ws $> ".")) concat <$> many recsve - pure (T.pack (x ++ xs)) + pure (T.pack (x <> xs)) where satisfy_str = pure <$> satisfy isDigit t_pn_chars_str = pure <$> t_pn_chars @@ -571,8 +572,8 @@ addTripleForObject obj = do t <- getTriple s p put (bUrl, dUrl, i, pms, s, p, ts |> t, genMap) where - getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " ++ show obj - getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " ++ show obj + getTriple Nothing _ = unexpected $ "No Subject with which to create triple for: " <> show obj + getTriple _ Nothing = unexpected $ "No Predicate with which to create triple for: " <> show obj getTriple (Just s') (Just p') = pure $ Triple s' p' obj @@ -632,7 +633,7 @@ parseStringAttoparsec bUrl docUrl t = handleResult' $ parse (evalStateT t_turtle where handleResult' res = case res of Fail _ _ err -> -- error err - Left $ ParseFailure $ "Parse failure: \n" ++ show err + Left $ ParseFailure $ "Parse failure: \n" <> show err Partial f -> handleResult' (f mempty) Done _ (ts,pms) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -656,7 +657,7 @@ initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Not handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) handleResult bUrl result = case result of - (Left err) -> Left (ParseFailure $ "Parse failure: \n" ++ show err) + (Left err) -> Left (ParseFailure $ "Parse failure: \n" <> show err) (Right (ts, pms)) -> Right $! mkRdf (F.toList ts) bUrl pms @@ -669,7 +670,7 @@ caseInsensitiveChar c = char (toLower c) <|> char (toUpper c) -- Match the string 's', accepting either lowercase or uppercase form of each character caseInsensitiveString :: (CharParsing m, Monad m) => String -> m String -caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" ++ s ++ "\"" +caseInsensitiveString s = try (mapM caseInsensitiveChar s) "\"" <> s <> "\"" tryIriResolution :: (CharParsing m, Monad m) => Maybe BaseUrl -> Maybe T.Text -> T.Text -> m T.Text tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl @@ -677,4 +678,4 @@ tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl tryIriResolution' (Just (BaseUrl bIri)) _ = either err pure (resolveIRI bIri iriFrag) tryIriResolution' _ (Just dIri) = either err pure (resolveIRI dIri iriFrag) tryIriResolution' _ _ = either err pure (resolveIRI mempty iriFrag) - err m = unexpected $ "Cannot resolve IRI: " ++ m ++ " " ++ show (mbUrl, mdUrl, iriFrag) + err m = unexpected $ "Cannot resolve IRI: " <> m <> " " <> show (mbUrl, mdUrl, iriFrag) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 07c3c10..e56a2ef 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -21,6 +21,7 @@ import Data.Char import Data.List (isPrefixOf) import qualified Data.Map as Map (fromList) import Data.Maybe +import Data.Semigroup ((<>)) import Data.Typeable import Text.RDF.RDF4H.ParserUtils import Data.RDF.IRI @@ -135,7 +136,7 @@ addMetaData :: (ArrowXml a) => Maybe BaseUrl -> Maybe Text -> a XmlTree XmlTree addMetaData bUrlM dUrlM = mkelem "/" ( [ sattr "transfer-Message" "OK" , sattr "transfer-MimeType" "text/rdf" - ] ++ mkSource dUrlM ++ mkBase bUrlM + ] <> mkSource dUrlM <> mkBase bUrlM ) [ arr id ] where mkSource (Just dUrl) = [ sattr "source" (T.unpack dUrl) ] @@ -173,7 +174,7 @@ parseDescription = updateState where readTypeTriple :: (ArrowXml a) => LParseState -> a XmlTree Triple readTypeTriple state = getName >>> arr (Triple (stateSubject state) rdfType . unode . T.pack) replaceLiElems acc n (Triple s p o : rest) | p == (unode . T.pack) "rdf:li" = - replaceLiElems (Triple s ((unode . T.pack) ("rdf:_" ++ show n)) o : acc) (n + 1) rest + replaceLiElems (Triple s ((unode . T.pack) ("rdf:_" <> show n)) o : acc) (n + 1) rest replaceLiElems acc n (Triple s p o : rest) = replaceLiElems (Triple s p o : acc) n rest replaceLiElems acc _ [] = acc @@ -438,7 +439,7 @@ my_expandURI -- |Make a UNode from an absolute string mkUNode :: forall a. (ArrowIf a) => a String Node mkUNode = choiceA [ (arr (isJust . unodeValidate . T.pack)) :-> (arr (unode . T.pack)) - , arr (const True) :-> arr (\uri -> throw (ParserException ("Invalid URI: " ++ uri))) + , arr (const True) :-> arr (\uri -> throw (ParserException ("Invalid URI: " <> uri))) ] -- |Make a UNode from a rdf:ID element, expanding relative URIs @@ -459,11 +460,11 @@ xmlName str = go [] str go accum [] = Just accum go accum [s] = if isValid s - then go (accum++[s]) [] + then go (accum<>[s]) [] else Nothing go accum (s:ss) = if isValid s - then go (accum++[s]) ss + then go (accum<>[s]) ss else Nothing isValid c = isAlphaNum c || '_' == c diff --git a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs index 62cdc02..418ce40 100644 --- a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs +++ b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs @@ -4,6 +4,7 @@ module Text.RDF.RDF4H.TurtleParser_ConformanceTest ( tests ) where +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty import Test.Tasty.HUnit as TU @@ -48,7 +49,7 @@ fpath :: String -> Int -> String -> String fpath name i ext = printf "data/ttl/conformance/%s-%02d.%s" name i ext :: String tests :: [TestTree] -tests = ts1 ++ ts2 ++ ts3 +tests = ts1 <> ts2 <> ts3 where ts1 = fmap checkGoodConformanceTest [0..29] ts2 = fmap checkBadConformanceTest [0..15] ts3 = fmap (uncurry checkGoodOtherTest) otherTestFiles @@ -82,21 +83,21 @@ checkBadConformanceTest i = -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. equivalent :: Rdf a => Either ParseFailure (RDF a) -> Either ParseFailure (RDF a) -> Maybe String -equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " ++ show e -equivalent _ (Left e) = Just $ "Parse failure of the input graph: " ++ show e +equivalent (Left e) _ = Just $ "Parse failure of the expected graph: " <> show e +equivalent _ (Left e) = Just $ "Parse failure of the input graph: " <> show e equivalent (Right gr1) (Right gr2) = checkSize <|> (test $! zip gr1ts gr2ts) where gr1ts = uordered $ triplesOf gr1 gr2ts = uordered $ triplesOf gr2 length1 = length gr1ts length2 = length gr2ts - checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " ++ (show length1) ++ ", got: " ++ (show length2)) + checkSize = if (length1 == length2) then Nothing else (Just $ "Size different. Expected: " <> (show length1) <> ", got: " <> (show length2)) test [] = Nothing test ((t1,t2):ts) = maybe (test ts) pure (compareTriple t1 t2) compareTriple t1@(Triple s1 p1 o1) t2@(Triple s2 p2 o2) = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -140,14 +141,14 @@ assertLoadSuccess, assertLoadFailure :: String -> IO (Either ParseFailure (RDF T assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () assertLoadFailure idStr exprGr = do g <- exprGr case g of Left _ -> return () - Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." + Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." assertEquivalent :: Rdf a => String -> IO (Either ParseFailure (RDF a)) -> IO (Either ParseFailure (RDF a)) -> TU.Assertion assertEquivalent testname r1 r2 = do @@ -155,7 +156,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg mkDocUrl :: Text -> String -> Int -> Maybe Text mkDocUrl baseDocUrl fname testNum = Just . fromString $ printf "%s%s-%02d.ttl" baseDocUrl fname testNum diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index e29554d..50cb702 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -7,6 +7,7 @@ module Text.RDF.RDF4H.XmlParser_Test -- todo: QuickCheck tests +import Data.Semigroup ((<>)) -- Testing imports import Test.Tasty import Test.Tasty.HUnit as TU @@ -33,7 +34,7 @@ tests = , testCase "NML2" test_parseXmlRDF_NML2 , testCase "NML3" test_parseXmlRDF_NML3 ] - ++ + <> map (uncurry checkGoodOtherTest) otherTestFiles otherTestFiles :: [(String, String)] @@ -89,7 +90,7 @@ testParse exRDF ex = case parsed of Right result -> assertBool - ("expected: " ++ show ex ++ "but got: " ++ show result) + ("expected: " <> show ex <> "but got: " <> show result) (isIsomorphic (result :: RDF TList) (ex :: RDF TList)) Left (ParseFailure err) -> assertFailure err @@ -352,7 +353,7 @@ assertEquivalent testname r1 r2 = do gr2 <- r2 case equivalent gr1 gr2 of Nothing -> return () - (Just msg) -> fail $ "Graph " ++ testname ++ " not equivalent to expected:\n" ++ msg + (Just msg) -> fail $ "Graph " <> testname <> " not equivalent to expected:\n" <> msg -- Determines if graphs are equivalent, returning Nothing if so or else a diagnostic message. -- First graph is expected graph, second graph is actual. @@ -371,13 +372,13 @@ equivalent (Right gr1) (Right gr2) = test $! zip gr1ts gr2ts compareTriple t1 t2 = if equalNodes s1 s2 && equalNodes p1 p2 && equalNodes o1 o2 then Nothing - else Just ("Expected:\n " ++ show t1 ++ "\nFound:\n " ++ show t2 ++ "\n") + else Just ("Expected:\n " <> show t1 <> "\nFound:\n " <> show t2 <> "\n") where (s1, p1, o1) = f t1 (s2, p2, o2) = f t2 f t = (subjectOf t, predicateOf t, objectOf t) - -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" ++ show i) - -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" ++ show i) + -- equalNodes (BNode fs1) (BNodeGen i) = T.reverse fs1 == T.pack ("_:genid" <> show i) + -- equalNodes (BNode fs1) (BNodeGen i) = fs1 == T.pack ("_:genid" <> show i) -- I'm not sure it's right to compare blank nodes with generated -- blank nodes. This is because parsing an already generated blank @@ -402,14 +403,14 @@ assertLoadSuccess :: String -> IO (Either ParseFailure (RDF TList)) -> TU.Assert assertLoadSuccess idStr exprGr = do g <- exprGr case g of - Left (ParseFailure err) -> TU.assertFailure $ idStr ++ err + Left (ParseFailure err) -> TU.assertFailure $ idStr <> err Right _ -> return () -- assertLoadFailure idStr exprGr = do -- g <- exprGr -- case g of -- Left _ -> return () --- Right _ -> TU.assertFailure $ "Bad test " ++ idStr ++ " loaded successfully." +-- Right _ -> TU.assertFailure $ "Bad test " <> idStr <> " loaded successfully." handleLoad :: Either ParseFailure (RDF TList) -> Either ParseFailure (RDF TList) handleLoad res = @@ -423,7 +424,7 @@ normalize t = let s' = normalizeN $ subjectOf t o' = normalizeN $ objectOf t in triple s' p' o' normalizeN :: Node -> Node -normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" ++ show i) +normalizeN (BNodeGen i) = BNode (T.pack $ "_:genid" <> show i) normalizeN n = n -- The Base URI to be used for all conformance tests: diff --git a/testsuite/tests/W3C/Manifest.hs b/testsuite/tests/W3C/Manifest.hs index ec9196e..9efda6b 100644 --- a/testsuite/tests/W3C/Manifest.hs +++ b/testsuite/tests/W3C/Manifest.hs @@ -7,6 +7,7 @@ module W3C.Manifest ( TestEntry(..) ) where +import Data.Semigroup ((<>)) import Data.RDF.Graph.TList import Data.RDF.Query import Data.RDF.Types @@ -131,10 +132,10 @@ loadManifest manifestPath baseIRI = rdfToManifest :: RDF TList -> Manifest rdfToManifest rdf = Manifest desc tpls - where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" ++ show (triplesOf rdf))) descNode + where desc = lnodeText $ objectOf $ headDef (error ("query empty: subject mf:node & predicate mf:name in:\n\n" <> show (triplesOf rdf))) descNode -- FIXME: Inconsistent use of nodes for describing the manifest (W3C bug) descNode = query rdf (Just manifestNode) (Just rdfsLabel) Nothing - ++ query rdf (Just manifestNode) (Just mfName) Nothing + <> query rdf (Just manifestNode) (Just mfName) Nothing -- descNode = query rdf (Just manifestNode) (Just mfName) Nothing tpls = map (rdfToTestEntry rdf) $ rdfCollectionToList rdf collectionHead collectionHead = objectOf $ headDef (error "query: mf:node & mf:entries") $ query rdf (Just manifestNode) (Just mfEntries) Nothing @@ -156,7 +157,7 @@ triplesToTestEntry rdf ts = (UNode "http://www.w3.org/ns/rdftest#TestXMLNegativeSyntax") -> mkTestXMLNegativeSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax") -> mkTestNTriplesPositiveSyntax ts (UNode "http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax") -> mkTestNTriplesNegativeSyntax ts - n -> error ("Unknown test case: " ++ show n) + n -> error ("Unknown test case: " <> show n) mkTestTurtleEval :: Triples -> TestEntry mkTestTurtleEval ts = TestTurtleEval { diff --git a/testsuite/tests/W3C/NTripleTest.hs b/testsuite/tests/W3C/NTripleTest.hs index e4ed915..3126545 100644 --- a/testsuite/tests/W3C/NTripleTest.hs +++ b/testsuite/tests/W3C/NTripleTest.hs @@ -3,6 +3,7 @@ module W3C.NTripleTest , testsAttoparsec ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -36,11 +37,10 @@ mfEntryToTest testParser (TestNTriplesNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x testParserParsec :: NTriplesParserCustom testParserParsec = NTriplesParserCustom Parsec testParserAttoparsec :: NTriplesParserCustom testParserAttoparsec = NTriplesParserCustom Attoparsec - diff --git a/testsuite/tests/W3C/RdfXmlTest.hs b/testsuite/tests/W3C/RdfXmlTest.hs index 0c20cf5..0249663 100644 --- a/testsuite/tests/W3C/RdfXmlTest.hs +++ b/testsuite/tests/W3C/RdfXmlTest.hs @@ -4,6 +4,7 @@ module W3C.RdfXmlTest ( tests ) where +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import Test.Tasty import qualified Test.Tasty.HUnit as TU @@ -37,7 +38,7 @@ mfEntryToTest (TestXMLNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x mfBaseURIXml :: BaseUrl mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" diff --git a/testsuite/tests/W3C/TurtleTest.hs b/testsuite/tests/W3C/TurtleTest.hs index 4122ff8..49af93e 100644 --- a/testsuite/tests/W3C/TurtleTest.hs +++ b/testsuite/tests/W3C/TurtleTest.hs @@ -8,6 +8,7 @@ module W3C.TurtleTest import Test.Tasty import qualified Test.Tasty.HUnit as TU +import Data.Semigroup ((<>)) import Data.Maybe (fromJust) import qualified Data.Text as T @@ -46,7 +47,7 @@ mfEntryToTest parser (TestTurtleNegativeSyntax nm _ _ act') = let act = (UNode . fromJust . fileSchemeToFilePath) act' rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " ++ show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x mfBaseURITurtle :: BaseUrl mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" diff --git a/testsuite/tests/W3C/W3CAssertions.hs b/testsuite/tests/W3C/W3CAssertions.hs index 8cf6a88..11e6011 100644 --- a/testsuite/tests/W3C/W3CAssertions.hs +++ b/testsuite/tests/W3C/W3CAssertions.hs @@ -6,6 +6,7 @@ module W3C.W3CAssertions , nodeURI ) where +import Data.Semigroup ((<>)) import qualified Data.Text as T import Data.RDF import qualified Test.HUnit as TU @@ -20,7 +21,7 @@ assertIsIsomorphic :: IO (RDF TList) -> IO (RDF TList) -> IO () assertIsIsomorphic r1 r2 = do gr1 <- r1 gr2 <- r2 - TU.assertBool ("not isomorphic: " ++ show gr1 ++ " compared with " ++ show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) + TU.assertBool ("not isomorphic: " <> show gr1 <> " compared with " <> show gr2) (isSame gr1 gr2) -- (isGraphIsomorphic gr1 gr2) where noBlankNodes g = (all noBlanks . expandTriples) g noBlanks (Triple s p o) = not (blankNode s) @@ -41,12 +42,12 @@ assertIsIsomorphic r1 r2 = do assertIsParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsParsed r1 = do gr1 <- r1 - TU.assertBool ("unable to parse, reason:\n" ++ show gr1) (isParsed gr1) + TU.assertBool ("unable to parse, reason:\n" <> show gr1) (isParsed gr1) assertIsNotParsed :: IO (Either ParseFailure (RDF TList)) -> TU.Assertion assertIsNotParsed r1 = do gr1 <- r1 - TU.assertBool ("parsed unexpectantly:\n" ++ show gr1) (not (isParsed gr1)) + TU.assertBool ("parsed unexpectantly:\n" <> show gr1) (not (isParsed gr1)) isParsed :: Either a b -> Bool isParsed (Left _) = False @@ -54,4 +55,4 @@ isParsed (Right _) = True nodeURI :: Node -> String nodeURI (UNode u) = T.unpack u -nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " ++ show node +nodeURI node = error $ "W3CAssertions: unexpected node in `nodeURI`: " <> show node From a19f16429cb7f0ed1b3adc83464045c6b99c77be Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 17:35:41 +0200 Subject: [PATCH 06/39] hlint fmap --- examples/ESWC.hs | 6 ++-- examples/ParseURLs.hs | 2 +- src/Data/RDF/Graph/HashMapSP.hs | 32 +++++++++---------- src/Data/RDF/Graph/MapSP.hs | 32 +++++++++---------- src/Data/RDF/Graph/TPatriciaTree.hs | 12 +++---- src/Data/RDF/Namespace.hs | 6 ++-- src/Data/RDF/Types.hs | 2 +- src/Text/RDF/RDF4H/TurtleSerializer.hs | 14 ++++---- src/Text/RDF/RDF4H/XmlParser.hs | 2 +- testsuite/tests/Data/RDF/PropertyTests.hs | 20 ++++++------ .../RDF/RDF4H/TurtleParser_ConformanceTest.hs | 2 +- .../tests/Text/RDF/RDF4H/XmlParser_Test.hs | 6 ++-- testsuite/tests/W3C/Manifest.hs | 4 +-- testsuite/tests/W3C/W3CAssertions.hs | 2 +- 14 files changed, 71 insertions(+), 71 deletions(-) diff --git a/examples/ESWC.hs b/examples/ESWC.hs index b0b8f39..2ef8cc7 100644 --- a/examples/ESWC.hs +++ b/examples/ESWC.hs @@ -13,8 +13,8 @@ heldByProp = "swc:heldBy" eswcCommitteeMembers :: RDF TList -> [T.Text] eswcCommitteeMembers graph = let triples = query graph (Just (unode eswcCommitteeURI)) (Just (unode heldByProp)) Nothing - memberURIs = map objectOf triples - in map + memberURIs = fmap objectOf triples + in fmap (\memberURI -> let (LNode (PlainL (firstName::T.Text))) = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:firstName")) Nothing @@ -22,7 +22,7 @@ eswcCommitteeMembers graph = objectOf $ head $ query graph (Just memberURI) (Just (unode "foaf:lastName")) Nothing in (T.append firstName (T.append (T.pack " ") lastName))) memberURIs - + main :: IO () main = do result <- parseURL diff --git a/examples/ParseURLs.hs b/examples/ParseURLs.hs index bbfc286..8986ebe 100644 --- a/examples/ParseURLs.hs +++ b/examples/ParseURLs.hs @@ -11,7 +11,7 @@ timBernersLee :: IO () timBernersLee = do Right (rdf::RDF TList) <- parseURL (XmlParser Nothing Nothing) "http://www.w3.org/People/Berners-Lee/card.rdf" let ts = query rdf (Just (UNode "http://www.w3.org/2011/Talks/0331-hyderabad-tbl/data#talk")) (Just (UNode "dct:title")) Nothing - let talks = map (\(Triple _ _ (LNode (PlainL s))) -> s) ts + let talks = fmap (\(Triple _ _ (LNode (PlainL s))) -> s) ts print talks main :: IO () diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index 22f2dab..d21102d 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -51,7 +51,7 @@ instance Rdf HashSP where showGraph' :: RDF HashSP -> String showGraph' (HashSP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap in concatMap (\t -> show t <> "\n") ts -- instance Show (HashSP) where @@ -80,7 +80,7 @@ mkRdf' triples baseURL pms = HashSP (tsMap, baseURL, pms) sortAndGroup xs = HashMap.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF HashSP -> Triples -triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . HashMap.toList) tsMap +triplesOf' (HashSP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . HashMap.toList) tsMap uniqTriplesOf' :: RDF HashSP -> Triples uniqTriplesOf' = nub . expandTriples @@ -93,47 +93,47 @@ select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) Nothing Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) <> ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (HashSP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (HashSP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF HashSP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -144,42 +144,42 @@ query' (HashSP (tsMap,_,_)) Nothing (Just p) Nothing = HashMap.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts query' (HashSP (tsMap,_,_)) Nothing Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) <> ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (HashSP (tsMap,_,_)) Nothing (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) Nothing Nothing = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this RDF HashSP instance query' (HashSP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap + (fmap (Triple s p) . HashMap.lookupDefault [] (s,p)) tsMap query' (HashSP (tsMap,_,_)) (Just s) Nothing (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (HashSP (tsMap,_,_)) (Just s) (Just p) (Just o) = HashMap.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/MapSP.hs b/src/Data/RDF/Graph/MapSP.hs index 0825653..036047e 100644 --- a/src/Data/RDF/Graph/MapSP.hs +++ b/src/Data/RDF/Graph/MapSP.hs @@ -75,7 +75,7 @@ instance Rdf SP where showGraph' :: RDF SP -> String showGraph' (SP (tsMap,_,_)) = - let ts = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap + let ts = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap in concatMap (\t -> show t <> "\n") ts type SPMap = Map (Subject,Predicate) [Object] @@ -101,7 +101,7 @@ mkRdf' triples baseURL pms = SP (tsMap, baseURL, pms) sortAndGroup xs = Map.fromListWith (<>) [((s,p), [o]) | Triple s p o <- xs] triplesOf' :: RDF SP -> Triples -triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> map (Triple s p) oList) . Map.toList) tsMap +triplesOf' (SP (tsMap,_,_)) = (concatMap (\((s,p),oList) -> fmap (Triple s p) oList) . Map.toList) tsMap uniqTriplesOf' :: RDF SP -> Triples uniqTriplesOf' = nub . expandTriples @@ -114,47 +114,47 @@ select' (SP (tsMap,_,_)) Nothing (Just pSelector) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p) oList ts = if pSelector p - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) Nothing Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter oSelector oList) <> ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter oSelector oList) <> ts select' (SP (tsMap,_,_)) Nothing (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if pSelector p - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) Nothing (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts select' (SP (tsMap,_,_)) (Just sSelector) (Just pSelector) (Just oSelector) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p) oList ts = if sSelector s && pSelector p - then map (Triple s p) (filter oSelector oList) <> ts + then fmap (Triple s p) (filter oSelector oList) <> ts else ts query' :: RDF SP -> Maybe Subject -> Maybe Predicate -> Maybe Object -> Triples @@ -165,42 +165,42 @@ query' (SP (tsMap,_,_)) Nothing (Just p) Nothing = Map.foldrWithKey findTripleWithP [] tsMap where findTripleWithP (s,p') oList ts = if p == p' - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts query' (SP (tsMap,_,_)) Nothing Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where - findTripleWithS (s,p) oList ts = map (Triple s p) (filter (== o) oList) <> ts + findTripleWithS (s,p) oList ts = fmap (Triple s p) (filter (== o) oList) <> ts query' (SP (tsMap,_,_)) Nothing (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s,p') oList ts = if p == p' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) Nothing Nothing = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) oList <> ts + then fmap (Triple s p) oList <> ts else ts -- optimal pattern for this SP instance query' (SP (tsMap,_,_)) (Just s) (Just p) Nothing = - (map (Triple s p) . Map.findWithDefault [] (s,p)) tsMap + (fmap (Triple s p) . Map.findWithDefault [] (s,p)) tsMap query' (SP (tsMap,_,_)) (Just s) Nothing (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p) oList ts = if s == s' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts query' (SP (tsMap,_,_)) (Just s) (Just p) (Just o) = Map.foldrWithKey findTripleWithS [] tsMap where findTripleWithS (s',p') oList ts = if s == s' && p == p' - then map (Triple s p) (filter (== o) oList) <> ts + then fmap (Triple s p) (filter (== o) oList) <> ts else ts diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 3108d17..6ab0997 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -101,24 +101,24 @@ mkRdf' ts base' pms' = triplesOf' :: RDF TPatriciaTree -> Triples triplesOf' (TPatriciaTree (g,idxLookup,_,_)) = - map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in Triple s p o) (G.labEdges g) uniqTriplesOf' :: RDF TPatriciaTree -> Triples uniqTriplesOf' ptG@(TPatriciaTree (g,idxLookup,_,_)) = - nub $ map (\(sIdx,oIdx,p) -> - let [s,o] = map (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] + nub $ fmap (\(sIdx,oIdx,p) -> + let [s,o] = fmap (\idx -> fromJust $ IntMap.lookup idx idxLookup) [sIdx,oIdx] in expandTriple (prefixMappings ptG) (Triple s p o)) (G.labEdges g) mkTriples :: IntMap.IntMap Node -> Node -> [(Node, IntMap.Key)] -> [(Node, IntMap.Key)] -> [Triple] mkTriples idxLookup thisNode adjsIn adjsOut = - let ts1 = map (\(predNode,subjIdx) -> + let ts1 = fmap (\(predNode,subjIdx) -> let s = fromJust (IntMap.lookup subjIdx idxLookup) in Triple s predNode thisNode ) adjsIn - ts2 = map (\(predNode,objIdx) -> + ts2 = fmap (\(predNode,objIdx) -> let o = fromJust (IntMap.lookup objIdx idxLookup) in Triple thisNode predNode o ) adjsOut diff --git a/src/Data/RDF/Namespace.hs b/src/Data/RDF/Namespace.hs index afc3eb2..e0d88ee 100644 --- a/src/Data/RDF/Namespace.hs +++ b/src/Data/RDF/Namespace.hs @@ -27,8 +27,8 @@ standard_ns_mappings = ns_mappings standard_namespaces -- |Takes a list of 'Namespace's and returns 'PrefixMappings'. ns_mappings :: [Namespace] -> PrefixMappings -ns_mappings ns = PrefixMappings $ Map.fromList $ - map (\(PrefixedNS pre uri) -> (pre, uri)) ns +ns_mappings ns = PrefixMappings $ Map.fromList $ + fmap (\(PrefixedNS pre uri) -> (pre, uri)) ns -- |The RDF namespace. rdf :: Namespace @@ -73,7 +73,7 @@ ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" -- |Perform a left-biased merge of the two sets of prefix mappings. mergePrefixMappings :: PrefixMappings -> PrefixMappings -> PrefixMappings -mergePrefixMappings (PrefixMappings p1s) (PrefixMappings p2s) = +mergePrefixMappings (PrefixMappings p1s) (PrefixMappings p2s) = PrefixMappings $ Map.union p1s p2s -- |View the prefix mappings as a list of key-value pairs. The PM in diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 25f8a54..0fc27be 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -579,7 +579,7 @@ instance Show PrefixMappings where -- worth optimizing yet. show (PrefixMappings pmap) = printf "PrefixMappings [%s]" mappingsStr where showPM = show . PrefixMapping - mappingsStr = List.intercalate ", " (map showPM (Map.toList pmap)) + mappingsStr = List.intercalate ", " (fmap showPM (Map.toList pmap)) -- |A mapping of a prefix to the URI for that prefix. newtype PrefixMapping = PrefixMapping (Text, Text) diff --git a/src/Text/RDF/RDF4H/TurtleSerializer.hs b/src/Text/RDF/RDF4H/TurtleSerializer.hs index ace29b1..81d183d 100644 --- a/src/Text/RDF/RDF4H/TurtleSerializer.hs +++ b/src/Text/RDF/RDF4H/TurtleSerializer.hs @@ -1,4 +1,4 @@ --- |An RDF serializer for Turtle +-- |An RDF serializer for Turtle -- . module Text.RDF.RDF4H.TurtleSerializer( @@ -31,7 +31,7 @@ instance RdfSerializer TurtleSerializer where hWriteT (TurtleSerializer docUrl pms) h = writeTriple h docUrl pms writeT s = hWriteT s stdout hWriteN (TurtleSerializer docUrl (PrefixMappings pms)) h n = writeNode h docUrl n pms - writeN s = hWriteN s stdout + writeN s = hWriteN s stdout -- TODO: writeRdf currently merges standard namespace prefix mappings with -- the ones that the RDF already contains, so that if the RDF has none @@ -72,10 +72,10 @@ writeTriples :: Handle -> Maybe T.Text -> PrefixMappings -> Triples -> IO () writeTriples h mdUrl (PrefixMappings pms) ts = mapM_ (writeSubjGroup h mdUrl revPms) (groupBy equalSubjects ts) where - revPms = Map.fromList $ map (\(k,v) -> (v,k)) $ Map.toList pms + revPms = Map.fromList $ (\(k,v) -> (v,k)) <$> Map.toList pms writeTriple :: Handle -> Maybe T.Text -> PrefixMappings -> Triple -> IO () -writeTriple h mdUrl (PrefixMappings pms) t = +writeTriple h mdUrl (PrefixMappings pms) t = w subjectOf >> space >> w predicateOf >> space >> w objectOf where w :: (Triple -> Node) -> IO () @@ -100,9 +100,9 @@ writeSubjGroup h dUrl pms ts@(t:_) = writePredGroup :: Handle -> Maybe T.Text -> Map T.Text T.Text -> Triples -> IO () writePredGroup _ _ _ [] = return () writePredGroup h docUrl pms (t:ts) = - -- The doesn't rule out <> in either the predicate or object (as well as subject), + -- The doesn't rule out <> in either the predicate or object (as well as subject), -- so we pass the docUrl through to writeNode in all cases. - writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> + writeNode h docUrl (predicateOf t) pms >> hPutChar h ' ' >> writeNode h docUrl (objectOf t) pms >> mapM_ (\t' -> hPutStr h ", " >> writeNode h docUrl (objectOf t') pms) ts @@ -131,7 +131,7 @@ _debugPMs pms = mapM_ (\(k, v) -> T.putStr k >> putStr "__" >> T.putStrLn v) (M -- Expects a map from uri to prefix, and returns the (prefix, uri_expansion) -- from the mappings such that uri_expansion is a prefix of uri, or Nothing if --- there is no such mapping. This function does a linear-time search over the +-- there is no such mapping. This function does a linear-time search over the -- map, but the prefix mappings should always be very small, so it's okay for now. findMapping :: Map T.Text T.Text -> T.Text -> Maybe (T.Text, T.Text) findMapping pms uri = diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index e56a2ef..08beb71 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -154,7 +154,7 @@ getRDF = proc xml -> do triples <- parseDescription' >. id -< (bUrl,rdf) returnA -< mkRdf triples (Just bUrl) prefixMap where toAttrMap = (getAttrl >>> (getName &&& (getChildren >>> getText))) >. id - toPrefixMap = PrefixMappings . Map.fromList . map (\(n, m) -> (T.pack (drop 6 n), T.pack m)) . filter (isPrefixOf "xmlns:" . fst) + toPrefixMap = PrefixMappings . Map.fromList . fmap (\(n, m) -> (T.pack (drop 6 n), T.pack m)) . filter (isPrefixOf "xmlns:" . fst) -- |Read the initial state from an rdf element parseDescription' :: forall a. (ArrowXml a, ArrowState GParseState a) => a (BaseUrl, XmlTree) Triple diff --git a/testsuite/tests/Data/RDF/PropertyTests.hs b/testsuite/tests/Data/RDF/PropertyTests.hs index 75fd8d4..9205d59 100644 --- a/testsuite/tests/Data/RDF/PropertyTests.hs +++ b/testsuite/tests/Data/RDF/PropertyTests.hs @@ -106,7 +106,7 @@ instance Arbitrary PrefixMappings where arbitraryBaseUrl :: Gen BaseUrl arbitraryBaseUrl = oneof $ - map + fmap (return . BaseUrl . T.pack) ["http://example.org/", "http://example.com/a", "http://asdf.org/b", "http://asdf.org/c"] @@ -547,7 +547,7 @@ tripleFromGen tripleFromGen _triplesOf rdf = if null ts then return Nothing - else oneof $ map (return . Just) ts + else oneof $ fmap (return . Just) ts where ts = _triplesOf rdf @@ -561,7 +561,7 @@ languages :: [T.Text] languages = [T.pack "fr", T.pack "en"] datatypes :: [T.Text] -datatypes = map (mkUri xsd . T.pack) ["string", "int", "token"] +datatypes = fmap (mkUri xsd . T.pack) ["string", "int", "token"] uris :: [T.Text] uris = [mkUri ex (n <> T.pack (show (i :: Int))) @@ -576,13 +576,13 @@ typedliterals :: [LValue] typedliterals = [typedL lit dtype | lit <- litvalues, dtype <- datatypes] litvalues :: [T.Text] -litvalues = map T.pack ["hello", "world", "peace", "earth", "", "haskell"] +litvalues = fmap T.pack ["hello", "world", "peace", "earth", "", "haskell"] unodes :: [Node] -unodes = map UNode uris +unodes = fmap UNode uris bnodes :: [ Node] -bnodes = map (BNode . \i -> T.pack ":_genid" <> T.pack (show (i::Int))) [1..5] +bnodes = fmap (BNode . \i -> T.pack ":_genid" <> T.pack (show (i::Int))) [1..5] lnodes :: [Node] lnodes = [LNode lit | lit <- plainliterals <> typedliterals] @@ -606,7 +606,7 @@ instance Arbitrary Triple where triple s p <$> arbitraryO instance Arbitrary Node where - arbitrary = oneof $ map return unodes + arbitrary = oneof $ fmap return unodes arbitraryTs :: Gen Triples arbitraryTs = do @@ -614,9 +614,9 @@ arbitraryTs = do sequence [arbitrary | _ <- [1 .. n]] arbitraryS, arbitraryP, arbitraryO :: Gen Node -arbitraryS = oneof $ map return $ unodes <> bnodes -arbitraryP = oneof $ map return unodes -arbitraryO = oneof $ map return $ unodes <> bnodes <> lnodes +arbitraryS = oneof $ fmap return $ unodes <> bnodes +arbitraryP = oneof $ fmap return unodes +arbitraryO = oneof $ fmap return $ unodes <> bnodes <> lnodes ---------------------------------------------------- -- Unit test cases -- diff --git a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs index 418ce40..382c039 100644 --- a/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs +++ b/testsuite/tests/Text/RDF/RDF4H/TurtleParser_ConformanceTest.hs @@ -73,7 +73,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] checkBadConformanceTest :: Int -> TestTree checkBadConformanceTest i = diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index 50cb702..9f569d3 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -35,7 +35,7 @@ tests = , testCase "NML3" test_parseXmlRDF_NML3 ] <> - map (uncurry checkGoodOtherTest) otherTestFiles + fmap (uncurry checkGoodOtherTest) otherTestFiles otherTestFiles :: [(String, String)] otherTestFiles = [ ("data/xml", "example07") @@ -80,7 +80,7 @@ doGoodConformanceTest expGr inGr testname = let t1 = assertLoadSuccess (printf "expected (%s): " testname) expGr t2 = assertLoadSuccess (printf " input (%s): " testname) inGr t3 = assertEquivalent testname expGr inGr - in testGroup (printf "conformance-%s" testname) $ map (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] + in testGroup (printf "conformance-%s" testname) $ fmap (uncurry testCase) [("loading-expected-graph-data", t1), ("loading-input-graph-data", t2), ("comparing-graphs", t3)] mkTextNode :: T.Text -> Node mkTextNode = lnode . plainL @@ -416,7 +416,7 @@ handleLoad :: Either ParseFailure (RDF TList) -> Either ParseFailure (RDF TList) handleLoad res = case res of l@(Left _) -> l - (Right gr) -> Right $ mkRdf (map normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) + (Right gr) -> Right $ mkRdf (fmap normalize (triplesOf gr)) (baseUrl gr) (prefixMappings gr) normalize :: Triple -> Triple normalize t = let s' = normalizeN $ subjectOf t diff --git a/testsuite/tests/W3C/Manifest.hs b/testsuite/tests/W3C/Manifest.hs index 9efda6b..cd46877 100644 --- a/testsuite/tests/W3C/Manifest.hs +++ b/testsuite/tests/W3C/Manifest.hs @@ -137,7 +137,7 @@ rdfToManifest rdf = Manifest desc tpls descNode = query rdf (Just manifestNode) (Just rdfsLabel) Nothing <> query rdf (Just manifestNode) (Just mfName) Nothing -- descNode = query rdf (Just manifestNode) (Just mfName) Nothing - tpls = map (rdfToTestEntry rdf) $ rdfCollectionToList rdf collectionHead + tpls = (rdfToTestEntry rdf) <$> rdfCollectionToList rdf collectionHead collectionHead = objectOf $ headDef (error "query: mf:node & mf:entries") $ query rdf (Just manifestNode) (Just mfEntries) Nothing manifestNode = headDef (error "manifestSubjectNodes yielding empty list") $ manifestSubjectNodes rdf @@ -274,7 +274,7 @@ manifestSubjectNodes :: RDF TList -> [Subject] manifestSubjectNodes rdf = subjectNodes rdf [mfManifest] subjectNodes :: RDF TList -> [Object] -> [Subject] -subjectNodes rdf = (map subjectOf) . concatMap queryType +subjectNodes rdf = (fmap subjectOf) . concatMap queryType where queryType n = query rdf Nothing (Just rdfType) (Just n) -- | Text of the literal node. diff --git a/testsuite/tests/W3C/W3CAssertions.hs b/testsuite/tests/W3C/W3CAssertions.hs index 11e6011..2ee45c9 100644 --- a/testsuite/tests/W3C/W3CAssertions.hs +++ b/testsuite/tests/W3C/W3CAssertions.hs @@ -15,7 +15,7 @@ import W3C.Manifest runManifestTests :: (TestEntry -> TestTree) -> Manifest -> TestTree runManifestTests mfEntryToTest manifest = - testGroup (T.unpack $ description manifest) $ map mfEntryToTest $ entries manifest + testGroup (T.unpack $ description manifest) $ mfEntryToTest <$> entries manifest assertIsIsomorphic :: IO (RDF TList) -> IO (RDF TList) -> IO () assertIsIsomorphic r1 r2 = do From cd46e94d323d3a6b64598e65058fda4bb6a6327e Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 17:44:37 +0200 Subject: [PATCH 07/39] Fix algebraic-graphs version's bound --- rdf4h.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdf4h.cabal b/rdf4h.cabal index c2e9350..6209b47 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -48,7 +48,7 @@ library , HTTP >= 4000.0.0 , hxt >= 9.3.1.2 , text >= 1.2.1.0 - , algebraic-graphs >= 0.4 && < 5 + , algebraic-graphs >= 0.3 && < 0.5 , unordered-containers , hashable , deepseq From 8ab7a1d72c626ab6dbcac98e64a88d3faef60f64 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 17:56:00 +0200 Subject: [PATCH 08/39] Travis: add lts-13 and drop lts-7 --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 500904b..6d809b6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,10 +81,6 @@ matrix: compiler: ": #stack 7.10.3" addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-7" - compiler: ": #stack 8.0.1" - addons: {apt: {packages: [libgmp-dev]}} - - env: BUILD=stack ARGS="--resolver lts-9" compiler: ": #stack 8.0.2" addons: {apt: {packages: [libgmp-dev]}} @@ -97,6 +93,10 @@ matrix: compiler: ": #stack 8.4.3" addons: {apt: {packages: [libgmp-dev]}} + - env: BUILD=stack ARGS="--resolver lts-13" + compiler: ": #stack 8.6.5" + addons: {apt: {packages: [libgmp-dev]}} + # Nightly builds are allowed to fail # - env: BUILD=stack ARGS="--resolver nightly" # compiler: ": #stack nightly" From b447e4b5aa2de108aa614e21ea9c734adfad7dea Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 18:38:20 +0200 Subject: [PATCH 09/39] Fix issue with Haddock --- src/Text/RDF/RDF4H/XmlParser.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index eb834e3..064dcd6 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -221,7 +221,7 @@ isMetaAttr = isA (== "rdf:about") -- -- And that specifically: -- --- +-- -- foo -- -- @@ -279,7 +279,7 @@ parsePredicatesFromChildren = updateState , second hasPredicateAttr :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)) , this :-> defaultA ] - + -- See: Issue http://www.w3.org/2000/03/rdf-tracking/#rdfms-rdf-names-use -- section: Illegal or unusual use of names from the RDF namespace -- @@ -318,7 +318,7 @@ validPropElementName = proc (state,predXml) -> do parseObjectsFromChildren :: forall a. (ArrowIf a, ArrowXml a, ArrowState GParseState a) => LParseState -> Predicate -> a XmlTree Triple parseObjectsFromChildren s p = - choiceA + choiceA [ isText :-> (neg( isWhiteSpace) >>> getText >>> arr (Triple (stateSubject s) p . mkLiteralNode s)) , isElem :-> (parseObjectDescription) ] @@ -467,7 +467,7 @@ xmlName str = go [] str else Nothing isValid c = isAlphaNum c || '_' == c - -- || '-' == c + -- '-' == c || '.' == c || ':' == c From fbf84319c9cfcbb3aeb4b175eac6729e9353fbf6 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Wed, 22 May 2019 18:38:20 +0200 Subject: [PATCH 10/39] Fix issue with Haddock --- src/Text/RDF/RDF4H/XmlParser.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 08beb71..20fe72b 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -468,7 +468,7 @@ xmlName str = go [] str else Nothing isValid c = isAlphaNum c || '_' == c - -- || '-' == c + -- '-' == c || '.' == c || ':' == c From d32650ab4a24aebf631f7ab2ff65190651d488e1 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 23 May 2019 10:22:46 +0200 Subject: [PATCH 11/39] Fix tests when path contains characters that must be escaped. --- src/Data/RDF/Types.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 47b4cde..91e58ba 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -649,7 +649,7 @@ fileSchemeToFilePath (UNode fileScheme) | otherwise = Nothing where textToFilePath = pure . fromString <=< stringToFilePath . T.unpack - stringToFilePath = fixPrefix <=< pure . Network.uriPath <=< Network.parseURI + stringToFilePath = fixPrefix <=< pure . unEscapeString . Network.uriPath <=< Network.parseURI fixPrefix "" = Nothing fixPrefix p@(p':p'') | p' == FP.pathSeparator = Just (FP.normalise p) -- Posix path From bf7112c74d2d274da637aac98a522bcff3f2fce0 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 23 May 2019 10:22:54 +0200 Subject: [PATCH 12/39] Fix Xeno --- src/Text/RDF/RDF4H/XmlParser.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index ebd143d..4441d62 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -37,7 +37,7 @@ import qualified Data.Text.Encoding as T import Xmlbf hiding (Node) import qualified Xmlbf (Node) import qualified Xmlbf.Xeno as Xeno - + data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) instance RdfParser XmlParser where @@ -81,7 +81,7 @@ data ParserException = ParserException String instance Exception ParserException testXeno :: Text -> Either String [Xmlbf.Node] -testXeno = Xeno.nodes . T.encodeUtf8 +testXeno = Xeno.fromRawXml . T.encodeUtf8 -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) @@ -90,7 +90,7 @@ parseXmlRDF :: (Rdf a) -> Text -- ^ The contents to parse -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure parseXmlRDF bUrl dUrl xmlStr = - case Xeno.nodes (T.encodeUtf8 xmlStr) of + case Xeno.fromRawXml (T.encodeUtf8 xmlStr) of Left xmlParseError -> Left (ParseFailure xmlParseError) Right nodes -> -- error (show nodes) case runParser (rdfParser bUrl dUrl) nodes of @@ -127,7 +127,7 @@ newline = do else anyUsefulChars (T.tail t) newlines :: Parser () -newlines = void (many newline) +newlines = void (many newline) pNodeNot :: Text -> Parser () pNodeNot t = do @@ -317,7 +317,7 @@ test1 = triplesOf got == expected (UNode "si:author") (LNode (PlainL "Jan Egil Refsnes")) ] - + -- missing in Xmlbf -- | @'pElement'' p@ runs a 'Parser' @p@ inside a element node and From 51a013dddd97ae9f76377e3fd2c378f25e4ede0e Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 23 May 2019 15:26:40 +0200 Subject: [PATCH 13/39] Tidying --- src/Text/RDF/RDF4H/XmlParser.hs | 234 ++++++++++++++------------------ 1 file changed, 105 insertions(+), 129 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 4441d62..868ecab 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,7 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DoAndIfThenElse #-} -{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleContexts #-} @@ -9,21 +8,22 @@ -- . module Text.RDF.RDF4H.XmlParser - -- ( - -- XmlParser'(XmlParser') - -- , xmlEg - -- ) -where + ( XmlParser(..) + , xmlEg + ) where import Text.RDF.RDF4H.ParserUtils (parseFromURL) import Debug.Trace -import qualified Control.Applicative as Applicative +import Control.Applicative import Control.Exception import Control.Monad +import Data.Semigroup ((<>)) import qualified Data.HashMap.Strict as HashMap import qualified Data.Map as Map import Data.Maybe +import Data.Bifunctor +import Data.Foldable import Data.RDF.IRI import Data.RDF.Types import Data.RDF.Graph.TList @@ -41,9 +41,9 @@ import qualified Xmlbf.Xeno as Xeno data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) instance RdfParser XmlParser where - parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl - parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl - parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl + parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl + parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl + parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl parseFile' :: (Rdf a) @@ -51,8 +51,7 @@ parseFile' :: -> Maybe Text -> String -> IO (Either ParseFailure (RDF a)) -parseFile' bUrl dUrl fpath = - TIO.readFile fpath >>= return . parseXmlRDF bUrl dUrl +parseFile' bUrl dUrl fpath = parseXmlRDF bUrl dUrl <$> TIO.readFile fpath parseURL' :: (Rdf a) @@ -69,12 +68,12 @@ parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) -- } deriving (Show) -- |Local state for the parser (dependant on the parent xml elements) -data ParseState = ParseState { stateBaseUrl :: Maybe BaseUrl - , stateLang :: Maybe Text - , stateSubject :: Subject - , stateGenId :: Int - } - deriving(Show) +data ParseState = ParseState + { stateBaseUrl :: Maybe BaseUrl + , stateLang :: Maybe Text + , stateSubject :: Subject + , stateGenId :: Int + } deriving(Show) data ParserException = ParserException String deriving (Show) @@ -89,23 +88,21 @@ parseXmlRDF :: (Rdf a) -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available -> Text -- ^ The contents to parse -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure -parseXmlRDF bUrl dUrl xmlStr = - case Xeno.fromRawXml (T.encodeUtf8 xmlStr) of - Left xmlParseError -> Left (ParseFailure xmlParseError) - Right nodes -> -- error (show nodes) - case runParser (rdfParser bUrl dUrl) nodes of - Left rdfParseError -> Left (ParseFailure rdfParseError) - Right rdf -> Right rdf +parseXmlRDF bUrl dUrl = parseRdf . parseXml + where + parseXml = Xeno.fromRawXml . T.encodeUtf8 + parseRdf = first ParseFailure . join . second parseRdf' + parseRdf' = runParser (rdfParser bUrl dUrl) -- TODO: use bUrl and dUrl rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) rdfParser bUrl dUrl = do - let initState = ParseState bUrl Nothing undefined 0 - rdf <- rdfDescription initState + let initState = ParseState bUrl mempty undefined 0 + rdf <- pRdfDescription initState newlines -- tree <- showTree -- error (show tree) - void pEndOfInput + pEndOfInput return rdf -- Text "\n" @@ -117,14 +114,7 @@ newline = do then pure () else pFail "not a newline text node" where - anyUsefulChars t = - if T.length t == 0 - then False - else - let c = T.head t - in if (c /= '\n' && c /= '\r' && c /= ' ') - then True - else anyUsefulChars (T.tail t) + anyUsefulChars = T.any (\c -> c /= '\n' && c /= '\r' && c /= ' ') newlines :: Parser () newlines = void (many newline) @@ -132,19 +122,21 @@ newlines = void (many newline) pNodeNot :: Text -> Parser () pNodeNot t = do n <- pName - if (n /= t) + if n /= t then pure () - else pFail ("forbidden element name: " ++ show t) + else pFail ("forbidden element name: " <> show t) {- [ ("xmlns:si","https://www.w3schools.com/rdf/") , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") ] -} -prefixes :: Parser [(Text,Text)] -prefixes = do - xs <- HashMap.toList <$> pAttrs - pure (map (\(k,v) -> (fromJust (T.stripPrefix "xmlns:" k),v)) xs) +pPrefixMappings :: Parser PrefixMappings +pPrefixMappings = PrefixMappings <$> pm + where + pm = Map.fromList . HashMap.foldlWithKey' getPrefixes mempty <$> pAttrs + getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) + oneAttr :: Parser (Text,Text) oneAttr = do @@ -153,85 +145,84 @@ oneAttr = do 1 -> pure $ head (HashMap.toList xs) _ -> pFail "not one attr" -rdfTriplesP :: ParseState -> Parser (Triples,ParseState) +rdfTriplesP :: ParseState -> Parser (Triples, ParseState) rdfTriplesP st = do newlines pElement "rdf:Description" $ do newlines - ((subj, reifiedTriples), st') <- subjP st - (ts) <- concat <$> many (predObjP st') + ((subj, reifiedTriples), st') <- pSubject st + ts <- concat <$> many (pPredicateObject st') newlines -- tree <- showTree -- error (show tree) - void pEndOfInput - pure (ts ++ reifiedTriples,st') - -- pure $ ((map (\(p, o) -> triple subj p o) predObjs ++ reifiedTriples), st') + pEndOfInput + pure (ts <> reifiedTriples, st') + -- pure $ ((fmap (\(p, o) -> triple subj p o) predObjs <> reifiedTriples), st') {- NOTE: remember to use `showTree` in the fork of xmlbf when pEndOfInput needs debugging. -} -subjP :: ParseState -> Parser ((Node,Triples),ParseState) -subjP st = do - -- void (pNodeNot "rdf:RDF") -- rdfms-rdf-names-use-error-001 - (do +pSubject :: ParseState -> Parser ((Node, Triples), ParseState) +pSubject st = unodeP <|> bnodeP + where + unodeP = do s <- unode <$> pAttr "rdf:about" - pure ((s,[]),st { stateSubject = s } ) - <|> do - -- theId <- pAttr "rdf:ID" - let theBnode = BNodeGen (stateGenId st) - st' = st { stateGenId = stateGenId st + 1} - pure ((theBnode,[]),st')) - --- predObjP :: Parser ((Node,Node)) -predObjP :: ParseState -> Parser Triples -predObjP st = do + pure ((s, []), st { stateSubject = s } ) + bnodeP = do + -- theId <- pAttr "rdf:ID" + let s = BNodeGen (stateGenId st) + st' = st { stateGenId = stateGenId st + 1, stateSubject = s} + pure ((s, []), st') + +-- pPredicateObject :: Parser ((Node,Node)) +pPredicateObject :: ParseState -> Parser Triples +pPredicateObject st = do void newlines - (do pAnyElement $ do - void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 - p <- unode <$> pName - (ts) <- - (do - -- typed literal - theType <- pAttr "rdf:datatype" - theText <- pText - pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))]) - <|> - -- blank node - (do (p1,o1) <- oneAttr - -- TODO: increment stateGenId - let bnode = BNodeGen (stateGenId st) - t1 = triple (stateSubject st) p bnode - a = case stateBaseUrl st of - Nothing -> T.pack "" - Just (BaseUrl uri) -> uri - Right txt = resolveIRI a p1 - p2 = unode txt - -- TODO: typed and lang literals - t2 = triple bnode p2 (lnode (plainL o1)) - pure [t1,t2]) - <|> - (do - -- plain literal - theText <- pText - newlines - pure [triple - (stateSubject st) - p - (lnode (plainL (TL.toStrict theText)))]) - newlines - pure ts - <|> - pFail "unable to parse predicate/object pair" - ) + do pAnyElement $ do + void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 + p <- unode <$> pName + (ts) <- + (do + -- typed literal + theType <- pAttr "rdf:datatype" + theText <- pText + pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))]) + <|> + -- blank node + (do (p1,o1) <- oneAttr + -- TODO: increment stateGenId + let bnode = BNodeGen (stateGenId st) + t1 = triple (stateSubject st) p bnode + a = case stateBaseUrl st of + Nothing -> T.pack "" + Just (BaseUrl uri) -> uri + Right txt = resolveIRI a p1 + p2 = unode txt + -- TODO: typed and lang literals + t2 = triple bnode p2 (lnode (plainL o1)) + pure [t1,t2]) + <|> + (do + -- plain literal + theText <- pText + newlines + pure [triple + (stateSubject st) + p + (lnode (plainL (TL.toStrict theText)))]) + newlines + pure ts + <|> + pFail "unable to parse predicate/object pair" -- TODO: reify triple -- TODO: unodes, and all different kinds of plain text nodes -- objP :: Parser (Node) -- objP {- st -} = do -- -- unode --- -- xs <- head <$> prefixes +-- -- xs <- head <$> pPrefixMappings -- -- error (show xs) -- -- TODO for: -- -- Element "eg:Creator" @@ -249,19 +240,16 @@ predObjP st = do -- pure (lnode (plainL (TL.toStrict theText))) - -rdfDescription' :: ParseState -> Parser (PrefixMappings,Maybe BaseUrl,Triples) -rdfDescription' st = do +pRdfDescription' :: Rdf a => ParseState -> Parser (RDF a) +pRdfDescription' st = do newlines - pfixes <- prefixes - (_,(triples,st')) <- pElement' (rdfTriplesP st) + pm <- pPrefixMappings + (_, (triples, st')) <- pElement' (rdfTriplesP st) newlines - pure (PrefixMappings (Map.fromList pfixes), Nothing, triples) + pure $ mkRdf triples Nothing pm -rdfDescription :: Rdf a => ParseState -> Parser (RDF a) -rdfDescription st = do - (pfixes,bUrl,triples) <- pElement "rdf:RDF" (rdfDescription' st) - pure $ mkRdf triples bUrl pfixes +pRdfDescription :: Rdf a => ParseState -> Parser (RDF a) +pRdfDescription st = pElement "rdf:RDF" (pRdfDescription' st) {- [ Text "\n" @@ -329,19 +317,16 @@ test1 = triplesOf got == expected -- some reason, capture it using 'pText' before using 'pElement''. -- -- Consumes the element from the parser state. -pElement' :: Parser a -> Parser (T.Text,a) -pElement' p = do - res <- p - name <- pName - return (name,res) +pElement' :: Parser a -> Parser (T.Text, a) +pElement' = liftA2 (,) pName pText' :: TL.Text -> Parser TL.Text pText' t = do - let pTextFail = pFail ("Missing text node " ++ show t) - (do t' <- pText - if t == t' then pure t - else pTextFail - <|> pTextFail) + let pTextFail = pFail ("Missing text node " <> show t) + do t' <- pText + if t == t' then pure t + else pTextFail + <|> pTextFail -- parser combinators missing in Xmlbf @@ -351,15 +336,6 @@ between open close thing = open *> thing <* close manyTill :: Parser a -> Parser end -> Parser [a] manyTill thing z = many thing <* z -(<|>) :: Parser a -> Parser a -> Parser a -(<|>) a b = a Applicative.<|> b - -some :: Parser a -> Parser [a] -some = Applicative.some - -many :: Parser a -> Parser [a] -many = Applicative.many - -- pElem :: Text -> Parser Text -- oneOf :: Parser [a] -> Parser a -- noneOf :: Parser [a] -> Parser a From 721a1ff7bfc42598621412400b9cd8aefb287958 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 23 May 2019 15:44:29 +0200 Subject: [PATCH 14/39] Make `pPredicateObject` easier to read --- src/Text/RDF/RDF4H/XmlParser.hs | 70 ++++++++++++++++----------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 868ecab..c69447b 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -181,42 +181,40 @@ pPredicateObject :: ParseState -> Parser Triples pPredicateObject st = do void newlines do pAnyElement $ do - void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 - p <- unode <$> pName - (ts) <- - (do - -- typed literal - theType <- pAttr "rdf:datatype" - theText <- pText - pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))]) - <|> - -- blank node - (do (p1,o1) <- oneAttr - -- TODO: increment stateGenId - let bnode = BNodeGen (stateGenId st) - t1 = triple (stateSubject st) p bnode - a = case stateBaseUrl st of - Nothing -> T.pack "" - Just (BaseUrl uri) -> uri - Right txt = resolveIRI a p1 - p2 = unode txt - -- TODO: typed and lang literals - t2 = triple bnode p2 (lnode (plainL o1)) - pure [t1,t2]) - <|> - (do - -- plain literal - theText <- pText - newlines - pure [triple - (stateSubject st) - p - (lnode (plainL (TL.toStrict theText)))]) - newlines - pure ts - <|> - pFail "unable to parse predicate/object pair" - -- TODO: reify triple + void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 + p <- unode <$> pName + ts <- pTypedLiteral p + <|> pBNode p + <|> pPlainLiteral p + <|> pFail "unable to parse predicate/object pair" + newlines + pure ts + -- TODO: reify triple + where + pTypedLiteral p = do + theType <- pAttr "rdf:datatype" + theText <- pText + pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))] + pBNode p = do + (p1, o1) <- oneAttr + -- TODO: increment stateGenId + let bnode = BNodeGen (stateGenId st) + t1 = triple (stateSubject st) p bnode + a = case stateBaseUrl st of + Nothing -> T.pack "" + Just (BaseUrl uri) -> uri + Right txt = resolveIRI a p1 + p2 = unode txt + -- TODO: typed and lang literals + t2 = triple bnode p2 (lnode (plainL o1)) + pure [t1, t2] + pPlainLiteral p = do + theText <- pText + newlines + pure [triple + (stateSubject st) + p + (lnode (plainL (TL.toStrict theText)))] -- TODO: unodes, and all different kinds of plain text nodes -- objP :: Parser (Node) From 4d312a6c80da1e9d37b012151ab3d4e09c3c95bd Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Fri, 24 May 2019 11:49:09 +0200 Subject: [PATCH 15/39] Tidying --- src/Text/RDF/RDF4H/XmlParser.hs | 68 ++++++++++++++------------------- 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index c69447b..8b24f5e 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -98,26 +98,19 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) rdfParser bUrl dUrl = do let initState = ParseState bUrl mempty undefined 0 - rdf <- pRdfDescription initState - newlines + rdf <- pRdf initState + pWs -- tree <- showTree -- error (show tree) pEndOfInput return rdf --- Text "\n" --- TODO: check that all that follows from \n is zero or more ' ' characters. -newline :: Parser () -newline = do - t <- pText - if not $ anyUsefulChars (TL.toStrict t) - then pure () - else pFail "not a newline text node" +-- |White spaces parser +pWs :: Parser () +pWs = (T.all ws . TL.toStrict <$> pText) >>= guard where - anyUsefulChars = T.any (\c -> c /= '\n' && c /= '\r' && c /= ' ') - -newlines :: Parser () -newlines = void (many newline) + -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S + ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' pNodeNot :: Text -> Parser () pNodeNot t = do @@ -145,20 +138,6 @@ oneAttr = do 1 -> pure $ head (HashMap.toList xs) _ -> pFail "not one attr" -rdfTriplesP :: ParseState -> Parser (Triples, ParseState) -rdfTriplesP st = do - newlines - pElement "rdf:Description" $ do - newlines - ((subj, reifiedTriples), st') <- pSubject st - ts <- concat <$> many (pPredicateObject st') - newlines - -- tree <- showTree - -- error (show tree) - pEndOfInput - pure (ts <> reifiedTriples, st') - -- pure $ ((fmap (\(p, o) -> triple subj p o) predObjs <> reifiedTriples), st') - {- NOTE: remember to use `showTree` in the fork of xmlbf when pEndOfInput needs debugging. @@ -179,7 +158,7 @@ pSubject st = unodeP <|> bnodeP -- pPredicateObject :: Parser ((Node,Node)) pPredicateObject :: ParseState -> Parser Triples pPredicateObject st = do - void newlines + void pWs do pAnyElement $ do void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 p <- unode <$> pName @@ -187,7 +166,7 @@ pPredicateObject st = do <|> pBNode p <|> pPlainLiteral p <|> pFail "unable to parse predicate/object pair" - newlines + pWs pure ts -- TODO: reify triple where @@ -210,7 +189,7 @@ pPredicateObject st = do pure [t1, t2] pPlainLiteral p = do theText <- pText - newlines + pWs pure [triple (stateSubject st) p @@ -237,18 +216,29 @@ pPredicateObject st = do -- theText <- pText -- pure (lnode (plainL (TL.toStrict theText))) - -pRdfDescription' :: Rdf a => ParseState -> Parser (RDF a) -pRdfDescription' st = do - newlines +pRdf :: Rdf a => ParseState -> Parser (RDF a) +pRdf st = pElement "rdf:RDF" $ do pm <- pPrefixMappings - (_, (triples, st')) <- pElement' (rdfTriplesP st) - newlines + -- [TODO] Ensure no attributes + triples <- pNodeElementList st pure $ mkRdf triples Nothing pm -pRdfDescription :: Rdf a => ParseState -> Parser (RDF a) -pRdfDescription st = pElement "rdf:RDF" (pRdfDescription' st) +pNodeElementList :: ParseState -> Parser Triples +pNodeElementList st = do + pWs + snd <$> pElement' (pRdfTriples st) +pRdfTriples :: ParseState -> Parser Triples +pRdfTriples st = do + pElement "rdf:Description" $ do + pWs + ((subj, reifiedTriples), st') <- pSubject st + ts <- concat <$> many (pPredicateObject st') + pWs + -- tree <- showTree + -- error (show tree) + pEndOfInput + pure (ts <> reifiedTriples) {- [ Text "\n" , Element From 8fccd8741f84b1d257e5ef87edd8d7f2ccf88966 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Fri, 24 May 2019 17:10:52 +0200 Subject: [PATCH 16/39] Use a monad transformer to manage the state --- .gitignore | 1 + src/Text/RDF/RDF4H/XmlParser.hs | 53 +++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 346cc90..42b31db 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ TAGS *.backup /.cabal-sandbox cabal.sandbox.config +cabal.project.local countries.ttl *.prof bench/MainCriterion diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 8b24f5e..94c26df 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -34,9 +34,10 @@ import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T -import Xmlbf hiding (Node) +import Xmlbf hiding (Node, Parser) import qualified Xmlbf (Node) import qualified Xmlbf.Xeno as Xeno +import Control.Monad.State.Strict data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) @@ -67,6 +68,9 @@ parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) -- { stateGenId :: Int -- } deriving (Show) + +type Parser = ParserT (State ParseState) + -- |Local state for the parser (dependant on the parent xml elements) data ParseState = ParseState { stateBaseUrl :: Maybe BaseUrl @@ -92,16 +96,14 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml where parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' - parseRdf' = runParser (rdfParser bUrl dUrl) + parseRdf' ns = evalState (runParserT (rdfParser bUrl dUrl) ns) initState + initState = ParseState bUrl mempty undefined 0 -- TODO: use bUrl and dUrl rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) rdfParser bUrl dUrl = do - let initState = ParseState bUrl mempty undefined 0 - rdf <- pRdf initState + rdf <- pRdf pWs - -- tree <- showTree - -- error (show tree) pEndOfInput return rdf @@ -143,21 +145,23 @@ oneAttr = do debugging. -} -pSubject :: ParseState -> Parser ((Node, Triples), ParseState) -pSubject st = unodeP <|> bnodeP +pSubject :: Parser (Node, Triples) +pSubject = unodeP <|> bnodeP where unodeP = do s <- unode <$> pAttr "rdf:about" - pure ((s, []), st { stateSubject = s } ) + modify $ \st -> st { stateSubject = s } + pure (s, []) bnodeP = do -- theId <- pAttr "rdf:ID" + st <- get let s = BNodeGen (stateGenId st) - st' = st { stateGenId = stateGenId st + 1, stateSubject = s} - pure ((s, []), st') + modify $ \st -> st { stateGenId = stateGenId st + 1, stateSubject = s } + pure (s, []) -- pPredicateObject :: Parser ((Node,Node)) -pPredicateObject :: ParseState -> Parser Triples -pPredicateObject st = do +pPredicateObject :: Parser Triples +pPredicateObject = do void pWs do pAnyElement $ do void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 @@ -173,10 +177,12 @@ pPredicateObject st = do pTypedLiteral p = do theType <- pAttr "rdf:datatype" theText <- pText + st <- get pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))] pBNode p = do (p1, o1) <- oneAttr -- TODO: increment stateGenId + st <- get let bnode = BNodeGen (stateGenId st) t1 = triple (stateSubject st) p bnode a = case stateBaseUrl st of @@ -190,6 +196,7 @@ pPredicateObject st = do pPlainLiteral p = do theText <- pText pWs + st <- get pure [triple (stateSubject st) p @@ -216,24 +223,24 @@ pPredicateObject st = do -- theText <- pText -- pure (lnode (plainL (TL.toStrict theText))) -pRdf :: Rdf a => ParseState -> Parser (RDF a) -pRdf st = pElement "rdf:RDF" $ do +pRdf :: Rdf a => Parser (RDF a) +pRdf = pElement "rdf:RDF" $ do pm <- pPrefixMappings -- [TODO] Ensure no attributes - triples <- pNodeElementList st + triples <- pNodeElementList pure $ mkRdf triples Nothing pm -pNodeElementList :: ParseState -> Parser Triples -pNodeElementList st = do +pNodeElementList :: Parser Triples +pNodeElementList = do pWs - snd <$> pElement' (pRdfTriples st) + snd <$> pElement' pRdfTriples -pRdfTriples :: ParseState -> Parser Triples -pRdfTriples st = do +pRdfTriples :: Parser Triples +pRdfTriples = pElement "rdf:Description" $ do pWs - ((subj, reifiedTriples), st') <- pSubject st - ts <- concat <$> many (pPredicateObject st') + (subj, reifiedTriples) <- pSubject + ts <- concat <$> many pPredicateObject pWs -- tree <- showTree -- error (show tree) From acefeead31d8781f3e6c661e4281e9b1109c748a Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Fri, 24 May 2019 17:49:04 +0200 Subject: [PATCH 17/39] Improvements --- src/Text/RDF/RDF4H/XmlParser.hs | 235 ++++++++++++++------------------ 1 file changed, 102 insertions(+), 133 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 94c26df..d69f2aa 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -107,31 +107,11 @@ rdfParser bUrl dUrl = do pEndOfInput return rdf --- |White spaces parser -pWs :: Parser () -pWs = (T.all ws . TL.toStrict <$> pText) >>= guard - where - -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S - ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' - -pNodeNot :: Text -> Parser () -pNodeNot t = do - n <- pName - if n /= t - then pure () - else pFail ("forbidden element name: " <> show t) - {- [ ("xmlns:si","https://www.w3schools.com/rdf/") , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") ] -} -pPrefixMappings :: Parser PrefixMappings -pPrefixMappings = PrefixMappings <$> pm - where - pm = Map.fromList . HashMap.foldlWithKey' getPrefixes mempty <$> pAttrs - getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) - oneAttr :: Parser (Text,Text) oneAttr = do @@ -145,6 +125,37 @@ oneAttr = do debugging. -} +pRdf :: Rdf a => Parser (RDF a) +pRdf = pElement "rdf:RDF" $ do + pm <- pPrefixMappings + -- [TODO] Ensure no attributes + triples <- pNodeElementList + pEndOfInput + pure $ mkRdf triples Nothing pm + +pPrefixMappings :: Parser PrefixMappings +pPrefixMappings = PrefixMappings <$> pm + where + pm = Map.fromList . HashMap.foldlWithKey' getPrefixes mempty <$> pAttrs + getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) + +pNodeElementList :: Parser Triples +pNodeElementList = pWs *> (mconcat <$> many pNodeElement) + +-- |White spaces parser +pWs :: Parser () +pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard + where + -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S + ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' + +pNodeElement :: Parser Triples +pNodeElement = pAnyElement $ do + name <- pName + guard (name == "rdf:Description") -- [FIXME] + (_subj, reifiedTriples) <- pSubject + fmap (reifiedTriples <>) pPropertyEltList + pSubject :: Parser (Node, Triples) pSubject = unodeP <|> bnodeP where @@ -159,121 +170,79 @@ pSubject = unodeP <|> bnodeP modify $ \st -> st { stateGenId = stateGenId st + 1, stateSubject = s } pure (s, []) --- pPredicateObject :: Parser ((Node,Node)) -pPredicateObject :: Parser Triples -pPredicateObject = do - void pWs - do pAnyElement $ do - void (pNodeNot "rdf:Description") -- rdfms-rdf-names-use-error-011 - p <- unode <$> pName - ts <- pTypedLiteral p - <|> pBNode p - <|> pPlainLiteral p - <|> pFail "unable to parse predicate/object pair" - pWs - pure ts - -- TODO: reify triple - where - pTypedLiteral p = do - theType <- pAttr "rdf:datatype" - theText <- pText - st <- get - pure [triple (stateSubject st) p ((lnode (typedL (TL.toStrict theText) theType)))] - pBNode p = do - (p1, o1) <- oneAttr - -- TODO: increment stateGenId - st <- get - let bnode = BNodeGen (stateGenId st) - t1 = triple (stateSubject st) p bnode - a = case stateBaseUrl st of - Nothing -> T.pack "" - Just (BaseUrl uri) -> uri - Right txt = resolveIRI a p1 - p2 = unode txt - -- TODO: typed and lang literals - t2 = triple bnode p2 (lnode (plainL o1)) - pure [t1, t2] - pPlainLiteral p = do - theText <- pText - pWs - st <- get - pure [triple - (stateSubject st) - p - (lnode (plainL (TL.toStrict theText)))] - --- TODO: unodes, and all different kinds of plain text nodes --- objP :: Parser (Node) --- objP {- st -} = do --- -- unode --- -- xs <- head <$> pPrefixMappings --- -- error (show xs) --- -- TODO for: --- -- Element "eg:Creator" --- -- [("eg:named","D\252rst")] --- -- [] --- pure (unode "http://www.example.com") --- <|> do --- -- typed literal --- theType <- pAttr "rdf:datatype" --- theText <- pText --- pure ((lnode (typedL (TL.toStrict theText) theType))) --- <|> do --- -- plain literal --- theText <- pText --- pure (lnode (plainL (TL.toStrict theText))) +pPropertyEltList :: Parser Triples +pPropertyEltList = pWs *> fmap mconcat (many (pPropertyElt <* pWs)) -pRdf :: Rdf a => Parser (RDF a) -pRdf = pElement "rdf:RDF" $ do - pm <- pPrefixMappings - -- [TODO] Ensure no attributes - triples <- pNodeElementList - pure $ mkRdf triples Nothing pm - -pNodeElementList :: Parser Triples -pNodeElementList = do +pResourcePropertyElt :: Node -> Parser Triples +pResourcePropertyElt p = do + -- [TODO] idAttr pWs - snd <$> pElement' pRdfTriples - -pRdfTriples :: Parser Triples -pRdfTriples = - pElement "rdf:Description" $ do - pWs - (subj, reifiedTriples) <- pSubject - ts <- concat <$> many pPredicateObject - pWs - -- tree <- showTree - -- error (show tree) - pEndOfInput - pure (ts <> reifiedTriples) -{- -[ Text "\n" -, Element - "rdf:RDF" - [ ("xmlns:si","https://www.w3schools.com/rdf/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n" - , Element - "rdf:Description" - [ ("rdf:about","https://www.w3schools.com") ] - [ Text "\n" - , Element - "si:title" - [] - [ Text "W3Schools" ] - , Text "\n" - , Element - "si:author" - [] - [ Text "Jan Egil Refsnes" ] - , Text "\n" - ] - ,Text "\n" - ] -,Text "\n" -] --} + guard False + pure mempty -- [TODO] + +pLiteralPropertyElt :: Node -> Parser Triples +pLiteralPropertyElt p = do + -- [TODO] idAttr + mdt <- optional (pAttr "rdf:datatype") + t <- pText + st <- get + let literal = maybe (plainL (TL.toStrict t)) (typedL (TL.toStrict t)) mdt + pure [triple (stateSubject st) p (lnode literal)] + +pParseTypeLiteralPropertyElt :: Node -> Parser Triples +pParseTypeLiteralPropertyElt p = do + -- [TODO] idAttr + pt <- pAttr "rdf:parseType" + guard (pt == "Literal") + guard False *> pure mempty -- [TODO] + +pParseTypeResourcePropertyElt :: Node -> Parser Triples +pParseTypeResourcePropertyElt p = do + -- [TODO] idAttr + pt <- pAttr "rdf:parseType" + guard (pt == "Resource") + -- pPropertyEltList + guard False *> pure mempty -- [TODO] + +pParseTypeCollectionPropertyElt :: Node -> Parser Triples +pParseTypeCollectionPropertyElt p = do + -- [TODO] idAttr + pt <- pAttr "rdf:parseType" + guard (pt == "Collection") + -- pNodeElementList + guard False *> pure mempty -- [TODO] + +pParseTypeOtherPropertyElt :: Node -> Parser Triples +pParseTypeOtherPropertyElt p = do + -- [TODO] idAttr + pt <- pAttr "rdf:parseType" + guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") + guard False *> pure mempty -- [TODO] + +pEmptyPropertyElt :: Node -> Parser Triples +pEmptyPropertyElt p = do + -- [TODO] idAttr + m <- resourceAttr <|> nodeIdAttr <|> datatypeAttr + ps <- pPropertyAttr + guard False *> pure mempty -- [TODO] + where + resourceAttr = guard False -- [TODO] + nodeIdAttr = guard False -- [TODO] + datatypeAttr = guard False -- [TODO] + pPropertyAttr = guard False -- [TODO] + +pPropertyElt :: Parser Triples +pPropertyElt = pAnyElement $ do + p <- unode <$> pName + -- [TODO] check URI + pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pParseTypeLiteralPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pEmptyPropertyElt p + xmlEg = T.pack $ unlines [ "" , " Date: Sat, 25 May 2019 06:42:10 +0200 Subject: [PATCH 18/39] Move some definitions to ParserUtils --- src/Text/RDF/RDF4H/ParserUtils.hs | 29 +++++++++++++++++++++++------ src/Text/RDF/RDF4H/TurtleParser.hs | 13 ------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index 90e7f98..5390ec9 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -1,17 +1,22 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -module Text.RDF.RDF4H.ParserUtils( - parseFromURL, - Parser(..) -) where +module Text.RDF.RDF4H.ParserUtils + ( Parser(..) + , parseFromURL + , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode + , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri + ) where import Data.RDF.Types +import Data.RDF.Namespace import Control.Exception.Lifted import Network.HTTP.Conduit import Data.Text.Encoding (decodeUtf8) import Data.Semigroup ((<>)) import qualified Data.ByteString.Lazy as BS +import Data.Text (Text) import qualified Data.Text as T data Parser = Parsec | Attoparsec @@ -25,8 +30,8 @@ parseFromURL :: (Rdf rdfImpl) => (T.Text -> Either ParseFailure (RDF rdfImpl)) - parseFromURL parseFunc url = do result <- Control.Exception.Lifted.try $ simpleHttp url case result of - Left (ex::HttpException) -> - case ex of + Left (err :: HttpException) -> + case err of (HttpExceptionRequest _req content) -> case content of ConnectionTimeout -> @@ -37,3 +42,15 @@ parseFromURL parseFunc url = do Right bs -> do let s = decodeUtf8 $ BS.toStrict bs return (parseFunc s) + +rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node +rdfTypeNode = UNode $ mkUri rdf "type" +rdfNilNode = UNode $ mkUri rdf "nil" +rdfFirstNode = UNode $ mkUri rdf "first" +rdfRestNode = UNode $ mkUri rdf "rest" + +xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text +xsdIntUri = mkUri xsd "integer" +xsdDoubleUri = mkUri xsd "double" +xsdDecimalUri = mkUri xsd "decimal" +xsdBooleanUri = mkUri xsd "boolean" diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index cfa3b8e..b87c5ca 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -19,7 +19,6 @@ import Data.Maybe import Data.Semigroup ((<>)) import Data.RDF.Types import Data.RDF.IRI -import Data.RDF.Namespace import Text.RDF.RDF4H.ParserUtils import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) @@ -315,18 +314,6 @@ t_collection = withConstantSubjectPredicate $ return bn getSubject = get >>= \(_, _, _, _, s, _, _, _) -> pure s -rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node -rdfTypeNode = UNode $ mkUri rdf "type" -rdfNilNode = UNode $ mkUri rdf "nil" -rdfFirstNode = UNode $ mkUri rdf "first" -rdfRestNode = UNode $ mkUri rdf "rest" - -xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: T.Text -xsdIntUri = mkUri xsd "integer" -xsdDoubleUri = mkUri xsd "double" -xsdDecimalUri = mkUri xsd "decimal" -xsdBooleanUri = mkUri xsd "boolean" - t_literal :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m Node t_literal = LNode <$> try t_rdf_literal <|> From 7d318d3c6e65fb9f6ec7ff6c9565fcfabff65053 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 25 May 2019 08:18:09 +0200 Subject: [PATCH 19/39] Improvements --- src/Text/RDF/RDF4H/XmlParser.hs | 245 ++++++++++++++++++++++++-------- 1 file changed, 188 insertions(+), 57 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index d69f2aa..5693531 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -10,23 +10,25 @@ module Text.RDF.RDF4H.XmlParser ( XmlParser(..) , xmlEg + , example11 + , example12 ) where -import Text.RDF.RDF4H.ParserUtils (parseFromURL) +import Text.RDF.RDF4H.ParserUtils hiding (Parser) +import Data.RDF.IRI +import Data.RDF.Types hiding (empty) +import Data.RDF.Graph.TList import Debug.Trace import Control.Applicative import Control.Exception import Control.Monad +import Control.Monad.State.Strict import Data.Semigroup ((<>)) -import qualified Data.HashMap.Strict as HashMap import qualified Data.Map as Map import Data.Maybe import Data.Bifunctor import Data.Foldable -import Data.RDF.IRI -import Data.RDF.Types -import Data.RDF.Graph.TList import qualified Data.HashMap.Strict as HM import Data.Text (Text) import Data.Text.Encoding @@ -37,7 +39,6 @@ import qualified Data.Text.Encoding as T import Xmlbf hiding (Node, Parser) import qualified Xmlbf (Node) import qualified Xmlbf.Xeno as Xeno -import Control.Monad.State.Strict data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) @@ -75,7 +76,7 @@ type Parser = ParserT (State ParseState) data ParseState = ParseState { stateBaseUrl :: Maybe BaseUrl , stateLang :: Maybe Text - , stateSubject :: Subject + , stateSubject :: Maybe Subject , stateGenId :: Int } deriving(Show) @@ -97,7 +98,7 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' parseRdf' ns = evalState (runParserT (rdfParser bUrl dUrl) ns) initState - initState = ParseState bUrl mempty undefined 0 + initState = ParseState bUrl empty empty 0 -- TODO: use bUrl and dUrl rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) @@ -116,8 +117,8 @@ rdfParser bUrl dUrl = do oneAttr :: Parser (Text,Text) oneAttr = do xs <- pAttrs - case length (HashMap.toList xs) of - 1 -> pure $ head (HashMap.toList xs) + case length (HM.toList xs) of + 1 -> pure $ head (HM.toList xs) _ -> pFail "not one attr" {- NOTE: @@ -130,13 +131,12 @@ pRdf = pElement "rdf:RDF" $ do pm <- pPrefixMappings -- [TODO] Ensure no attributes triples <- pNodeElementList - pEndOfInput pure $ mkRdf triples Nothing pm pPrefixMappings :: Parser PrefixMappings pPrefixMappings = PrefixMappings <$> pm where - pm = Map.fromList . HashMap.foldlWithKey' getPrefixes mempty <$> pAttrs + pm = Map.fromList . HM.foldlWithKey' getPrefixes mempty <$> pAttrs getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) pNodeElementList :: Parser Triples @@ -151,58 +151,111 @@ pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard pNodeElement :: Parser Triples pNodeElement = pAnyElement $ do - name <- pName - guard (name == "rdf:Description") -- [FIXME] - (_subj, reifiedTriples) <- pSubject - fmap (reifiedTriples <>) pPropertyEltList + (s, ts1) <- pSubject + ts2 <- pPropertyAttr + ts3 <- pPropertyEltList + setSubject (Just s) + pure $ mconcat [ts1, ts2, ts3] pSubject :: Parser (Node, Triples) -pSubject = unodeP <|> bnodeP +pSubject = do + n <- pUnodeId <|> pUnode <|> pBnode <|> pBnodeGen + uri <- pName + pLang >>= setLang + mtype <- optional (pType1 n uri) + ts <- handlePropertyAttr n uri + pure (n, (maybe ts (:ts) mtype)) where - unodeP = do - s <- unode <$> pAttr "rdf:about" - modify $ \st -> st { stateSubject = s } - pure (s, []) - bnodeP = do - -- theId <- pAttr "rdf:ID" - st <- get - let s = BNodeGen (stateGenId st) - modify $ \st -> st { stateGenId = stateGenId st + 1, stateSubject = s } - pure (s, []) + pUnodeId = do + _ <- pIdAttr + pFail "[TODO] rdf:ID" + pUnode = do + s <- unode <$> pAboutAttr + setSubject (Just s) + pure s + pBnode = do + bn <- pNodeIdAttr + let s = BNode bn + setSubject (Just s) + pure s + pBnodeGen = do + s <- newBNode + setSubject (Just s) + pure s + pType1 n uri = + if uri /= "rdf:Description" + then pure $ triple n rdfTypeNode (unode uri) + else empty + handlePropertyAttr n uri = do + attrs <- pAttrs + HM.elems <$> HM.traverseWithKey f attrs + where + -- [TODO] resolve IRIs + f attr value = pure $ if attr == "rdf:type" + then triple n rdfTypeNode (unode value) + else triple n (unode attr) (lnode (plainL value)) + +pLang :: Parser (Maybe Text) +pLang = optional (pAttr "xml:lang") pPropertyEltList :: Parser Triples pPropertyEltList = pWs *> fmap mconcat (many (pPropertyElt <* pWs)) +pPropertyElt :: Parser Triples +pPropertyElt = pAnyElement $ do + p <- unode <$> pName + -- [TODO] check URI + pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pParseTypeLiteralPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pEmptyPropertyElt p + pResourcePropertyElt :: Node -> Parser Triples pResourcePropertyElt p = do -- [TODO] idAttr + -- [TODO] rdf:ID + pWs + uri <- pName + let p = unode uri + s <- currentSubject + ts <- pNodeElement + o <- currentSubject pWs - guard False - pure mempty -- [TODO] + let mt = flip triple p <$> s <*> o + pure $ maybe ts (:ts) mt pLiteralPropertyElt :: Node -> Parser Triples pLiteralPropertyElt p = do -- [TODO] idAttr - mdt <- optional (pAttr "rdf:datatype") + dt <- optional (pAttr "rdf:datatype") t <- pText - st <- get - let literal = maybe (plainL (TL.toStrict t)) (typedL (TL.toStrict t)) mdt - pure [triple (stateSubject st) p (lnode literal)] + s <- currentSubject + lang <- liftA2 (<|>) pLang currentLang + let t' = TL.toStrict t + let literal = maybe (plainL t') id $ (typedL t' <$> dt) <|> (plainLL t' <$> lang) + pure $ maybe mempty (\s' -> [triple s' p (lnode literal)]) s pParseTypeLiteralPropertyElt :: Node -> Parser Triples pParseTypeLiteralPropertyElt p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Literal") - guard False *> pure mempty -- [TODO] + pFail "TODO" -- [TODO] pParseTypeResourcePropertyElt :: Node -> Parser Triples pParseTypeResourcePropertyElt p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Resource") - -- pPropertyEltList - guard False *> pure mempty -- [TODO] + o <- newBNode + s <- currentSubject + let ts = maybe mempty (\s' -> [triple s' p o]) s + setSubject (Just o) + -- pPropertyEltList [TODO] + pure ts pParseTypeCollectionPropertyElt :: Node -> Parser Triples pParseTypeCollectionPropertyElt p = do @@ -210,38 +263,116 @@ pParseTypeCollectionPropertyElt p = do pt <- pAttr "rdf:parseType" guard (pt == "Collection") -- pNodeElementList - guard False *> pure mempty -- [TODO] + pFail "TODO" -- [TODO] pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") - guard False *> pure mempty -- [TODO] + pFail "TODO" -- [TODO] pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do -- [TODO] idAttr - m <- resourceAttr <|> nodeIdAttr <|> datatypeAttr - ps <- pPropertyAttr - guard False *> pure mempty -- [TODO] + s <- currentSubject + l <- currentLang + ts <- optional (pResourceAttr' s <|> pNodeIdAttr' s l <|> pDatatypeAttr' s l) + pure $ maybe mempty id ts + -- ps <- pPropertyAttr -- [TODO] where - resourceAttr = guard False -- [TODO] - nodeIdAttr = guard False -- [TODO] - datatypeAttr = guard False -- [TODO] - pPropertyAttr = guard False -- [TODO] + pResourceAttr' s = do + o <- unode <$> pResourceAttr + pure $ maybe mempty (\s' -> [triple s' p o]) s + pNodeIdAttr' s l = do + bn <- pNodeIdAttr + let o = BNode bn + pure $ maybe mempty (\s' -> [triple s' p o]) s + pDatatypeAttr' s l = do + pFail "TODO" -- [TODO] + +pIdAttr :: Parser Text +pIdAttr = pAttr "rdf:ID" -- [TODO] Check + +pNodeIdAttr :: Parser Text +pNodeIdAttr = pAttr "rdf:nodeID" -- [TODO] Check + +pAboutAttr :: Parser Text +pAboutAttr = pAttr "rdf:about" >>= checkIRI "rdf:about" + +pResourceAttr :: Parser Text +pResourceAttr = pAttr "rdf:resource" >>= checkIRI "rdf:resource" + +pDatatypeAttr :: Parser Text +pDatatypeAttr = pAttr "rdf:datatype" >>= checkIRI "rdf:datatype" + +-- [TODO] +pPropertyAttr :: Parser Triples +pPropertyAttr = do + attrs <- HM.filterWithKey (\iri _ -> iri /= "rdf:type") <$> pAttrs + s <- currentSubject + lang <- currentLang + let mkLiteral = lnode . maybe plainL (flip plainLL) lang + pure $ maybe + mempty + (\s' -> HM.elems $ HM.mapWithKey (mkTriple s' mkLiteral) attrs) + s + where + mkTriple s mkLiteral iri value = Triple s (unode iri) (mkLiteral value) -pPropertyElt :: Parser Triples -pPropertyElt = pAnyElement $ do - p <- unode <$> pName - -- [TODO] check URI - pResourcePropertyElt p - <|> pLiteralPropertyElt p - <|> pParseTypeLiteralPropertyElt p - <|> pParseTypeResourcePropertyElt p - <|> pParseTypeCollectionPropertyElt p - <|> pParseTypeOtherPropertyElt p - <|> pEmptyPropertyElt p +checkIRI :: String -> Text -> Parser Text +checkIRI msg = maybe (pFail ("Malformed IRI: " <> msg)) pure . uriValidate + +newBNode :: Parser Node +newBNode = do + st <- get + modify $ \st -> st { stateGenId = stateGenId st + 1 } + pure $ BNodeGen (stateGenId st) + +currentSubject :: Parser (Maybe Subject) +currentSubject = stateSubject <$> get + +setSubject :: (Maybe Subject) -> Parser () +setSubject s = modify (\st -> st { stateSubject = s }) + +currentLang :: Parser (Maybe Text) +currentLang = stateLang <$> get + +setLang :: (Maybe Text) -> Parser () +setLang lang = modify (\st -> st { stateLang = lang }) + +example11 :: Text +example11 = T.pack $ unlines + [ "" + , "" + , " " + , " " + , " " + , " " + , "" + , "" + , "" + ] + +example12 :: Text +example12 = T.pack $ unlines + [ "" + , "" + , " " + , " " + , " Dave Beckett" + , " " + , " " + , " " + , "" + ] xmlEg = T.pack $ unlines [ "" @@ -281,7 +412,7 @@ test1 = triplesOf got == expected -- some reason, capture it using 'pText' before using 'pElement''. -- -- Consumes the element from the parser state. -pElement' :: Parser a -> Parser (T.Text, a) +pElement' :: Parser a -> Parser (Text, a) pElement' = liftA2 (,) pName pText' :: TL.Text -> Parser TL.Text From bce4300cbd5696885cccefbd5561190ad2bdc552 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 25 May 2019 17:25:50 +0200 Subject: [PATCH 20/39] Improvements --- src/Text/RDF/RDF4H/XmlParser.hs | 192 ++++++++++++++++---------------- 1 file changed, 93 insertions(+), 99 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 5693531..012daed 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -3,6 +3,7 @@ {-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} -- |An parser for the RDF/XML format -- . @@ -15,29 +16,29 @@ module Text.RDF.RDF4H.XmlParser ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) -import Data.RDF.IRI +--import Data.RDF.IRI import Data.RDF.Types hiding (empty) -import Data.RDF.Graph.TList +--import Data.RDF.Graph.TList -import Debug.Trace +--import Debug.Trace import Control.Applicative import Control.Exception import Control.Monad import Control.Monad.State.Strict import Data.Semigroup ((<>)) import qualified Data.Map as Map -import Data.Maybe +--import Data.Maybe import Data.Bifunctor -import Data.Foldable +--import Data.Foldable import qualified Data.HashMap.Strict as HM import Data.Text (Text) -import Data.Text.Encoding +--import Data.Text.Encoding import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T import Xmlbf hiding (Node, Parser) -import qualified Xmlbf (Node) +--import qualified Xmlbf (Node) import qualified Xmlbf.Xeno as Xeno data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) @@ -74,7 +75,7 @@ type Parser = ParserT (State ParseState) -- |Local state for the parser (dependant on the parent xml elements) data ParseState = ParseState - { stateBaseUrl :: Maybe BaseUrl + { stateBaseUri :: Maybe BaseUrl , stateLang :: Maybe Text , stateSubject :: Maybe Subject , stateGenId :: Int @@ -84,8 +85,8 @@ data ParserException = ParserException String deriving (Show) instance Exception ParserException -testXeno :: Text -> Either String [Xmlbf.Node] -testXeno = Xeno.fromRawXml . T.encodeUtf8 +-- testXeno :: Text -> Either String [Xmlbf.Node] +-- testXeno = Xeno.fromRawXml . T.encodeUtf8 -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) @@ -102,25 +103,12 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml -- TODO: use bUrl and dUrl rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) -rdfParser bUrl dUrl = do +rdfParser _bUrl _dUrl = do rdf <- pRdf pWs pEndOfInput return rdf -{- -[ ("xmlns:si","https://www.w3schools.com/rdf/") -, ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") -] --} - -oneAttr :: Parser (Text,Text) -oneAttr = do - xs <- pAttrs - case length (HM.toList xs) of - 1 -> pure $ head (HM.toList xs) - _ -> pFail "not one attr" - {- NOTE: remember to use `showTree` in the fork of xmlbf when pEndOfInput needs debugging. @@ -128,6 +116,8 @@ oneAttr = do pRdf :: Rdf a => Parser (RDF a) pRdf = pElement "rdf:RDF" $ do + bUri <- optional pBaseUri + setBaseUri bUri pm <- pPrefixMappings -- [TODO] Ensure no attributes triples <- pNodeElementList @@ -139,6 +129,9 @@ pPrefixMappings = PrefixMappings <$> pm pm = Map.fromList . HM.foldlWithKey' getPrefixes mempty <$> pAttrs getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) +pBaseUri :: Parser BaseUrl +pBaseUri = BaseUrl <$> pAttr "xml:base" + pNodeElementList :: Parser Triples pNodeElementList = pWs *> (mconcat <$> many pNodeElement) @@ -159,41 +152,48 @@ pNodeElement = pAnyElement $ do pSubject :: Parser (Node, Triples) pSubject = do - n <- pUnodeId <|> pUnode <|> pBnode <|> pBnodeGen + s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen uri <- pName pLang >>= setLang - mtype <- optional (pType1 n uri) - ts <- handlePropertyAttr n uri - pure (n, (maybe ts (:ts) mtype)) + mtype <- optional (pType1 s uri) + ts <- pPropertyAttrs s + pure (s, (maybe ts (:ts) mtype)) where pUnodeId = do - _ <- pIdAttr - pFail "[TODO] rdf:ID" - pUnode = do - s <- unode <$> pAboutAttr - setSubject (Just s) - pure s + nid <- pIdAttr + -- [FIXME] undefined + mkUNodeID nid >>= maybe undefined pure pBnode = do bn <- pNodeIdAttr let s = BNode bn setSubject (Just s) pure s + -- Default subject: a new blank node + pUnode = do + s <- unode <$> pAboutAttr + setSubject (Just s) + pure s pBnodeGen = do s <- newBNode setSubject (Just s) pure s pType1 n uri = if uri /= "rdf:Description" - then pure $ triple n rdfTypeNode (unode uri) + then pure $ Triple n rdfTypeNode (unode uri) else empty - handlePropertyAttr n uri = do - attrs <- pAttrs - HM.elems <$> HM.traverseWithKey f attrs - where - -- [TODO] resolve IRIs - f attr value = pure $ if attr == "rdf:type" - then triple n rdfTypeNode (unode value) - else triple n (unode attr) (lnode (plainL value)) + +pPropertyAttrs :: Node -> Parser Triples +pPropertyAttrs s = do + attrs <- pAttrs + HM.elems <$> HM.traverseWithKey f attrs + where + -- [TODO] resolve IRIs + f attr value = if attr == "rdf:type" + then pure $ Triple s rdfTypeNode (unode value) + else do + lang <- currentLang + pure $ let mkLiteral = maybe plainL (flip plainLL) lang + in Triple s (unode attr) (lnode (mkLiteral value)) pLang :: Parser (Maybe Text) pLang = optional (pAttr "xml:lang") @@ -205,12 +205,14 @@ pPropertyElt :: Parser Triples pPropertyElt = pAnyElement $ do p <- unode <$> pName -- [TODO] check URI - pResourcePropertyElt p - <|> pLiteralPropertyElt p - <|> pParseTypeLiteralPropertyElt p + pParseTypeLiteralPropertyElt p + -- <|> pLiteralPropertyElt p + -- <|> pParseTypeLiteralPropertyElt p <|> pParseTypeResourcePropertyElt p <|> pParseTypeCollectionPropertyElt p <|> pParseTypeOtherPropertyElt p + <|> pResourcePropertyElt p + <|> pLiteralPropertyElt p <|> pEmptyPropertyElt p pResourcePropertyElt :: Node -> Parser Triples @@ -218,13 +220,11 @@ pResourcePropertyElt p = do -- [TODO] idAttr -- [TODO] rdf:ID pWs - uri <- pName - let p = unode uri s <- currentSubject ts <- pNodeElement o <- currentSubject pWs - let mt = flip triple p <$> s <*> o + let mt = flip Triple p <$> s <*> o pure $ maybe ts (:ts) mt pLiteralPropertyElt :: Node -> Parser Triples @@ -236,10 +236,10 @@ pLiteralPropertyElt p = do lang <- liftA2 (<|>) pLang currentLang let t' = TL.toStrict t let literal = maybe (plainL t') id $ (typedL t' <$> dt) <|> (plainLL t' <$> lang) - pure $ maybe mempty (\s' -> [triple s' p (lnode literal)]) s + pure $ maybe mempty (\s' -> [Triple s' p (lnode literal)]) s pParseTypeLiteralPropertyElt :: Node -> Parser Triples -pParseTypeLiteralPropertyElt p = do +pParseTypeLiteralPropertyElt _p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Literal") @@ -250,23 +250,24 @@ pParseTypeResourcePropertyElt p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Resource") - o <- newBNode s <- currentSubject - let ts = maybe mempty (\s' -> [triple s' p o]) s - setSubject (Just o) + o <- newBNode + let ts = maybe mempty (\s' -> [Triple s' p o]) s + -- setSubject (Just o) -- pPropertyEltList [TODO] pure ts pParseTypeCollectionPropertyElt :: Node -> Parser Triples -pParseTypeCollectionPropertyElt p = do +pParseTypeCollectionPropertyElt _p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Collection") -- pNodeElementList + -- pWs *> (mconcat <$> many pNodeElement) pFail "TODO" -- [TODO] pParseTypeOtherPropertyElt :: Node -> Parser Triples -pParseTypeOtherPropertyElt p = do +pParseTypeOtherPropertyElt _p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") @@ -274,22 +275,17 @@ pParseTypeOtherPropertyElt p = do pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do - -- [TODO] idAttr + -- [TODO] idAttr, rdf:ID s <- currentSubject - l <- currentLang - ts <- optional (pResourceAttr' s <|> pNodeIdAttr' s l <|> pDatatypeAttr' s l) - pure $ maybe mempty id ts - -- ps <- pPropertyAttr -- [TODO] + case s of + Nothing -> pure mempty + Just s' -> do + o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode + ts <- pPropertyAttrs o + pure (Triple s' p o : ts) where - pResourceAttr' s = do - o <- unode <$> pResourceAttr - pure $ maybe mempty (\s' -> [triple s' p o]) s - pNodeIdAttr' s l = do - bn <- pNodeIdAttr - let o = BNode bn - pure $ maybe mempty (\s' -> [triple s' p o]) s - pDatatypeAttr' s l = do - pFail "TODO" -- [TODO] + pResourceAttr' = unode <$> pResourceAttr + pNodeIdAttr' = BNode <$> pNodeIdAttr pIdAttr :: Parser Text pIdAttr = pAttr "rdf:ID" -- [TODO] Check @@ -325,10 +321,21 @@ checkIRI msg = maybe (pFail ("Malformed IRI: " <> msg)) pure . uriValidate newBNode :: Parser Node newBNode = do - st <- get modify $ \st -> st { stateGenId = stateGenId st + 1 } + st <- get pure $ BNodeGen (stateGenId st) +currentBaseUri :: Parser (Maybe BaseUrl) +currentBaseUri = stateBaseUri <$> get + +setBaseUri :: (Maybe BaseUrl) -> Parser () +setBaseUri u = modify (\st -> st { stateBaseUri = u }) + +mkUNodeID :: Text -> Parser (Maybe Node) +mkUNodeID t = currentBaseUri >>= \case + Nothing -> pure Nothing + Just (BaseUrl u) -> pure . Just . unode $ mconcat [u, "#", t] + currentSubject :: Parser (Maybe Subject) currentSubject = stateSubject <$> get @@ -374,6 +381,7 @@ example12 = T.pack $ unlines , "" ] +xmlEg :: Text xmlEg = T.pack $ unlines [ "" , "" ] -test1 :: Bool -test1 = triplesOf got == expected - where - Right (got::RDF TList) = parseXmlRDF Nothing Nothing xmlEg - expected = - [ Triple - (UNode "https://www.w3schools.com") - (UNode "si:title") - (LNode (PlainL "W3Schools")) - , Triple - (UNode "https://www.w3schools.com") - (UNode "si:author") - (LNode (PlainL "Jan Egil Refsnes")) - ] -- missing in Xmlbf @@ -412,24 +406,24 @@ test1 = triplesOf got == expected -- some reason, capture it using 'pText' before using 'pElement''. -- -- Consumes the element from the parser state. -pElement' :: Parser a -> Parser (Text, a) -pElement' = liftA2 (,) pName +-- pElement' :: Parser a -> Parser (Text, a) +-- pElement' = liftA2 (,) pName -pText' :: TL.Text -> Parser TL.Text -pText' t = do - let pTextFail = pFail ("Missing text node " <> show t) - do t' <- pText - if t == t' then pure t - else pTextFail - <|> pTextFail +-- pText' :: TL.Text -> Parser TL.Text +-- pText' t = do +-- let pTextFail = pFail ("Missing text node " <> show t) +-- do t' <- pText +-- if t == t' then pure t +-- else pTextFail +-- <|> pTextFail -- parser combinators missing in Xmlbf -between :: Parser a -> Parser b -> Parser c -> Parser c -between open close thing = open *> thing <* close - -manyTill :: Parser a -> Parser end -> Parser [a] -manyTill thing z = many thing <* z +-- between :: Parser a -> Parser b -> Parser c -> Parser c +-- between open close thing = open *> thing <* close +-- +-- manyTill :: Parser a -> Parser end -> Parser [a] +-- manyTill thing z = many thing <* z -- pElem :: Text -> Parser Text -- oneOf :: Parser [a] -> Parser a From d3f3c7378da198c7502c4d56d3ec322b6f2e75a0 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 25 May 2019 18:43:55 +0200 Subject: [PATCH 21/39] Collection --- src/Text/RDF/RDF4H/ParserUtils.hs | 5 +- src/Text/RDF/RDF4H/XmlParser.hs | 236 +++++++----------------------- 2 files changed, 55 insertions(+), 186 deletions(-) diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index 5390ec9..f4797d3 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -4,7 +4,7 @@ module Text.RDF.RDF4H.ParserUtils ( Parser(..) , parseFromURL - , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode + , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode, rdfListIndex , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri ) where @@ -49,6 +49,9 @@ rdfNilNode = UNode $ mkUri rdf "nil" rdfFirstNode = UNode $ mkUri rdf "first" rdfRestNode = UNode $ mkUri rdf "rest" +rdfListIndex :: Text +rdfListIndex = mkUri rdf "_" + xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text xsdIntUri = mkUri xsd "integer" xsdDoubleUri = mkUri xsd "double" diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 012daed..ab0e635 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -79,6 +79,7 @@ data ParseState = ParseState , stateLang :: Maybe Text , stateSubject :: Maybe Subject , stateGenId :: Int + , stateListIndex :: Int } deriving(Show) data ParserException = ParserException String @@ -99,7 +100,7 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' parseRdf' ns = evalState (runParserT (rdfParser bUrl dUrl) ns) initState - initState = ParseState bUrl empty empty 0 + initState = ParseState bUrl empty empty 0 0 -- TODO: use bUrl and dUrl rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) @@ -168,11 +169,11 @@ pSubject = do let s = BNode bn setSubject (Just s) pure s - -- Default subject: a new blank node pUnode = do s <- unode <$> pAboutAttr setSubject (Just s) pure s + -- Default subject: a new blank node pBnodeGen = do s <- newBNode setSubject (Just s) @@ -199,21 +200,24 @@ pLang :: Parser (Maybe Text) pLang = optional (pAttr "xml:lang") pPropertyEltList :: Parser Triples -pPropertyEltList = pWs *> fmap mconcat (many (pPropertyElt <* pWs)) +pPropertyEltList = pWs + *> resetListIndex + *> fmap mconcat (many (pPropertyElt <* pWs)) pPropertyElt :: Parser Triples pPropertyElt = pAnyElement $ do - p <- unode <$> pName - -- [TODO] check URI - pParseTypeLiteralPropertyElt p - -- <|> pLiteralPropertyElt p - -- <|> pParseTypeLiteralPropertyElt p - <|> pParseTypeResourcePropertyElt p - <|> pParseTypeCollectionPropertyElt p - <|> pParseTypeOtherPropertyElt p - <|> pResourcePropertyElt p - <|> pLiteralPropertyElt p - <|> pEmptyPropertyElt p + p <- unode <$> (pName >>= listExpansion) + -- [TODO] check URI + pParseTypeLiteralPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pEmptyPropertyElt p + where + listExpansion "rdf:li" = nextListIndex + listExpansion u = pure u pResourcePropertyElt :: Node -> Parser Triples pResourcePropertyElt p = do @@ -258,13 +262,34 @@ pParseTypeResourcePropertyElt p = do pure ts pParseTypeCollectionPropertyElt :: Node -> Parser Triples -pParseTypeCollectionPropertyElt _p = do +pParseTypeCollectionPropertyElt p = do -- [TODO] idAttr pt <- pAttr "rdf:parseType" guard (pt == "Collection") - -- pNodeElementList - -- pWs *> (mconcat <$> many pNodeElement) - pFail "TODO" -- [TODO] + s <- currentSubject + case s of + Nothing -> pure mempty + Just s' -> do + r <- optional pNodeElement + case r of + Nothing -> pure [Triple s' p rdfNilNode] + Just ts1 -> do + s'' <- currentSubject + n <- newBNode + let ts2 = maybe mempty (\s''' -> [Triple s' p n, Triple n rdfFirstNode s''']) s'' + ts3 <- go n + pure $ mconcat [ts1, ts2, ts3] + where + go s = do + r <- optional pNodeElement + case r of + Nothing -> pure $ [Triple s rdfRestNode rdfNilNode] + Just ts1 -> do + s' <- currentSubject + n <- newBNode + let ts2 = maybe mempty (\s'' -> [Triple s rdfRestNode n, Triple n rdfFirstNode s'']) s' + ts3 <- go n + pure $ mconcat [ts1, ts2, ts3] pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt _p = do @@ -325,6 +350,14 @@ newBNode = do st <- get pure $ BNodeGen (stateGenId st) +nextListIndex :: Parser Text +nextListIndex = do + modify $ \st -> st { stateListIndex = stateListIndex st + 1 } + (rdfListIndex <>) . T.pack . show . stateListIndex <$> get + +resetListIndex :: Parser () +resetListIndex = modify $ \st -> st { stateListIndex = 0 } + currentBaseUri :: Parser (Maybe BaseUrl) currentBaseUri = stateBaseUri <$> get @@ -428,170 +461,3 @@ xmlEg = T.pack $ unlines -- pElem :: Text -> Parser Text -- oneOf :: Parser [a] -> Parser a -- noneOf :: Parser [a] -> Parser a - - ---------------------------- --- Example trees from xeno - --- data/xml/example07.rdf -{- -[ Text "\n" -, Element "rdf:RDF" - [ ("xmlns:dc","http://purl.org/dc/elements/1.1/") - , ("xmlns:ex","http://example.org/stuff/1.0/") - , ("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n " - , Element "rdf:Description" - [ ("rdf:about","http://www.w3.org/TR/rdf-syntax-grammar") - , ("dc:title","RDF/XML Syntax Specification (Revised)") - ] - [ Text "\n " - , Element "ex:editor" - [] - [ Text "\n " - , Element "rdf:Description" - [ ("ex:fullName","Dave Beckett") - ] - [ Text "\n " - , Element "ex:homePage" - [ ("rdf:resource","http://purl.org/net/dajobe/") - ] - [] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] --} - - -{- rdf-tests/rdf-xml/amp-in-url/test001.rdf - -[ Text "\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#") - ] - [ Text "\n\n " - , Element "rdf:Description" - [("rdf:about","http://example/q?abc=1&def=2") - ] - [ Text "\n " - , Element "rdf:value" - [] - [Text "xxx"] - , Text "\n " - ] - , Text "\n\n" - ] -, Text "\n" -] --} - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:RDF" - [] - [] - , Text "\n" - ] -, Text "\n" -] - --} - - - -{- "rdf-tests/rdf-xml/rdfms-rdf-names-use/error-011.rdf" -Description is forbidden as a property element name. - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/node1")] - [ Text "\n " - , Element "rdf:Description" - [("rdf:resource","http://example.org/node2")] - [] - , Text "\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-literals/test001.rdf" - -[ Text "\n\n\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\n \n\n " - , Element "rdf:Description" - [("rdf:about","http://www.w3.org/TR/2002/WD-charmod-20020220")] - [ Text "\n\n \n " - , Element "eg:Creator" - [("eg:named","D\252rst")] - [] - , Text "\n \n\n " - ] - , Text "\n" - ] -, Text "\n" -] - --} - -{- rdf-tests/rdf-xml/rdf-charmod-uris/test001.rdf - -[ Text "\r\n\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n\r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr\233")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text "\r\n" -] - --} - -{- "rdf-tests/rdf-xml/rdf-charmod-uris/test002.rdf" - -[ Text "\r\n\r\n" -, Element "rdf:RDF" - [("xmlns:eg","http://example.org/#"),("xmlns:rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#")] - [ Text "\r\n \r\n \r\n\r\n " - , Element "rdf:Description" - [("rdf:about","http://example.org/#Andr%C3%A9")] - [ Text "\r\n " - , Element "eg:owes" - [] - [Text "2000"] - , Text "\r\n " - ] - , Text "\r\n" - ] -, Text " \r\n" -] - --} From a34801019787768e2171fb16c4d4dffcf633efb6 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 27 May 2019 18:51:54 +0200 Subject: [PATCH 22/39] Fix typo in XML --- testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index 9f569d3..ddf36d5 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -174,7 +174,7 @@ test_parseXmlRDF_vCardPersonal :: Assertion test_parseXmlRDF_vCardPersonal = testParse "\ - \\ + \\ \Corky Crystal\ \Corks\ \\ From aa23978492fc9228325470a2b88587e6a03b0360 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 27 May 2019 18:57:48 +0200 Subject: [PATCH 23/39] Improvements --- src/Data/RDF/IRI.hs | 2 +- src/Data/RDF/Types.hs | 4 +- src/Text/RDF/RDF4H/ParserUtils.hs | 18 +++-- src/Text/RDF/RDF4H/XmlParser.hs | 110 ++++++++++++++++++++--------- src/Text/RDF/RDF4H/XmlParserHXT.hs | 8 +-- 5 files changed, 97 insertions(+), 45 deletions(-) diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 6477f9e..fbd302b 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -127,7 +127,7 @@ validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt --- [FIXME] Currently, this is a correct but naive implemenation. +-- [FIXME] Currently, this is a correct but naive implementation. resolveIRI :: Text -> Text -> Either String Text resolveIRI baseIri iri = serializeIRI <$> resolvedIRI where diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 9159529..d48dff4 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -40,7 +40,7 @@ module Data.RDF.Types ( PrefixMappings(PrefixMappings),PrefixMapping(PrefixMapping), -- * Supporting types - BaseUrl(BaseUrl), NodeSelector, ParseFailure(ParseFailure) + BaseUrl(..), NodeSelector, ParseFailure(ParseFailure) ) where @@ -462,7 +462,7 @@ class RdfSerializer s where -- |The base URL of an RDF. -newtype BaseUrl = BaseUrl Text +newtype BaseUrl = BaseUrl { unBaseUrl :: Text } deriving (Eq, Ord, Show, NFData, Semigroup, Generic) instance Binary BaseUrl diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index f4797d3..f4b2d86 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -5,6 +5,7 @@ module Text.RDF.RDF4H.ParserUtils ( Parser(..) , parseFromURL , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode, rdfListIndex + , rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode, rdfXmlLiteral , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri ) where @@ -44,13 +45,20 @@ parseFromURL parseFunc url = do return (parseFunc s) rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode :: Node -rdfTypeNode = UNode $ mkUri rdf "type" -rdfNilNode = UNode $ mkUri rdf "nil" -rdfFirstNode = UNode $ mkUri rdf "first" -rdfRestNode = UNode $ mkUri rdf "rest" +rdfTypeNode = UNode $ mkUri rdf "type" +rdfNilNode = UNode $ mkUri rdf "nil" +rdfFirstNode = UNode $ mkUri rdf "first" +rdfRestNode = UNode $ mkUri rdf "rest" -rdfListIndex :: Text +rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode :: Node +rdfSubjectNode = UNode $ mkUri rdf "subject" +rdfPredicateNode = UNode $ mkUri rdf "predicate" +rdfObjectNode = UNode $ mkUri rdf "object" +rdfStatementNode = UNode $ mkUri rdf "Statement" + +rdfListIndex, rdfXmlLiteral :: Text rdfListIndex = mkUri rdf "_" +rdfXmlLiteral = mkUri rdf "XMLLiteral" xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text xsdIntUri = mkUri xsd "integer" diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index ab0e635..ef2e041 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,9 +1,10 @@ {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE DoAndIfThenElse #-} -{-# LANGUAGE RankNTypes #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DoAndIfThenElse #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TupleSections #-} -- |An parser for the RDF/XML format -- . @@ -16,7 +17,7 @@ module Text.RDF.RDF4H.XmlParser ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) ---import Data.RDF.IRI +import Data.RDF.IRI import Data.RDF.Types hiding (empty) --import Data.RDF.Graph.TList @@ -134,7 +135,7 @@ pBaseUri :: Parser BaseUrl pBaseUri = BaseUrl <$> pAttr "xml:base" pNodeElementList :: Parser Triples -pNodeElementList = pWs *> (mconcat <$> many pNodeElement) +pNodeElementList = pWs *> (mconcat <$> some pNodeElement) -- |White spaces parser pWs :: Parser () @@ -155,15 +156,14 @@ pSubject :: Parser (Node, Triples) pSubject = do s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen uri <- pName + when (not (checkNodeUri uri)) (pFail $ "URI not allowed: " <> T.unpack uri) pLang >>= setLang mtype <- optional (pType1 s uri) ts <- pPropertyAttrs s pure (s, (maybe ts (:ts) mtype)) where - pUnodeId = do - nid <- pIdAttr - -- [FIXME] undefined - mkUNodeID nid >>= maybe undefined pure + checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= "rdf:li" && isNotOldTerm uri + pUnodeId = pIdAttr >>= mkUNodeID pBnode = do bn <- pNodeIdAttr let s = BNode bn @@ -189,9 +189,15 @@ pPropertyAttrs s = do HM.elems <$> HM.traverseWithKey f attrs where -- [TODO] resolve IRIs - f attr value = if attr == "rdf:type" - then pure $ Triple s rdfTypeNode (unode value) - else do + -- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs + isPropertyAttrURI uri = isNotCoreSyntaxTerm uri + && uri /= "rdf:Description" + && uri /= "rdf:li" + && isNotOldTerm uri + f attr value + | not (isPropertyAttrURI attr) = pFail $ "URI not allowed for attribute: " <> T.unpack attr + | attr == "rdf:type" = pure $ Triple s rdfTypeNode (unode value) + | otherwise = do lang <- currentLang pure $ let mkLiteral = maybe plainL (flip plainLL) lang in Triple s (unode attr) (lnode (mkLiteral value)) @@ -221,33 +227,42 @@ pPropertyElt = pAnyElement $ do pResourcePropertyElt :: Node -> Parser Triples pResourcePropertyElt p = do - -- [TODO] idAttr - -- [TODO] rdf:ID pWs + mi <- optional pIdAttr s <- currentSubject - ts <- pNodeElement + ts1 <- pNodeElement o <- currentSubject + setSubject s pWs let mt = flip Triple p <$> s <*> o - pure $ maybe ts (:ts) mt + ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe (ts1 <> ts2) (:(ts1 <> ts2)) mt pLiteralPropertyElt :: Node -> Parser Triples pLiteralPropertyElt p = do - -- [TODO] idAttr + mi <- optional pIdAttr dt <- optional (pAttr "rdf:datatype") - t <- pText + l <- pText s <- currentSubject lang <- liftA2 (<|>) pLang currentLang - let t' = TL.toStrict t - let literal = maybe (plainL t') id $ (typedL t' <$> dt) <|> (plainLL t' <$> lang) - pure $ maybe mempty (\s' -> [Triple s' p (lnode literal)]) s + let l' = TL.toStrict l + o = lnode $ maybe (plainL l') id $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) + mt = (\s' -> Triple s' p o) <$> s + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt pParseTypeLiteralPropertyElt :: Node -> Parser Triples -pParseTypeLiteralPropertyElt _p = do - -- [TODO] idAttr +pParseTypeLiteralPropertyElt p = do + mi <- optional pIdAttr pt <- pAttr "rdf:parseType" guard (pt == "Literal") - pFail "TODO" -- [TODO] + l <- pText -- [FIXME] + s <- currentSubject + let l' = TL.toStrict l + o = lnode (typedL l' rdfXmlLiteral) + mt = (\s' -> Triple s' p o) <$> s + ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + pure $ maybe ts (:ts) mt pParseTypeResourcePropertyElt :: Node -> Parser Triples pParseTypeResourcePropertyElt p = do @@ -313,7 +328,7 @@ pEmptyPropertyElt p = do pNodeIdAttr' = BNode <$> pNodeIdAttr pIdAttr :: Parser Text -pIdAttr = pAttr "rdf:ID" -- [TODO] Check +pIdAttr = pAttr "rdf:ID" -- [TODO] Check ID pNodeIdAttr :: Parser Text pNodeIdAttr = pAttr "rdf:nodeID" -- [TODO] Check @@ -341,8 +356,38 @@ pPropertyAttr = do where mkTriple s mkLiteral iri value = Triple s (unode iri) (mkLiteral value) +pNoMoreChildren :: Parser () +pNoMoreChildren = pChildren >>= \case + [] -> pure () + ns -> pFail $ "Unexpected remaining children: " <> show ns + checkIRI :: String -> Text -> Parser Text -checkIRI msg = maybe (pFail ("Malformed IRI: " <> msg)) pure . uriValidate +checkIRI msg iri = do + bUri <- maybe mempty unBaseUrl <$> currentBaseUri + case uriValidate iri of + Nothing -> pFail ("Malformed IRI: " <> msg) + Just iri' -> either pFail pure (resolveIRI bUri iri') + +-- https://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms +isNotCoreSyntaxTerm :: Text -> Bool +isNotCoreSyntaxTerm uri + = uri /= "rdf:RDF" && uri /= "rdf:ID" && uri /= "rdf:about" + && uri /= "rdf:parseType" && uri /= "rdf:resource" + && uri /= "rdf:nodeID" && uri /= "rdf:datatype" + +-- https://www.w3.org/TR/rdf-syntax-grammar/#oldTerms +isNotOldTerm :: Text -> Bool +isNotOldTerm uri = uri /= "rdf:aboutEach" + && uri /= "rdf:aboutEachPrefix" + && uri /= "rdf:bagID" + +reifyTriple :: Text -> Triple -> Parser Triples +reifyTriple i (Triple s p' o) = do + n <- mkUNodeID i + pure [ Triple n rdfSubjectNode s + , Triple n rdfPredicateNode p' + , Triple n rdfObjectNode o + , Triple n rdfTypeNode rdfStatementNode ] newBNode :: Parser Node newBNode = do @@ -350,6 +395,7 @@ newBNode = do st <- get pure $ BNodeGen (stateGenId st) +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#section-List-Expand nextListIndex :: Parser Text nextListIndex = do modify $ \st -> st { stateListIndex = stateListIndex st + 1 } @@ -364,10 +410,10 @@ currentBaseUri = stateBaseUri <$> get setBaseUri :: (Maybe BaseUrl) -> Parser () setBaseUri u = modify (\st -> st { stateBaseUri = u }) -mkUNodeID :: Text -> Parser (Maybe Node) -mkUNodeID t = currentBaseUri >>= \case - Nothing -> pure Nothing - Just (BaseUrl u) -> pure . Just . unode $ mconcat [u, "#", t] +mkUNodeID :: Text -> Parser Node +mkUNodeID t = currentBaseUri >>= pure . unode . \case + Nothing -> t + Just (BaseUrl u) -> mconcat [u, "#", t] currentSubject :: Parser (Maybe Subject) currentSubject = stateSubject <$> get diff --git a/src/Text/RDF/RDF4H/XmlParserHXT.hs b/src/Text/RDF/RDF4H/XmlParserHXT.hs index 843ff78..8157bb0 100644 --- a/src/Text/RDF/RDF4H/XmlParserHXT.hs +++ b/src/Text/RDF/RDF4H/XmlParserHXT.hs @@ -220,7 +220,7 @@ isMetaAttr = isA (== "rdf:about") -- -- And that specifically: -- --- +-- -- foo -- -- @@ -278,7 +278,7 @@ parsePredicatesFromChildren = updateState , second hasPredicateAttr :-> (defaultA <+> (mkBlankNode &&& arr id >>> arr2A parsePredicateAttr)) , this :-> defaultA ] - + -- See: Issue http://www.w3.org/2000/03/rdf-tracking/#rdfms-rdf-names-use -- section: Illegal or unusual use of names from the RDF namespace -- @@ -317,7 +317,7 @@ validPropElementName = proc (state,predXml) -> do parseObjectsFromChildren :: forall a. (ArrowIf a, ArrowXml a, ArrowState GParseState a) => LParseState -> Predicate -> a XmlTree Triple parseObjectsFromChildren s p = - choiceA + choiceA [ isText :-> (neg( isWhiteSpace) >>> getText >>> arr (Triple (stateSubject s) p . mkLiteralNode s)) , isElem :-> (parseObjectDescription) ] @@ -405,10 +405,8 @@ validNodeElementName = neg (hasName "rdf:RDF") >>> neg (hasName "rdf:aboutEach") >>> neg (hasName "rdf:aboutEachPrefix") -rdfXmlLiteral :: Text rdfFirst,rdfRest,rdfNil,rdfType,rdfStatement,rdfSubject,rdfPredicate,rdfObject :: Node -rdfXmlLiteral = T.pack "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" rdfFirst = (unode . T.pack) "rdf:first" rdfRest = (unode . T.pack) "rdf:rest" rdfNil = (unode . T.pack) "rdf:nil" From 84af8a907ec7900c5fd46530cd7aa76e38502618 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 28 May 2019 00:45:37 +0200 Subject: [PATCH 24/39] Fix doc uri --- testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs index ddf36d5..530dfdc 100644 --- a/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs +++ b/testsuite/tests/Text/RDF/RDF4H/XmlParser_Test.hs @@ -70,7 +70,7 @@ loadExpectedGraph1 fname = do loadInputGraph1 :: String -> String -> IO (Either ParseFailure (RDF TList)) loadInputGraph1 dir fname = - (parseString (XmlParser Nothing (mkDocUrl1 testBaseUri fname)) <$> + (parseString (XmlParser Nothing (mkDocUrl1 testBaseUri dir fname)) <$> TIO.readFile (printf "%s/%s.rdf" dir fname :: String)) doGoodConformanceTest :: IO (Either ParseFailure (RDF TList)) -> @@ -431,5 +431,5 @@ normalizeN n = n testBaseUri :: String testBaseUri = "http://www.w3.org/2001/sw/DataAccess/df1/tests/" -mkDocUrl1 :: String -> String -> Maybe T.Text -mkDocUrl1 baseDocUrl fname = Just $ T.pack $ printf "%s%s.rdf" baseDocUrl fname +mkDocUrl1 :: String -> String -> String -> Maybe T.Text +mkDocUrl1 baseDocUrl dir fname = Just . T.pack $ printf "%s/%s/%s.rdf" baseDocUrl dir fname From eb2159fbd93244c581142eb0ad338f6346ec970f Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 28 May 2019 02:15:44 +0200 Subject: [PATCH 25/39] Improvements --- rdf4h.cabal | 1 + src/Text/RDF/RDF4H/XmlParser.hs | 83 ++++++++++++++++----------- src/Text/RDF/RDF4H/XmlParser/Utils.hs | 36 ++++++++++++ 3 files changed, 88 insertions(+), 32 deletions(-) create mode 100644 src/Text/RDF/RDF4H/XmlParser/Utils.hs diff --git a/rdf4h.cabal b/rdf4h.cabal index dbd036f..b7a00f0 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -39,6 +39,7 @@ library , Text.RDF.RDF4H.NTriplesSerializer , Text.RDF.RDF4H.XmlParser , Text.RDF.RDF4H.XmlParserHXT + , Text.RDF.RDF4H.XmlParser.Utils , Text.RDF.RDF4H.ParserUtils build-depends: attoparsec , base >= 4.8.0.0 diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index ef2e041..e0d4c1a 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -17,6 +17,7 @@ module Text.RDF.RDF4H.XmlParser ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) +import Text.RDF.RDF4H.XmlParser.Utils import Data.RDF.IRI import Data.RDF.Types hiding (empty) --import Data.RDF.Graph.TList @@ -100,26 +101,21 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml where parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' - parseRdf' ns = evalState (runParserT (rdfParser bUrl dUrl) ns) initState + parseRdf' ns = evalState (runParserT (rdfParser (BaseUrl <$> dUrl)) ns) initState initState = ParseState bUrl empty empty 0 0 --- TODO: use bUrl and dUrl -rdfParser :: Rdf a => Maybe BaseUrl -> Maybe Text -> Parser (RDF a) -rdfParser _bUrl _dUrl = do - rdf <- pRdf +rdfParser :: Rdf a => Maybe BaseUrl -> Parser (RDF a) +rdfParser dUrl = do + rdf <- pRdf dUrl pWs pEndOfInput return rdf -{- NOTE: - remember to use `showTree` in the fork of xmlbf when pEndOfInput needs - debugging. --} - -pRdf :: Rdf a => Parser (RDF a) -pRdf = pElement "rdf:RDF" $ do - bUri <- optional pBaseUri - setBaseUri bUri +pRdf :: Rdf a => Maybe BaseUrl -> Parser (RDF a) +pRdf dUrl = pElement "rdf:RDF" $ do + bUri <- currentBaseUri + bUri' <- optional pBaseUri + setBaseUri (bUri' <|> bUri <|> dUrl) pm <- pPrefixMappings -- [TODO] Ensure no attributes triples <- pNodeElementList @@ -266,19 +262,21 @@ pParseTypeLiteralPropertyElt p = do pParseTypeResourcePropertyElt :: Node -> Parser Triples pParseTypeResourcePropertyElt p = do - -- [TODO] idAttr + mi <- optional pIdAttr pt <- pAttr "rdf:parseType" guard (pt == "Resource") s <- currentSubject o <- newBNode - let ts = maybe mempty (\s' -> [Triple s' p o]) s - -- setSubject (Just o) - -- pPropertyEltList [TODO] - pure ts + let mt = (\s' -> Triple s' p o) <$> s + ts1 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) + setSubject (Just o) + ts2 <- keepListIndex pPropertyEltList + setSubject s + pure $ maybe (ts1 <> ts2) ((<> ts2) . (:ts1)) mt pParseTypeCollectionPropertyElt :: Node -> Parser Triples pParseTypeCollectionPropertyElt p = do - -- [TODO] idAttr + mi <- optional pIdAttr pt <- pAttr "rdf:parseType" guard (pt == "Collection") s <- currentSubject @@ -287,13 +285,17 @@ pParseTypeCollectionPropertyElt p = do Just s' -> do r <- optional pNodeElement case r of - Nothing -> pure [Triple s' p rdfNilNode] + Nothing -> + let t = Triple s' p rdfNilNode + in ([t] <>) <$> maybe (pure mempty) (`reifyTriple` t) mi Just ts1 -> do s'' <- currentSubject n <- newBNode - let ts2 = maybe mempty (\s''' -> [Triple s' p n, Triple n rdfFirstNode s''']) s'' + let t = Triple s' p n + ts2 = maybe mempty (\s''' -> [t, Triple n rdfFirstNode s''']) s'' ts3 <- go n - pure $ mconcat [ts1, ts2, ts3] + ts4 <- maybe (pure mempty) (`reifyTriple` t) mi + pure $ mconcat [ts1, ts2, ts3, ts4] where go s = do r <- optional pNodeElement @@ -308,30 +310,36 @@ pParseTypeCollectionPropertyElt p = do pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt _p = do - -- [TODO] idAttr + _mi <- optional pIdAttr pt <- pAttr "rdf:parseType" guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") pFail "TODO" -- [TODO] pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do - -- [TODO] idAttr, rdf:ID + mi <- optional pIdAttr s <- currentSubject case s of Nothing -> pure mempty Just s' -> do o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode - ts <- pPropertyAttrs o - pure (Triple s' p o : ts) + let t = Triple s' p o + ts1 <- maybe (pure mempty) (`reifyTriple` t) mi + ts2 <- pPropertyAttrs o + pure (t:ts1 <> ts2) where pResourceAttr' = unode <$> pResourceAttr pNodeIdAttr' = BNode <$> pNodeIdAttr pIdAttr :: Parser Text -pIdAttr = pAttr "rdf:ID" -- [TODO] Check ID +pIdAttr = do + i <- pAttr "rdf:ID" + either pFail pure (validateID i) pNodeIdAttr :: Parser Text -pNodeIdAttr = pAttr "rdf:nodeID" -- [TODO] Check +pNodeIdAttr = do + i <- pAttr "rdf:nodeID" + either pFail pure (validateID i) pAboutAttr :: Parser Text pAboutAttr = pAttr "rdf:about" >>= checkIRI "rdf:about" @@ -384,10 +392,10 @@ isNotOldTerm uri = uri /= "rdf:aboutEach" reifyTriple :: Text -> Triple -> Parser Triples reifyTriple i (Triple s p' o) = do n <- mkUNodeID i - pure [ Triple n rdfSubjectNode s + pure [ Triple n rdfTypeNode rdfStatementNode + , Triple n rdfSubjectNode s , Triple n rdfPredicateNode p' - , Triple n rdfObjectNode o - , Triple n rdfTypeNode rdfStatementNode ] + , Triple n rdfObjectNode o ] newBNode :: Parser Node newBNode = do @@ -395,6 +403,17 @@ newBNode = do st <- get pure $ BNodeGen (stateGenId st) +currentListIndex :: Parser Int +currentListIndex = stateListIndex <$> get + +setListIndex :: Int -> Parser () +setListIndex i = modify (\st -> st { stateListIndex = i }) + +keepListIndex :: Parser a -> Parser a +keepListIndex p = do + i <- currentListIndex + p <* setListIndex i + -- See: https://www.w3.org/TR/rdf-syntax-grammar/#section-List-Expand nextListIndex :: Parser Text nextListIndex = do diff --git a/src/Text/RDF/RDF4H/XmlParser/Utils.hs b/src/Text/RDF/RDF4H/XmlParser/Utils.hs new file mode 100644 index 0000000..32bd4fa --- /dev/null +++ b/src/Text/RDF/RDF4H/XmlParser/Utils.hs @@ -0,0 +1,36 @@ +module Text.RDF.RDF4H.XmlParser.Utils + ( validateID + ) where + + +import Data.Functor ((<$)) +import Control.Applicative (liftA2) +import Data.Text (Text) +import qualified Data.Text as T +import Data.Attoparsec.Text (Parser, ()) +import qualified Data.Attoparsec.Text as P + + +validateID :: Text -> Either String Text +validateID t = t <$ parseId t + +parseId :: Text -> Either String Text +parseId = P.parseOnly $ idParser <* (P.endOfInput "Unexpected characters at the end") + +-- http://www.w3.org/TR/REC-xml-names/#NT-NCName +idParser :: Parser Text +idParser = liftA2 T.cons pNameStartChar pNameRest + where + pNameStartChar = P.satisfy isValidFirstCharId + pNameRest = P.takeWhile isValidRestCharId + isValidFirstCharId c = + ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') + || ('\xC0' <= c && c <= '\xD6') || ('\xD8' <= c && c <= '\xF6') + || ('\xF8' <= c && c <= '\x2FF') || ('\x370' <= c && c <= '\x37D') + || ('\x37F' <= c && c <= '\x1FFF') || ('\x200C' <= c && c <= '\x200D') + || ('\x2070' <= c && c <= '\x218F') || ('\x2C00' <= c && c <= '\x2FEF') + || ('\x3001' <= c && c <= '\xD7FF') || ('\xF900' <= c && c <= '\xFDCF') + || ('\xFDF0' <= c && c <= '\xFFFD') || ('\x10000' <= c && c <= '\xEFFFF') + isValidRestCharId c = isValidFirstCharId c + || c == '-' || c == '.' || ('0' <= c && c <= '9') + || ('\x0300' <= c && c <= '\x036F') || ('\x203F' <= c && c <= '\x2040') From 7af4f69a804a996e8115921cf1ff2f84b386709a Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 28 May 2019 11:18:26 +0200 Subject: [PATCH 26/39] Add Semigroup and Monoid instances to PrefixMappings --- src/Data/RDF/Graph/AdjHashMap.hs | 3 +- src/Data/RDF/Graph/AlgebraicGraph.hs | 2 +- src/Data/RDF/Graph/HashMapSP.hs | 2 +- src/Data/RDF/Graph/TList.hs | 2 +- src/Data/RDF/Graph/TPatriciaTree.hs | 2 +- src/Data/RDF/Namespace.hs | 64 ++++++++++++-------------- src/Data/RDF/Types.hs | 4 +- src/Text/RDF/RDF4H/TurtleParser.hs | 2 +- src/Text/RDF/RDF4H/TurtleSerializer.hs | 2 +- 9 files changed, 38 insertions(+), 45 deletions(-) diff --git a/src/Data/RDF/Graph/AdjHashMap.hs b/src/Data/RDF/Graph/AdjHashMap.hs index 5d9d545..1148725 100644 --- a/src/Data/RDF/Graph/AdjHashMap.hs +++ b/src/Data/RDF/Graph/AdjHashMap.hs @@ -17,7 +17,6 @@ import Data.List import Data.Binary (Binary) import Data.RDF.Types import Data.RDF.Query -import Data.RDF.Namespace import Data.Hashable () import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HashMap @@ -132,7 +131,7 @@ prefixMappings' (AdjHashMap (_, _, pms)) = pms addPrefixMappings' :: RDF AdjHashMap -> PrefixMappings -> Bool -> RDF AdjHashMap addPrefixMappings' (AdjHashMap (ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in AdjHashMap (ts, baseURL, merge pms pms') empty' :: RDF AdjHashMap diff --git a/src/Data/RDF/Graph/AlgebraicGraph.hs b/src/Data/RDF/Graph/AlgebraicGraph.hs index d376f28..9d2c16f 100644 --- a/src/Data/RDF/Graph/AlgebraicGraph.hs +++ b/src/Data/RDF/Graph/AlgebraicGraph.hs @@ -64,7 +64,7 @@ showGraph' r = concatMap (\t -> show t ++ "\n") (expandTriples r) addPrefixMappings' :: RDF AlgebraicGraph -> PrefixMappings -> Bool -> RDF AlgebraicGraph addPrefixMappings' (AlgebraicGraph g baseURL pms) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in AlgebraicGraph g baseURL (merge pms pms') empty' :: RDF AlgebraicGraph diff --git a/src/Data/RDF/Graph/HashMapSP.hs b/src/Data/RDF/Graph/HashMapSP.hs index d21102d..2893fc7 100644 --- a/src/Data/RDF/Graph/HashMapSP.hs +++ b/src/Data/RDF/Graph/HashMapSP.hs @@ -67,7 +67,7 @@ prefixMappings' (HashSP (_, _, pms)) = pms addPrefixMappings' :: RDF HashSP -> PrefixMappings -> Bool -> RDF HashSP addPrefixMappings' (HashSP (tsMap, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in HashSP (tsMap, baseURL, merge pms pms') empty' :: RDF HashSP diff --git a/src/Data/RDF/Graph/TList.hs b/src/Data/RDF/Graph/TList.hs index 6578eb1..bffa82a 100644 --- a/src/Data/RDF/Graph/TList.hs +++ b/src/Data/RDF/Graph/TList.hs @@ -78,7 +78,7 @@ prefixMappings' (TListC(_, _, pms)) = pms addPrefixMappings' :: RDF TList -> PrefixMappings -> Bool -> RDF TList addPrefixMappings' (TListC(ts, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TListC(ts, baseURL, merge pms pms') baseUrl' :: RDF TList -> Maybe BaseUrl diff --git a/src/Data/RDF/Graph/TPatriciaTree.hs b/src/Data/RDF/Graph/TPatriciaTree.hs index 6ab0997..eae2b4a 100644 --- a/src/Data/RDF/Graph/TPatriciaTree.hs +++ b/src/Data/RDF/Graph/TPatriciaTree.hs @@ -49,7 +49,7 @@ prefixMappings' (TPatriciaTree (_,_,_,pms')) = pms' addPrefixMappings' :: RDF TPatriciaTree -> PrefixMappings -> Bool -> RDF TPatriciaTree addPrefixMappings' (TPatriciaTree (g, idxLookup, baseURL, pms)) pms' replace = - let merge = if replace then flip mergePrefixMappings else mergePrefixMappings + let merge = if replace then flip (<>) else (<>) in TPatriciaTree (g, idxLookup, baseURL, merge pms pms') baseUrl' :: RDF TPatriciaTree -> Maybe BaseUrl diff --git a/src/Data/RDF/Namespace.hs b/src/Data/RDF/Namespace.hs index e0d88ee..02e16df 100644 --- a/src/Data/RDF/Namespace.hs +++ b/src/Data/RDF/Namespace.hs @@ -6,7 +6,6 @@ module Data.RDF.Namespace( -- * Namespace types and functions Namespace(..), mkPlainNS, mkPrefixedNS, mkPrefixedNS', PrefixMapping(PrefixMapping), PrefixMappings(PrefixMappings), toPMList, - mergePrefixMappings, mkUri, prefixOf, uriOf, -- * Predefined namespace values @@ -17,12 +16,13 @@ module Data.RDF.Namespace( import qualified Data.Text as T import Data.RDF.Types import qualified Data.Map as Map +import Data.Semigroup ((<>)) standard_namespaces :: [Namespace] standard_namespaces = [rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2] -- |The set of common predefined namespaces as a 'PrefixMappings' value. -standard_ns_mappings :: PrefixMappings +standard_ns_mappings :: PrefixMappings standard_ns_mappings = ns_mappings standard_namespaces -- |Takes a list of 'Namespace's and returns 'PrefixMappings'. @@ -31,50 +31,44 @@ ns_mappings ns = PrefixMappings $ Map.fromList $ fmap (\(PrefixedNS pre uri) -> (pre, uri)) ns -- |The RDF namespace. -rdf :: Namespace -rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +rdf :: Namespace +rdf = mkPrefixedNS' "rdf" "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -- |The RDF Schema namespace. rdfs :: Namespace -rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" +rdfs = mkPrefixedNS' "rdfs" "http://www.w3.org/2000/01/rdf-schema#" -- |The Dublin Core namespace. -dc :: Namespace -dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" +dc :: Namespace +dc = mkPrefixedNS' "dc" "http://purl.org/dc/elements/1.1/" -- |The Dublin Core terms namespace. -dct :: Namespace -dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" +dct :: Namespace +dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" -- |The OWL namespace. -owl :: Namespace -owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" +owl :: Namespace +owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" -- |The XML Schema namespace. -xsd :: Namespace -xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" +xsd :: Namespace +xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" -- |The SKOS namespace. skos :: Namespace -skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" +skos = mkPrefixedNS' "skos" "http://www.w3.org/2004/02/skos/core#" -- |The friend of a friend namespace. foaf :: Namespace -foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" +foaf = mkPrefixedNS' "foaf" "http://xmlns.com/foaf/0.1/" -- |Example namespace #1. -ex :: Namespace -ex = mkPrefixedNS' "ex" "http://www.example.org/" +ex :: Namespace +ex = mkPrefixedNS' "ex" "http://www.example.org/" -- |Example namespace #2. -ex2 :: Namespace -ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" - - --- |Perform a left-biased merge of the two sets of prefix mappings. -mergePrefixMappings :: PrefixMappings -> PrefixMappings -> PrefixMappings -mergePrefixMappings (PrefixMappings p1s) (PrefixMappings p2s) = - PrefixMappings $ Map.union p1s p2s +ex2 :: Namespace +ex2 = mkPrefixedNS' "ex2" "http://www2.example.org/" -- |View the prefix mappings as a list of key-value pairs. The PM in -- in the name is to reduce name clashes if used without qualifying. @@ -87,13 +81,13 @@ mkUri ns local = uriOf ns `T.append` local -- |Make a namespace for the given URI reference. -mkPlainNS :: T.Text -> Namespace -mkPlainNS = PlainNS +mkPlainNS :: T.Text -> Namespace +mkPlainNS = PlainNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively. -mkPrefixedNS :: T.Text -> T.Text -> Namespace -mkPrefixedNS = PrefixedNS +mkPrefixedNS :: T.Text -> T.Text -> Namespace +mkPrefixedNS = PrefixedNS -- |Make a namespace having the given prefix for the given URI reference, -- respectively, using strings which will be converted to bytestrings @@ -102,11 +96,11 @@ mkPrefixedNS' :: String -> String -> Namespace mkPrefixedNS' s1 s2 = mkPrefixedNS (T.pack s1) (T.pack s2) -- |Determine the URI of the given namespace. -uriOf :: Namespace -> T.Text -uriOf (PlainNS uri) = uri -uriOf (PrefixedNS _ uri) = uri +uriOf :: Namespace -> T.Text +uriOf (PlainNS uri) = uri +uriOf (PrefixedNS _ uri) = uri -- |Determine the prefix of the given namespace, if it has one. -prefixOf :: Namespace -> Maybe T.Text -prefixOf (PlainNS _) = Nothing -prefixOf (PrefixedNS p _) = Just p +prefixOf :: Namespace -> Maybe T.Text +prefixOf (PlainNS _) = Nothing +prefixOf (PrefixedNS p _) = Just p diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index d48dff4..128ca85 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -569,8 +569,8 @@ instance Show Namespace where show (PrefixedNS prefix uri) = printf "(PrefixNS %s %s)" (T.unpack prefix) (T.unpack uri) -- |An alias for a map from prefix to namespace URI. -newtype PrefixMappings = PrefixMappings (Map Text Text) - deriving (Eq, Ord,NFData, Generic) +newtype PrefixMappings = PrefixMappings (Map Text Text) + deriving (Eq, Ord, NFData, Semigroup, Monoid, Generic) instance Binary PrefixMappings diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index b87c5ca..f5c8b0a 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -665,4 +665,4 @@ tryIriResolution mbUrl mdUrl iriFrag = tryIriResolution' mbUrl mdUrl tryIriResolution' (Just (BaseUrl bIri)) _ = either err pure (resolveIRI bIri iriFrag) tryIriResolution' _ (Just dIri) = either err pure (resolveIRI dIri iriFrag) tryIriResolution' _ _ = either err pure (resolveIRI mempty iriFrag) - err m = unexpected $ "Cannot resolve IRI: " <> m <> " " <> show (mbUrl, mdUrl, iriFrag) + err m = unexpected $ mconcat ["Cannot resolve IRI: ", m, " ", show (mbUrl, mdUrl, iriFrag)] diff --git a/src/Text/RDF/RDF4H/TurtleSerializer.hs b/src/Text/RDF/RDF4H/TurtleSerializer.hs index 81d183d..46ffcd7 100644 --- a/src/Text/RDF/RDF4H/TurtleSerializer.hs +++ b/src/Text/RDF/RDF4H/TurtleSerializer.hs @@ -23,7 +23,7 @@ data TurtleSerializer = TurtleSerializer (Maybe T.Text) PrefixMappings instance RdfSerializer TurtleSerializer where hWriteRdf (TurtleSerializer docUrl pms) h rdf = _writeRdf h docUrl (addPrefixMappings rdf pms False) writeRdf s = hWriteRdf s stdout - hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (mergePrefixMappings (prefixMappings rdf) pms) + hWriteH (TurtleSerializer _ pms) h rdf = writeHeader h (baseUrl rdf) (prefixMappings rdf <> pms) writeH s = hWriteRdf s stdout -- TODO: should use mdUrl to render <> where appropriate hWriteTs (TurtleSerializer docUrl pms) h = writeTriples h docUrl pms From 137d0fe89f59957733c34893364282fae8cf69ba Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 28 May 2019 12:32:11 +0200 Subject: [PATCH 27/39] Improvements --- src/Data/RDF/IRI.hs | 5 +- src/Data/RDF/Namespace.hs | 6 +- src/Data/RDF/Types.hs | 2 +- src/Text/RDF/RDF4H/ParserUtils.hs | 38 ++- src/Text/RDF/RDF4H/XmlParser.hs | 355 ++++++++++++++++++-------- src/Text/RDF/RDF4H/XmlParser/Utils.hs | 50 +++- src/Text/RDF/RDF4H/XmlParserHXT.hs | 2 +- testsuite/tests/Test.hs | 4 +- testsuite/tests/W3C/RdfXmlTest.hs | 34 ++- 9 files changed, 356 insertions(+), 140 deletions(-) diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index fbd302b..4535828 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -128,7 +128,10 @@ validateIRI t = t <$ parseIRI t -- | IRI parsing and resolution according to algorithm 5.2 from RFC3986 -- See: http://www.ietf.org/rfc/rfc3986.txt -- [FIXME] Currently, this is a correct but naive implementation. -resolveIRI :: Text -> Text -> Either String Text +resolveIRI + :: Text -- ^ Base URI + -> Text -- ^ URI to resolve + -> Either String Text resolveIRI baseIri iri = serializeIRI <$> resolvedIRI where resolvedIRI = either (const resolvedRelativeIRI) resolveAbsoluteIRI (parseIRI iri) diff --git a/src/Data/RDF/Namespace.hs b/src/Data/RDF/Namespace.hs index 02e16df..8c48674 100644 --- a/src/Data/RDF/Namespace.hs +++ b/src/Data/RDF/Namespace.hs @@ -9,7 +9,7 @@ module Data.RDF.Namespace( mkUri, prefixOf, uriOf, -- * Predefined namespace values - rdf, rdfs, dc, dct, owl, xsd, skos, foaf, ex, ex2, + rdf, rdfs, dc, dct, owl, xml, xsd, skos, foaf, ex, ex2, standard_ns_mappings, ns_mappings ) where @@ -50,6 +50,10 @@ dct = mkPrefixedNS' "dct" "http://purl.org/dc/terms/" owl :: Namespace owl = mkPrefixedNS' "owl" "http://www.w3.org/2002/07/owl#" +-- |The XML Schema namespace. +xml :: Namespace +xml = mkPrefixedNS' "xml" "http://www.w3.org/XML/1998/namespace" + -- |The XML Schema namespace. xsd :: Namespace xsd = mkPrefixedNS' "xsd" "http://www.w3.org/2001/XMLSchema#" diff --git a/src/Data/RDF/Types.hs b/src/Data/RDF/Types.hs index 128ca85..066ce15 100644 --- a/src/Data/RDF/Types.hs +++ b/src/Data/RDF/Types.hs @@ -592,7 +592,7 @@ instance Show PrefixMapping where -- | Resolve a prefix using the given prefix mappings. resolveQName :: Text -> PrefixMappings -> Maybe Text -resolveQName prefix (PrefixMappings pms') = Map.lookup prefix pms' +resolveQName prefix (PrefixMappings pms) = Map.lookup prefix pms {-# INLINE mkAbsoluteUrl #-} {-# DEPRECATED mkAbsoluteUrl "Use resolveIRI instead, because mkAbsoluteUrl is a partial function" #-} diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index f4b2d86..34ee00a 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -4,8 +4,16 @@ module Text.RDF.RDF4H.ParserUtils ( Parser(..) , parseFromURL - , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode, rdfListIndex - , rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode, rdfXmlLiteral + -- RDF + , rdfTypeNode, rdfNilNode, rdfFirstNode, rdfRestNode + , rdfSubjectNode, rdfPredicateNode, rdfObjectNode, rdfStatementNode + , rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype + , rdfType, rdfLi, rdfListIndex + , rdfDescription, rdfXmlLiteral + , rdfAboutEach, rdfAboutEachPrefix, rdfBagID + -- XML + , xmlLang + -- XSD , xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri ) where @@ -56,9 +64,33 @@ rdfPredicateNode = UNode $ mkUri rdf "predicate" rdfObjectNode = UNode $ mkUri rdf "object" rdfStatementNode = UNode $ mkUri rdf "Statement" -rdfListIndex, rdfXmlLiteral :: Text +-- Core terms +rdfTag, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype :: Text +rdfTag = mkUri rdf "RDF" +rdfID = mkUri rdf "ID" +rdfAbout = mkUri rdf "about" +rdfParseType = mkUri rdf "parseType" +rdfResource = mkUri rdf "resource" +rdfNodeID = mkUri rdf "nodeID" +rdfDatatype = mkUri rdf "datatype" + +rdfType, rdfLi, rdfListIndex :: Text +rdfType = mkUri rdf "type" +rdfLi = mkUri rdf "li" rdfListIndex = mkUri rdf "_" + +rdfXmlLiteral, rdfDescription :: Text rdfXmlLiteral = mkUri rdf "XMLLiteral" +rdfDescription = mkUri rdf "Description" + +-- Old terms +rdfAboutEach, rdfAboutEachPrefix, rdfBagID :: Text +rdfAboutEach = mkUri rdf "rdf:aboutEach" +rdfAboutEachPrefix = mkUri rdf "rdf:aboutEachPrefix" +rdfBagID = mkUri rdf "rdf:bagID" + +xmlLang :: Text +xmlLang = mkUri xml "lang" xsdIntUri, xsdDoubleUri, xsdDecimalUri, xsdBooleanUri :: Text xsdIntUri = mkUri xsd "integer" diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index e0d4c1a..9864c98 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -11,6 +11,7 @@ module Text.RDF.RDF4H.XmlParser ( XmlParser(..) + , parseDebug -- [FIXME] , xmlEg , example11 , example12 @@ -19,19 +20,25 @@ module Text.RDF.RDF4H.XmlParser import Text.RDF.RDF4H.ParserUtils hiding (Parser) import Text.RDF.RDF4H.XmlParser.Utils import Data.RDF.IRI -import Data.RDF.Types hiding (empty) ---import Data.RDF.Graph.TList +import Data.RDF.Types hiding (empty, resolveQName) +import qualified Data.RDF.Types as RDF +import Data.RDF.Graph.TList ---import Debug.Trace +import Debug.Trace import Control.Applicative import Control.Exception import Control.Monad import Control.Monad.State.Strict import Data.Semigroup ((<>)) +import Data.Set (Set) +import qualified Data.Set as S +--import Data.Map (Map) import qualified Data.Map as Map --import Data.Maybe +import Data.Either import Data.Bifunctor --import Data.Foldable +import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Data.Text (Text) --import Data.Text.Encoding @@ -39,8 +46,7 @@ import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T -import Xmlbf hiding (Node, Parser) ---import qualified Xmlbf (Node) +import Xmlbf hiding (Node, Parser, State) import qualified Xmlbf.Xeno as Xeno data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) @@ -50,16 +56,14 @@ instance RdfParser XmlParser where parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl -parseFile' :: - (Rdf a) +parseFile' :: (Rdf a) => Maybe BaseUrl -> Maybe Text -> String -> IO (Either ParseFailure (RDF a)) parseFile' bUrl dUrl fpath = parseXmlRDF bUrl dUrl <$> TIO.readFile fpath -parseURL' :: - (Rdf a) +parseURL' :: (Rdf a) => Maybe BaseUrl -- ^ The optional base URI of the document. -> Maybe Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. -> String -- ^ The location URI from which to retrieve the XML document. @@ -67,71 +71,113 @@ parseURL' :: -- corresponding to the XML document. parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) --- -- |Global state for the parser --- data GParseState = GParseState --- { stateGenId :: Int --- } deriving (Show) - - type Parser = ParserT (State ParseState) -- |Local state for the parser (dependant on the parent xml elements) data ParseState = ParseState { stateBaseUri :: Maybe BaseUrl + , stateIdSet :: Set Text -- ^ set of rdf:ID found in the scope of the current base URI. + , statePrefixMapping :: PrefixMappings , stateLang :: Maybe Text + , stateNodeAttrs :: HashMap Text Text -- ^ Current node RDF attributes , stateSubject :: Maybe Subject - , stateGenId :: Int , stateListIndex :: Int + , stateGenId :: Int } deriving(Show) data ParserException = ParserException String deriving (Show) instance Exception ParserException --- testXeno :: Text -> Either String [Xmlbf.Node] --- testXeno = Xeno.fromRawXml . T.encodeUtf8 - -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) - => Maybe BaseUrl -- ^ The base URL for the RDF if required - -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available - -> Text -- ^ The contents to parse - -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure + => Maybe BaseUrl -- ^ The base URL for the RDF if required + -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available + -> Text -- ^ The contents to parse + -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure parseXmlRDF bUrl dUrl = parseRdf . parseXml where + bUrl' = BaseUrl <$> dUrl <|> bUrl parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' - parseRdf' ns = evalState (runParserT (rdfParser (BaseUrl <$> dUrl)) ns) initState - initState = ParseState bUrl empty empty 0 0 + parseRdf' ns = evalState (runParserT rdfParser ns) initState + initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 -rdfParser :: Rdf a => Maybe BaseUrl -> Parser (RDF a) -rdfParser dUrl = do - rdf <- pRdf dUrl - pWs - pEndOfInput - return rdf +parseDebug :: String -> IO (RDF TList) +parseDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://plouf/") (Just "plouf")) f -pRdf :: Rdf a => Maybe BaseUrl -> Parser (RDF a) -pRdf dUrl = pElement "rdf:RDF" $ do +rdfParser :: Rdf a => Parser (RDF a) +rdfParser = do + bUri <- currentBaseUri + triples <- (pRdf <* pWs) <||> pNodeElementList + pEndOfInput + pm <- currentPrefixMappings + pure $ mkRdf triples bUri pm + +pRdf :: Parser Triples +pRdf = pAnyElement $ do + attrs <- pRDFAttrs + uri <- pName >>= pQName + guard (uri == rdfTag) + when (not $ HM.null attrs) $ pFail "rdf:RDF: The set of attributes should be empty." + pNodeElementList + +pQName :: Text -> Parser Text +pQName qn = do + pm <- currentPrefixMappings + let qn' = resolveQName pm qn >>= validateIRI + either pFail pure qn' + +-- |Process the attributes of a node +pRDFAttrs :: Parser (HashMap Text Text) +pRDFAttrs = do + -- Language (xml:lang) + liftA2 (<|>) pLang currentLang >>= setLang + -- Base URI + -- [TODO] resolve base uri in context + liftA2 (<|>) pBase currentBaseUri >>= setBaseUri bUri <- currentBaseUri - bUri' <- optional pBaseUri - setBaseUri (bUri' <|> bUri <|> dUrl) - pm <- pPrefixMappings - -- [TODO] Ensure no attributes - triples <- pNodeElementList - pure $ mkRdf triples Nothing pm - -pPrefixMappings :: Parser PrefixMappings -pPrefixMappings = PrefixMappings <$> pm + -- Process the rest of the attributes + attrs <- pAttrs + -- Get the namespace definitions (xmlns:) + pm <- updatePrefixMappings (PrefixMappings $ HM.foldlWithKey' mkNameSpaces mempty attrs) + -- Filter and resolve RDF attributes + let as = HM.foldlWithKey' (mkRdfAttribute pm bUri) mempty attrs + setNodeAttrs as + pure as where - pm = Map.fromList . HM.foldlWithKey' getPrefixes mempty <$> pAttrs - getPrefixes ps k v = maybe ps (\k' -> (k', v):ps) (T.stripPrefix "xmlns:" k) - -pBaseUri :: Parser BaseUrl -pBaseUri = BaseUrl <$> pAttr "xml:base" + mkNameSpaces ns qn iri = + -- [TODO] resolve IRI + -- [TODO] check malformed identifiers & IRI + let qn' = parseQName qn + ns' = f <$> qn' <*> validateIRI iri + f (Nothing , "xmlns") iri' = Map.insert mempty iri' ns + f (Just "xmlns", prefix ) iri' = Map.insert prefix iri' ns + f _ _ = ns + in either (const ns) id ns' + mkRdfAttribute pm bUri as qn v = + let as' = parseQName qn >>= f + f (Nothing, "xmlns") = Right as + f (Just "xmlns", _) = Right as + f qn'@(Just _, _) = (\a -> HM.insert a v as) <$> resolveQName' pm qn' + f (Nothing, uri) = case bUri of + Nothing -> Right as -- [FIXME] manage missing base URI + Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' uri + in either (const as) id as' + +pRDFAttr :: Text -> Parser Text +pRDFAttr a = do + as <- currentNodeAttrs + maybe + (pFail $ mconcat ["Attribute \"", T.unpack a, "\" not found."]) + pure + (HM.lookup a as) + +pMatchAndRemoveAttr :: Text -> Parser Text +pMatchAndRemoveAttr a = pRDFAttr a <* removeNodeAttr a pNodeElementList :: Parser Triples -pNodeElementList = pWs *> (mconcat <$> some pNodeElement) +pNodeElementList = pWs *> (mconcat <$> some (keepState pNodeElement <* pWs)) -- |White spaces parser pWs :: Parser () @@ -140,33 +186,46 @@ pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard -- See: https://www.w3.org/TR/2000/REC-xml-20001006#NT-S ws c = c == '\x20' || c == '\x09' || c == '\x0d' || c == '\x0a' +-- https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement pNodeElement :: Parser Triples pNodeElement = pAnyElement $ do - (s, ts1) <- pSubject - ts2 <- pPropertyAttr - ts3 <- pPropertyEltList + -- Process attributes + void pRDFAttrs + -- Process subject + (s, mt) <- pSubject + ts1 <- pPropertyAttrs s + -- Process propertyEltList + ts2 <- keepState pPropertyEltList setSubject (Just s) - pure $ mconcat [ts1, ts2, ts3] + let ts = ts1 <> ts2 + pure $ maybe ts (:ts) mt -pSubject :: Parser (Node, Triples) +--pSubject :: Parser (Node, Triples) +pSubject :: Parser (Node, Maybe Triple) pSubject = do + mi <- optional pIdAttr + traverse checkIdIsUnique mi + -- Create the subject s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen - uri <- pName + -- traceM (show s) + -- Resolve URI + uri <- pName >>= pQName + --currentBaseUri >>= traceM . show + -- Check that the URI is allowed when (not (checkNodeUri uri)) (pFail $ "URI not allowed: " <> T.unpack uri) - pLang >>= setLang + -- Optional rdf:type triple mtype <- optional (pType1 s uri) - ts <- pPropertyAttrs s - pure (s, (maybe ts (:ts) mtype)) + pure (s, mtype) where - checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= "rdf:li" && isNotOldTerm uri - pUnodeId = pIdAttr >>= mkUNodeID + checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= rdfLi && isNotOldTerm uri + pUnodeId = (pIdAttr >>= mkUNodeID) <* removeNodeAttr rdfID pBnode = do - bn <- pNodeIdAttr + bn <- pNodeIdAttr <* removeNodeAttr rdfNodeID let s = BNode bn setSubject (Just s) pure s pUnode = do - s <- unode <$> pAboutAttr + s <- unode <$> pAboutAttr <* removeNodeAttr rdfAbout setSubject (Just s) pure s -- Default subject: a new blank node @@ -175,32 +234,34 @@ pSubject = do setSubject (Just s) pure s pType1 n uri = - if uri /= "rdf:Description" + if uri /= rdfDescription then pure $ Triple n rdfTypeNode (unode uri) else empty pPropertyAttrs :: Node -> Parser Triples pPropertyAttrs s = do - attrs <- pAttrs + attrs <- currentNodeAttrs HM.elems <$> HM.traverseWithKey f attrs where - -- [TODO] resolve IRIs -- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs isPropertyAttrURI uri = isNotCoreSyntaxTerm uri - && uri /= "rdf:Description" - && uri /= "rdf:li" + && uri /= rdfDescription + && uri /= rdfLi && isNotOldTerm uri f attr value | not (isPropertyAttrURI attr) = pFail $ "URI not allowed for attribute: " <> T.unpack attr - | attr == "rdf:type" = pure $ Triple s rdfTypeNode (unode value) + | attr == rdfType = pure $ Triple s rdfTypeNode (unode value) | otherwise = do - lang <- currentLang - pure $ let mkLiteral = maybe plainL (flip plainLL) lang - in Triple s (unode attr) (lnode (mkLiteral value)) + lang <- currentLang + pure $ let mkLiteral = maybe plainL (flip plainLL) lang + in Triple s (unode attr) (lnode (mkLiteral value)) pLang :: Parser (Maybe Text) pLang = optional (pAttr "xml:lang") +pBase :: Parser (Maybe BaseUrl) +pBase = optional (BaseUrl <$> pAttr "xml:base") + pPropertyEltList :: Parser Triples pPropertyEltList = pWs *> resetListIndex @@ -208,39 +269,44 @@ pPropertyEltList = pWs pPropertyElt :: Parser Triples pPropertyElt = pAnyElement $ do - p <- unode <$> (pName >>= listExpansion) + -- Process attributes + void pRDFAttrs + --attrs1 <- currentNodeAttrs + --traceM ("pPropertyElt1 " <> show attrs1) + p <- unode <$> (pName >>= pQName >>= listExpansion) -- [TODO] check URI pParseTypeLiteralPropertyElt p - <|> pParseTypeResourcePropertyElt p - <|> pParseTypeCollectionPropertyElt p - <|> pParseTypeOtherPropertyElt p - <|> pResourcePropertyElt p - <|> pLiteralPropertyElt p - <|> pEmptyPropertyElt p + <||> pParseTypeResourcePropertyElt p + <||> pParseTypeCollectionPropertyElt p + <||> pParseTypeOtherPropertyElt p + <||> pResourcePropertyElt p + <||> pLiteralPropertyElt p + <||> pEmptyPropertyElt p where - listExpansion "rdf:li" = nextListIndex - listExpansion u = pure u + listExpansion u + | u == rdfLi = nextListIndex + | otherwise = pure u pResourcePropertyElt :: Node -> Parser Triples pResourcePropertyElt p = do pWs - mi <- optional pIdAttr - s <- currentSubject - ts1 <- pNodeElement - o <- currentSubject - setSubject s + (ts1, o) <- keepState $ liftA2 (,) pNodeElement currentSubject pWs + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi + s <- currentSubject let mt = flip Triple p <$> s <*> o ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe (ts1 <> ts2) (:(ts1 <> ts2)) mt pLiteralPropertyElt :: Node -> Parser Triples pLiteralPropertyElt p = do - mi <- optional pIdAttr - dt <- optional (pAttr "rdf:datatype") + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi + dt <- optional pDatatypeAttr l <- pText s <- currentSubject - lang <- liftA2 (<|>) pLang currentLang + lang <- currentLang let l' = TL.toStrict l o = lnode $ maybe (plainL l') id $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) mt = (\s' -> Triple s' p o) <$> s @@ -249,10 +315,11 @@ pLiteralPropertyElt p = do pParseTypeLiteralPropertyElt :: Node -> Parser Triples pParseTypeLiteralPropertyElt p = do - mi <- optional pIdAttr - pt <- pAttr "rdf:parseType" + pt <- pRDFAttr rdfParseType guard (pt == "Literal") - l <- pText -- [FIXME] + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi + l <- pText -- [FIXME] XML literal s <- currentSubject let l' = TL.toStrict l o = lnode (typedL l' rdfXmlLiteral) @@ -262,9 +329,10 @@ pParseTypeLiteralPropertyElt p = do pParseTypeResourcePropertyElt :: Node -> Parser Triples pParseTypeResourcePropertyElt p = do - mi <- optional pIdAttr - pt <- pAttr "rdf:parseType" + pt <- pRDFAttr rdfParseType guard (pt == "Resource") + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi s <- currentSubject o <- newBNode let mt = (\s' -> Triple s' p o) <$> s @@ -276,9 +344,10 @@ pParseTypeResourcePropertyElt p = do pParseTypeCollectionPropertyElt :: Node -> Parser Triples pParseTypeCollectionPropertyElt p = do - mi <- optional pIdAttr - pt <- pAttr "rdf:parseType" + pt <- pRDFAttr rdfParseType guard (pt == "Collection") + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi s <- currentSubject case s of Nothing -> pure mempty @@ -310,50 +379,60 @@ pParseTypeCollectionPropertyElt p = do pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt _p = do - _mi <- optional pIdAttr - pt <- pAttr "rdf:parseType" + pt <- pRDFAttr rdfParseType guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi pFail "TODO" -- [TODO] pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do - mi <- optional pIdAttr s <- currentSubject case s of Nothing -> pure mempty Just s' -> do + mi <- optional pIdAttr <* removeNodeAttr rdfID + traverse checkIdIsUnique mi o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode let t = Triple s' p o ts1 <- maybe (pure mempty) (`reifyTriple` t) mi ts2 <- pPropertyAttrs o pure (t:ts1 <> ts2) where - pResourceAttr' = unode <$> pResourceAttr - pNodeIdAttr' = BNode <$> pNodeIdAttr + pResourceAttr' = unode <$> pResourceAttr <* removeNodeAttr rdfResource + pNodeIdAttr' = BNode <$> pNodeIdAttr <* removeNodeAttr rdfNodeID pIdAttr :: Parser Text pIdAttr = do - i <- pAttr "rdf:ID" + i <- pRDFAttr rdfID either pFail pure (validateID i) +checkIdIsUnique :: Text -> Parser () +checkIdIsUnique i = do + notUnique <- S.member i <$> currentIdSet + when notUnique (pFail $ "rdf:ID already used in this context: " <> T.unpack i) + updateIdSet i + pNodeIdAttr :: Parser Text pNodeIdAttr = do - i <- pAttr "rdf:nodeID" + i <- pRDFAttr rdfNodeID either pFail pure (validateID i) pAboutAttr :: Parser Text -pAboutAttr = pAttr "rdf:about" >>= checkIRI "rdf:about" +pAboutAttr = pRDFAttr rdfAbout >>= checkIRI "rdf:about" pResourceAttr :: Parser Text -pResourceAttr = pAttr "rdf:resource" >>= checkIRI "rdf:resource" +pResourceAttr = pRDFAttr rdfResource >>= checkIRI "rdf:resource" pDatatypeAttr :: Parser Text -pDatatypeAttr = pAttr "rdf:datatype" >>= checkIRI "rdf:datatype" +pDatatypeAttr = pRDFAttr rdfDatatype >>= checkIRI "rdf:datatype" -- [TODO] pPropertyAttr :: Parser Triples pPropertyAttr = do - attrs <- HM.filterWithKey (\iri _ -> iri /= "rdf:type") <$> pAttrs + -- [FIXME] filter + -- attrs <- HM.filterWithKey (\iri _ -> iri /= "rdf:type") <$> pAttrs + attrs <- currentNodeAttrs s <- currentSubject lang <- currentLang let mkLiteral = lnode . maybe plainL (flip plainLL) lang @@ -369,25 +448,31 @@ pNoMoreChildren = pChildren >>= \case [] -> pure () ns -> pFail $ "Unexpected remaining children: " <> show ns +-- | Try the first parser, if it fails restore the state and try the second parser. +(<||>) :: Parser a -> Parser a -> Parser a +(<||>) p1 p2 = do + st <- get + p1 <|> (put st *> p2) + checkIRI :: String -> Text -> Parser Text checkIRI msg iri = do bUri <- maybe mempty unBaseUrl <$> currentBaseUri case uriValidate iri of - Nothing -> pFail ("Malformed IRI: " <> msg) + Nothing -> pFail $ mconcat ["Malformed IRI for \"", msg, "\": ", T.unpack iri] Just iri' -> either pFail pure (resolveIRI bUri iri') -- https://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms isNotCoreSyntaxTerm :: Text -> Bool isNotCoreSyntaxTerm uri - = uri /= "rdf:RDF" && uri /= "rdf:ID" && uri /= "rdf:about" - && uri /= "rdf:parseType" && uri /= "rdf:resource" - && uri /= "rdf:nodeID" && uri /= "rdf:datatype" + = uri /= rdfTag && uri /= rdfID && uri /= rdfAbout + && uri /= rdfParseType && uri /= rdfResource + && uri /= rdfNodeID && uri /= rdfDatatype -- https://www.w3.org/TR/rdf-syntax-grammar/#oldTerms isNotOldTerm :: Text -> Bool -isNotOldTerm uri = uri /= "rdf:aboutEach" - && uri /= "rdf:aboutEachPrefix" - && uri /= "rdf:bagID" +isNotOldTerm uri = uri /= rdfAboutEach + && uri /= rdfAboutEachPrefix + && uri /= rdfBagID reifyTriple :: Text -> Triple -> Parser Triples reifyTriple i (Triple s p' o) = do @@ -403,6 +488,52 @@ newBNode = do st <- get pure $ BNodeGen (stateGenId st) +-- Parser's state utils +currentGenID :: Parser Int +currentGenID = stateGenId <$> get + +-- |Process a parser, restoring the state except for stateGenId and stateIdSet +keepState :: Parser a -> Parser a +keepState p = do + st <- get + let bUri = stateBaseUri st + is = stateIdSet st + p <* do + st' <- get + let i = stateGenId st' + bUri' = stateBaseUri st' + is' = stateIdSet st' + -- Update the set of ID if necessary + if bUri /= bUri' + then put (st { stateGenId = i }) + else put (st { stateGenId = i, stateIdSet = is <> is' }) + +currentIdSet :: Parser (Set Text) +currentIdSet = stateIdSet <$> get + +updateIdSet :: Text -> Parser () +updateIdSet i = do + is <- currentIdSet + modify (\st -> st { stateIdSet = S.insert i is }) + +currentNodeAttrs :: Parser (HashMap Text Text) +currentNodeAttrs = stateNodeAttrs <$> get + +setNodeAttrs :: HashMap Text Text -> Parser () +setNodeAttrs as = modify (\st -> st { stateNodeAttrs = as }) + +removeNodeAttr :: Text -> Parser () +removeNodeAttr a = HM.delete a <$> currentNodeAttrs >>= setNodeAttrs + +currentPrefixMappings :: Parser PrefixMappings +currentPrefixMappings = statePrefixMapping <$> get + +updatePrefixMappings :: PrefixMappings -> Parser PrefixMappings +updatePrefixMappings pm = do + pm' <- (<> pm) <$> currentPrefixMappings + modify (\st -> st { statePrefixMapping = pm' }) + pure pm' + currentListIndex :: Parser Int currentListIndex = stateListIndex <$> get diff --git a/src/Text/RDF/RDF4H/XmlParser/Utils.hs b/src/Text/RDF/RDF4H/XmlParser/Utils.hs index 32bd4fa..1ed22c8 100644 --- a/src/Text/RDF/RDF4H/XmlParser/Utils.hs +++ b/src/Text/RDF/RDF4H/XmlParser/Utils.hs @@ -1,25 +1,65 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + module Text.RDF.RDF4H.XmlParser.Utils - ( validateID + ( + -- Validation + validateID + , resolveQName, resolveQName' + , parseQName ) where import Data.Functor ((<$)) -import Control.Applicative (liftA2) +import Control.Applicative (liftA2, Alternative(..)) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Map as Map import Data.Attoparsec.Text (Parser, ()) import qualified Data.Attoparsec.Text as P +import Data.RDF.Namespace +-- IRI processing validateID :: Text -> Either String Text validateID t = t <$ parseId t parseId :: Text -> Either String Text -parseId = P.parseOnly $ idParser <* (P.endOfInput "Unexpected characters at the end") +parseId = P.parseOnly $ pNCName <* (P.endOfInput "Unexpected characters at the end") + +resolveQName :: PrefixMappings -> Text -> Either String Text +resolveQName pm qn = parseQName qn >>= resolveQName' pm + +resolveQName' :: PrefixMappings -> (Maybe Text, Text) -> Either String Text +resolveQName' (PrefixMappings pm) (Nothing, name) = + case Map.lookup mempty pm of + Nothing -> Left "Cannot resolve QName: no default namespace defined." + Just iri -> Right $ iri <> name +resolveQName' (PrefixMappings pm) (Just prefix, name) = + case Map.lookup prefix pm of + Nothing -> Left $ mconcat ["Cannot resolve QName: prefix \"", T.unpack prefix, "\" not defined"] + Just iri -> Right $ iri <> name + +parseQName :: Text -> Either String (Maybe Text, Text) +parseQName = P.parseOnly $ pQName <* (P.endOfInput "Unexpected characters at the end of a QName") + +-- https://www.w3.org/TR/xml-names/#ns-qualnames +pQName :: Parser (Maybe Text, Text) +pQName = pPrefixedName <|> pUnprefixedNamed + where pUnprefixedNamed = (empty,) <$> pLocalPart + +pPrefixedName :: Parser (Maybe Text, Text) +pPrefixedName = do + prefix <- pLocalPart <* P.char ':' + localPart <- pLocalPart + pure (Just prefix, localPart) + +pLocalPart :: Parser Text +pLocalPart = pNCName -- http://www.w3.org/TR/REC-xml-names/#NT-NCName -idParser :: Parser Text -idParser = liftA2 T.cons pNameStartChar pNameRest +pNCName :: Parser Text +pNCName = liftA2 T.cons pNameStartChar pNameRest where pNameStartChar = P.satisfy isValidFirstCharId pNameRest = P.takeWhile isValidRestCharId diff --git a/src/Text/RDF/RDF4H/XmlParserHXT.hs b/src/Text/RDF/RDF4H/XmlParserHXT.hs index 8157bb0..6c6dcab 100644 --- a/src/Text/RDF/RDF4H/XmlParserHXT.hs +++ b/src/Text/RDF/RDF4H/XmlParserHXT.hs @@ -21,7 +21,7 @@ import Data.List (isPrefixOf) import qualified Data.Map as Map (fromList) import Data.Maybe import Data.Typeable -import Text.RDF.RDF4H.ParserUtils +import Text.RDF.RDF4H.ParserUtils hiding (rdfType) import Data.RDF.IRI import Data.RDF.Types (Rdf,RDF,RdfParser(..),Node(BNodeGen),BaseUrl(..),Triple(..),Triples,Subject,Predicate,Object,PrefixMappings(..),ParseFailure(ParseFailure),mkRdf,lnode,plainL,plainLL,typedL,unode,bnode,unodeValidate) import Data.Text (Text) diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 62dd0af..91ecebc 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -43,7 +43,7 @@ main = do fromJust . filePathToUri $ (dir T.unpack suitesDir) turtleManifest <- loadManifest mfPathTurtle (fileSchemeUri suiteFilesDirTurtle) - xmlManifest <- loadManifest mfPathXml (fileSchemeUri suiteFilesDirXml) + xmlManifest <- loadManifest mfPathXml (unBaseUrl mfBaseURIXml) nTriplesManifest <- loadManifest mfPathNTriples (fileSchemeUri suiteFilesDirNTriples) -- run tests @@ -108,7 +108,7 @@ main = do , testGroup "parser-w3c-tests-xml" - [ W3CRdfXmlTest.tests xmlManifest + [ W3CRdfXmlTest.tests (dir T.unpack suiteFilesDirXml) xmlManifest ] ] ) diff --git a/testsuite/tests/W3C/RdfXmlTest.hs b/testsuite/tests/W3C/RdfXmlTest.hs index 0249663..0bce804 100644 --- a/testsuite/tests/W3C/RdfXmlTest.hs +++ b/testsuite/tests/W3C/RdfXmlTest.hs @@ -2,6 +2,7 @@ module W3C.RdfXmlTest ( tests + , mfBaseURIXml ) where import Data.Semigroup ((<>)) @@ -19,29 +20,34 @@ import Text.RDF.RDF4H.XmlParser import Text.RDF.RDF4H.NTriplesParser import Data.RDF.Graph.TList -tests :: Manifest -> TestTree -tests = runManifestTests mfEntryToTest +tests :: String -> Manifest -> TestTree +tests = runManifestTests . mfEntryToTest -- Functions to map manifest test entries to unit tests. -- They are defined here to avoid cluttering W3C.Manifest -- with functions that may not be needed to those who -- just want to parse Manifest files. -- TODO: They should probably be moved to W3C.Manifest after all. -mfEntryToTest :: TestEntry -> TestTree -mfEntryToTest (TestXMLEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = (fromEither <$> parseFile testParser (nodeURI act)) :: IO (RDF TList) - expectedRDF = (fromEither <$> parseFile NTriplesParser (nodeURI res)) :: IO (RDF TList) +mfEntryToTest :: String -> TestEntry -> TestTree +mfEntryToTest dir (TestXMLEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (testParser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest (TestXMLNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile testParser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir (TestXMLNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (testParser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x +mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURIXml) +getFilePath _ _ = error "Unexpected node" mfBaseURIXml :: BaseUrl mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" -testParser :: XmlParser -testParser = XmlParser (Just mfBaseURIXml) Nothing +testParser :: String -> XmlParser +testParser dUri = XmlParser (Just mfBaseURIXml) (Just . T.pack $ dUri) From 93a39c2c94ca850937012373ca0150e99c8e6dae Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 07:29:07 +0200 Subject: [PATCH 28/39] Improvements --- rdf4h.cabal | 2 +- src/Data/RDF/IRI.hs | 4 + src/Text/RDF/RDF4H/ParserUtils.hs | 6 +- src/Text/RDF/RDF4H/XmlParser.hs | 213 +++++++++--------- .../XmlParser/{Utils.hs => Identifiers.hs} | 4 +- 5 files changed, 115 insertions(+), 114 deletions(-) rename src/Text/RDF/RDF4H/XmlParser/{Utils.hs => Identifiers.hs} (94%) diff --git a/rdf4h.cabal b/rdf4h.cabal index b7a00f0..8b85377 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -39,7 +39,7 @@ library , Text.RDF.RDF4H.NTriplesSerializer , Text.RDF.RDF4H.XmlParser , Text.RDF.RDF4H.XmlParserHXT - , Text.RDF.RDF4H.XmlParser.Utils + , Text.RDF.RDF4H.XmlParser.Identifiers , Text.RDF.RDF4H.ParserUtils build-depends: attoparsec , base >= 4.8.0.0 diff --git a/src/Data/RDF/IRI.hs b/src/Data/RDF/IRI.hs index 4535828..0cdd7ce 100644 --- a/src/Data/RDF/IRI.hs +++ b/src/Data/RDF/IRI.hs @@ -16,6 +16,7 @@ module Data.RDF.IRI , serializeIRI , parseIRI, parseRelIRI , validateIRI, resolveIRI + , removeIRIFragment ) where import Data.Semigroup (Semigroup(..)) @@ -92,6 +93,9 @@ data SchemaError | MissingColon -- ^ Schemas must be followed by a colon deriving (Show, Eq) +removeIRIFragment :: IRIRef -> IRIRef +removeIRIFragment (IRIRef s a p q _) = IRIRef s a p q Nothing + -- [TODO] use Builder serializeIRI :: IRIRef -> Text serializeIRI (IRIRef s a p q f) = mconcat diff --git a/src/Text/RDF/RDF4H/ParserUtils.hs b/src/Text/RDF/RDF4H/ParserUtils.hs index 34ee00a..dda1d22 100644 --- a/src/Text/RDF/RDF4H/ParserUtils.hs +++ b/src/Text/RDF/RDF4H/ParserUtils.hs @@ -85,9 +85,9 @@ rdfDescription = mkUri rdf "Description" -- Old terms rdfAboutEach, rdfAboutEachPrefix, rdfBagID :: Text -rdfAboutEach = mkUri rdf "rdf:aboutEach" -rdfAboutEachPrefix = mkUri rdf "rdf:aboutEachPrefix" -rdfBagID = mkUri rdf "rdf:bagID" +rdfAboutEach = mkUri rdf "aboutEach" +rdfAboutEachPrefix = mkUri rdf "aboutEachPrefix" +rdfBagID = mkUri rdf "bagID" xmlLang :: Text xmlLang = mkUri xml "lang" diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 9864c98..e02c311 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -1,10 +1,10 @@ {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE TupleSections #-} -- |An parser for the RDF/XML format -- . @@ -18,34 +18,35 @@ module Text.RDF.RDF4H.XmlParser ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) -import Text.RDF.RDF4H.XmlParser.Utils +import Text.RDF.RDF4H.XmlParser.Identifiers import Data.RDF.IRI import Data.RDF.Types hiding (empty, resolveQName) import qualified Data.RDF.Types as RDF import Data.RDF.Graph.TList -import Debug.Trace +--import Debug.Trace import Control.Applicative -import Control.Exception import Control.Monad +import Control.Monad.Except import Control.Monad.State.Strict import Data.Semigroup ((<>)) import Data.Set (Set) import qualified Data.Set as S ---import Data.Map (Map) import qualified Data.Map as Map ---import Data.Maybe +import Data.Maybe import Data.Either import Data.Bifunctor ---import Data.Foldable +import Data.HashSet (HashSet) +import qualified Data.HashSet as HS import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Data.Text (Text) ---import Data.Text.Encoding import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T +import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Builder as BB import Xmlbf hiding (Node, Parser, State) import qualified Xmlbf.Xeno as Xeno @@ -71,7 +72,7 @@ parseURL' :: (Rdf a) -- corresponding to the XML document. parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) -type Parser = ParserT (State ParseState) +type Parser = ParserT (ExceptT String (State ParseState)) -- |Local state for the parser (dependant on the parent xml elements) data ParseState = ParseState @@ -85,10 +86,6 @@ data ParseState = ParseState , stateGenId :: Int } deriving(Show) -data ParserException = ParserException String - deriving (Show) -instance Exception ParserException - -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) => Maybe BaseUrl -- ^ The base URL for the RDF if required @@ -100,33 +97,32 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml bUrl' = BaseUrl <$> dUrl <|> bUrl parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' - parseRdf' ns = evalState (runParserT rdfParser ns) initState + parseRdf' ns = join $ evalState (runExceptT (runParserT rdfParser ns)) initState initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 parseDebug :: String -> IO (RDF TList) -parseDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://plouf/") (Just "plouf")) f +parseDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f rdfParser :: Rdf a => Parser (RDF a) rdfParser = do bUri <- currentBaseUri triples <- (pRdf <* pWs) <||> pNodeElementList pEndOfInput - pm <- currentPrefixMappings - pure $ mkRdf triples bUri pm + mkRdf triples bUri <$> currentPrefixMappings pRdf :: Parser Triples pRdf = pAnyElement $ do attrs <- pRDFAttrs uri <- pName >>= pQName guard (uri == rdfTag) - when (not $ HM.null attrs) $ pFail "rdf:RDF: The set of attributes should be empty." + unless (null attrs) $ throwError "rdf:RDF: The set of attributes should be empty." pNodeElementList pQName :: Text -> Parser Text pQName qn = do pm <- currentPrefixMappings let qn' = resolveQName pm qn >>= validateIRI - either pFail pure qn' + either throwError pure qn' -- |Process the attributes of a node pRDFAttrs :: Parser (HashMap Text Text) @@ -157,12 +153,15 @@ pRDFAttrs = do in either (const ns) id ns' mkRdfAttribute pm bUri as qn v = let as' = parseQName qn >>= f - f (Nothing, "xmlns") = Right as - f (Just "xmlns", _) = Right as - f qn'@(Just _, _) = (\a -> HM.insert a v as) <$> resolveQName' pm qn' - f (Nothing, uri) = case bUri of - Nothing -> Right as -- [FIXME] manage missing base URI - Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' uri + -- [NOTE] Ignore xml reserved names + f (Nothing, n) + | T.isPrefixOf "xml" n = Right as + | otherwise = case bUri of + Nothing -> Right as -- [FIXME] manage missing base URI + Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' n + f qn'@(Just prefix, _) + | T.isPrefixOf "xml" prefix = Right as + | otherwise = (\a -> HM.insert a v as) <$> resolveQName' pm qn' in either (const as) id as' pRDFAttr :: Text -> Parser Text @@ -203,36 +202,24 @@ pNodeElement = pAnyElement $ do --pSubject :: Parser (Node, Triples) pSubject :: Parser (Node, Maybe Triple) pSubject = do - mi <- optional pIdAttr - traverse checkIdIsUnique mi -- Create the subject + -- [TODO] check the attributes that only one of the following may work s <- pUnodeId <|> pBnode <|> pUnode <|> pBnodeGen - -- traceM (show s) + setSubject (Just s) -- Resolve URI uri <- pName >>= pQName - --currentBaseUri >>= traceM . show -- Check that the URI is allowed - when (not (checkNodeUri uri)) (pFail $ "URI not allowed: " <> T.unpack uri) + unless (checkNodeUri uri) (throwError $ "URI not allowed: " <> T.unpack uri) -- Optional rdf:type triple mtype <- optional (pType1 s uri) pure (s, mtype) where checkNodeUri uri = isNotCoreSyntaxTerm uri && uri /= rdfLi && isNotOldTerm uri pUnodeId = (pIdAttr >>= mkUNodeID) <* removeNodeAttr rdfID - pBnode = do - bn <- pNodeIdAttr <* removeNodeAttr rdfNodeID - let s = BNode bn - setSubject (Just s) - pure s - pUnode = do - s <- unode <$> pAboutAttr <* removeNodeAttr rdfAbout - setSubject (Just s) - pure s + pBnode = (BNode <$> pNodeIdAttr) <* removeNodeAttr rdfNodeID + pUnode = (unode <$> pAboutAttr) <* removeNodeAttr rdfAbout -- Default subject: a new blank node - pBnodeGen = do - s <- newBNode - setSubject (Just s) - pure s + pBnodeGen = newBNode pType1 n uri = if uri /= rdfDescription then pure $ Triple n rdfTypeNode (unode uri) @@ -243,13 +230,8 @@ pPropertyAttrs s = do attrs <- currentNodeAttrs HM.elems <$> HM.traverseWithKey f attrs where - -- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs - isPropertyAttrURI uri = isNotCoreSyntaxTerm uri - && uri /= rdfDescription - && uri /= rdfLi - && isNotOldTerm uri f attr value - | not (isPropertyAttrURI attr) = pFail $ "URI not allowed for attribute: " <> T.unpack attr + | not (isPropertyAttrURI attr) = throwError $ "URI not allowed for attribute: " <> T.unpack attr | attr == rdfType = pure $ Triple s rdfTypeNode (unode value) | otherwise = do lang <- currentLang @@ -260,7 +242,13 @@ pLang :: Parser (Maybe Text) pLang = optional (pAttr "xml:lang") pBase :: Parser (Maybe BaseUrl) -pBase = optional (BaseUrl <$> pAttr "xml:base") +pBase = optional $ do + uri <- pAttr "xml:base" + -- Parse and remove fragment + BaseUrl <$> either + throwError + (pure . serializeIRI . removeIRIFragment) + (parseIRI uri) pPropertyEltList :: Parser Triples pPropertyEltList = pWs @@ -271,10 +259,11 @@ pPropertyElt :: Parser Triples pPropertyElt = pAnyElement $ do -- Process attributes void pRDFAttrs - --attrs1 <- currentNodeAttrs - --traceM ("pPropertyElt1 " <> show attrs1) - p <- unode <$> (pName >>= pQName >>= listExpansion) - -- [TODO] check URI + -- Process URI + uri <- pName >>= pQName >>= listExpansion + unless (isPropertyAttrURI uri) (throwError $ "URI not allowed for propertyElt: " <> T.unpack uri) + let p = unode uri + -- Process 'propertyElt' pParseTypeLiteralPropertyElt p <||> pParseTypeResourcePropertyElt p <||> pParseTypeCollectionPropertyElt p @@ -293,7 +282,7 @@ pResourcePropertyElt p = do (ts1, o) <- keepState $ liftA2 (,) pNodeElement currentSubject pWs mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi + checkAllowedAttributes [] s <- currentSubject let mt = flip Triple p <$> s <*> o ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) @@ -301,14 +290,16 @@ pResourcePropertyElt p = do pLiteralPropertyElt :: Node -> Parser Triples pLiteralPropertyElt p = do + l <- pText + -- No children + pChildren >>= guard . null mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi + checkAllowedAttributes [rdfDatatype] dt <- optional pDatatypeAttr - l <- pText s <- currentSubject lang <- currentLang let l' = TL.toStrict l - o = lnode $ maybe (plainL l') id $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) + o = lnode . fromMaybe (plainL l') $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) mt = (\s' -> Triple s' p o) <$> s ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe ts (:ts) mt @@ -318,11 +309,10 @@ pParseTypeLiteralPropertyElt p = do pt <- pRDFAttr rdfParseType guard (pt == "Literal") mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi - l <- pText -- [FIXME] XML literal + checkAllowedAttributes [rdfParseType] + l <- pXMLLiteral s <- currentSubject - let l' = TL.toStrict l - o = lnode (typedL l' rdfXmlLiteral) + let o = lnode (typedL l rdfXmlLiteral) mt = (\s' -> Triple s' p o) <$> s ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe ts (:ts) mt @@ -332,7 +322,7 @@ pParseTypeResourcePropertyElt p = do pt <- pRDFAttr rdfParseType guard (pt == "Resource") mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi + checkAllowedAttributes [rdfParseType] s <- currentSubject o <- newBNode let mt = (\s' -> Triple s' p o) <$> s @@ -347,7 +337,7 @@ pParseTypeCollectionPropertyElt p = do pt <- pRDFAttr rdfParseType guard (pt == "Collection") mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi + checkAllowedAttributes [rdfParseType] s <- currentSubject case s of Nothing -> pure mempty @@ -369,7 +359,7 @@ pParseTypeCollectionPropertyElt p = do go s = do r <- optional pNodeElement case r of - Nothing -> pure $ [Triple s rdfRestNode rdfNilNode] + Nothing -> pure [Triple s rdfRestNode rdfNilNode] Just ts1 -> do s' <- currentSubject n <- newBNode @@ -381,9 +371,9 @@ pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt _p = do pt <- pRDFAttr rdfParseType guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") - mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi - pFail "TODO" -- [TODO] + checkAllowedAttributes [rdfParseType] + _mi <- optional pIdAttr <* removeNodeAttr rdfID + throwError "[TODO] pParseTypeOtherPropertyElt" pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do @@ -392,7 +382,6 @@ pEmptyPropertyElt p = do Nothing -> pure mempty Just s' -> do mi <- optional pIdAttr <* removeNodeAttr rdfID - traverse checkIdIsUnique mi o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode let t = Triple s' p o ts1 <- maybe (pure mempty) (`reifyTriple` t) mi @@ -402,21 +391,33 @@ pEmptyPropertyElt p = do pResourceAttr' = unode <$> pResourceAttr <* removeNodeAttr rdfResource pNodeIdAttr' = BNode <$> pNodeIdAttr <* removeNodeAttr rdfNodeID +checkAllowedAttributes :: HashSet Text -> Parser () +checkAllowedAttributes as = do + attrs <- currentNodeAttrs + let diff = HS.difference (HM.keysSet attrs) as + unless (null diff) (throwError $ "Attributes not allowed: " <> show diff) + +pXMLLiteral :: Parser Text +pXMLLiteral = + T.decodeUtf8 . BL.toStrict . BB.toLazyByteString . encode <$> pChildren + pIdAttr :: Parser Text pIdAttr = do i <- pRDFAttr rdfID - either pFail pure (validateID i) + i' <- either throwError pure (validateID i) + checkIdIsUnique i' + pure i' checkIdIsUnique :: Text -> Parser () checkIdIsUnique i = do notUnique <- S.member i <$> currentIdSet - when notUnique (pFail $ "rdf:ID already used in this context: " <> T.unpack i) + when notUnique (throwError $ "rdf:ID already used in this context: " <> T.unpack i) updateIdSet i pNodeIdAttr :: Parser Text pNodeIdAttr = do i <- pRDFAttr rdfNodeID - either pFail pure (validateID i) + either throwError pure (validateID i) pAboutAttr :: Parser Text pAboutAttr = pRDFAttr rdfAbout >>= checkIRI "rdf:about" @@ -427,26 +428,21 @@ pResourceAttr = pRDFAttr rdfResource >>= checkIRI "rdf:resource" pDatatypeAttr :: Parser Text pDatatypeAttr = pRDFAttr rdfDatatype >>= checkIRI "rdf:datatype" --- [TODO] -pPropertyAttr :: Parser Triples -pPropertyAttr = do - -- [FIXME] filter - -- attrs <- HM.filterWithKey (\iri _ -> iri /= "rdf:type") <$> pAttrs - attrs <- currentNodeAttrs - s <- currentSubject - lang <- currentLang - let mkLiteral = lnode . maybe plainL (flip plainLL) lang - pure $ maybe - mempty - (\s' -> HM.elems $ HM.mapWithKey (mkTriple s' mkLiteral) attrs) - s - where - mkTriple s mkLiteral iri value = Triple s (unode iri) (mkLiteral value) - pNoMoreChildren :: Parser () pNoMoreChildren = pChildren >>= \case [] -> pure () - ns -> pFail $ "Unexpected remaining children: " <> show ns + ns -> throwError $ "Unexpected remaining children: " <> show ns + +reifyTriple :: Text -> Triple -> Parser Triples +reifyTriple i (Triple s p' o) = do + n <- mkUNodeID i + pure [ Triple n rdfTypeNode rdfStatementNode + , Triple n rdfSubjectNode s + , Triple n rdfPredicateNode p' + , Triple n rdfObjectNode o ] + + +-- Parser utils -- | Try the first parser, if it fails restore the state and try the second parser. (<||>) :: Parser a -> Parser a -> Parser a @@ -454,12 +450,22 @@ pNoMoreChildren = pChildren >>= \case st <- get p1 <|> (put st *> p2) +-- URI checks + checkIRI :: String -> Text -> Parser Text checkIRI msg iri = do bUri <- maybe mempty unBaseUrl <$> currentBaseUri case uriValidate iri of - Nothing -> pFail $ mconcat ["Malformed IRI for \"", msg, "\": ", T.unpack iri] - Just iri' -> either pFail pure (resolveIRI bUri iri') + Nothing -> throwError $ mconcat ["Malformed IRI for \"", msg, "\": ", T.unpack iri] + Just iri' -> either throwError pure (resolveIRI bUri iri') + +-- https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttributeURIs +isPropertyAttrURI :: Text -> Bool +isPropertyAttrURI uri + = isNotCoreSyntaxTerm uri + && uri /= rdfDescription + && uri /= rdfLi + && isNotOldTerm uri -- https://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms isNotCoreSyntaxTerm :: Text -> Bool @@ -474,23 +480,12 @@ isNotOldTerm uri = uri /= rdfAboutEach && uri /= rdfAboutEachPrefix && uri /= rdfBagID -reifyTriple :: Text -> Triple -> Parser Triples -reifyTriple i (Triple s p' o) = do - n <- mkUNodeID i - pure [ Triple n rdfTypeNode rdfStatementNode - , Triple n rdfSubjectNode s - , Triple n rdfPredicateNode p' - , Triple n rdfObjectNode o ] - +-- Parser's state utils +-- |Create a new unique blank node newBNode :: Parser Node newBNode = do modify $ \st -> st { stateGenId = stateGenId st + 1 } - st <- get - pure $ BNodeGen (stateGenId st) - --- Parser's state utils -currentGenID :: Parser Int -currentGenID = stateGenId <$> get + BNodeGen . stateGenId <$> get -- |Process a parser, restoring the state except for stateGenId and stateIdSet keepState :: Parser a -> Parser a @@ -561,9 +556,11 @@ setBaseUri :: (Maybe BaseUrl) -> Parser () setBaseUri u = modify (\st -> st { stateBaseUri = u }) mkUNodeID :: Text -> Parser Node -mkUNodeID t = currentBaseUri >>= pure . unode . \case - Nothing -> t - Just (BaseUrl u) -> mconcat [u, "#", t] +mkUNodeID t = mkUnode <$> currentBaseUri + where + mkUnode = unode . \case + Nothing -> t + Just (BaseUrl u) -> mconcat [u, "#", t] currentSubject :: Parser (Maybe Subject) currentSubject = stateSubject <$> get diff --git a/src/Text/RDF/RDF4H/XmlParser/Utils.hs b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs similarity index 94% rename from src/Text/RDF/RDF4H/XmlParser/Utils.hs rename to src/Text/RDF/RDF4H/XmlParser/Identifiers.hs index 1ed22c8..a64df23 100644 --- a/src/Text/RDF/RDF4H/XmlParser/Utils.hs +++ b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} -module Text.RDF.RDF4H.XmlParser.Utils +module Text.RDF.RDF4H.XmlParser.Identifiers ( -- Validation validateID @@ -33,7 +33,7 @@ resolveQName pm qn = parseQName qn >>= resolveQName' pm resolveQName' :: PrefixMappings -> (Maybe Text, Text) -> Either String Text resolveQName' (PrefixMappings pm) (Nothing, name) = case Map.lookup mempty pm of - Nothing -> Left "Cannot resolve QName: no default namespace defined." + Nothing -> Left $ mconcat ["Cannot resolve QName \"", T.unpack name, "\": no default namespace defined."] Just iri -> Right $ iri <> name resolveQName' (PrefixMappings pm) (Just prefix, name) = case Map.lookup prefix pm of From c04d8e4c744e5939c614ba7cac8737467accd301 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 10:49:50 +0200 Subject: [PATCH 29/39] Update W3C repository --- rdf-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdf-tests b/rdf-tests index b3136e9..280e9de 160000 --- a/rdf-tests +++ b/rdf-tests @@ -1 +1 @@ -Subproject commit b3136e909c6f1bfa550290bfb6cc41a29f2dc40d +Subproject commit 280e9de3aaefa6b292a151bd455204d49a0c09db From f6664cd0c854867eae0bf310dede4dff7311ba71 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 10:53:46 +0200 Subject: [PATCH 30/39] Update stack to use the proper version of Xmlbf. --- stack.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stack.yaml b/stack.yaml index 666abb9..7965a0e 100644 --- a/stack.yaml +++ b/stack.yaml @@ -6,8 +6,8 @@ packages: # - /home/rob/code/haskell/xmlbf/xmlbf-xeno extra-deps: - hgal-2.0.0.2 -- git: git@gitlab.com:k0001/xmlbf.git - commit: f46b96a401ac2ef6f30f8939c9f7bf92f38df383 +- git: git@gitlab.com:Wismill/xmlbf.git + commit: 0142bd5352316030c97bdcd9fe75d4afc20bf552 subdirs: - xmlbf - - xmlbf-xeno \ No newline at end of file + - xmlbf-xeno From d5ab65577b0bf0f5bc80c59dd4990314fcb95178 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 11:09:17 +0200 Subject: [PATCH 31/39] Some more cleaning --- src/Text/RDF/RDF4H/XmlParser.hs | 92 --------------------------------- 1 file changed, 92 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index e02c311..2d377ec 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -12,9 +12,6 @@ module Text.RDF.RDF4H.XmlParser ( XmlParser(..) , parseDebug -- [FIXME] - , xmlEg - , example11 - , example12 ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) @@ -172,9 +169,6 @@ pRDFAttr a = do pure (HM.lookup a as) -pMatchAndRemoveAttr :: Text -> Parser Text -pMatchAndRemoveAttr a = pRDFAttr a <* removeNodeAttr a - pNodeElementList :: Parser Triples pNodeElementList = pWs *> (mconcat <$> some (keepState pNodeElement <* pWs)) @@ -428,11 +422,6 @@ pResourceAttr = pRDFAttr rdfResource >>= checkIRI "rdf:resource" pDatatypeAttr :: Parser Text pDatatypeAttr = pRDFAttr rdfDatatype >>= checkIRI "rdf:datatype" -pNoMoreChildren :: Parser () -pNoMoreChildren = pChildren >>= \case - [] -> pure () - ns -> throwError $ "Unexpected remaining children: " <> show ns - reifyTriple :: Text -> Triple -> Parser Triples reifyTriple i (Triple s p' o) = do n <- mkUNodeID i @@ -573,84 +562,3 @@ currentLang = stateLang <$> get setLang :: (Maybe Text) -> Parser () setLang lang = modify (\st -> st { stateLang = lang }) - -example11 :: Text -example11 = T.pack $ unlines - [ "" - , "" - , " " - , " " - , " " - , " " - , "" - , "" - , "" - ] - -example12 :: Text -example12 = T.pack $ unlines - [ "" - , "" - , " " - , " " - , " Dave Beckett" - , " " - , " " - , " " - , "" - ] - -xmlEg :: Text -xmlEg = T.pack $ unlines - [ "" - , "" - , "" - , "W3Schools" - , "Jan Egil Refsnes" - , "" - , "" - ] - - --- missing in Xmlbf - --- | @'pElement'' p@ runs a 'Parser' @p@ inside a element node and --- returns a pair with the name of the parsed element and result of --- @p@. This fails if such element does not exist at the current --- position. --- --- Leading whitespace is ignored. If you need to preserve that whitespace for --- some reason, capture it using 'pText' before using 'pElement''. --- --- Consumes the element from the parser state. --- pElement' :: Parser a -> Parser (Text, a) --- pElement' = liftA2 (,) pName - --- pText' :: TL.Text -> Parser TL.Text --- pText' t = do --- let pTextFail = pFail ("Missing text node " <> show t) --- do t' <- pText --- if t == t' then pure t --- else pTextFail --- <|> pTextFail - - --- parser combinators missing in Xmlbf --- between :: Parser a -> Parser b -> Parser c -> Parser c --- between open close thing = open *> thing <* close --- --- manyTill :: Parser a -> Parser end -> Parser [a] --- manyTill thing z = many thing <* z - --- pElem :: Text -> Parser Text --- oneOf :: Parser [a] -> Parser a --- noneOf :: Parser [a] -> Parser a From 1f8bdb866a8b324e465c2ad9ca2b6a9abb71dac8 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 12:54:07 +0200 Subject: [PATCH 32/39] Fix test suite for Turtle --- src/Text/RDF/RDF4H/TurtleParser.hs | 10 ++++-- src/Text/RDF/RDF4H/XmlParser.hs | 6 ++-- testsuite/tests/Test.hs | 17 +++++---- testsuite/tests/W3C/TurtleTest.hs | 57 +++++++++++++++++------------- 4 files changed, 51 insertions(+), 39 deletions(-) diff --git a/src/Text/RDF/RDF4H/TurtleParser.hs b/src/Text/RDF/RDF4H/TurtleParser.hs index f5c8b0a..ec4e8d6 100644 --- a/src/Text/RDF/RDF4H/TurtleParser.hs +++ b/src/Text/RDF/RDF4H/TurtleParser.hs @@ -8,6 +8,7 @@ module Text.RDF.RDF4H.TurtleParser ( TurtleParser(TurtleParser) , TurtleParserCustom(TurtleParserCustom) + , parseTurtleDebug ) where import Prelude hiding (readFile) @@ -16,9 +17,11 @@ import Data.Char (toLower, toUpper, isDigit, isHexDigit) import qualified Data.Map as Map import Data.Map (Map) import Data.Maybe +import Data.Either import Data.Semigroup ((<>)) import Data.RDF.Types import Data.RDF.IRI +import Data.RDF.Graph.TList import Text.RDF.RDF4H.ParserUtils import Text.RDF.RDF4H.NTriplesParser import Text.Parsec (runParser, ParseError) @@ -30,7 +33,7 @@ import Control.Monad import Text.Parser.Char import Text.Parser.Combinators import Text.Parser.LookAhead -import Control.Applicative +import Control.Applicative hiding (empty) import Control.Monad.State.Class import Control.Monad.State.Strict @@ -74,6 +77,9 @@ type ParseState = , Seq Triple -- the triples encountered while parsing; always added to on the right side , Map String Integer ) -- map blank node names to generated id. +parseTurtleDebug :: String -> IO (RDF TList) +parseTurtleDebug f = fromRight empty <$> parseFile (TurtleParserCustom (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/") Attoparsec) f + -- grammar rule: [1] turtleDoc t_turtleDoc :: (MonadState ParseState m, CharParsing m, LookAheadParsing m) => m (Seq Triple, PrefixMappings) t_turtleDoc = @@ -639,7 +645,7 @@ parseURLAttoparsec bUrl docUrl = parseFromURL (parseStringAttoparsec bUrl docUrl --------------------------------- initialState :: Maybe BaseUrl -> Maybe T.Text -> ParseState -initialState bUrl docUrl = (bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) +initialState bUrl docUrl = (BaseUrl <$> docUrl <|> bUrl, docUrl, 1, PrefixMappings mempty, Nothing, Nothing, mempty, mempty) handleResult :: Rdf a => Maybe BaseUrl -> Either ParseError (Seq Triple, PrefixMappings) -> Either ParseFailure (RDF a) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 2d377ec..8894c8b 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -11,7 +11,7 @@ module Text.RDF.RDF4H.XmlParser ( XmlParser(..) - , parseDebug -- [FIXME] + , parseXmlDebug -- [FIXME] ) where import Text.RDF.RDF4H.ParserUtils hiding (Parser) @@ -97,8 +97,8 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml parseRdf' ns = join $ evalState (runExceptT (runParserT rdfParser ns)) initState initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 -parseDebug :: String -> IO (RDF TList) -parseDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f +parseXmlDebug :: String -> IO (RDF TList) +parseXmlDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f rdfParser :: Rdf a => Parser (RDF a) rdfParser = do diff --git a/testsuite/tests/Test.hs b/testsuite/tests/Test.hs index 91ecebc..96c4c52 100644 --- a/testsuite/tests/Test.hs +++ b/testsuite/tests/Test.hs @@ -25,13 +25,13 @@ suiteFilesDirXml = "rdf-tests/rdf-xml/" suiteFilesDirNTriples = "rdf-tests/ntriples/" mfPathTurtle,mfPathXml,mfPathNTriples :: T.Text -mfPathTurtle = T.concat [suiteFilesDirTurtle, "manifest.ttl"] -mfPathXml = T.concat [suiteFilesDirXml, "manifest.ttl"] -mfPathNTriples = T.concat [suiteFilesDirNTriples, "manifest.ttl"] +mfPathTurtle = mconcat [suiteFilesDirTurtle, "manifest.ttl"] +mfPathXml = mconcat [suiteFilesDirXml, "manifest.ttl"] +mfPathNTriples = mconcat [suiteFilesDirNTriples, "manifest.ttl"] mfBaseURITurtle,mfBaseURIXml,mfBaseURINTriples :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" -mfBaseURIXml = BaseUrl "http://www.w3.org/2013/RDFXMLTests/" +mfBaseURITurtle = W3CTurtleTest.mfBaseURITurtle +mfBaseURIXml = W3CRdfXmlTest.mfBaseURIXml mfBaseURINTriples = BaseUrl "http://www.w3.org/2013/N-TriplesTests/" main :: IO () @@ -41,8 +41,7 @@ main = do dir <- getCurrentDirectory let fileSchemeUri suitesDir = fromJust . filePathToUri $ (dir T.unpack suitesDir) - turtleManifest <- - loadManifest mfPathTurtle (fileSchemeUri suiteFilesDirTurtle) + turtleManifest <- loadManifest mfPathTurtle (unBaseUrl mfBaseURITurtle) xmlManifest <- loadManifest mfPathXml (unBaseUrl mfBaseURIXml) nTriplesManifest <- loadManifest mfPathNTriples (fileSchemeUri suiteFilesDirNTriples) @@ -100,10 +99,10 @@ main = do "parser-w3c-tests-turtle" [ testGroup "parser-w3c-tests-turtle-parsec" - [W3CTurtleTest.testsParsec turtleManifest] + [W3CTurtleTest.testsParsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] , testGroup "parser-w3c-tests-turtle-attoparsec" - [W3CTurtleTest.testsAttoparsec turtleManifest] + [W3CTurtleTest.testsAttoparsec (dir T.unpack suiteFilesDirTurtle) turtleManifest] ] , testGroup diff --git a/testsuite/tests/W3C/TurtleTest.hs b/testsuite/tests/W3C/TurtleTest.hs index 49af93e..68c19c2 100644 --- a/testsuite/tests/W3C/TurtleTest.hs +++ b/testsuite/tests/W3C/TurtleTest.hs @@ -3,6 +3,7 @@ module W3C.TurtleTest ( testsParsec , testsAttoparsec + , mfBaseURITurtle ) where import Test.Tasty @@ -22,41 +23,47 @@ import Text.RDF.RDF4H.NTriplesParser import Text.RDF.RDF4H.ParserUtils import Data.RDF.Graph.TList -testsParsec :: Manifest -> TestTree -testsParsec = runManifestTests (mfEntryToTest testParserParsec) +testsParsec :: String -> Manifest -> TestTree +testsParsec = runManifestTests . (`mfEntryToTest` testParserParsec) -testsAttoparsec :: Manifest -> TestTree -testsAttoparsec = runManifestTests (mfEntryToTest testParserAttoparsec) +testsAttoparsec :: String -> Manifest -> TestTree +testsAttoparsec = runManifestTests . (`mfEntryToTest` testParserAttoparsec) -mfEntryToTest :: TurtleParserCustom -> TestEntry -> TestTree -mfEntryToTest parser (TestTurtleEval nm _ _ act' res') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - res = (UNode . fromJust . fileSchemeToFilePath) res' - parsedRDF = (fromEither <$> parseFile parser (nodeURI act)) :: IO (RDF TList) - expectedRDF = (fromEither <$> parseFile NTriplesParser (nodeURI res)) :: IO (RDF TList) +mfEntryToTest :: String -> (String -> TurtleParserCustom) -> TestEntry -> TestTree +mfEntryToTest dir parser (TestTurtleEval nm _ _ act res) = + let pathExpected = getFilePath dir res + pathAction = getFilePath dir act + parsedRDF = (fromEither <$> parseFile (parser (nodeURI act)) pathAction) :: IO (RDF TList) + expectedRDF = (fromEither <$> parseFile NTriplesParser pathExpected) :: IO (RDF TList) in TU.testCase (T.unpack nm) $ assertIsIsomorphic parsedRDF expectedRDF -mfEntryToTest parser (TestTurtleNegativeEval nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeEval nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest parser (TestTurtlePositiveSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtlePositiveSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsParsed rdf -mfEntryToTest parser (TestTurtleNegativeSyntax nm _ _ act') = - let act = (UNode . fromJust . fileSchemeToFilePath) act' - rdf = parseFile parser (nodeURI act) :: IO (Either ParseFailure (RDF TList)) +mfEntryToTest dir parser (TestTurtleNegativeSyntax nm _ _ act) = + let pathAction = getFilePath dir act + rdf = parseFile (parser (nodeURI act)) pathAction :: IO (Either ParseFailure (RDF TList)) in TU.testCase (T.unpack nm) $ assertIsNotParsed rdf -mfEntryToTest _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x +mfEntryToTest _ _ x = error $ "unknown TestEntry pattern in mfEntryToTest: " <> show x +-- [NOTE] Was previously: http://www.w3.org/2013/TurtleTests/ mfBaseURITurtle :: BaseUrl -mfBaseURITurtle = BaseUrl "http://www.w3.org/2013/TurtleTests/" +mfBaseURITurtle = BaseUrl "http://w3c.github.io/rdf-tests/turtle/" -- testParser :: TurtleParser -- testParser = TurtleParser (Just mfBaseURITurtle) Nothing -testParserParsec :: TurtleParserCustom -testParserParsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Parsec +testParserParsec :: String -> TurtleParserCustom +testParserParsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Parsec -testParserAttoparsec :: TurtleParserCustom -testParserAttoparsec = TurtleParserCustom (Just mfBaseURITurtle) Nothing Attoparsec +testParserAttoparsec :: String -> TurtleParserCustom +testParserAttoparsec dUrl = TurtleParserCustom (Just mfBaseURITurtle) (Just . T.pack $ dUrl) Attoparsec + +getFilePath :: String -> Node -> String +getFilePath dir (UNode iri) = fixFilePath' iri + where fixFilePath' = (dir <>) . T.unpack . fromJust . T.stripPrefix (unBaseUrl mfBaseURITurtle) +getFilePath _ _ = error "Unexpected node" From b22f89ca2886d8183d32653508aa8fc9e8d98659 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Thu, 30 May 2019 15:41:50 +0200 Subject: [PATCH 33/39] Fix test-00.ttl --- data/ttl/conformance/test-00.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/ttl/conformance/test-00.out b/data/ttl/conformance/test-00.out index 1209817..7f8b717 100644 --- a/data/ttl/conformance/test-00.out +++ b/data/ttl/conformance/test-00.out @@ -1 +1 @@ -_:genid1 . +_:genid1 . From 4052d1aca76a4bc247c20817b4067d766e30cdd1 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 1 Jun 2019 01:51:42 +0200 Subject: [PATCH 34/39] Update latest changes of Xmlbf --- src/Text/RDF/RDF4H/XmlParser.hs | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 8894c8b..a2e261a 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -44,7 +44,7 @@ import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Builder as BB -import Xmlbf hiding (Node, Parser, State) +import Xmlbf hiding (Node, State) import qualified Xmlbf.Xeno as Xeno data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) @@ -94,7 +94,7 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml bUrl' = BaseUrl <$> dUrl <|> bUrl parseXml = Xeno.fromRawXml . T.encodeUtf8 parseRdf = first ParseFailure . join . second parseRdf' - parseRdf' ns = join $ evalState (runExceptT (runParserT rdfParser ns)) initState + parseRdf' ns = join $ evalState (runExceptT (parseM rdfParser ns)) initState initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 parseXmlDebug :: String -> IO (RDF TList) @@ -103,7 +103,7 @@ parseXmlDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ rdfParser :: Rdf a => Parser (RDF a) rdfParser = do bUri <- currentBaseUri - triples <- (pRdf <* pWs) <||> pNodeElementList + triples <- (pRdf <* pWs) <|> pNodeElementList pEndOfInput mkRdf triples bUri <$> currentPrefixMappings @@ -165,7 +165,7 @@ pRDFAttr :: Text -> Parser Text pRDFAttr a = do as <- currentNodeAttrs maybe - (pFail $ mconcat ["Attribute \"", T.unpack a, "\" not found."]) + (fail $ mconcat ["Attribute \"", T.unpack a, "\" not found."]) pure (HM.lookup a as) @@ -259,12 +259,12 @@ pPropertyElt = pAnyElement $ do let p = unode uri -- Process 'propertyElt' pParseTypeLiteralPropertyElt p - <||> pParseTypeResourcePropertyElt p - <||> pParseTypeCollectionPropertyElt p - <||> pParseTypeOtherPropertyElt p - <||> pResourcePropertyElt p - <||> pLiteralPropertyElt p - <||> pEmptyPropertyElt p + <|> pParseTypeResourcePropertyElt p + <|> pParseTypeCollectionPropertyElt p + <|> pParseTypeOtherPropertyElt p + <|> pResourcePropertyElt p + <|> pLiteralPropertyElt p + <|> pEmptyPropertyElt p where listExpansion u | u == rdfLi = nextListIndex @@ -430,15 +430,6 @@ reifyTriple i (Triple s p' o) = do , Triple n rdfPredicateNode p' , Triple n rdfObjectNode o ] - --- Parser utils - --- | Try the first parser, if it fails restore the state and try the second parser. -(<||>) :: Parser a -> Parser a -> Parser a -(<||>) p1 p2 = do - st <- get - p1 <|> (put st *> p2) - -- URI checks checkIRI :: String -> Text -> Parser Text From f1a04e92e271b353188b431e72be3f51636b4d21 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sat, 1 Jun 2019 02:16:04 +0200 Subject: [PATCH 35/39] Improve the Cabal file --- rdf4h.cabal | 50 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/rdf4h.cabal b/rdf4h.cabal index 8b85377..c5dc581 100644 --- a/rdf4h.cabal +++ b/rdf4h.cabal @@ -19,12 +19,25 @@ cabal-version: >= 1.8 build-type: Simple category: RDF stability: Experimental -tested-with: GHC==7.10.2, GHC==8.0.2 +tested-with: GHC==7.10.3, GHC==8.0.2, GHC==8.8.2, GHC==8.4.3, GHC==8.6.5 extra-tmp-files: test extra-source-files: examples/ParseURLs.hs , examples/ESWC.hs + +source-repository head + type: git + location: https://github.com/robstewart57/rdf4h.git + + +flag dev + description: Developer build + manual: True + default: False + + library + hs-source-dirs: src exposed-modules: Data.RDF , Data.RDF.IRI , Data.RDF.Namespace @@ -68,8 +81,12 @@ library if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* - hs-source-dirs: src - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + executable rdf4h main-is: src/Rdf4hParseMain.hs @@ -81,11 +98,17 @@ executable rdf4h if impl(ghc < 7.6) build-depends: ghc-prim - ghc-options: -Wall -funbox-strict-fields + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + test-suite test-rdf4h type: exitcode-stdio-1.0 main-is: Test.hs + hs-source-dirs: testsuite/tests other-modules: Data.RDF.PropertyTests Data.RDF.GraphImplTests Data.RDF.IRITests @@ -93,9 +116,9 @@ test-suite test-rdf4h Text.RDF.RDF4H.XmlParser_Test W3C.Manifest W3C.NTripleTest + W3C.TurtleTest W3C.RdfXmlTest W3C.W3CAssertions - ghc-options: -Wall -fno-warn-orphans -funbox-strict-fields build-depends: base >= 4.8.0.0 && < 6 , rdf4h , tasty @@ -114,8 +137,12 @@ test-suite test-rdf4h if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* - other-modules: W3C.TurtleTest - hs-source-dirs: testsuite/tests + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields + benchmark rdf4h-bench type: exitcode-stdio-1.0 @@ -126,11 +153,12 @@ benchmark rdf4h-bench criterion, rdf4h, text >= 1.2.1.0 - ghc-options: -Wall if !impl(ghc >= 8.0) build-depends: semigroups == 0.18.* -source-repository head - type: git - location: https://github.com/robstewart57/rdf4h.git + ghc-options: -Wall -Wcompat -Wincomplete-record-updates -Wincomplete-uni-patterns -Wredundant-constraints + if flag(dev) + ghc-options: -O0 + else + ghc-options: -O2 -funbox-strict-fields From 1ad58fc59ed044961090f13c37a0f6eaef4818b2 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 3 Jun 2019 08:01:02 +0200 Subject: [PATCH 36/39] Documentation --- src/Text/RDF/RDF4H/XmlParser.hs | 232 ++++++++++++++------ src/Text/RDF/RDF4H/XmlParser/Identifiers.hs | 51 ++++- src/Text/RDF/RDF4H/XmlParserHXT.hs | 2 +- 3 files changed, 207 insertions(+), 78 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index a2e261a..6cc4bfa 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -11,84 +11,105 @@ module Text.RDF.RDF4H.XmlParser ( XmlParser(..) - , parseXmlDebug -- [FIXME] + , parseXmlDebug ) where -import Text.RDF.RDF4H.ParserUtils hiding (Parser) -import Text.RDF.RDF4H.XmlParser.Identifiers -import Data.RDF.IRI -import Data.RDF.Types hiding (empty, resolveQName) +import Data.RDF.Types hiding (empty, resolveQName) import qualified Data.RDF.Types as RDF -import Data.RDF.Graph.TList - ---import Debug.Trace -import Control.Applicative -import Control.Monad -import Control.Monad.Except -import Control.Monad.State.Strict -import Data.Semigroup ((<>)) +import Data.RDF.IRI +import Data.RDF.Graph.TList +import Text.RDF.RDF4H.ParserUtils hiding (Parser) +import Text.RDF.RDF4H.XmlParser.Identifiers + +import Control.Applicative +import Control.Monad +import Control.Monad.Except +import Control.Monad.State.Strict +import Data.Semigroup ((<>)) import Data.Set (Set) import qualified Data.Set as S import qualified Data.Map as Map -import Data.Maybe -import Data.Either -import Data.Bifunctor +import Data.Maybe +import Data.Either +import Data.Bifunctor import Data.HashSet (HashSet) import qualified Data.HashSet as HS import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM -import Data.Text (Text) +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as T import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Builder as BB -import Xmlbf hiding (Node, State) +import Xmlbf hiding (Node, State) import qualified Xmlbf.Xeno as Xeno -data XmlParser = XmlParser (Maybe BaseUrl) (Maybe Text) instance RdfParser XmlParser where parseString (XmlParser bUrl dUrl) = parseXmlRDF bUrl dUrl parseFile (XmlParser bUrl dUrl) = parseFile' bUrl dUrl parseURL (XmlParser bUrl dUrl) = parseURL' bUrl dUrl +-- |Configuration for the XML parser +data XmlParser = XmlParser + (Maybe BaseUrl) + -- ^ The base URI to parse the document. + (Maybe Text) + -- ^ Location URI from which to retrieve the XML document. + parseFile' :: (Rdf a) => Maybe BaseUrl -> Maybe Text - -> String + -> FilePath -> IO (Either ParseFailure (RDF a)) parseFile' bUrl dUrl fpath = parseXmlRDF bUrl dUrl <$> TIO.readFile fpath parseURL' :: (Rdf a) - => Maybe BaseUrl -- ^ The optional base URI of the document. - -> Maybe Text -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. - -> String -- ^ The location URI from which to retrieve the XML document. - -> IO (Either ParseFailure (RDF a)) -- ^ The parse result, which is either a @ParseFailure@ or the RDF - -- corresponding to the XML document. + => Maybe BaseUrl + -- ^ The optional base URI of the document. + -> Maybe Text + -- ^ The document URI (i.e., the URI of the document itself); if Nothing, use location URI. + -> String + -- ^ The location URI from which to retrieve the XML document. + -> IO (Either ParseFailure (RDF a)) + -- ^ The parse result, which is either a @ParseFailure@ or the RDF + -- corresponding to the XML document. parseURL' bUrl docUrl = parseFromURL (parseXmlRDF bUrl docUrl) +-- |The parser monad. type Parser = ParserT (ExceptT String (State ParseState)) -- |Local state for the parser (dependant on the parent xml elements) data ParseState = ParseState { stateBaseUri :: Maybe BaseUrl - , stateIdSet :: Set Text -- ^ set of rdf:ID found in the scope of the current base URI. + -- ^ The local base URI. + , stateIdSet :: Set Text + -- ^ The set of @rdf:ID@ found in the scope of the current base URI. , statePrefixMapping :: PrefixMappings + -- ^ The namespace mapping. , stateLang :: Maybe Text - , stateNodeAttrs :: HashMap Text Text -- ^ Current node RDF attributes + -- ^ The local @xml:lang@ + , stateNodeAttrs :: HashMap Text Text + -- ^ Current node RDF attributes. , stateSubject :: Maybe Subject - , stateListIndex :: Int + -- ^ Current subject for triple construction. + , stateCollectionIndex :: Int + -- ^ Current collection index. , stateGenId :: Int } deriving(Show) -- |Parse a xml Text to an RDF representation parseXmlRDF :: (Rdf a) - => Maybe BaseUrl -- ^ The base URL for the RDF if required - -> Maybe Text -- ^ DocUrl: The request URL for the RDF if available - -> Text -- ^ The contents to parse - -> Either ParseFailure (RDF a) -- ^ The RDF representation of the triples or ParseFailure + => Maybe BaseUrl + -- ^ The base URI for the RDF if required + -> Maybe Text + -- ^ The request URI for the document to if available + -> Text + -- ^ The contents to parse + -> Either ParseFailure (RDF a) + -- ^ The RDF representation of the triples or ParseFailure parseXmlRDF bUrl dUrl = parseRdf . parseXml where bUrl' = BaseUrl <$> dUrl <|> bUrl @@ -97,9 +118,14 @@ parseXmlRDF bUrl dUrl = parseRdf . parseXml parseRdf' ns = join $ evalState (runExceptT (parseM rdfParser ns)) initState initState = ParseState bUrl' mempty mempty empty mempty empty 0 0 -parseXmlDebug :: String -> IO (RDF TList) +-- |A parser for debugging purposes. +parseXmlDebug + :: FilePath + -- ^ Path of the file to parse. + -> IO (RDF TList) parseXmlDebug f = fromRight RDF.empty <$> parseFile (XmlParser (Just . BaseUrl $ "http://base-url.com/") (Just "http://doc-url.com/")) f +-- |Document parser rdfParser :: Rdf a => Parser (RDF a) rdfParser = do bUri <- currentBaseUri @@ -107,6 +133,8 @@ rdfParser = do pEndOfInput mkRdf triples bUri <$> currentPrefixMappings +-- |Parser for @rdf:RDF@, if present. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#RDF pRdf :: Parser Triples pRdf = pAnyElement $ do attrs <- pRDFAttrs @@ -115,64 +143,95 @@ pRdf = pAnyElement $ do unless (null attrs) $ throwError "rdf:RDF: The set of attributes should be empty." pNodeElementList +-- |Parser for XML QName: resolve the namespace with the mapping in context. +-- +-- Throws an error if the namespace is not defined. pQName :: Text -> Parser Text pQName qn = do pm <- currentPrefixMappings let qn' = resolveQName pm qn >>= validateIRI either throwError pure qn' --- |Process the attributes of a node +-- |Process the attributes of a node. +-- +-- To be called __once__ per XML element. pRDFAttrs :: Parser (HashMap Text Text) pRDFAttrs = do -- Language (xml:lang) liftA2 (<|>) pLang currentLang >>= setLang - -- Base URI - -- [TODO] resolve base uri in context + -- Base URI (xml:base) liftA2 (<|>) pBase currentBaseUri >>= setBaseUri bUri <- currentBaseUri -- Process the rest of the attributes attrs <- pAttrs -- Get the namespace definitions (xmlns:) - pm <- updatePrefixMappings (PrefixMappings $ HM.foldlWithKey' mkNameSpaces mempty attrs) + pm <- updatePrefixMappings (PrefixMappings $ HM.foldlWithKey' mkNameSpace mempty attrs) -- Filter and resolve RDF attributes let as = HM.foldlWithKey' (mkRdfAttribute pm bUri) mempty attrs setNodeAttrs as pure as where - mkNameSpaces ns qn iri = - -- [TODO] resolve IRI - -- [TODO] check malformed identifiers & IRI + -- |Check if an XML attribute is a namespace definition + -- and if so add it to the mapping. + mkNameSpace + :: Map.Map Text Text + -- ^ Current namespace mapping + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> Map.Map Text Text + mkNameSpace ns qn iri = let qn' = parseQName qn ns' = f <$> qn' <*> validateIRI iri f (Nothing , "xmlns") iri' = Map.insert mempty iri' ns f (Just "xmlns", prefix ) iri' = Map.insert prefix iri' ns f _ _ = ns in either (const ns) id ns' + -- |Check if an XML attribute is an RDF attribute + -- and if so resolve its URI and keep it. + mkRdfAttribute + :: PrefixMappings + -- ^ Namespace mapping + -> Maybe BaseUrl + -- ^ Base URI + -> HM.HashMap Text Text + -- ^ Current set of RDF attributes + -> Text + -- ^ XML attribute to process + -> Text + -- ^ Value of the attribute + -> HM.HashMap Text Text mkRdfAttribute pm bUri as qn v = let as' = parseQName qn >>= f - -- [NOTE] Ignore xml reserved names + -- [NOTE] Ignore XML reserved names f (Nothing, n) | T.isPrefixOf "xml" n = Right as | otherwise = case bUri of - Nothing -> Right as -- [FIXME] manage missing base URI - Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' n + Nothing -> Right as -- [FIXME] manage missing base URI + Just (BaseUrl bUri') -> (\a -> HM.insert a v as) <$> resolveIRI bUri' n f qn'@(Just prefix, _) | T.isPrefixOf "xml" prefix = Right as | otherwise = (\a -> HM.insert a v as) <$> resolveQName' pm qn' in either (const as) id as' +-- |Return the value of the requested RDF attribute using its URI. +-- +-- Fails if the attribute is not defined. pRDFAttr :: Text -> Parser Text pRDFAttr a = do as <- currentNodeAttrs maybe - (fail $ mconcat ["Attribute \"", T.unpack a, "\" not found."]) + (fail . mconcat $ ["Attribute \"", T.unpack a, "\" not found."]) pure (HM.lookup a as) +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList pNodeElementList :: Parser Triples pNodeElementList = pWs *> (mconcat <$> some (keepState pNodeElement <* pWs)) -- |White spaces parser +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#ws pWs :: Parser () pWs = maybe True (T.all ws . TL.toStrict) <$> optional pText >>= guard where @@ -184,7 +243,7 @@ pNodeElement :: Parser Triples pNodeElement = pAnyElement $ do -- Process attributes void pRDFAttrs - -- Process subject + -- Process URI, subject and @rdf:type@. (s, mt) <- pSubject ts1 <- pPropertyAttrs s -- Process propertyEltList @@ -193,7 +252,8 @@ pNodeElement = pAnyElement $ do let ts = ts1 <> ts2 pure $ maybe ts (:ts) mt ---pSubject :: Parser (Node, Triples) +-- |Process the following parts of a @nodeElement@: URI, subject and @rdf:type@. +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#nodeElement pSubject :: Parser (Node, Maybe Triple) pSubject = do -- Create the subject @@ -219,6 +279,7 @@ pSubject = do then pure $ Triple n rdfTypeNode (unode uri) else empty +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyAttr pPropertyAttrs :: Node -> Parser Triples pPropertyAttrs s = do attrs <- currentNodeAttrs @@ -235,6 +296,7 @@ pPropertyAttrs s = do pLang :: Parser (Maybe Text) pLang = optional (pAttr "xml:lang") +-- [TODO] resolve base uri in context pBase :: Parser (Maybe BaseUrl) pBase = optional $ do uri <- pAttr "xml:base" @@ -244,16 +306,18 @@ pBase = optional $ do (pure . serializeIRI . removeIRIFragment) (parseIRI uri) +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyEltList pPropertyEltList :: Parser Triples pPropertyEltList = pWs - *> resetListIndex + *> resetCollectionIndex *> fmap mconcat (many (pPropertyElt <* pWs)) +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#propertyElt pPropertyElt :: Parser Triples pPropertyElt = pAnyElement $ do -- Process attributes void pRDFAttrs - -- Process URI + -- Process the predicate from the URI uri <- pName >>= pQName >>= listExpansion unless (isPropertyAttrURI uri) (throwError $ "URI not allowed for propertyElt: " <> T.unpack uri) let p = unode uri @@ -267,21 +331,27 @@ pPropertyElt = pAnyElement $ do <|> pEmptyPropertyElt p where listExpansion u - | u == rdfLi = nextListIndex + | u == rdfLi = nextCollectionIndex | otherwise = pure u +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#resourcePropertyElt pResourcePropertyElt :: Node -> Parser Triples pResourcePropertyElt p = do pWs + -- [NOTE] We need to restore part of the state after exploring the element' children. (ts1, o) <- keepState $ liftA2 (,) pNodeElement currentSubject pWs mi <- optional pIdAttr <* removeNodeAttr rdfID + -- No other attribute is allowed. checkAllowedAttributes [] + -- Generated triple s <- currentSubject let mt = flip Triple p <$> s <*> o + -- Reify the triple ts2 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe (ts1 <> ts2) (:(ts1 <> ts2)) mt +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt pLiteralPropertyElt :: Node -> Parser Triples pLiteralPropertyElt p = do l <- pText @@ -292,12 +362,15 @@ pLiteralPropertyElt p = do dt <- optional pDatatypeAttr s <- currentSubject lang <- currentLang + -- Generated triple let l' = TL.toStrict l o = lnode . fromMaybe (plainL l') $ (typedL l' <$> dt) <|> (plainLL l' <$> lang) mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe ts (:ts) mt +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt pParseTypeLiteralPropertyElt :: Node -> Parser Triples pParseTypeLiteralPropertyElt p = do pt <- pRDFAttr rdfParseType @@ -305,27 +378,34 @@ pParseTypeLiteralPropertyElt p = do mi <- optional pIdAttr <* removeNodeAttr rdfID checkAllowedAttributes [rdfParseType] l <- pXMLLiteral + -- Generated triple s <- currentSubject let o = lnode (typedL l rdfXmlLiteral) mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple ts <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) pure $ maybe ts (:ts) mt +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt pParseTypeResourcePropertyElt :: Node -> Parser Triples pParseTypeResourcePropertyElt p = do pt <- pRDFAttr rdfParseType guard (pt == "Resource") mi <- optional pIdAttr <* removeNodeAttr rdfID checkAllowedAttributes [rdfParseType] + -- Generated triple s <- currentSubject o <- newBNode let mt = (\s' -> Triple s' p o) <$> s + -- Reify the triple ts1 <- maybe (pure mempty) (uncurry reifyTriple) (liftA2 (,) mi mt) setSubject (Just o) - ts2 <- keepListIndex pPropertyEltList - setSubject s + -- Explore children + ts2 <- keepCollectionIndex pPropertyEltList + --setSubject s pure $ maybe (ts1 <> ts2) ((<> ts2) . (:ts1)) mt +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt pParseTypeCollectionPropertyElt :: Node -> Parser Triples pParseTypeCollectionPropertyElt p = do pt <- pRDFAttr rdfParseType @@ -339,28 +419,38 @@ pParseTypeCollectionPropertyElt p = do r <- optional pNodeElement case r of Nothing -> + -- Empty collection let t = Triple s' p rdfNilNode in ([t] <>) <$> maybe (pure mempty) (`reifyTriple` t) mi Just ts1 -> do + -- Non empty collection s'' <- currentSubject n <- newBNode + -- Triples corresping to the first item let t = Triple s' p n ts2 = maybe mempty (\s''' -> [t, Triple n rdfFirstNode s''']) s'' + -- Process next item ts3 <- go n + -- Reify triple ts4 <- maybe (pure mempty) (`reifyTriple` t) mi pure $ mconcat [ts1, ts2, ts3, ts4] where go s = do + -- Generate the triples of the current item. r <- optional pNodeElement case r of + -- End of the collection Nothing -> pure [Triple s rdfRestNode rdfNilNode] + -- Add the item to the collection and process the next item Just ts1 -> do s' <- currentSubject n <- newBNode let ts2 = maybe mempty (\s'' -> [Triple s rdfRestNode n, Triple n rdfFirstNode s'']) s' + -- Next item ts3 <- go n pure $ mconcat [ts1, ts2, ts3] +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt pParseTypeOtherPropertyElt :: Node -> Parser Triples pParseTypeOtherPropertyElt _p = do pt <- pRDFAttr rdfParseType @@ -369,6 +459,7 @@ pParseTypeOtherPropertyElt _p = do _mi <- optional pIdAttr <* removeNodeAttr rdfID throwError "[TODO] pParseTypeOtherPropertyElt" +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt pEmptyPropertyElt :: Node -> Parser Triples pEmptyPropertyElt p = do s <- currentSubject @@ -378,6 +469,7 @@ pEmptyPropertyElt p = do mi <- optional pIdAttr <* removeNodeAttr rdfID o <- pResourceAttr' <|> pNodeIdAttr' <|> newBNode let t = Triple s' p o + -- Reify triple ts1 <- maybe (pure mempty) (`reifyTriple` t) mi ts2 <- pPropertyAttrs o pure (t:ts1 <> ts2) @@ -391,6 +483,8 @@ checkAllowedAttributes as = do let diff = HS.difference (HM.keysSet attrs) as unless (null diff) (throwError $ "Attributes not allowed: " <> show diff) +-- See: https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-xmlliteral, +-- https://www.w3.org/TR/rdf-syntax-grammar/#literal pXMLLiteral :: Parser Text pXMLLiteral = T.decodeUtf8 . BL.toStrict . BB.toLazyByteString . encode <$> pChildren @@ -398,7 +492,8 @@ pXMLLiteral = pIdAttr :: Parser Text pIdAttr = do i <- pRDFAttr rdfID - i' <- either throwError pure (validateID i) + i' <- either throwError pure (checkRdfId i) + -- Check the uniqueness of the ID in the context of the current base URI. checkIdIsUnique i' pure i' @@ -411,7 +506,7 @@ checkIdIsUnique i = do pNodeIdAttr :: Parser Text pNodeIdAttr = do i <- pRDFAttr rdfNodeID - either throwError pure (validateID i) + either throwError pure (checkRdfId i) pAboutAttr :: Parser Text pAboutAttr = pRDFAttr rdfAbout >>= checkIRI "rdf:about" @@ -430,6 +525,7 @@ reifyTriple i (Triple s p' o) = do , Triple n rdfPredicateNode p' , Triple n rdfObjectNode o ] +-------------------------------------------------------------------------------- -- URI checks checkIRI :: String -> Text -> Parser Text @@ -460,7 +556,9 @@ isNotOldTerm uri = uri /= rdfAboutEach && uri /= rdfAboutEachPrefix && uri /= rdfBagID +-------------------------------------------------------------------------------- -- Parser's state utils + -- |Create a new unique blank node newBNode :: Parser Node newBNode = do @@ -509,25 +607,25 @@ updatePrefixMappings pm = do modify (\st -> st { statePrefixMapping = pm' }) pure pm' -currentListIndex :: Parser Int -currentListIndex = stateListIndex <$> get +currentCollectionIndex :: Parser Int +currentCollectionIndex = stateCollectionIndex <$> get -setListIndex :: Int -> Parser () -setListIndex i = modify (\st -> st { stateListIndex = i }) +setCollectionIndex :: Int -> Parser () +setCollectionIndex i = modify (\st -> st { stateCollectionIndex = i }) -keepListIndex :: Parser a -> Parser a -keepListIndex p = do - i <- currentListIndex - p <* setListIndex i +keepCollectionIndex :: Parser a -> Parser a +keepCollectionIndex p = do + i <- currentCollectionIndex + p <* setCollectionIndex i -- See: https://www.w3.org/TR/rdf-syntax-grammar/#section-List-Expand -nextListIndex :: Parser Text -nextListIndex = do - modify $ \st -> st { stateListIndex = stateListIndex st + 1 } - (rdfListIndex <>) . T.pack . show . stateListIndex <$> get +nextCollectionIndex :: Parser Text +nextCollectionIndex = do + modify $ \st -> st { stateCollectionIndex = stateCollectionIndex st + 1 } + (rdfListIndex <>) . T.pack . show . stateCollectionIndex <$> get -resetListIndex :: Parser () -resetListIndex = modify $ \st -> st { stateListIndex = 0 } +resetCollectionIndex :: Parser () +resetCollectionIndex = modify $ \st -> st { stateCollectionIndex = 0 } currentBaseUri :: Parser (Maybe BaseUrl) currentBaseUri = stateBaseUri <$> get diff --git a/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs index a64df23..6592525 100644 --- a/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs +++ b/src/Text/RDF/RDF4H/XmlParser/Identifiers.hs @@ -2,9 +2,9 @@ {-# LANGUAGE TupleSections #-} module Text.RDF.RDF4H.XmlParser.Identifiers - ( - -- Validation - validateID + ( -- rdf:ID validation + checkRdfId + -- Qualified names , resolveQName, resolveQName' , parseQName ) where @@ -20,17 +20,42 @@ import qualified Data.Attoparsec.Text as P import Data.RDF.Namespace --- IRI processing -validateID :: Text -> Either String Text -validateID t = t <$ parseId t +-------------------------------------------------------------------------------- +-- rdf:ID + +-- |Validate the value of @rdf:ID@. +-- +-- See: https://www.w3.org/TR/rdf-syntax-grammar/#rdf-id +checkRdfId + :: Text + -- ^ Value of a @rdf:ID@ attribute to validate. + -> Either String Text +checkRdfId t = t <$ parseId t parseId :: Text -> Either String Text parseId = P.parseOnly $ pNCName <* (P.endOfInput "Unexpected characters at the end") -resolveQName :: PrefixMappings -> Text -> Either String Text +-------------------------------------------------------------------------------- +-- Qualified names + +-- |Parse and resolve a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames +resolveQName + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> Text + -- ^ Raw qualified name to process. + -> Either String Text resolveQName pm qn = parseQName qn >>= resolveQName' pm -resolveQName' :: PrefixMappings -> (Maybe Text, Text) -> Either String Text +-- |Resolve a qualified name. +resolveQName' + :: PrefixMappings + -- ^ Namespace mapping to resolve q qualified name. + -> (Maybe Text, Text) + -- ^ (namespace, local name) + -> Either String Text resolveQName' (PrefixMappings pm) (Nothing, name) = case Map.lookup mempty pm of Nothing -> Left $ mconcat ["Cannot resolve QName \"", T.unpack name, "\": no default namespace defined."] @@ -40,20 +65,26 @@ resolveQName' (PrefixMappings pm) (Just prefix, name) = Nothing -> Left $ mconcat ["Cannot resolve QName: prefix \"", T.unpack prefix, "\" not defined"] Just iri -> Right $ iri <> name +-- |Parse a qualified name. +-- +-- See: https://www.w3.org/TR/xml-names/#ns-qualnames parseQName :: Text -> Either String (Maybe Text, Text) parseQName = P.parseOnly $ pQName <* (P.endOfInput "Unexpected characters at the end of a QName") -- https://www.w3.org/TR/xml-names/#ns-qualnames +-- https://www.w3.org/TR/xml-names/#NT-QName pQName :: Parser (Maybe Text, Text) pQName = pPrefixedName <|> pUnprefixedNamed where pUnprefixedNamed = (empty,) <$> pLocalPart +-- https://www.w3.org/TR/xml-names/#NT-PrefixedName pPrefixedName :: Parser (Maybe Text, Text) pPrefixedName = do prefix <- pLocalPart <* P.char ':' localPart <- pLocalPart pure (Just prefix, localPart) +-- https://www.w3.org/TR/xml-names/#NT-LocalPart pLocalPart :: Parser Text pLocalPart = pNCName @@ -63,8 +94,8 @@ pNCName = liftA2 T.cons pNameStartChar pNameRest where pNameStartChar = P.satisfy isValidFirstCharId pNameRest = P.takeWhile isValidRestCharId - isValidFirstCharId c = - ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') + isValidFirstCharId c + = ('A' <= c && c <= 'Z') || c == '_' || ('a' <= c && c <= 'z') || ('\xC0' <= c && c <= '\xD6') || ('\xD8' <= c && c <= '\xF6') || ('\xF8' <= c && c <= '\x2FF') || ('\x370' <= c && c <= '\x37D') || ('\x37F' <= c && c <= '\x1FFF') || ('\x200C' <= c && c <= '\x200D') diff --git a/src/Text/RDF/RDF4H/XmlParserHXT.hs b/src/Text/RDF/RDF4H/XmlParserHXT.hs index 6c6dcab..8bec792 100644 --- a/src/Text/RDF/RDF4H/XmlParserHXT.hs +++ b/src/Text/RDF/RDF4H/XmlParserHXT.hs @@ -464,7 +464,7 @@ xmlName str = go [] str else Nothing isValid c = isAlphaNum c || '_' == c - -- || '-' == c + -- '-' == c || '.' == c || ':' == c From 3ed61db7dd9d58be701d8f250ab198424c0da5cf Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 3 Jun 2019 08:26:22 +0200 Subject: [PATCH 37/39] Add benchmark for XML parsers --- bench/MainCriterion.hs | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/bench/MainCriterion.hs b/bench/MainCriterion.hs index d5ccb73..6077c0c 100644 --- a/bench/MainCriterion.hs +++ b/bench/MainCriterion.hs @@ -19,21 +19,15 @@ import Control.DeepSeq (NFData) -- $ gzip -d bills.099.actions.rdf.gz parseXmlRDF :: Rdf a => T.Text -> RDF a -parseXmlRDF s = - let (Right rdf) = parseString (XmlParser Nothing Nothing) s - in rdf +parseXmlRDF = either (error . show) id . parseString (XmlParser Nothing Nothing) {-# INLINE parseXmlRDF #-} parseNtRDF :: Rdf a => T.Text -> RDF a -parseNtRDF s = - let (Right rdf) = parseString NTriplesParser s - in rdf +parseNtRDF = either (error . show) id . parseString NTriplesParser {-# INLINE parseNtRDF #-} parseTtlRDF :: Rdf a => T.Text -> RDF a -parseTtlRDF s = - let (Right rdf) = parseString (TurtleParser Nothing Nothing) s - in rdf +parseTtlRDF = either (error . show) id . parseString (TurtleParser Nothing Nothing) {-# INLINE parseTtlRDF #-} queryGr :: Rdf a => (Maybe Node,Maybe Node,Maybe Node,RDF a) -> [Triple] @@ -49,17 +43,19 @@ main :: IO () main = defaultMainWith (defaultConfig {resamples = 100}) [ env + -- [FIXME] Do not rely on system's defaults to read files. (do fawltyContentTurtle <- readFile "data/ttl/fawlty1.ttl" fawltyContentNTriples <- readFile "data/nt/all-fawlty-towers.nt" - rdf1' <- parseFile (XmlParser Nothing Nothing) xmlFile - rdf2' <- parseFile (XmlParser Nothing Nothing) xmlFile - rdf3' <- parseFile (XmlParser Nothing Nothing) xmlFile - let rdf1 = either (error . show) id rdf1' :: RDF TList + xmlContent <- readFile xmlFile + let rdf1' = parseString (XmlParser Nothing Nothing) xmlContent + rdf2' = parseString (XmlParser Nothing Nothing) xmlContent + rdf3' =parseString (XmlParser Nothing Nothing) xmlContent + rdf1 = either (error . show) id rdf1' :: RDF TList rdf2 = either (error . show) id rdf2' :: RDF AdjHashMap rdf3 = either (error . show) id rdf3' :: RDF AlgebraicGraph triples = triplesOf rdf1 - return (rdf1, rdf2, rdf3, triples, fawltyContentNTriples, fawltyContentTurtle)) $ - \ ~(triplesList, adjMap, algGraph, triples, fawltyContentNTriples, fawltyContentTurtle) -> + return (rdf1, rdf2, rdf3, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent)) $ + \ ~(triplesList, adjMap, algGraph, triples, fawltyContentNTriples, fawltyContentTurtle, xmlContent) -> bgroup "rdf4h" [ bgroup @@ -84,6 +80,16 @@ main = defaultMainWith let res = parseString (TurtleParserCustom Nothing Nothing Attoparsec) t :: Either ParseFailure (RDF TList) in either (error . show) id res ) fawltyContentTurtle + , bench "xml-xmlbf" $ + nf (\t -> + let res = parseString (XmlParser Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent + , bench "xml-xht" $ + nf (\t -> + let res = parseString (XmlParserHXT Nothing Nothing) t :: Either ParseFailure (RDF TList) + in either (error . show) id res + ) xmlContent ] , bgroup @@ -149,7 +155,7 @@ queryBench label gr = , bench (label <> " O") $ nf queryGr (queryNothing,queryNothing,objQuery,gr) ] -addRemoveTriples :: (NFData a,NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] +addRemoveTriples :: (NFData (RDF a), Rdf a) => String -> Triples -> RDF a -> RDF a -> [Benchmark] addRemoveTriples lbl triples emptyGr populatedGr = [ bench (lbl <> "-add-triples") $ nf addTriples (triples,emptyGr) , bench (lbl <> "-remove-triples") $ nf removeTriples (triples,populatedGr) From 6485b27715fc068c74abf6041f70485811634276 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 3 Jun 2019 08:28:38 +0200 Subject: [PATCH 38/39] Update stack.yaml to the latest commit of the official repo --- stack.yaml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/stack.yaml b/stack.yaml index 7965a0e..d755ed7 100644 --- a/stack.yaml +++ b/stack.yaml @@ -2,12 +2,10 @@ resolver: lts-12.1 compiler-check: newer-minor packages: - '.' -# - /home/rob/code/haskell/xmlbf/xmlbf -# - /home/rob/code/haskell/xmlbf/xmlbf-xeno extra-deps: - hgal-2.0.0.2 -- git: git@gitlab.com:Wismill/xmlbf.git - commit: 0142bd5352316030c97bdcd9fe75d4afc20bf552 - subdirs: - - xmlbf - - xmlbf-xeno + - git: git@gitlab.com:k0001/xmlbf.git + commit: ce65be6366c0aaafcc282268d767c52380e459e6 + subdirs: + - xmlbf + - xmlbf-xeno From 8bf9f1f6344cf898982dc320ece0c861ec4ec32f Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 3 Jun 2019 08:54:07 +0200 Subject: [PATCH 39/39] Documentation --- src/Text/RDF/RDF4H/XmlParser.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Text/RDF/RDF4H/XmlParser.hs b/src/Text/RDF/RDF4H/XmlParser.hs index 6cc4bfa..9958fed 100644 --- a/src/Text/RDF/RDF4H/XmlParser.hs +++ b/src/Text/RDF/RDF4H/XmlParser.hs @@ -55,9 +55,9 @@ instance RdfParser XmlParser where -- |Configuration for the XML parser data XmlParser = XmlParser (Maybe BaseUrl) - -- ^ The base URI to parse the document. + -- ^ The /default/ base URI to parse the document. (Maybe Text) - -- ^ Location URI from which to retrieve the XML document. + -- ^ The /retrieval URI/ of the XML document. parseFile' :: (Rdf a) => Maybe BaseUrl @@ -152,7 +152,7 @@ pQName qn = do let qn' = resolveQName pm qn >>= validateIRI either throwError pure qn' --- |Process the attributes of a node. +-- |Process the attributes of an XML element. -- -- To be called __once__ per XML element. pRDFAttrs :: Parser (HashMap Text Text) @@ -457,7 +457,8 @@ pParseTypeOtherPropertyElt _p = do guard (pt /= "Resource" && pt /= "Literal" && pt /= "Collection") checkAllowedAttributes [rdfParseType] _mi <- optional pIdAttr <* removeNodeAttr rdfID - throwError "[TODO] pParseTypeOtherPropertyElt" + -- [FIXME] Implement 'parseTypeOtherPropertyElt' + throwError "Not implemented: rdf:parseType = other" -- See: https://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt pEmptyPropertyElt :: Node -> Parser Triples