%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% The following collection of Xcerpt rules %% %% parses and normalizes RDF/XML files into %% %% straight triples %% %% -------------------------------------------- %% %% For more details see Oliver Bolzer, "Towards %% Data-Integration on the Semantic Web: Querying %% RDF with Xcerpt". Diploma Thesis, 2004 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Top-Level Subjects CONSTRUCT RDFXML-SUBJECT[ var ORIGIN, var SUB ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDF-FILE[ var ORIGIN, rdf:RDF{{ var SUB }} ] END % Recursion, subject-predicate-object nesting CONSTRUCT RDFXML-SUBJECT[ var ORIGIN, var OBJECT ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDF-FILE[ var ORIGIN, rdf:RDF{{ desc identity var IDENT var OBJECT as /.*/:/.*/{{}} }} ], RDFXML-SUBJECT[ var ORIGIN, /.*/:/.*/{{ % subject /.*/:/.*/{{ % predicate with child as object without attributes{{ rdf:parseType{ "Literal" } }}, without attributes{{ rdf:parseType{ "Resource"} }}, identity var IDENT var OBJECT }} }} ] ] END % regular URI reference CONSTRUCT RDFXML-NODE[ var ORIGIN, var NODE, var URI:uri{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDFXML-SUBJECT[ var ORIGIN, var NODE as /.*/:/.*/{{ attributes{{ rdf:about{ var URI } }} }} ] END % relative URI reference CONSTRUCT RDFXML-NODE[ var ORIGIN, var NODE, &join( var ORIGIN, "#", var ID):uri{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDFXML-SUBJECT[ var ORIGIN, var NODE as /.*/:/.*/{{ attributes{{ rdf:ID{ var ID } }} }} ] END % blank node with explicit identifier CONSTRUCT RDFXML-NODE[ var ORIGIN, var NODE, var NODEID:blank{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDFXML-SUBJECT[ var ORIGIN, var NODE as /.*/:/.*/{{ attributes{{ rdf:blankID{ var NODEID } }} }} ] END % blank node without explicit identifier CONSTRUCT RDFXML-NODE[ var ORIGIN, var NODE, var NIDENT:blank{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDFXML-SUBJECT[ var ORIGIN, identity var NIDENT var NODE as /.*/:/.*/{{ attributes{{ without rdf:about{{}}, without rdf:nodeID{{}}, without rdf:ID{{}} }} }} ] END CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, var OBJECT ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ var P_NS:var P_LN{ without attributes{{ rdf:parseType{{}} }}, identity var OIDENT var ONODE as /.*/:/.*/{{}} } }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ], RDFXML-NODE[ var ORIGIN, identity var OIDENT var ONODE, var OBJECT ] ] END CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, literal{ attributes{{ optional "http://www.w3.org/XML/1998/namespacelang":uri{ var LANG }, optional "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype":uri{ var TYPE } }}, var O_TEXT, } ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ns-prefix xml = "http://www.w3.org/XML/1998/namespace" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ var P_NS:var P_LN{ var O_TEXT as /.*/, attributes{{ optional xml:lang{ var LANG }, optional rdf:datatype{ var TYPE } }} } }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] ] END CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, var O_URI:uri{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ var P_NS:var P_LN{ attributes{{ rdf:resource{ O_URI } }} } }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] ] END CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, literal{ optional attributes{{ "http://www.w3.org/XML/1998/namespacelang":uri{ var LANG } }}, var O_TEXT } ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ns-prefix xml = "http://www.w3.org/XML/1998/namespace" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ attributes{{ var P_NS:P_LN{ var O_TEXT }, optional xml:lang{ var LANG } }} where { var P_NS != "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and var P_NS != "http://www.w3.org/XML/1998/namespace" } }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] END CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, literal{ attributes{ "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype":uri{ "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral" }}, var O_XML } ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ var P_NS:P_LN{{ attributes{{ rdf:parseType{ "Literal" } }}, var O_XML as /.*/:/.*/{{}} }} }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] END % triple extraction CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, var O_IDENT:blank{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ identity var O_IDENT var P_NS:P_LN{{ attributes{{ rdf:parseType{ "Resource" } }}, }} }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] ] END %% recursion CONSTRUCT RDFXML-SUBJECT[ var ORIGN, var BLANK ] FROM and[ rdf:RDF{{ desc identity var IDENT var BLANK as /.*/:/.*/{{}} }}, RDFXML-SUBJECT[ var ORIGIN, /.*/:/.*/{{ var BLANK as /.*/:/.*/{{ attributes{{ rdf:parseType{ "Resource" } }}, }} }} ] ] END %% subject with object as empty property element CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, &join( var P_NS, var P_LN):uri{}, var O_IDENT:blank{} ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as /.*/:/.*/{{ identity var O_IDENT var P_NS:P_LN{ attributes{{ without rdf:resource{{}}, without rdf:parseType{{}}, var A_NS:/.*/{{}} % any other attribute }} } where { var P_NS != "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and var P_NS != "http://www.w3.org/XML/1998/namespace" } }} ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] ] END %% empty property element as subject to it's attributes CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var S_IDENT:blank{}, &join( var P_NS, var P_LN):uri{}, link{ var O_TEXT } ] FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" RDFXML-SUBJECT[ var ORIGIN, /.*/:/.*/{{ identity var S_IDENT /.*/:/.*/{ attributes{{ without rdf:resource{{}}, without rdf:parseType{{}}, var P_NS:var P_LN{ var O_TEXT } % any other attribute }} where { var P_NS != "http://www.w3.org/1999/02/22-rdf-syntax-ns#" } } }} ] END %% case 1, identified subject CONSTRUCT RDF-TRIPLE[ attributes{ origin{var ORIGIN} }, var SUBJECT, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type":uri{}, &join( var S_CLASS_NS, var S_CLASS_LN ):uri{} FROM ns-prefix rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and[ RDFXML-SUBJECT[ var ORIGIN, identity var SIDENT var SNODE as var S_CLASS_NS:var S_CLASS_LN {{ without attributes{{ rdf:parseType{ "Resource" } }} }} where { var S_CLASS_NS != "http://www.w3.org/1999/02/22-rdf-syntax-ns#" and var S_CLASS_LN != "Description" } ], RDFXML-NODE[ var ORIGIN, identity var SIDENT var SNODE, var SUBJECT ] ] END