[redland-dev] Entities in RDFa
Richard Smith
richard at ex-parrot.com
Fri Jan 6 05:54:50 EST 2012
Richard Smith wrote:
> I think fixing this (at least of builds using libxml2) is as simple as adding
> the XML_PARSE_DTDLOAD flag to libxml_options in raptor_grddl.c and
> raptor_sax2.c. Probably it should be done by way of a new raptor option that
> by default is disabled, much like RAPTOR_OPTION_NO_NET is.
>
> Does this seem a worthwhile change? And would it help if I knocked up a
> patch for it?
The attached patch implements this via a new raptor parse
option called 'loadDTD'. The default behaviour is
unchanged.
Could someone more familiar with the code confirm that I
have got the correct raptor_option_area values for the new
option in raptor_option.c? I'm not really sure I properly
understand what RAPTOR_OPTION_AREA_PARSER is for.
(If this list strips attachments, there won't be a lot of
point to this message...)
Richard
-------------- next part --------------
diff -ur raptor2-2.0.6/configure.ac raptor2-2.0.6+patch/configure.ac
--- raptor2-2.0.6/configure.ac 2011-11-24 07:15:15.000000000 +0000
+++ raptor2-2.0.6+patch/configure.ac 2012-01-06 01:51:56.815669830 +0000
@@ -700,6 +700,16 @@
AC_CHECK_FUNCS(xmlSAX2InternalSubset xmlCtxtUseOptions)
+ AC_MSG_CHECKING(if libxml has parser option XML_PARSE_DTDLOAD)
+ AC_TRY_LINK([
+#ifdef HAVE_LIBXML_PARSER_H
+#include <libxml/parser.h>
+#endif
+], [xmlParserOption foo; foo = XML_PARSE_DTDLOAD],
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(RAPTOR_LIBXML_XML_PARSE_DTDLOAD, 1, [does libxml have XML_PARSE_DTDLOA]),
+ AC_MSG_RESULT(no))
+
AC_MSG_CHECKING(if libxml has parser option XML_PARSE_NONET)
AC_TRY_LINK([
#ifdef HAVE_LIBXML_PARSER_H
diff -ur raptor2-2.0.6/librdfa/rdfa.c raptor2-2.0.6+patch/librdfa/rdfa.c
--- raptor2-2.0.6/librdfa/rdfa.c 2011-08-22 07:05:56.000000000 +0100
+++ raptor2-2.0.6+patch/librdfa/rdfa.c 2012-01-06 09:59:25.158089322 +0000
@@ -1218,6 +1218,18 @@
rdfa_init_context(context);
#ifdef LIBRDFA_IN_RAPTOR
+ /* Optionally forbid network requests in the XML parser */
+ raptor_sax2_set_option(context->sax2,
+ RAPTOR_OPTION_NO_NET, NULL,
+ RAPTOR_OPTIONS_GET_NUMERIC(context, RAPTOR_OPTION_NO_NET));
+
+ /* Optionally force DTD loads in the XML parser */
+ raptor_sax2_set_option(context->sax2,
+ RAPTOR_OPTION_LOAD_DTD, NULL,
+ RAPTOR_OPTIONS_GET_NUMERIC(context, RAPTOR_OPTION_LOAD_DTD));
+#endif
+
+#ifdef LIBRDFA_IN_RAPTOR
context->base_uri=raptor_new_uri(context->sax2->world, (const unsigned char*)context->base);
raptor_sax2_parse_start(context->sax2, context->base_uri);
#endif
diff -ur raptor2-2.0.6/librdfa/rdfa.h raptor2-2.0.6+patch/librdfa/rdfa.h
--- raptor2-2.0.6/librdfa/rdfa.h 2011-04-26 19:16:35.000000000 +0100
+++ raptor2-2.0.6+patch/librdfa/rdfa.h 2012-01-06 10:03:37.046101513 +0000
@@ -233,6 +233,8 @@
raptor_sax2* sax2;
raptor_namespace_handler namespace_handler;
void* namespace_handler_user_data;
+ raptor_object_options options;
+
#else
XML_Parser parser;
#endif
diff -ur raptor2-2.0.6/src/raptor2.h.in raptor2-2.0.6+patch/src/raptor2.h.in
--- raptor2-2.0.6/src/raptor2.h.in 2011-11-27 17:36:30.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor2.h.in 2012-01-06 02:04:21.895705896 +0000
@@ -494,6 +494,7 @@
* @RAPTOR_OPTION_WRITER_XML_VERSION: Integer XML version XML 1.0 (10) or XML 1.1 (11)
* @RAPTOR_OPTION_WRITER_XML_DECLARATION: Write XML 1.0 or 1.1 declaration.
* @RAPTOR_OPTION_NO_NET: Deny network requests.
+ * @RAPTOR_OPTION_LOAD_DTD: Load document DTDs.
* @RAPTOR_OPTION_RESOURCE_BORDER: Border color of resource
* nodes for GraphViz DOT serializer.
* @RAPTOR_OPTION_LITERAL_BORDER: Border color of literal nodes
@@ -568,7 +569,8 @@
RAPTOR_OPTION_WWW_CERT_FILENAME,
RAPTOR_OPTION_WWW_CERT_TYPE,
RAPTOR_OPTION_WWW_CERT_PASSPHRASE,
- RAPTOR_OPTION_LAST = RAPTOR_OPTION_WWW_CERT_PASSPHRASE
+ RAPTOR_OPTION_LOAD_DTD,
+ RAPTOR_OPTION_LAST = RAPTOR_OPTION_LOAD_DTD
} raptor_option;
diff -ur raptor2-2.0.6/src/raptor_config.h.in raptor2-2.0.6+patch/src/raptor_config.h.in
--- raptor2-2.0.6/src/raptor_config.h.in 2011-11-24 07:15:46.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_config.h.in 2012-01-06 01:55:06.359679001 +0000
@@ -196,6 +196,9 @@
/* does libxml xmlSAXHandler have initialized field */
#undef RAPTOR_LIBXML_XMLSAXHANDLER_INITIALIZED
+/* does libxml have XML_PARSE_DTDLOAD */
+#undef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+
/* does libxml have XML_PARSE_NONET */
#undef RAPTOR_LIBXML_XML_PARSE_NONET
diff -ur raptor2-2.0.6/src/raptor_grddl.c raptor2-2.0.6+patch/src/raptor_grddl.c
--- raptor2-2.0.6/src/raptor_grddl.c 2011-08-31 20:53:24.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_grddl.c 2012-01-06 02:07:42.351715591 +0000
@@ -878,6 +878,10 @@
if(RAPTOR_OPTIONS_GET_NUMERIC(xpbc->rdf_parser, RAPTOR_OPTION_NO_NET))
libxml_options |= XML_PARSE_NONET;
#endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+ if(RAPTOR_OPTIONS_GET_NUMERIC(xpbc->rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+ libxml_options |= XML_PARSE_DTDLOAD;
+#endif
#ifdef HAVE_XMLCTXTUSEOPTIONS
xmlCtxtUseOptions(xc, libxml_options);
#endif
@@ -1439,6 +1443,10 @@
if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
libxml_options |= XML_PARSE_NONET;
#endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+ libxml_options |= XML_PARSE_DTDLOAD;
+#endif
#ifdef HAVE_XMLCTXTUSEOPTIONS
xmlCtxtUseOptions(grddl_parser->xml_ctxt, libxml_options);
#endif
@@ -1488,6 +1496,10 @@
if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
options |= HTML_PARSE_NONET;
#endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+ if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+ options |= XML_PARSE_DTDLOAD;
+#endif
htmlCtxtUseOptions(grddl_parser->html_ctxt, options);
diff -ur raptor2-2.0.6/src/raptor_librdfa.c raptor2-2.0.6+patch/src/raptor_librdfa.c
--- raptor2-2.0.6/src/raptor_librdfa.c 2011-10-21 21:41:16.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_librdfa.c 2012-01-06 10:05:44.150107663 +0000
@@ -267,6 +267,8 @@
/* returns RDFa Processing Graph error triples - not used by raptor */
rdfa_set_processor_graph_triple_handler(librdfa_parser->context, NULL);
+ librdfa_parser->context->options = rdf_parser->options;
+
rc = rdfa_parse_start(librdfa_parser->context);
if(rc != RDFA_PARSE_SUCCESS)
return 1;
diff -ur raptor2-2.0.6/src/raptor_option.c raptor2-2.0.6+patch/src/raptor_option.c
--- raptor2-2.0.6/src/raptor_option.c 2011-08-01 03:02:22.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_option.c 2012-01-06 09:40:28.342034303 +0000
@@ -277,6 +277,12 @@
RAPTOR_OPTION_VALUE_TYPE_STRING,
"wwwCertPassphrase",
"SSL client certificate passphrase"
+ },
+ { RAPTOR_OPTION_LOAD_DTD,
+ (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2),
+ RAPTOR_OPTION_VALUE_TYPE_BOOL,
+ "loadDTD",
+ "Parsers and SAX2 XML Parser should load DTDs."
}
};
diff -ur raptor2-2.0.6/src/raptor_rdfxml.c raptor2-2.0.6+patch/src/raptor_rdfxml.c
--- raptor2-2.0.6/src/raptor_rdfxml.c 2011-10-21 21:41:16.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_rdfxml.c 2012-01-06 02:09:14.807720071 +0000
@@ -1001,6 +1001,11 @@
raptor_sax2_set_option(rdf_xml_parser->sax2,
RAPTOR_OPTION_NO_NET, NULL,
RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
+
+ /* Optionally force DTD loads in the XML parser */
+ raptor_sax2_set_option(rdf_xml_parser->sax2,
+ RAPTOR_OPTION_LOAD_DTD, NULL,
+ RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD));
raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
diff -ur raptor2-2.0.6/src/raptor_rss.c raptor2-2.0.6+patch/src/raptor_rss.c
--- raptor2-2.0.6/src/raptor_rss.c 2011-08-31 20:53:24.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_rss.c 2012-01-06 02:11:18.495726048 +0000
@@ -249,6 +249,11 @@
raptor_sax2_set_option(rss_parser->sax2,
RAPTOR_OPTION_NO_NET, NULL,
RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
+
+ /* Optionally force DTD loads in the XML parser */
+ raptor_sax2_set_option(rss_parser->sax2,
+ RAPTOR_OPTION_LOAD_DTD, NULL,
+ RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD));
raptor_sax2_parse_start(rss_parser->sax2, uri);
diff -ur raptor2-2.0.6/src/raptor_sax2.c raptor2-2.0.6+patch/src/raptor_sax2.c
--- raptor2-2.0.6/src/raptor_sax2.c 2011-11-27 17:36:30.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_sax2.c 2012-01-06 10:06:33.994110079 +0000
@@ -518,6 +518,10 @@
if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET))
libxml_options |= XML_PARSE_NONET;
#endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+ if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_LOAD_DTD))
+ libxml_options |= XML_PARSE_DTDLOAD;
+#endif
#ifdef HAVE_XMLCTXTUSEOPTIONS
xmlCtxtUseOptions(xc, libxml_options);
#endif
diff -ur raptor2-2.0.6/src/raptor_turtle_writer.c raptor2-2.0.6+patch/src/raptor_turtle_writer.c
--- raptor2-2.0.6/src/raptor_turtle_writer.c 2011-11-12 21:18:03.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_turtle_writer.c 2012-01-06 02:11:56.555727893 +0000
@@ -704,6 +704,7 @@
/* Shared */
case RAPTOR_OPTION_NO_NET:
+ case RAPTOR_OPTION_LOAD_DTD:
/* XML writer options */
case RAPTOR_OPTION_RELATIVE_URIS:
More information about the redland-dev
mailing list