[redland-dev] Entities in RDFa

Richard Smith richard at ex-parrot.com
Fri Jan 6 05:54:50 EST 2012


Richard Smith wrote:

> I think fixing this (at least of builds using libxml2) is as simple as adding 
> the XML_PARSE_DTDLOAD flag to libxml_options in raptor_grddl.c and 
> raptor_sax2.c. Probably it should be done by way of a new raptor option that 
> by default is disabled, much like RAPTOR_OPTION_NO_NET is.
>
> Does this seem a worthwhile change?  And would it help if I knocked up a 
> patch for it?

The attached patch implements this via a new raptor parse 
option called 'loadDTD'.  The default behaviour is 
unchanged.

Could someone more familiar with the code confirm that I 
have got the correct raptor_option_area values for the new 
option in raptor_option.c?  I'm not really sure I properly 
understand what RAPTOR_OPTION_AREA_PARSER is for.

(If this list strips attachments, there won't be a lot of 
point to this message...)

Richard
-------------- next part --------------
diff -ur raptor2-2.0.6/configure.ac raptor2-2.0.6+patch/configure.ac
--- raptor2-2.0.6/configure.ac	2011-11-24 07:15:15.000000000 +0000
+++ raptor2-2.0.6+patch/configure.ac	2012-01-06 01:51:56.815669830 +0000
@@ -700,6 +700,16 @@
 
     AC_CHECK_FUNCS(xmlSAX2InternalSubset xmlCtxtUseOptions)
 
+    AC_MSG_CHECKING(if libxml has parser option XML_PARSE_DTDLOAD)
+    AC_TRY_LINK([
+#ifdef HAVE_LIBXML_PARSER_H
+#include <libxml/parser.h>
+#endif
+], [xmlParserOption foo; foo = XML_PARSE_DTDLOAD],
+                AC_MSG_RESULT(yes)
+		AC_DEFINE(RAPTOR_LIBXML_XML_PARSE_DTDLOAD, 1, [does libxml have XML_PARSE_DTDLOA]),
+		AC_MSG_RESULT(no))
+
     AC_MSG_CHECKING(if libxml has parser option XML_PARSE_NONET)
     AC_TRY_LINK([
 #ifdef HAVE_LIBXML_PARSER_H
diff -ur raptor2-2.0.6/librdfa/rdfa.c raptor2-2.0.6+patch/librdfa/rdfa.c
--- raptor2-2.0.6/librdfa/rdfa.c	2011-08-22 07:05:56.000000000 +0100
+++ raptor2-2.0.6+patch/librdfa/rdfa.c	2012-01-06 09:59:25.158089322 +0000
@@ -1218,6 +1218,18 @@
    rdfa_init_context(context);
 
 #ifdef LIBRDFA_IN_RAPTOR
+  /* Optionally forbid network requests in the XML parser */
+  raptor_sax2_set_option(context->sax2, 
+                         RAPTOR_OPTION_NO_NET, NULL,
+                         RAPTOR_OPTIONS_GET_NUMERIC(context, RAPTOR_OPTION_NO_NET));
+
+  /* Optionally force DTD loads in the XML parser */
+  raptor_sax2_set_option(context->sax2, 
+                         RAPTOR_OPTION_LOAD_DTD, NULL,
+                         RAPTOR_OPTIONS_GET_NUMERIC(context, RAPTOR_OPTION_LOAD_DTD));
+#endif
+
+#ifdef LIBRDFA_IN_RAPTOR
    context->base_uri=raptor_new_uri(context->sax2->world, (const unsigned char*)context->base);
    raptor_sax2_parse_start(context->sax2, context->base_uri);
 #endif
diff -ur raptor2-2.0.6/librdfa/rdfa.h raptor2-2.0.6+patch/librdfa/rdfa.h
--- raptor2-2.0.6/librdfa/rdfa.h	2011-04-26 19:16:35.000000000 +0100
+++ raptor2-2.0.6+patch/librdfa/rdfa.h	2012-01-06 10:03:37.046101513 +0000
@@ -233,6 +233,8 @@
    raptor_sax2* sax2;
    raptor_namespace_handler namespace_handler;
    void* namespace_handler_user_data;
+  raptor_object_options options;
+   
 #else
    XML_Parser parser;
 #endif
diff -ur raptor2-2.0.6/src/raptor2.h.in raptor2-2.0.6+patch/src/raptor2.h.in
--- raptor2-2.0.6/src/raptor2.h.in	2011-11-27 17:36:30.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor2.h.in	2012-01-06 02:04:21.895705896 +0000
@@ -494,6 +494,7 @@
  * @RAPTOR_OPTION_WRITER_XML_VERSION: Integer XML version XML 1.0 (10) or XML 1.1 (11)
  * @RAPTOR_OPTION_WRITER_XML_DECLARATION: Write XML 1.0 or 1.1 declaration.
  * @RAPTOR_OPTION_NO_NET: Deny network requests.
+ * @RAPTOR_OPTION_LOAD_DTD: Load document DTDs.
  * @RAPTOR_OPTION_RESOURCE_BORDER: Border color of resource
  *   nodes for GraphViz DOT serializer.
  * @RAPTOR_OPTION_LITERAL_BORDER: Border color of literal nodes
@@ -568,7 +569,8 @@
   RAPTOR_OPTION_WWW_CERT_FILENAME,
   RAPTOR_OPTION_WWW_CERT_TYPE,
   RAPTOR_OPTION_WWW_CERT_PASSPHRASE,
-  RAPTOR_OPTION_LAST = RAPTOR_OPTION_WWW_CERT_PASSPHRASE
+  RAPTOR_OPTION_LOAD_DTD,
+  RAPTOR_OPTION_LAST = RAPTOR_OPTION_LOAD_DTD
 } raptor_option;
 
 
diff -ur raptor2-2.0.6/src/raptor_config.h.in raptor2-2.0.6+patch/src/raptor_config.h.in
--- raptor2-2.0.6/src/raptor_config.h.in	2011-11-24 07:15:46.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_config.h.in	2012-01-06 01:55:06.359679001 +0000
@@ -196,6 +196,9 @@
 /* does libxml xmlSAXHandler have initialized field */
 #undef RAPTOR_LIBXML_XMLSAXHANDLER_INITIALIZED
 
+/* does libxml have XML_PARSE_DTDLOAD */
+#undef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+
 /* does libxml have XML_PARSE_NONET */
 #undef RAPTOR_LIBXML_XML_PARSE_NONET
 
diff -ur raptor2-2.0.6/src/raptor_grddl.c raptor2-2.0.6+patch/src/raptor_grddl.c
--- raptor2-2.0.6/src/raptor_grddl.c	2011-08-31 20:53:24.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_grddl.c	2012-01-06 02:07:42.351715591 +0000
@@ -878,6 +878,10 @@
       if(RAPTOR_OPTIONS_GET_NUMERIC(xpbc->rdf_parser, RAPTOR_OPTION_NO_NET))
         libxml_options |= XML_PARSE_NONET;
 #endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+      if(RAPTOR_OPTIONS_GET_NUMERIC(xpbc->rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+        libxml_options |= XML_PARSE_DTDLOAD;
+#endif
 #ifdef HAVE_XMLCTXTUSEOPTIONS
       xmlCtxtUseOptions(xc, libxml_options);
 #endif
@@ -1439,6 +1443,10 @@
       if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
         libxml_options |= XML_PARSE_NONET;
 #endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+      if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+        libxml_options |= XML_PARSE_DTDLOAD;
+#endif
 #ifdef HAVE_XMLCTXTUSEOPTIONS
       xmlCtxtUseOptions(grddl_parser->xml_ctxt, libxml_options);
 #endif
@@ -1488,6 +1496,10 @@
         if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
           options |= HTML_PARSE_NONET;
 #endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+        if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD))
+          options |= XML_PARSE_DTDLOAD;
+#endif
 
         htmlCtxtUseOptions(grddl_parser->html_ctxt, options);
  
diff -ur raptor2-2.0.6/src/raptor_librdfa.c raptor2-2.0.6+patch/src/raptor_librdfa.c
--- raptor2-2.0.6/src/raptor_librdfa.c	2011-10-21 21:41:16.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_librdfa.c	2012-01-06 10:05:44.150107663 +0000
@@ -267,6 +267,8 @@
   /* returns RDFa Processing Graph error triples - not used by raptor */
   rdfa_set_processor_graph_triple_handler(librdfa_parser->context, NULL);
 
+  librdfa_parser->context->options = rdf_parser->options;
+
   rc = rdfa_parse_start(librdfa_parser->context);
   if(rc != RDFA_PARSE_SUCCESS)
     return 1;
diff -ur raptor2-2.0.6/src/raptor_option.c raptor2-2.0.6+patch/src/raptor_option.c
--- raptor2-2.0.6/src/raptor_option.c	2011-08-01 03:02:22.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_option.c	2012-01-06 09:40:28.342034303 +0000
@@ -277,6 +277,12 @@
     RAPTOR_OPTION_VALUE_TYPE_STRING,
     "wwwCertPassphrase",
     "SSL client certificate passphrase"
+  },
+  { RAPTOR_OPTION_LOAD_DTD,
+    (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2),
+    RAPTOR_OPTION_VALUE_TYPE_BOOL,
+    "loadDTD",
+    "Parsers and SAX2 XML Parser should load DTDs."
   }
 };
 
diff -ur raptor2-2.0.6/src/raptor_rdfxml.c raptor2-2.0.6+patch/src/raptor_rdfxml.c
--- raptor2-2.0.6/src/raptor_rdfxml.c	2011-10-21 21:41:16.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_rdfxml.c	2012-01-06 02:09:14.807720071 +0000
@@ -1001,6 +1001,11 @@
   raptor_sax2_set_option(rdf_xml_parser->sax2, 
                          RAPTOR_OPTION_NO_NET, NULL,
                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
+
+  /* Optionally force DTD loads in the XML parser */
+  raptor_sax2_set_option(rdf_xml_parser->sax2, 
+                         RAPTOR_OPTION_LOAD_DTD, NULL,
+                         RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD));
   
   raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
 
diff -ur raptor2-2.0.6/src/raptor_rss.c raptor2-2.0.6+patch/src/raptor_rss.c
--- raptor2-2.0.6/src/raptor_rss.c	2011-08-31 20:53:24.000000000 +0100
+++ raptor2-2.0.6+patch/src/raptor_rss.c	2012-01-06 02:11:18.495726048 +0000
@@ -249,6 +249,11 @@
   raptor_sax2_set_option(rss_parser->sax2, 
                          RAPTOR_OPTION_NO_NET, NULL,
                          RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET));
+
+  /* Optionally force DTD loads in the XML parser */
+  raptor_sax2_set_option(rss_parser->sax2, 
+                         RAPTOR_OPTION_LOAD_DTD, NULL,
+                         RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_DTD));
   
   raptor_sax2_parse_start(rss_parser->sax2, uri);
 
diff -ur raptor2-2.0.6/src/raptor_sax2.c raptor2-2.0.6+patch/src/raptor_sax2.c
--- raptor2-2.0.6/src/raptor_sax2.c	2011-11-27 17:36:30.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_sax2.c	2012-01-06 10:06:33.994110079 +0000
@@ -518,6 +518,10 @@
     if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET))
       libxml_options |= XML_PARSE_NONET;
 #endif
+#ifdef RAPTOR_LIBXML_XML_PARSE_DTDLOAD
+    if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_LOAD_DTD))
+      libxml_options |= XML_PARSE_DTDLOAD;
+#endif
 #ifdef HAVE_XMLCTXTUSEOPTIONS
     xmlCtxtUseOptions(xc, libxml_options);
 #endif
diff -ur raptor2-2.0.6/src/raptor_turtle_writer.c raptor2-2.0.6+patch/src/raptor_turtle_writer.c
--- raptor2-2.0.6/src/raptor_turtle_writer.c	2011-11-12 21:18:03.000000000 +0000
+++ raptor2-2.0.6+patch/src/raptor_turtle_writer.c	2012-01-06 02:11:56.555727893 +0000
@@ -704,6 +704,7 @@
       
     /* Shared */
     case RAPTOR_OPTION_NO_NET:
+    case RAPTOR_OPTION_LOAD_DTD:
 
     /* XML writer options */
     case RAPTOR_OPTION_RELATIVE_URIS:


More information about the redland-dev mailing list