class LibXML::XML::Parser::Context
The XML::Parser::Context class provides in-depth control over how a document is parsed.
Public Class Methods
Creates a new parser context based on the specified document.
Parameters:
document - An XML::Document instance options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_document(int argc, VALUE* argv, VALUE klass)
{
VALUE document, options;
rb_scan_args(argc, argv, "11", &document, &options);
if (rb_obj_is_kind_of(document, cXMLDocument) == Qfalse)
rb_raise(rb_eTypeError, "Must pass an LibXML::XML::Document object");
xmlDocPtr xdoc;
xmlChar *buffer;
int length;
Data_Get_Struct(document, xmlDoc, xdoc);
xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, (const char*)xdoc->encoding, 0);
xmlParserCtxtPtr ctxt = xmlCreateDocParserCtxt(buffer);
if (!ctxt)
rxml_raise(xmlGetLastError());
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
xmlCtxtUseOptionsInternal (called below) initialize slightly different
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
return rxml_parser_context_wrap(ctxt);
}
Creates a new parser context based on the specified file or uri.
Parameters:
file - A filename or uri options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_file(int argc, VALUE* argv, VALUE klass)
{
VALUE file, options;
rb_scan_args(argc, argv, "11", &file, &options);
xmlParserCtxtPtr ctxt = xmlCreateURLParserCtxt(StringValuePtr(file), 0);
if (!ctxt)
rxml_raise(xmlGetLastError());
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
xmlCtxtUseOptionsInternal (called below) initialize slightly different
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
return rxml_parser_context_wrap(ctxt);
}
Creates a new parser context based on the specified io object.
Parameters:
io - A ruby IO object options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_io(int argc, VALUE* argv, VALUE klass)
{
VALUE io, options;
rb_scan_args(argc, argv, "11", &io, &options);
if (NIL_P(io))
rb_raise(rb_eTypeError, "Must pass in an IO object");
xmlParserInputBufferPtr input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
(void*)io, XML_CHAR_ENCODING_NONE);
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if (!ctxt)
{
xmlFreeParserInputBuffer(input);
rxml_raise(xmlGetLastError());
}
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
xmlCtxtUseOptionsInternal (called below) initialize slightly different
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
xmlParserInputPtr stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
if (!stream)
{
xmlFreeParserInputBuffer(input);
xmlFreeParserCtxt(ctxt);
rxml_raise(xmlGetLastError());
}
inputPush(ctxt, stream);
VALUE result = rxml_parser_context_wrap(ctxt);
/* Attach io object to parser so it won't get freed.*/
rb_ivar_set(result, IO_ATTR, io);
return result;
}
Creates a new parser context based on the specified string.
Parameters:
string - A string that contains the data to parse options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_string(int argc, VALUE* argv, VALUE klass)
{
VALUE string, options;
rb_scan_args(argc, argv, "11", &string, &options);
Check_Type(string, T_STRING);
if (RSTRING_LEN(string) == 0)
rb_raise(rb_eArgError, "Must specify a string with one or more characters");
xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string), (int)RSTRING_LEN(string));
if (!ctxt)
rxml_raise(xmlGetLastError());
/* This is annoying, but xmlInitParserCtxt (called indirectly above) and
xmlCtxtUseOptionsInternal (called below) initialize slightly different
context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */
xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));
return rxml_parser_context_wrap(ctxt);
}
Public Instance Methods
Obtain the base url for this parser context.
static VALUE rxml_parser_context_base_uri_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->input && ctxt->input->filename)
return rxml_new_cstr((const xmlChar*)ctxt->input->filename, ctxt->encoding);
else
return Qnil;
}
Sets the base url for this parser context.
static VALUE rxml_parser_context_base_uri_set(VALUE self, VALUE url)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
Check_Type(url, T_STRING);
if (ctxt->input && !ctxt->input->filename)
{
const char* xurl = StringValuePtr(url);
ctxt->input->filename = (const char*)xmlStrdup((const xmlChar*)xurl);
}
return self;
}
Closes the underlying input streams. This is useful when parsing a large amount of files and you want to close the files without relying on Ruby’s garbage collector to run.
static VALUE rxml_parser_context_close(VALUE self)
{
xmlParserCtxtPtr ctxt;
xmlParserInputPtr xinput;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
while ((xinput = inputPop(ctxt)) != NULL)
{
xmlFreeInputStream(xinput);
}
return Qnil;
}
Obtain the data directory associated with this context.
static VALUE rxml_parser_context_data_directory_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->directory == NULL)
return (Qnil);
else
return (rxml_new_cstr((const xmlChar*)ctxt->directory, ctxt->encoding));
}
Obtain the depth of this context.
static VALUE rxml_parser_context_depth_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->depth));
}
Control whether CDATA nodes will be created in this context.
static VALUE rxml_parser_context_disable_cdata_set(VALUE self, VALUE value)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->sax == NULL)
rb_raise(rb_eRuntimeError, "Sax handler is not yet set");
/* LibXML controls this internally with the default SAX handler. */
if (value)
ctxt->sax->cdataBlock = NULL;
else
ctxt->sax->cdataBlock = xmlSAX2CDataBlock;
return value;
}
Determine whether CDATA nodes will be created in this context.
static VALUE rxml_parser_context_disable_cdata_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
/* LibXML controls this internally with the default SAX handler. */
if (ctxt->sax && ctxt->sax->cdataBlock)
return (Qfalse);
else
return (Qtrue);
}
Determine whether SAX-based processing is disabled in this context.
static VALUE rxml_parser_context_disable_sax_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->disableSAX)
return (Qtrue);
else
return (Qfalse);
}
Determine whether this is a docbook context.
static VALUE rxml_parser_context_docbook_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->html == 2) // TODO check this
return (Qtrue);
else
return (Qfalse);
}
Obtain the character encoding identifier used in this context.
static VALUE rxml_parser_context_encoding_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return INT2NUM(xmlParseCharEncoding((const char*)ctxt->encoding));
}
Sets the character encoding for this context.
static VALUE rxml_parser_context_encoding_set(VALUE self, VALUE encoding)
{
xmlParserCtxtPtr ctxt;
int result;
const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding));
xmlCharEncodingHandlerPtr hdlr = xmlFindCharEncodingHandler(xencoding);
if (!hdlr)
rb_raise(rb_eArgError, "Unknown encoding: %i", NUM2INT(encoding));
Data_Get_Struct(self, xmlParserCtxt, ctxt);
result = xmlSwitchToEncoding(ctxt, hdlr);
if (result != 0)
rxml_raise(xmlGetLastError());
if (ctxt->encoding != NULL)
xmlFree((xmlChar *) ctxt->encoding);
ctxt->encoding = xmlStrdup((const xmlChar *) xencoding);
return self;
}
Obtain the last-error number in this context.
static VALUE rxml_parser_context_errno_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->errNo));
}
Determine whether this is an html context.
static VALUE rxml_parser_context_html_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->html == 1)
return (Qtrue);
else
return (Qfalse);
}
Obtain the limit on the number of IO streams opened in this context.
static VALUE rxml_parser_context_io_max_num_streams_get(VALUE self)
{
// TODO alias to max_streams and dep this?
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->inputMax));
}
Obtain the actual number of IO streams in this context.
static VALUE rxml_parser_context_io_num_streams_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->inputNr));
}
Determine whether parsers in this context retain whitespace.
static VALUE rxml_parser_context_keep_blanks_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->keepBlanks)
return (Qtrue);
else
return (Qfalse);
}
Obtain the name depth for this context.
static VALUE rxml_parser_context_name_depth_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->nameNr));
}
Obtain the maximum name depth for this context.
static VALUE rxml_parser_context_name_depth_max_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->nameMax));
}
Obtain the name node for this context.
static VALUE rxml_parser_context_name_node_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->name == NULL)
return (Qnil);
else
return (rxml_new_cstr( ctxt->name, ctxt->encoding));
}
Obtain the name table for this context.
static VALUE rxml_parser_context_name_tab_get(VALUE self)
{
int i;
xmlParserCtxtPtr ctxt;
VALUE tab_ary;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->nameTab == NULL)
return (Qnil);
tab_ary = rb_ary_new();
for (i = (ctxt->nameNr - 1); i >= 0; i--)
{
if (ctxt->nameTab[i] == NULL)
continue;
else
rb_ary_push(tab_ary, rxml_new_cstr( ctxt->nameTab[i], ctxt->encoding));
}
return (tab_ary);
}
Obtain the root node of this context.
static VALUE rxml_parser_context_node_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->node == NULL)
return (Qnil);
else
return (rxml_node_wrap(ctxt->node));
}
Obtain the node depth for this context.
static VALUE rxml_parser_context_node_depth_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->nodeNr));
}
Obtain the maximum node depth for this context.
static VALUE rxml_parser_context_node_depth_max_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->nodeMax));
}
Obtain the number of characters in this context.
static VALUE rxml_parser_context_num_chars_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (LONG2NUM(ctxt->nbChars));
}
Returns the parser options for this context. Multiple options can be combined by using Bitwise OR (|).
static VALUE rxml_parser_context_options_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return INT2NUM(ctxt->options);
}
Provides control over the execution of a parser. Valid values are the constants defined on XML::Parser::Options. Multiple options can be combined by using Bitwise OR (|).
static VALUE rxml_parser_context_options_set(VALUE self, VALUE options)
{
xmlParserCtxtPtr ctxt;
Check_Type(options, T_FIXNUM);
Data_Get_Struct(self, xmlParserCtxt, ctxt);
xmlCtxtUseOptions(ctxt, NUM2INT(options));
return self;
}
Control whether recovery mode is enabled in this context.
static VALUE rxml_parser_context_recovery_set(VALUE self, VALUE value)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (value == Qfalse)
{
ctxt->recovery = 0;
return (Qfalse);
}
else
{
ctxt->recovery = 1;
return (Qtrue);
}
}
Determine whether recovery mode is enabled in this context.
static VALUE rxml_parser_context_recovery_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->recovery)
return (Qtrue);
else
return (Qfalse);
}
Control whether external entity replacement is enabled in this context.
static VALUE rxml_parser_context_replace_entities_set(VALUE self, VALUE value)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (value == Qfalse)
{
ctxt->replaceEntities = 0;
return (Qfalse);
}
else
{
ctxt->replaceEntities = 1;
return (Qtrue);
}
}
Determine whether external entity replacement is enabled in this context.
static VALUE rxml_parser_context_replace_entities_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->replaceEntities)
return (Qtrue);
else
return (Qfalse);
}
Obtain the space depth for this context.
static VALUE rxml_parser_context_space_depth_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->spaceNr));
}
Obtain the maximum space depth for this context.
static VALUE rxml_parser_context_space_depth_max_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
return (INT2NUM(ctxt->spaceMax));
}
Determine whether this is a standalone context.
static VALUE rxml_parser_context_standalone_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->standalone)
return (Qtrue);
else
return (Qfalse);
}
Determine whether this context maintains statistics.
static VALUE rxml_parser_context_stats_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->record_info)
return (Qtrue);
else
return (Qfalse);
}
Determine whether this context is a subset of an external context.
static VALUE rxml_parser_context_subset_external_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->inSubset == 2)
return (Qtrue);
else
return (Qfalse);
}
Obtain this context’s external subset system identifier. (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_external_system_id_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->extSubSystem == NULL)
return (Qnil);
else
return (rxml_new_cstr( ctxt->extSubSystem, ctxt->encoding));
}
Obtain this context’s external subset URI. (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_external_uri_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->extSubURI == NULL)
return (Qnil);
else
return (rxml_new_cstr( ctxt->extSubURI, ctxt->encoding));
}
Determine whether this context is a subset of an internal context.
static VALUE rxml_parser_context_subset_internal_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->inSubset == 1)
return (Qtrue);
else
return (Qfalse);
}
Obtain this context’s subset name (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_name_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->intSubName == NULL)
return (Qnil);
else
return (rxml_new_cstr(ctxt->intSubName, ctxt->encoding));
}
Determine whether this context is valid.
static VALUE rxml_parser_context_valid_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->valid)
return (Qtrue);
else
return (Qfalse);
}
Determine whether validation is enabled in this context.
static VALUE rxml_parser_context_validate_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->validate)
return (Qtrue);
else
return (Qfalse);
}
Obtain this context’s version identifier.
static VALUE rxml_parser_context_version_get(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->version == NULL)
return (Qnil);
else
return (rxml_new_cstr( ctxt->version, ctxt->encoding));
}
Determine whether this context contains well-formed XML.
static VALUE rxml_parser_context_well_formed_q(VALUE self)
{
xmlParserCtxtPtr ctxt;
Data_Get_Struct(self, xmlParserCtxt, ctxt);
if (ctxt->wellFormed)
return (Qtrue);
else
return (Qfalse);
}