class LibXML::XML::Parser::Context

The XML::Parser::Context class provides in-depth control over how a document is parsed.

Public Class Methods

XML::Parser::Context.document(document) → XML::Parser::Context click to toggle source

Creates a new parser context based on the specified document.

Parameters:

document - An XML::Document instance
options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_document(int argc, VALUE* argv, VALUE klass)
{
  VALUE document, options;
  rb_scan_args(argc, argv, "11", &document, &options);

  if (rb_obj_is_kind_of(document, cXMLDocument) == Qfalse)
    rb_raise(rb_eTypeError, "Must pass an LibXML::XML::Document object");

  xmlDocPtr xdoc;
  xmlChar *buffer;
  int length;
  Data_Get_Struct(document, xmlDoc, xdoc);
  xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, (const char*)xdoc->encoding, 0);

  xmlParserCtxtPtr ctxt = xmlCreateDocParserCtxt(buffer);

  if (!ctxt)
    rxml_raise(xmlGetLastError());

  /* This is annoying, but xmlInitParserCtxt (called indirectly above) and 
     xmlCtxtUseOptionsInternal (called below) initialize slightly different
     context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
     sets to 0 and xmlCtxtUseOptionsInternal sets to 1.  So we have to call both. */
  xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));

  return rxml_parser_context_wrap(ctxt);
}
XML::Parser::Context.file(file) → XML::Parser::Context click to toggle source

Creates a new parser context based on the specified file or uri.

Parameters:

file - A filename or uri
options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_file(int argc, VALUE* argv, VALUE klass)
{
  VALUE file, options;
  rb_scan_args(argc, argv, "11", &file, &options);

  xmlParserCtxtPtr ctxt = xmlCreateURLParserCtxt(StringValuePtr(file), 0);

  if (!ctxt)
    rxml_raise(xmlGetLastError());

  /* This is annoying, but xmlInitParserCtxt (called indirectly above) and 
     xmlCtxtUseOptionsInternal (called below) initialize slightly different
     context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
     sets to 0 and xmlCtxtUseOptionsInternal sets to 1.  So we have to call both. */
  xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));

  return rxml_parser_context_wrap(ctxt);
}
XML::Parser::Context.io(io) → XML::Parser::Context click to toggle source

Creates a new parser context based on the specified io object.

Parameters:

io - A ruby IO object
options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_io(int argc, VALUE* argv, VALUE klass)
{
  VALUE io, options;
  rb_scan_args(argc, argv, "11", &io, &options);

  if (NIL_P(io))
    rb_raise(rb_eTypeError, "Must pass in an IO object");

  xmlParserInputBufferPtr input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
                                       (void*)io, XML_CHAR_ENCODING_NONE);

  xmlParserCtxtPtr ctxt = xmlNewParserCtxt();

  if (!ctxt)
  {
    xmlFreeParserInputBuffer(input);
    rxml_raise(xmlGetLastError());
  }

  /* This is annoying, but xmlInitParserCtxt (called indirectly above) and 
     xmlCtxtUseOptionsInternal (called below) initialize slightly different
     context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
     sets to 0 and xmlCtxtUseOptionsInternal sets to 1.  So we have to call both. */
  xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));

  xmlParserInputPtr stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);

  if (!stream)
  {
    xmlFreeParserInputBuffer(input);
    xmlFreeParserCtxt(ctxt);
    rxml_raise(xmlGetLastError());
  }
  inputPush(ctxt, stream);
  VALUE result = rxml_parser_context_wrap(ctxt);

  /* Attach io object to parser so it won't get freed.*/
  rb_ivar_set(result, IO_ATTR, io);

  return result;
}
XML::Parser::Context.string(string) → XML::Parser::Context click to toggle source

Creates a new parser context based on the specified string.

Parameters:

string - A string that contains the data to parse
options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_string(int argc, VALUE* argv, VALUE klass)
{
  VALUE string, options;
  rb_scan_args(argc, argv, "11", &string, &options);

  Check_Type(string, T_STRING);

  if (RSTRING_LEN(string) == 0)
    rb_raise(rb_eArgError, "Must specify a string with one or more characters");

  xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string), (int)RSTRING_LEN(string));
  
  if (!ctxt)
    rxml_raise(xmlGetLastError());

  /* This is annoying, but xmlInitParserCtxt (called indirectly above) and 
     xmlCtxtUseOptionsInternal (called below) initialize slightly different
     context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt
     sets to 0 and xmlCtxtUseOptionsInternal sets to 1.  So we have to call both. */
  xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options));

  return rxml_parser_context_wrap(ctxt);
}

Public Instance Methods

base_uri → "http:://libxml.org" click to toggle source

Obtain the base url for this parser context.

static VALUE rxml_parser_context_base_uri_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->input && ctxt->input->filename)
    return rxml_new_cstr((const xmlChar*)ctxt->input->filename, ctxt->encoding);
  else
    return Qnil;
}
base_uri = "http:://libxml.org" click to toggle source

Sets the base url for this parser context.

static VALUE rxml_parser_context_base_uri_set(VALUE self, VALUE url)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  Check_Type(url, T_STRING);

  if (ctxt->input && !ctxt->input->filename)
  {
    const char* xurl = StringValuePtr(url);
    ctxt->input->filename = (const char*)xmlStrdup((const xmlChar*)xurl);
  }
  return self;
}
close → nil click to toggle source

Closes the underlying input streams. This is useful when parsing a large amount of files and you want to close the files without relying on Ruby’s garbage collector to run.

static VALUE rxml_parser_context_close(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  xmlParserInputPtr xinput;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  while ((xinput = inputPop(ctxt)) != NULL)
  {
         xmlFreeInputStream(xinput);
  }
  return Qnil;
}
data_directory → "dir" click to toggle source

Obtain the data directory associated with this context.

static VALUE rxml_parser_context_data_directory_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->directory == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr((const xmlChar*)ctxt->directory, ctxt->encoding));
}
depth → num click to toggle source

Obtain the depth of this context.

static VALUE rxml_parser_context_depth_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->depth));
}
disable_cdata = (true|false) click to toggle source

Control whether CDATA nodes will be created in this context.

static VALUE rxml_parser_context_disable_cdata_set(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->sax == NULL)
    rb_raise(rb_eRuntimeError, "Sax handler is not yet set");

  /* LibXML controls this internally with the default SAX handler. */ 
  if (value)
    ctxt->sax->cdataBlock = NULL;
  else
    ctxt->sax->cdataBlock = xmlSAX2CDataBlock;

  return value;
}
disable_cdata? → (true|false) click to toggle source

Determine whether CDATA nodes will be created in this context.

static VALUE rxml_parser_context_disable_cdata_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  /* LibXML controls this internally with the default SAX handler. */
  if (ctxt->sax && ctxt->sax->cdataBlock)
    return (Qfalse);
  else
    return (Qtrue);
}
disable_sax? → (true|false) click to toggle source

Determine whether SAX-based processing is disabled in this context.

static VALUE rxml_parser_context_disable_sax_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->disableSAX)
    return (Qtrue);
  else
    return (Qfalse);
}
docbook? → (true|false) click to toggle source

Determine whether this is a docbook context.

static VALUE rxml_parser_context_docbook_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->html == 2) // TODO check this
    return (Qtrue);
  else
    return (Qfalse);
}
encoding → XML::Encoding::UTF_8 click to toggle source

Obtain the character encoding identifier used in this context.

static VALUE rxml_parser_context_encoding_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);
  return INT2NUM(xmlParseCharEncoding((const char*)ctxt->encoding));
}
encoding = XML::Encoding::UTF_8 click to toggle source

Sets the character encoding for this context.

static VALUE rxml_parser_context_encoding_set(VALUE self, VALUE encoding)
{
  xmlParserCtxtPtr ctxt;
  int result;
  const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding));
  xmlCharEncodingHandlerPtr hdlr = xmlFindCharEncodingHandler(xencoding);
  
  if (!hdlr)
    rb_raise(rb_eArgError, "Unknown encoding: %i", NUM2INT(encoding));

  Data_Get_Struct(self, xmlParserCtxt, ctxt);
  result = xmlSwitchToEncoding(ctxt, hdlr);

  if (result != 0)
    rxml_raise(xmlGetLastError());

  if (ctxt->encoding != NULL)
    xmlFree((xmlChar *) ctxt->encoding);

  ctxt->encoding = xmlStrdup((const xmlChar *) xencoding);
  return self;
}
errno → num click to toggle source

Obtain the last-error number in this context.

static VALUE rxml_parser_context_errno_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->errNo));
}
html? → (true|false) click to toggle source

Determine whether this is an html context.

static VALUE rxml_parser_context_html_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->html == 1)
    return (Qtrue);
  else
    return (Qfalse);
}
max_num_streams → num click to toggle source

Obtain the limit on the number of IO streams opened in this context.

static VALUE rxml_parser_context_io_max_num_streams_get(VALUE self)
{
  // TODO alias to max_streams and dep this?
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->inputMax));
}
num_streams → "dir" click to toggle source

Obtain the actual number of IO streams in this context.

static VALUE rxml_parser_context_io_num_streams_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->inputNr));
}
keep_blanks? → (true|false) click to toggle source

Determine whether parsers in this context retain whitespace.

static VALUE rxml_parser_context_keep_blanks_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->keepBlanks)
    return (Qtrue);
  else
    return (Qfalse);
}
name_depth → num click to toggle source

Obtain the name depth for this context.

static VALUE rxml_parser_context_name_depth_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->nameNr));
}
name_depth_max → num click to toggle source

Obtain the maximum name depth for this context.

static VALUE rxml_parser_context_name_depth_max_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->nameMax));
}
name_node → "name" click to toggle source

Obtain the name node for this context.

static VALUE rxml_parser_context_name_node_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->name == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr( ctxt->name, ctxt->encoding));
}
name_tab → ["name", ..., "name"] click to toggle source

Obtain the name table for this context.

static VALUE rxml_parser_context_name_tab_get(VALUE self)
{
  int i;
  xmlParserCtxtPtr ctxt;
  VALUE tab_ary;

  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->nameTab == NULL)
    return (Qnil);

  tab_ary = rb_ary_new();

  for (i = (ctxt->nameNr - 1); i >= 0; i--)
  {
    if (ctxt->nameTab[i] == NULL)
      continue;
    else
      rb_ary_push(tab_ary, rxml_new_cstr( ctxt->nameTab[i], ctxt->encoding));
  }

  return (tab_ary);
}
node → node click to toggle source

Obtain the root node of this context.

static VALUE rxml_parser_context_node_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->node == NULL)
    return (Qnil);
  else
    return (rxml_node_wrap(ctxt->node));
}
node_depth → num click to toggle source

Obtain the node depth for this context.

static VALUE rxml_parser_context_node_depth_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->nodeNr));
}
node_depth_max → num click to toggle source

Obtain the maximum node depth for this context.

static VALUE rxml_parser_context_node_depth_max_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->nodeMax));
}
num_chars → num click to toggle source

Obtain the number of characters in this context.

static VALUE rxml_parser_context_num_chars_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (LONG2NUM(ctxt->nbChars));
}
options > XML::Parser::Options::NOENT click to toggle source

Returns the parser options for this context. Multiple options can be combined by using Bitwise OR (|).

static VALUE rxml_parser_context_options_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return INT2NUM(ctxt->options);
}
options = XML::Parser::Options::NOENT | click to toggle source
XML::Parser::Options::NOCDATA

Provides control over the execution of a parser. Valid values are the constants defined on XML::Parser::Options. Multiple options can be combined by using Bitwise OR (|).

static VALUE rxml_parser_context_options_set(VALUE self, VALUE options)
{
  xmlParserCtxtPtr ctxt;
  Check_Type(options, T_FIXNUM);

  Data_Get_Struct(self, xmlParserCtxt, ctxt);
  xmlCtxtUseOptions(ctxt, NUM2INT(options));

  return self;
}
recovery = true|false click to toggle source

Control whether recovery mode is enabled in this context.

static VALUE rxml_parser_context_recovery_set(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (value == Qfalse)
  {
    ctxt->recovery = 0;
    return (Qfalse);
  }
  else
  {
    ctxt->recovery = 1;
    return (Qtrue);
  }
}
recovery? → (true|false) click to toggle source

Determine whether recovery mode is enabled in this context.

static VALUE rxml_parser_context_recovery_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->recovery)
    return (Qtrue);
  else
    return (Qfalse);
}
replace_entities = true|false click to toggle source

Control whether external entity replacement is enabled in this context.

static VALUE rxml_parser_context_replace_entities_set(VALUE self, VALUE value)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (value == Qfalse)
  {
    ctxt->replaceEntities = 0;
    return (Qfalse);
  }
  else
  {
    ctxt->replaceEntities = 1;
    return (Qtrue);
  }
}
replace_entities? → (true|false) click to toggle source

Determine whether external entity replacement is enabled in this context.

static VALUE rxml_parser_context_replace_entities_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->replaceEntities)
    return (Qtrue);
  else
    return (Qfalse);
}
space_depth → num click to toggle source

Obtain the space depth for this context.

static VALUE rxml_parser_context_space_depth_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->spaceNr));
}
space_depth → num click to toggle source

Obtain the maximum space depth for this context.

static VALUE rxml_parser_context_space_depth_max_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  return (INT2NUM(ctxt->spaceMax));
}
standalone? → (true|false) click to toggle source

Determine whether this is a standalone context.

static VALUE rxml_parser_context_standalone_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->standalone)
    return (Qtrue);
  else
    return (Qfalse);
}
stats? → (true|false) click to toggle source

Determine whether this context maintains statistics.

static VALUE rxml_parser_context_stats_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->record_info)
    return (Qtrue);
  else
    return (Qfalse);
}
subset_external? → (true|false) click to toggle source

Determine whether this context is a subset of an external context.

static VALUE rxml_parser_context_subset_external_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->inSubset == 2)
    return (Qtrue);
  else
    return (Qfalse);
}
subset_external_system_id → "system_id" click to toggle source

Obtain this context’s external subset system identifier. (valid only if either of subset_external? or subset_internal? is true).

static VALUE rxml_parser_context_subset_external_system_id_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->extSubSystem == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr( ctxt->extSubSystem, ctxt->encoding));
}
subset_external_uri → "uri" click to toggle source

Obtain this context’s external subset URI. (valid only if either of subset_external? or subset_internal? is true).

static VALUE rxml_parser_context_subset_external_uri_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->extSubURI == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr( ctxt->extSubURI, ctxt->encoding));
}
subset_internal? → (true|false) click to toggle source

Determine whether this context is a subset of an internal context.

static VALUE rxml_parser_context_subset_internal_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->inSubset == 1)
    return (Qtrue);
  else
    return (Qfalse);
}
subset_internal_name → "name" click to toggle source

Obtain this context’s subset name (valid only if either of subset_external? or subset_internal? is true).

static VALUE rxml_parser_context_subset_name_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->intSubName == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr(ctxt->intSubName, ctxt->encoding));
}
valid? → (true|false) click to toggle source

Determine whether this context is valid.

static VALUE rxml_parser_context_valid_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->valid)
    return (Qtrue);
  else
    return (Qfalse);
}
validate? → (true|false) click to toggle source

Determine whether validation is enabled in this context.

static VALUE rxml_parser_context_validate_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->validate)
    return (Qtrue);
  else
    return (Qfalse);
}
version → "version" click to toggle source

Obtain this context’s version identifier.

static VALUE rxml_parser_context_version_get(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->version == NULL)
    return (Qnil);
  else
    return (rxml_new_cstr( ctxt->version, ctxt->encoding));
}
well_formed? → (true|false) click to toggle source

Determine whether this context contains well-formed XML.

static VALUE rxml_parser_context_well_formed_q(VALUE self)
{
  xmlParserCtxtPtr ctxt;
  Data_Get_Struct(self, xmlParserCtxt, ctxt);

  if (ctxt->wellFormed)
    return (Qtrue);
  else
    return (Qfalse);
}