class LibXML::XML::Parser::Context
The XML::Parser::Context
class provides in-depth control over how a document is parsed.
Public Class Methods
Creates a new parser context based on the specified document.
Parameters:
document - An XML::Document instance options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_document(int argc, VALUE* argv, VALUE klass) { VALUE document, options; rb_scan_args(argc, argv, "11", &document, &options); if (rb_obj_is_kind_of(document, cXMLDocument) == Qfalse) rb_raise(rb_eTypeError, "Must pass an LibXML::XML::Document object"); xmlDocPtr xdoc; xmlChar *buffer; int length; Data_Get_Struct(document, xmlDoc, xdoc); xmlDocDumpFormatMemoryEnc(xdoc, &buffer, &length, (const char*)xdoc->encoding, 0); xmlParserCtxtPtr ctxt = xmlCreateDocParserCtxt(buffer); if (!ctxt) rxml_raise(xmlGetLastError()); /* This is annoying, but xmlInitParserCtxt (called indirectly above) and xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); return rxml_parser_context_wrap(ctxt); }
Creates a new parser context based on the specified file or uri.
Parameters:
file - A filename or uri options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_file(int argc, VALUE* argv, VALUE klass) { VALUE file, options; rb_scan_args(argc, argv, "11", &file, &options); xmlParserCtxtPtr ctxt = xmlCreateURLParserCtxt(StringValuePtr(file), 0); if (!ctxt) rxml_raise(xmlGetLastError()); /* This is annoying, but xmlInitParserCtxt (called indirectly above) and xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); return rxml_parser_context_wrap(ctxt); }
Creates a new parser context based on the specified io object.
Parameters:
io - A ruby IO object options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_io(int argc, VALUE* argv, VALUE klass) { VALUE io, options; rb_scan_args(argc, argv, "11", &io, &options); if (NIL_P(io)) rb_raise(rb_eTypeError, "Must pass in an IO object"); xmlParserInputBufferPtr input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL, (void*)io, XML_CHAR_ENCODING_NONE); xmlParserCtxtPtr ctxt = xmlNewParserCtxt(); if (!ctxt) { xmlFreeParserInputBuffer(input); rxml_raise(xmlGetLastError()); } /* This is annoying, but xmlInitParserCtxt (called indirectly above) and xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); xmlParserInputPtr stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE); if (!stream) { xmlFreeParserInputBuffer(input); xmlFreeParserCtxt(ctxt); rxml_raise(xmlGetLastError()); } inputPush(ctxt, stream); VALUE result = rxml_parser_context_wrap(ctxt); /* Attach io object to parser so it won't get freed.*/ rb_ivar_set(result, IO_ATTR, io); return result; }
Creates a new parser context based on the specified string.
Parameters:
string - A string that contains the data to parse options - A or'ed together list of LibXML::XML::Parser::Options values
static VALUE rxml_parser_context_string(int argc, VALUE* argv, VALUE klass) { VALUE string, options; rb_scan_args(argc, argv, "11", &string, &options); Check_Type(string, T_STRING); if (RSTRING_LEN(string) == 0) rb_raise(rb_eArgError, "Must specify a string with one or more characters"); xmlParserCtxtPtr ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string), (int)RSTRING_LEN(string)); if (!ctxt) rxml_raise(xmlGetLastError()); /* This is annoying, but xmlInitParserCtxt (called indirectly above) and xmlCtxtUseOptionsInternal (called below) initialize slightly different context options, in particular XML_PARSE_NODICT which xmlInitParserCtxt sets to 0 and xmlCtxtUseOptionsInternal sets to 1. So we have to call both. */ xmlCtxtUseOptions(ctxt, options == Qnil ? 0 : NUM2INT(options)); return rxml_parser_context_wrap(ctxt); }
Public Instance Methods
Obtain the base url for this parser context.
static VALUE rxml_parser_context_base_uri_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->input && ctxt->input->filename) return rxml_new_cstr((const xmlChar*)ctxt->input->filename, ctxt->encoding); else return Qnil; }
Sets the base url for this parser context.
static VALUE rxml_parser_context_base_uri_set(VALUE self, VALUE url) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); Check_Type(url, T_STRING); if (ctxt->input && !ctxt->input->filename) { const char* xurl = StringValuePtr(url); ctxt->input->filename = (const char*)xmlStrdup((const xmlChar*)xurl); } return self; }
Closes the underlying input streams. This is useful when parsing a large amount of files and you want to close the files without relying on Ruby’s garbage collector to run.
static VALUE rxml_parser_context_close(VALUE self) { xmlParserCtxtPtr ctxt; xmlParserInputPtr xinput; Data_Get_Struct(self, xmlParserCtxt, ctxt); while ((xinput = inputPop(ctxt)) != NULL) { xmlFreeInputStream(xinput); } return Qnil; }
Obtain the data directory associated with this context.
static VALUE rxml_parser_context_data_directory_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->directory == NULL) return (Qnil); else return (rxml_new_cstr((const xmlChar*)ctxt->directory, ctxt->encoding)); }
Obtain the depth of this context.
static VALUE rxml_parser_context_depth_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->depth)); }
Control whether CDATA nodes will be created in this context.
static VALUE rxml_parser_context_disable_cdata_set(VALUE self, VALUE value) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->sax == NULL) rb_raise(rb_eRuntimeError, "Sax handler is not yet set"); /* LibXML controls this internally with the default SAX handler. */ if (value) ctxt->sax->cdataBlock = NULL; else ctxt->sax->cdataBlock = xmlSAX2CDataBlock; return value; }
Determine whether CDATA nodes will be created in this context.
static VALUE rxml_parser_context_disable_cdata_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); /* LibXML controls this internally with the default SAX handler. */ if (ctxt->sax && ctxt->sax->cdataBlock) return (Qfalse); else return (Qtrue); }
Determine whether SAX-based processing is disabled in this context.
static VALUE rxml_parser_context_disable_sax_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->disableSAX) return (Qtrue); else return (Qfalse); }
Determine whether this is a docbook context.
static VALUE rxml_parser_context_docbook_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->html == 2) // TODO check this return (Qtrue); else return (Qfalse); }
Obtain the character encoding identifier used in this context.
static VALUE rxml_parser_context_encoding_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return INT2NUM(xmlParseCharEncoding((const char*)ctxt->encoding)); }
Sets the character encoding for this context.
static VALUE rxml_parser_context_encoding_set(VALUE self, VALUE encoding) { xmlParserCtxtPtr ctxt; int result; const char* xencoding = xmlGetCharEncodingName((xmlCharEncoding)NUM2INT(encoding)); xmlCharEncodingHandlerPtr hdlr = xmlFindCharEncodingHandler(xencoding); if (!hdlr) rb_raise(rb_eArgError, "Unknown encoding: %i", NUM2INT(encoding)); Data_Get_Struct(self, xmlParserCtxt, ctxt); result = xmlSwitchToEncoding(ctxt, hdlr); if (result != 0) rxml_raise(xmlGetLastError()); if (ctxt->encoding != NULL) xmlFree((xmlChar *) ctxt->encoding); ctxt->encoding = xmlStrdup((const xmlChar *) xencoding); return self; }
Obtain the last-error number in this context.
static VALUE rxml_parser_context_errno_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->errNo)); }
Determine whether this is an html context.
static VALUE rxml_parser_context_html_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->html == 1) return (Qtrue); else return (Qfalse); }
Obtain the limit on the number of IO streams opened in this context.
static VALUE rxml_parser_context_io_max_num_streams_get(VALUE self) { // TODO alias to max_streams and dep this? xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->inputMax)); }
Obtain the actual number of IO streams in this context.
static VALUE rxml_parser_context_io_num_streams_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->inputNr)); }
Determine whether parsers in this context retain whitespace.
static VALUE rxml_parser_context_keep_blanks_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->keepBlanks) return (Qtrue); else return (Qfalse); }
Obtain the name depth for this context.
static VALUE rxml_parser_context_name_depth_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->nameNr)); }
Obtain the maximum name depth for this context.
static VALUE rxml_parser_context_name_depth_max_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->nameMax)); }
Obtain the name node for this context.
static VALUE rxml_parser_context_name_node_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->name == NULL) return (Qnil); else return (rxml_new_cstr( ctxt->name, ctxt->encoding)); }
Obtain the name table for this context.
static VALUE rxml_parser_context_name_tab_get(VALUE self) { int i; xmlParserCtxtPtr ctxt; VALUE tab_ary; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->nameTab == NULL) return (Qnil); tab_ary = rb_ary_new(); for (i = (ctxt->nameNr - 1); i >= 0; i--) { if (ctxt->nameTab[i] == NULL) continue; else rb_ary_push(tab_ary, rxml_new_cstr( ctxt->nameTab[i], ctxt->encoding)); } return (tab_ary); }
Obtain the root node of this context.
static VALUE rxml_parser_context_node_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->node == NULL) return (Qnil); else return (rxml_node_wrap(ctxt->node)); }
Obtain the node depth for this context.
static VALUE rxml_parser_context_node_depth_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->nodeNr)); }
Obtain the maximum node depth for this context.
static VALUE rxml_parser_context_node_depth_max_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->nodeMax)); }
Obtain the number of characters in this context.
static VALUE rxml_parser_context_num_chars_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (LONG2NUM(ctxt->nbChars)); }
Returns the parser options for this context. Multiple options can be combined by using Bitwise OR (|).
static VALUE rxml_parser_context_options_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return INT2NUM(ctxt->options); }
Provides control over the execution of a parser. Valid values are the constants defined on XML::Parser::Options
. Multiple options can be combined by using Bitwise OR (|).
static VALUE rxml_parser_context_options_set(VALUE self, VALUE options) { xmlParserCtxtPtr ctxt; Check_Type(options, T_FIXNUM); Data_Get_Struct(self, xmlParserCtxt, ctxt); xmlCtxtUseOptions(ctxt, NUM2INT(options)); return self; }
Control whether recovery mode is enabled in this context.
static VALUE rxml_parser_context_recovery_set(VALUE self, VALUE value) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (value == Qfalse) { ctxt->recovery = 0; return (Qfalse); } else { ctxt->recovery = 1; return (Qtrue); } }
Determine whether recovery mode is enabled in this context.
static VALUE rxml_parser_context_recovery_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->recovery) return (Qtrue); else return (Qfalse); }
Control whether external entity replacement is enabled in this context.
static VALUE rxml_parser_context_replace_entities_set(VALUE self, VALUE value) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (value == Qfalse) { ctxt->replaceEntities = 0; return (Qfalse); } else { ctxt->replaceEntities = 1; return (Qtrue); } }
Determine whether external entity replacement is enabled in this context.
static VALUE rxml_parser_context_replace_entities_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->replaceEntities) return (Qtrue); else return (Qfalse); }
Obtain the space depth for this context.
static VALUE rxml_parser_context_space_depth_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->spaceNr)); }
Obtain the maximum space depth for this context.
static VALUE rxml_parser_context_space_depth_max_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); return (INT2NUM(ctxt->spaceMax)); }
Determine whether this is a standalone context.
static VALUE rxml_parser_context_standalone_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->standalone) return (Qtrue); else return (Qfalse); }
Determine whether this context maintains statistics.
static VALUE rxml_parser_context_stats_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->record_info) return (Qtrue); else return (Qfalse); }
Determine whether this context is a subset of an external context.
static VALUE rxml_parser_context_subset_external_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->inSubset == 2) return (Qtrue); else return (Qfalse); }
Obtain this context’s external subset system identifier. (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_external_system_id_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->extSubSystem == NULL) return (Qnil); else return (rxml_new_cstr( ctxt->extSubSystem, ctxt->encoding)); }
Obtain this context’s external subset URI. (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_external_uri_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->extSubURI == NULL) return (Qnil); else return (rxml_new_cstr( ctxt->extSubURI, ctxt->encoding)); }
Determine whether this context is a subset of an internal context.
static VALUE rxml_parser_context_subset_internal_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->inSubset == 1) return (Qtrue); else return (Qfalse); }
Obtain this context’s subset name (valid only if either of subset_external? or subset_internal? is true).
static VALUE rxml_parser_context_subset_name_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->intSubName == NULL) return (Qnil); else return (rxml_new_cstr(ctxt->intSubName, ctxt->encoding)); }
Determine whether this context is valid.
static VALUE rxml_parser_context_valid_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->valid) return (Qtrue); else return (Qfalse); }
Determine whether validation is enabled in this context.
static VALUE rxml_parser_context_validate_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->validate) return (Qtrue); else return (Qfalse); }
Obtain this context’s version identifier.
static VALUE rxml_parser_context_version_get(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->version == NULL) return (Qnil); else return (rxml_new_cstr( ctxt->version, ctxt->encoding)); }
Determine whether this context contains well-formed XML
.
static VALUE rxml_parser_context_well_formed_q(VALUE self) { xmlParserCtxtPtr ctxt; Data_Get_Struct(self, xmlParserCtxt, ctxt); if (ctxt->wellFormed) return (Qtrue); else return (Qfalse); }