Php xml stream parser

thefish / php_stream_xml_parser.php

This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters

/**
* Stream XML parser, with closure and callbacks
* Warning — tags in path are case sensitive!
* @author thefish
*/
class Parser
/**
* @var array An array of registered callbacks
*/
private $ callbacks = array ();
/**
* @var string The current node path being investigated
*/
private $ currentPath = ‘/’ ;
/**
* @var array An array path data for paths that require callbacks
*/
private $ pathData = array ();
/**
* @var boolean Whether or not the object is currently parsing
*/
private $ parse = FALSE ;
/**
* @var array A list of namespaces in this XML
*/
private $ namespaces = array ();
/**
* Parses the XML provided using streaming and callbacks
*
* @param mixed $data Either a stream resource or string containing XML
* @param int $chunkSize The size of data to read in at a time. Only
* relevant if $data is a stream
*
* @return Parser
* @throws \Exception
*/
public function parse ( $ data , $ chunkSize = 1024 )
//Ensure that the $data var is of the right type
if (!is_string( $ data )
&& (!is_resource( $ data ) || get_resource_type( $ data ) !== ‘stream’ )
)
throw new \ Exception ( ‘Data must be a string or a stream resource’ );
>
//Ensure $chunkSize is the right type
if (!is_int( $ chunkSize ))
throw new \ Exception ( ‘Chunk size must be an integer’ );
>
//Initialise the object
$ this -> init ();
//Create the parser and set the parsing flag
$ this -> parse = TRUE ;
$ parser = xml_parser_create();
//Get real tag names
xml_parser_set_option( $ parser , XML_OPTION_CASE_FOLDING , 0 );
//Set the parser up, ready to stream through the XML
xml_set_object( $ parser , $ this );
//Set up the protected methods _start and _end to deal with the start
//and end tags respectively
xml_set_element_handler( $ parser , ‘start’ , ‘end’ );
//Set up the _addCdata method to parse any CDATA tags
xml_set_character_data_handler( $ parser , ‘addCdata’ );
//For general purpose data, use the _addData method
xml_set_default_handler( $ parser , ‘addData’ );
//If the data is a resource then loop through it, otherwise just parse
//the string
if (is_resource( $ data ))
//Not all resources support fseek. For those that don’t, suppress
// /the error
@fseek( $ data , 0 );
while ( $ this -> parse && $ chunk = fread( $ data , $ chunkSize ))
$ this -> parseString ( $ parser , $ chunk , feof( $ data ));
>
> else
$ this -> parseString ( $ parser , $ data , TRUE );
>
//Free up the parser
xml_parser_free( $ parser );
return $ this ;
>
/**
* Registers a single callback for a specified XML path
*
* @param string $path The path that the callback is for
* @param callable $callback The callback mechanism to use
*
* @return Parser
* @throws \Exception
*/
public function registerCallback ( $ path , $ callback )
//Ensure the path is a string
if (!is_string( $ path ))
throw new \ Exception ( ‘Path must be a string’ );
>
//Ensure that the callback is callable
if (!is_callable( $ callback ))
throw new \ Exception ( ‘Callback must be callable’ );
>
if (substr( $ path , — 1 , 1 ) !== ‘/’ )
$ path .= ‘/’ ;
>
//If this is the first callback for this path, initialise the variable
if (!isset( $ this -> callbacks [ $ path ]))
$ this -> callback [ $ path ] = array ();
>
//Add the callback
$ this -> callbacks [ $ path ][] = $ callback ;
return $ this ;
>
/**
* Registers multiple callbacks for the specified paths, for example
*
* $parser->registerCallbacks(array(
* array( ‘/path/to/element’, ‘callback’ ),
* array( ‘/path/to/another/element’, array($this, ‘callback’ ) ),
* ));
*
*
* @param Array $pathCallbacks An array of paths and callbacks
*
* @return Parser
* @throws \Exception
*/
public function registerCallbacks ( Array $ pathCallbacks )
foreach ( $ pathCallbacks as $ row )
if (count( $ row ) != 2 )
throw new \ Exception (
‘Each array element in $pathCallbacks must be an array of’
. ‘ 2 elements (the path and the callback)’
);
>
list ( $ path , $ callback ) = $ row ;
$ this -> registerCallback ( $ path , $ callback );
>
return $ this ;
>
/**
* Stops the parser from parsing any more. Because of the nature of
* streaming there may be more data to read. If this is the case then no
* further callbacks will be called.
*
* @return Parser
*/
public function stopParsing ()
$ this -> parse = FALSE ;
return $ this ;
>
/**
* Initialise the object variables
*
* @return NULL
*/
private function init ()
$ this -> namespaces = array ();
$ this -> currentPath = ‘/’ ;
$ this -> pathData = array ();
$ this -> parse = FALSE ;
>
/**
* Parse data using xml_parse
*
* @param resource $parser The XML parser
* @param string $data The data to parse
* @param boolean $isFinal Whether or not this is the final part to parse
*
* @return NULL
* @throws \Exception
*/
protected function parseString ( $ parser , $ data , $ isFinal )
if (!xml_parse( $ parser , $ data , $ isFinal ))
throw new \ Exception (
xml_error_string(xml_get_error_code( $ parser ))
. ‘ At line: ‘ .
xml_get_current_line_number( $ parser )
);
>
>
/**
* Parses the start tag
*
* @param resource $parser The XML parser
* @param string $tag The tag that’s being started
* @param array $attributes The attributes on this tag
*
* @return NULL
*/
protected function start ( $ parser , $ tag , $ attributes )
//Update the current path
$ this -> currentPath .= $ tag . ‘/’ ;
//Go through each callback and ensure that path data has been
//started for it
foreach ( $ this -> callbacks as $ path => $ callbacks )
if ( $ path === $ this -> currentPath )
$ this -> pathData [ $ this -> currentPath ] = » ;
>
>
//Generate the tag, with attributes
$ data = ‘
foreach ( $ attributes as $ key => $ val )
$ options = ENT_QUOTES ;
if (defined( ‘ENT_XML1’ ))
$ options |= ENT_XML1 ;
>
$ data .= ‘ ‘ . $ key . ‘=»‘ . $ val . ‘»‘ ;
if (stripos( $ key , ‘xmlns:’ ) !== false )
$ key = str_replace( ‘xmlns:’ , » , $ key );
$ this -> namespaces [ $ key ] = $ val ;
>
>
$ data .= ‘>’ ;
//Add the data to the path data required
$ this -> addData ( $ parser , $ data );
>
/**
* Adds CDATA to any paths that require it
*
* @param resource $parser
* @param string $data
*
* @return NULL
*/
protected function addCdata ( $ parser , $ data )
$ this -> addData ( $ parser , ‘
>
/**
* Adds data to any paths that require it
*
* @param resource $parser
* @param string $data
*
* @return NULL
*/
protected function addData ( $ parser , $ data )
//Having a path data entry means at least 1 callback is interested in
//the data. Loop through each path here and, if inside that path, add
//the data
foreach ( $ this -> pathData as $ key => $ val )
if (strpos( $ this -> currentPath , $ key ) !== FALSE )
$ this -> pathData [ $ key ] .= $ data ;
>
>
>
/**
* Parses the end of a tag
*
* @param resource $parser
* @param string $tag
*
* @return NULL
*/
protected function end ( $ parser , $ tag )
//Add the data to the paths that require it
$ data = ‘‘ ;
$ this -> addData ( $ parser , $ data );
//Loop through each callback and see if the path matches the
//current path
foreach ( $ this -> callbacks as $ path => $ callbacks )
//If parsing should continue, and the paths match, then a callback
//needs to be made
if ( $ this -> parse && $ this -> currentPath === $ path )
if (! $ this -> fireCallbacks ( $ path , $ callbacks ))
break ;
>
>
>
//Unset the path data for this path, as it’s no longer needed
unset( $ this -> pathData [ $ this -> currentPath ]);
//Update the path with the new path (effectively moving up a directory)
$ this -> currentPath = substr(
$ this -> currentPath ,
0 ,
strlen( $ this -> currentPath ) — (strlen( $ tag ) + 1 )
);
>
/**
* Generates a SimpleXMLElement and passes it to each of the callbacks
*
* @param string $path The path to create the SimpleXMLElement from
* @param array $callbacks An array of callbacks to be fired.
*
* @return boolean
*/
protected function fireCallbacks ( $ path , array $ callbacks )
$ namespaceStr = » ;
$ namespaces = $ this -> namespaces ;
$ matches = array ();
$ pathData = $ this -> pathData [ $ path ];
$ regex = ‘/xmlns:(?P[^=]+)=»[^\»]+»/sm’ ;
// Make sure any namespaces already defined in this element are not
// defined again
if (preg_match_all( $ regex , $ pathData , $ matches ))
foreach ( $ matches [ ‘namespace’ ] as $ key => $ value )
unset( $ namespaces [ $ value ]);
>
>
// Define all remaining namespaces on the root element
foreach ( $ namespaces as $ key => $ val )
$ namespaceStr .= ‘ xmlns:’ . $ key . ‘=»‘ . $ val . ‘»‘ ;
>
//Build the SimpleXMLElement object. As this is a partial XML
//document suppress any warnings or errors that might arise
//from invalid namespaces
$ data = new \ SimpleXMLElement (
preg_replace( ‘/^(<[^\s>]+)/’ , ‘$1’ . $ namespaceStr , $ pathData ),
LIBXML_COMPACT | LIBXML_NOERROR | LIBXML_NOWARNING | LIBXML_NOCDATA
);
print_r( $ data );
//Loop through each callback. If one of them stops the parsing
//then cease operation immediately
foreach ( $ callbacks as $ callback )
call_user_func_array( $ callback , array ( $ this , $ data ));
if (! $ this -> parse )
return false ;
>
>
return true ;
>
>

Источник

Saved searches

Use saved searches to filter your results more quickly

You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session. You switched accounts on another tab or window. Reload to refresh your session.

PHP SAX XML Stream Reader

License

hobnob/xmlStreamReader

This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?

Sign In Required

Please sign in to use Codespaces.

Launching GitHub Desktop

If nothing happens, download GitHub Desktop and try again.

Launching GitHub Desktop

If nothing happens, download GitHub Desktop and try again.

Launching Xcode

If nothing happens, download Xcode and try again.

Launching Visual Studio Code

Your codespace will open once ready.

There was a problem preparing your codespace, please try again.

Latest commit

Git stats

Files

Failed to load latest commit information.

README.md

Build Status Dependency Status Latest Stable Version Montly Downloads

Code Coverage Scrutinizer Quality Score SensioLabsInsight

Reads XML from either a string or a stream, allowing the registration of callbacks when an elemnt is found that matches path.

Installation with Composer

Declare xmlStreamReader as a dependency in your projects composer.json file:

< "require": < "hobnob/xml-stream-reader": "1.0.*" > >
 $xmlParser = new \Hobnob\XmlStreamReader\Parser(); $xmlParser->registerCallback( '/xml/node/path', function( \Hobnob\XmlStreamReader\Parser $parser, \SimpleXMLElement $node ) < // do stuff with $node > ); $xmlParser->registerCallback( '/xml/node/@attr', function( \Hobnob\XmlStreamReader\Parser $parser, $attrValue ) < // do stuff with $attrValue > ); $xmlParser->parse(fopen('file.xml', 'r'));

Источник

Читайте также:  Python создать пустую таблицу
Оцените статью