2016-05-11 00:26:44 +00:00
< ? php
namespace Sabre\Xml ;
use XMLReader ;
/**
* The Reader class expands upon PHP ' s built - in XMLReader .
*
* The intended usage , is to assign certain XML elements to PHP classes . These
* need to be registered using the $elementMap public property .
*
* After this is done , a single call to parse () will parse the entire document ,
* and delegate sub - sections of the document to element classes .
*
* @ copyright Copyright ( C ) 2009 - 2015 fruux GmbH ( https :// fruux . com / ) .
* @ author Evert Pot ( http :// evertpot . com / )
* @ license http :// sabre . io / license / Modified BSD License
*/
class Reader extends XMLReader {
use ContextStackTrait ;
/**
* Returns the current nodename in clark - notation .
*
* For example : " { http://www.w3.org/2005/Atom}feed " .
* Or if no namespace is defined : " { }feed " .
*
* This method returns null if we ' re not currently on an element .
*
* @ return string | null
*/
function getClark () {
if ( ! $this -> localName ) {
return null ;
}
return '{' . $this -> namespaceURI . '}' . $this -> localName ;
}
/**
* Reads the entire document .
*
* This function returns an array with the following three elements :
* * name - The root element name .
* * value - The value for the root element .
* * attributes - An array of attributes .
*
* This function will also disable the standard libxml error handler ( which
* usually just results in PHP errors ), and throw exceptions instead .
*
* @ return array
*/
function parse () {
$previousEntityState = libxml_disable_entity_loader ( true );
$previousSetting = libxml_use_internal_errors ( true );
2016-10-17 21:26:48 +00:00
try {
// Really sorry about the silence operator, seems like I have no
// choice. See:
//
// https://bugs.php.net/bug.php?id=64230
while ( $this -> nodeType !== self :: ELEMENT && @ $this -> read ()) {
// noop
}
$result = $this -> parseCurrentElement ();
2016-05-11 00:26:44 +00:00
2016-10-17 21:26:48 +00:00
$errors = libxml_get_errors ();
libxml_clear_errors ();
if ( $errors ) {
throw new LibXMLException ( $errors );
}
2016-05-11 00:26:44 +00:00
2016-10-17 21:26:48 +00:00
} finally {
libxml_use_internal_errors ( $previousSetting );
libxml_disable_entity_loader ( $previousEntityState );
2016-05-11 00:26:44 +00:00
}
return $result ;
}
/**
* parseGetElements parses everything in the current sub - tree ,
* and returns a an array of elements .
*
* Each element has a 'name' , 'value' and 'attributes' key .
*
* If the the element didn ' t contain sub - elements , an empty array is always
* returned . If there was any text inside the element , it will be
* discarded .
*
* If the $elementMap argument is specified , the existing elementMap will
* be overridden while parsing the tree , and restored after this process .
*
* @ param array $elementMap
* @ return array
*/
function parseGetElements ( array $elementMap = null ) {
$result = $this -> parseInnerTree ( $elementMap );
if ( ! is_array ( $result )) {
return [];
}
return $result ;
}
/**
* Parses all elements below the current element .
*
* This method will return a string if this was a text - node , or an array if
* there were sub - elements .
*
* If there ' s both text and sub - elements , the text will be discarded .
*
* If the $elementMap argument is specified , the existing elementMap will
* be overridden while parsing the tree , and restored after this process .
*
* @ param array $elementMap
* @ return array | string
*/
function parseInnerTree ( array $elementMap = null ) {
$text = null ;
$elements = [];
if ( $this -> nodeType === self :: ELEMENT && $this -> isEmptyElement ) {
// Easy!
$this -> next ();
return null ;
}
if ( ! is_null ( $elementMap )) {
$this -> pushContext ();
$this -> elementMap = $elementMap ;
}
2016-10-17 21:26:48 +00:00
try {
2016-05-11 00:26:44 +00:00
2016-10-17 21:26:48 +00:00
// Really sorry about the silence operator, seems like I have no
// choice. See:
//
// https://bugs.php.net/bug.php?id=64230
if ( !@ $this -> read ()) {
2016-05-11 00:26:44 +00:00
$errors = libxml_get_errors ();
2016-10-17 21:26:48 +00:00
libxml_clear_errors ();
2016-05-11 00:26:44 +00:00
if ( $errors ) {
throw new LibXMLException ( $errors );
}
2016-10-17 21:26:48 +00:00
throw new ParseException ( 'This should never happen (famous last words)' );
2016-05-11 00:26:44 +00:00
}
2016-10-17 21:26:48 +00:00
while ( true ) {
if ( ! $this -> isValid ()) {
$errors = libxml_get_errors ();
if ( $errors ) {
libxml_clear_errors ();
throw new LibXMLException ( $errors );
2016-05-28 15:46:24 +00:00
}
2016-10-17 21:26:48 +00:00
}
switch ( $this -> nodeType ) {
case self :: ELEMENT :
$elements [] = $this -> parseCurrentElement ();
break ;
case self :: TEXT :
case self :: CDATA :
$text .= $this -> value ;
$this -> read ();
break ;
case self :: END_ELEMENT :
// Ensuring we are moving the cursor after the end element.
$this -> read ();
break 2 ;
case self :: NONE :
throw new ParseException ( 'We hit the end of the document prematurely. This likely means that some parser "eats" too many elements. Do not attempt to continue parsing.' );
default :
// Advance to the next element
$this -> read ();
break ;
}
2016-05-11 00:26:44 +00:00
}
2016-10-17 21:26:48 +00:00
} finally {
if ( ! is_null ( $elementMap )) {
$this -> popContext ();
}
2016-05-11 00:26:44 +00:00
}
return ( $elements ? $elements : $text );
}
/**
* Reads all text below the current element , and returns this as a string .
*
* @ return string
*/
function readText () {
$result = '' ;
$previousDepth = $this -> depth ;
while ( $this -> read () && $this -> depth != $previousDepth ) {
if ( in_array ( $this -> nodeType , [ XMLReader :: TEXT , XMLReader :: CDATA , XMLReader :: WHITESPACE ])) {
$result .= $this -> value ;
}
}
return $result ;
}
/**
* Parses the current XML element .
*
* This method returns arn array with 3 properties :
* * name - A clark - notation XML element name .
* * value - The parsed value .
* * attributes - A key - value list of attributes .
*
* @ return array
*/
function parseCurrentElement () {
$name = $this -> getClark ();
$attributes = [];
if ( $this -> hasAttributes ) {
$attributes = $this -> parseAttributes ();
}
$value = call_user_func (
$this -> getDeserializerForElementName ( $name ),
$this
);
return [
'name' => $name ,
'value' => $value ,
'attributes' => $attributes ,
];
}
/**
* Grabs all the attributes from the current element , and returns them as a
* key - value array .
*
* If the attributes are part of the same namespace , they will simply be
* short keys . If they are defined on a different namespace , the attribute
* name will be retured in clark - notation .
*
* @ return array
*/
function parseAttributes () {
$attributes = [];
while ( $this -> moveToNextAttribute ()) {
if ( $this -> namespaceURI ) {
// Ignoring 'xmlns', it doesn't make any sense.
if ( $this -> namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
continue ;
}
$name = $this -> getClark ();
$attributes [ $name ] = $this -> value ;
} else {
$attributes [ $this -> localName ] = $this -> value ;
}
}
$this -> moveToElement ();
return $attributes ;
}
/**
* Returns the function that should be used to parse the element identified
* by it ' s clark - notation name .
*
* @ param string $name
* @ return callable
*/
function getDeserializerForElementName ( $name ) {
2016-05-28 15:46:24 +00:00
2016-05-11 00:26:44 +00:00
if ( ! array_key_exists ( $name , $this -> elementMap )) {
2016-05-28 15:46:24 +00:00
if ( substr ( $name , 0 , 2 ) == '{}' && array_key_exists ( substr ( $name , 2 ), $this -> elementMap )) {
$name = substr ( $name , 2 );
} else {
return [ 'Sabre\\Xml\\Element\\Base' , 'xmlDeserialize' ];
}
2016-05-11 00:26:44 +00:00
}
$deserializer = $this -> elementMap [ $name ];
if ( is_subclass_of ( $deserializer , 'Sabre\\Xml\\XmlDeserializable' )) {
2016-10-17 21:26:48 +00:00
return [ $deserializer , 'xmlDeserialize' ];
2016-05-11 00:26:44 +00:00
}
if ( is_callable ( $deserializer )) {
return $deserializer ;
}
$type = gettype ( $deserializer );
if ( $type === 'string' ) {
$type .= ' (' . $deserializer . ')' ;
} elseif ( $type === 'object' ) {
$type .= ' (' . get_class ( $deserializer ) . ')' ;
}
throw new \LogicException ( 'Could not use this type as a deserializer: ' . $type . ' for element: ' . $name );
}
}