Transformer.php 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. <?php declare(strict_types=1);
  2. namespace TheNorthMemory\Xml;
  3. use const LIBXML_VERSION;
  4. use const LIBXML_NONET;
  5. use const LIBXML_COMPACT;
  6. use const LIBXML_NOCDATA;
  7. use const LIBXML_NOBLANKS;
  8. use function array_walk;
  9. use function is_array;
  10. use function is_object;
  11. use function is_string;
  12. use function preg_replace;
  13. use function strpos;
  14. use function preg_match;
  15. use function sprintf;
  16. use function trigger_error;
  17. use function libxml_clear_errors;
  18. use function libxml_disable_entity_loader;
  19. use function libxml_get_last_error;
  20. use function libxml_use_internal_errors;
  21. use function simplexml_load_string;
  22. use SimpleXMLElement;
  23. use Traversable;
  24. use XMLWriter;
  25. /**
  26. * Transform the `XML` to `Array` or `Array` to `XML`.
  27. *
  28. * @template TKey of array-key
  29. * @template TValue of \Stringable
  30. */
  31. class Transformer
  32. {
  33. /**
  34. * Convert the $xml string to array.
  35. *
  36. * Always issue the `additional Libxml parameters` asof `LIBXML_NONET`
  37. * | `LIBXML_COMPACT`
  38. * | `LIBXML_NOCDATA`
  39. * | `LIBXML_NOBLANKS`
  40. *
  41. * @param string $xml - The xml string, default is `<xml/>` string
  42. *
  43. * @return array<TKey,TValue>
  44. */
  45. public static function toArray(string $xml = '<xml/>'): array
  46. {
  47. LIBXML_VERSION < 20900 && $previous = libxml_disable_entity_loader(true);
  48. libxml_use_internal_errors(true);
  49. $el = simplexml_load_string(static::sanitize($xml), SimpleXMLElement::class, LIBXML_NONET | LIBXML_COMPACT | LIBXML_NOCDATA | LIBXML_NOBLANKS);
  50. LIBXML_VERSION < 20900 && isset($previous) && libxml_disable_entity_loader($previous);
  51. if (false === $el) {
  52. // while parsing failed, let's clean the internal buffer and
  53. // only leave the last error message which still can be fetched by the `error_get_last()` function.
  54. if (false !== ($err = libxml_get_last_error())) {
  55. libxml_clear_errors();
  56. @trigger_error(sprintf(
  57. 'Parsing the $xml failed with the last error(level=%d,code=%d,message=%s).',
  58. $err->level, $err->code, $err->message
  59. ));
  60. }
  61. return [];
  62. }
  63. return static::cast($el);
  64. }
  65. /**
  66. * Recursive cast the $thing as array data structure.
  67. *
  68. * @param array<TKey,SimpleXMLElement|TValue>|SimpleXMLElement $thing - The thing
  69. *
  70. * @return array<TKey,TValue>
  71. */
  72. protected static function cast($thing): array
  73. {
  74. $data = (array) $thing;
  75. array_walk($data, static function(&$value) { static::value($value); });
  76. return $data;
  77. }
  78. /**
  79. * Cast the value $thing, specially doing the `array`, `SimpleXMLElement` to `array`
  80. *
  81. * @param array<TKey,TValue>|SimpleXMLElement $thing - The value thing reference
  82. */
  83. protected static function value(&$thing): void
  84. {
  85. is_array($thing) && $thing = static::cast($thing);
  86. if (is_object($thing) && $thing instanceof SimpleXMLElement) {
  87. $thing = $thing->count() ? static::cast($thing) : (string) $thing;
  88. }
  89. }
  90. /**
  91. * Trim invalid characters from the $xml string
  92. *
  93. * @see https://github.com/w7corp/easywechat/pull/1419
  94. * @license https://github.com/w7corp/easywechat/blob/4.x/LICENSE
  95. *
  96. * @param string $xml - The xml string
  97. */
  98. public static function sanitize(string $xml): string
  99. {
  100. return preg_replace('#[^\x{9}\x{A}\x{D}\x{20}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]+#u', '', $xml) ?? '';
  101. }
  102. /**
  103. * Transform the given $data array as of an XML string.
  104. *
  105. * @param array<TKey,TValue|LabeledArrayIterator<TKey,TValue>> $data - The data array
  106. * @param boolean $headless - The headless flag, default `true` means without the `<?xml version="1.0" encoding="UTF-8" ?>` doctype
  107. * @param boolean $indent - Toggle indentation on/off, default is `false` off
  108. * @param string $root - The root node label, default is `xml` string
  109. * @param string $item - The nest array identify text, default is `item` string
  110. *
  111. * @return string - The xml string
  112. */
  113. public static function toXml(array $data, bool $headless = true, bool $indent = false, string $root = 'xml', string $item = 'item'): string
  114. {
  115. $writer = new XMLWriter();
  116. $writer->openMemory();
  117. $writer->setIndent($indent);
  118. $headless || $writer->startDocument('1.0', 'utf-8');
  119. $writer->startElement($root);
  120. static::walk($writer, $data, $item);
  121. $writer->endElement();
  122. $headless || $writer->endDocument();
  123. $xml = $writer->outputMemory();
  124. $writer = null;
  125. return $xml;
  126. }
  127. /**
  128. * Wrap the native `Array` data with spicial `label` and mark it whether or nor is wrapped by this `label`.
  129. *
  130. * @param array<TKey,TValue> $data - The data
  131. * @param boolean $wrapped - the wrapping flag, default is `false`
  132. * @param string $label - The label, default is `item`
  133. *
  134. * @return LabeledArrayIterator<TKey,TValue>
  135. */
  136. public static function wrap(array $data, bool $wrapped = false, string $label = 'item'): LabeledArrayIterator
  137. {
  138. return (new LabeledArrayIterator($data))->wrapped($wrapped)->withLabel($label);
  139. }
  140. /**
  141. * Walk the given data array by the `XMLWriter` instance.
  142. *
  143. * @param \XMLWriter $writer - The `XMLWriter` instance reference
  144. * @param array<TKey,TValue|array<TKey,TValue>|LabeledArrayIterator<TKey,TValue>> $data - The data array
  145. * @param string $item - The nest array identify tag text
  146. */
  147. protected static function walk(XMLWriter &$writer, array $data, string $item): void
  148. {
  149. foreach ($data as $key => $value) {
  150. $tag = is_string($key) && static::isElementNameValid($key) ? $key : $item;
  151. $withoutParentElement = false;
  152. if ($value instanceof LabeledArrayIterator && ($withoutParentElement = $value->isWrapped())) {
  153. $tag = $value->getLabel();
  154. }
  155. $withoutParentElement || $writer->startElement($tag);
  156. if (is_array($value) || (is_object($value) && $value instanceof Traversable)) {
  157. static::walk($writer, (array) $value, $withoutParentElement ? $tag : $item);
  158. } else {
  159. static::content($writer, (string) $value);
  160. }
  161. $withoutParentElement || $writer->endElement();
  162. }
  163. }
  164. /**
  165. * Write content text.
  166. *
  167. * The content text includes the characters `<`, `>`, `&` and `"` are written as CDATA references.
  168. * All others including `'` are written literally.
  169. *
  170. * @param \XMLWriter $writer - The `XMLWriter` instance reference
  171. * @param string $thing - The content text
  172. */
  173. protected static function content(XMLWriter &$writer, string $thing = ''): void
  174. {
  175. static::needsCdataWrapping($thing) && $writer->writeCdata($thing) || $writer->text($thing);
  176. }
  177. /**
  178. * Checks the name is a valid xml element name.
  179. *
  180. * @see \Symfony\Component\Serializer\Encoder\XmlEncoder::isElementNameValid
  181. * @license https://github.com/symfony/serializer/blob/5.3/LICENSE
  182. *
  183. * @param string $name - The name
  184. *
  185. * @return boolean - True means valid
  186. */
  187. protected static function isElementNameValid(string $name = ''): bool
  188. {
  189. return $name && false === strpos($name, ' ') && preg_match('#^[\pL_][\pL0-9._:-]*$#ui', $name);
  190. }
  191. /**
  192. * Checks if a value contains any characters which would require CDATA wrapping.
  193. *
  194. * Notes here: the `XMLWriter` shall been wrapped the `"` string as `&quot;` symbol string,
  195. * it's strictly following the `XMLWriter` specification here.
  196. *
  197. * @see \Symfony\Component\Serializer\Encoder\XmlEncoder::needsCdataWrapping
  198. * @license https://github.com/symfony/serializer/blob/5.3/LICENSE
  199. *
  200. * @param string $value - The value
  201. *
  202. * @return boolean - True means need
  203. */
  204. protected static function needsCdataWrapping(string $value = ''): bool
  205. {
  206. return $value && 0 < preg_match('#[>&"<]#', $value);
  207. }
  208. }