Text.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. <?php
  2. /**
  3. * This file is part of PHPWord - A pure PHP library for reading and writing
  4. * word processing documents.
  5. *
  6. * PHPWord is free software distributed under the terms of the GNU Lesser
  7. * General Public License version 3 as published by the Free Software Foundation.
  8. *
  9. * For the full copyright and license information, please read the LICENSE
  10. * file that was distributed with this source code. For the full list of
  11. * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
  12. *
  13. * @see https://github.com/PHPOffice/PHPWord
  14. * @copyright 2010-2018 PHPWord contributors
  15. * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
  16. */
  17. namespace PhpOffice\PhpWord\Shared;
  18. /**
  19. * Text
  20. */
  21. class Text
  22. {
  23. /**
  24. * Control characters array
  25. *
  26. * @var string[]
  27. */
  28. private static $controlCharacters = array();
  29. /**
  30. * Build control characters array
  31. */
  32. private static function buildControlCharacters()
  33. {
  34. for ($i = 0; $i <= 19; ++$i) {
  35. if ($i != 9 && $i != 10 && $i != 13) {
  36. $find = '_x' . sprintf('%04s', strtoupper(dechex($i))) . '_';
  37. $replace = chr($i);
  38. self::$controlCharacters[$find] = $replace;
  39. }
  40. }
  41. }
  42. /**
  43. * Convert from PHP control character to OpenXML escaped control character
  44. *
  45. * Excel 2007 team:
  46. * ----------------
  47. * That's correct, control characters are stored directly in the shared-strings table.
  48. * We do encode characters that cannot be represented in XML using the following escape sequence:
  49. * _xHHHH_ where H represents a hexadecimal character in the character's value...
  50. * So you could end up with something like _x0008_ in a string (either in a cell value (<v>)
  51. * element or in the shared string <t> element.
  52. *
  53. * @param string $value Value to escape
  54. * @return string
  55. */
  56. public static function controlCharacterPHP2OOXML($value = '')
  57. {
  58. if (empty(self::$controlCharacters)) {
  59. self::buildControlCharacters();
  60. }
  61. return str_replace(array_values(self::$controlCharacters), array_keys(self::$controlCharacters), $value);
  62. }
  63. /**
  64. * Return a number formatted for being integrated in xml files
  65. * @param float $number
  66. * @param int $decimals
  67. * @return string
  68. */
  69. public static function numberFormat($number, $decimals)
  70. {
  71. return number_format($number, $decimals, '.', '');
  72. }
  73. /**
  74. * @param int $dec
  75. * @see http://stackoverflow.com/a/7153133/2235790
  76. * @author velcrow
  77. * @return string
  78. */
  79. public static function chr($dec)
  80. {
  81. if ($dec <= 0x7F) {
  82. return chr($dec);
  83. }
  84. if ($dec <= 0x7FF) {
  85. return chr(($dec >> 6) + 192) . chr(($dec & 63) + 128);
  86. }
  87. if ($dec <= 0xFFFF) {
  88. return chr(($dec >> 12) + 224) . chr((($dec >> 6) & 63) + 128) . chr(($dec & 63) + 128);
  89. }
  90. if ($dec <= 0x1FFFFF) {
  91. return chr(($dec >> 18) + 240) . chr((($dec >> 12) & 63) + 128) . chr((($dec >> 6) & 63) + 128) . chr(($dec & 63) + 128);
  92. }
  93. return '';
  94. }
  95. /**
  96. * Convert from OpenXML escaped control character to PHP control character
  97. *
  98. * @param string $value Value to unescape
  99. * @return string
  100. */
  101. public static function controlCharacterOOXML2PHP($value = '')
  102. {
  103. if (empty(self::$controlCharacters)) {
  104. self::buildControlCharacters();
  105. }
  106. return str_replace(array_keys(self::$controlCharacters), array_values(self::$controlCharacters), $value);
  107. }
  108. /**
  109. * Check if a string contains UTF-8 data
  110. *
  111. * @param string $value
  112. * @return bool
  113. */
  114. public static function isUTF8($value = '')
  115. {
  116. return is_string($value) && ($value === '' || preg_match('/^./su', $value) == 1);
  117. }
  118. /**
  119. * Return UTF8 encoded value
  120. *
  121. * @param string $value
  122. * @return string
  123. */
  124. public static function toUTF8($value = '')
  125. {
  126. if (!is_null($value) && !self::isUTF8($value)) {
  127. $value = utf8_encode($value);
  128. }
  129. return $value;
  130. }
  131. /**
  132. * Returns unicode from UTF8 text
  133. *
  134. * The function is splitted to reduce cyclomatic complexity
  135. *
  136. * @param string $text UTF8 text
  137. * @return string Unicode text
  138. * @since 0.11.0
  139. */
  140. public static function toUnicode($text)
  141. {
  142. return self::unicodeToEntities(self::utf8ToUnicode($text));
  143. }
  144. /**
  145. * Returns unicode array from UTF8 text
  146. *
  147. * @param string $text UTF8 text
  148. * @return array
  149. * @since 0.11.0
  150. * @see http://www.randomchaos.com/documents/?source=php_and_unicode
  151. */
  152. public static function utf8ToUnicode($text)
  153. {
  154. $unicode = array();
  155. $values = array();
  156. $lookingFor = 1;
  157. // Gets unicode for each character
  158. for ($i = 0; $i < strlen($text); $i++) {
  159. $thisValue = ord($text[$i]);
  160. if ($thisValue < 128) {
  161. $unicode[] = $thisValue;
  162. } else {
  163. if (count($values) == 0) {
  164. $lookingFor = $thisValue < 224 ? 2 : 3;
  165. }
  166. $values[] = $thisValue;
  167. if (count($values) == $lookingFor) {
  168. if ($lookingFor == 3) {
  169. $number = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  170. } else {
  171. $number = (($values[0] % 32) * 64) + ($values[1] % 64);
  172. }
  173. $unicode[] = $number;
  174. $values = array();
  175. $lookingFor = 1;
  176. }
  177. }
  178. }
  179. return $unicode;
  180. }
  181. /**
  182. * Returns entites from unicode array
  183. *
  184. * @param array $unicode
  185. * @return string
  186. * @since 0.11.0
  187. * @see http://www.randomchaos.com/documents/?source=php_and_unicode
  188. */
  189. private static function unicodeToEntities($unicode)
  190. {
  191. $entities = '';
  192. foreach ($unicode as $value) {
  193. if ($value != 65279) {
  194. $entities .= $value > 127 ? '\uc0{\u' . $value . '}' : chr($value);
  195. }
  196. }
  197. return $entities;
  198. }
  199. /**
  200. * Return name without underscore for < 0.10.0 variable name compatibility
  201. *
  202. * @param string $value
  203. * @return string
  204. */
  205. public static function removeUnderscorePrefix($value)
  206. {
  207. if (!is_null($value)) {
  208. if (substr($value, 0, 1) == '_') {
  209. $value = substr($value, 1);
  210. }
  211. }
  212. return $value;
  213. }
  214. }