Iri.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. <?php
  2. /**
  3. * IRI parser/serialiser/normaliser
  4. *
  5. * @package Requests\Utilities
  6. */
  7. namespace WpOrg\Requests;
  8. use WpOrg\Requests\Exception;
  9. use WpOrg\Requests\Exception\InvalidArgument;
  10. use WpOrg\Requests\Ipv6;
  11. use WpOrg\Requests\Port;
  12. use WpOrg\Requests\Utility\InputValidator;
  13. /**
  14. * IRI parser/serialiser/normaliser
  15. *
  16. * Copyright (c) 2007-2010, Geoffrey Sneddon and Steve Minutillo.
  17. * All rights reserved.
  18. *
  19. * Redistribution and use in source and binary forms, with or without
  20. * modification, are permitted provided that the following conditions are met:
  21. *
  22. * * Redistributions of source code must retain the above copyright notice,
  23. * this list of conditions and the following disclaimer.
  24. *
  25. * * Redistributions in binary form must reproduce the above copyright notice,
  26. * this list of conditions and the following disclaimer in the documentation
  27. * and/or other materials provided with the distribution.
  28. *
  29. * * Neither the name of the SimplePie Team nor the names of its contributors
  30. * may be used to endorse or promote products derived from this software
  31. * without specific prior written permission.
  32. *
  33. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  34. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  35. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  36. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
  37. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  38. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  39. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  40. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  41. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  42. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  43. * POSSIBILITY OF SUCH DAMAGE.
  44. *
  45. * @package Requests\Utilities
  46. * @author Geoffrey Sneddon
  47. * @author Steve Minutillo
  48. * @copyright 2007-2009 Geoffrey Sneddon and Steve Minutillo
  49. * @license https://opensource.org/licenses/bsd-license.php
  50. * @link http://hg.gsnedders.com/iri/
  51. *
  52. * @property string $iri IRI we're working with
  53. * @property-read string $uri IRI in URI form, {@see \WpOrg\Requests\Iri::to_uri()}
  54. * @property string $scheme Scheme part of the IRI
  55. * @property string $authority Authority part, formatted for a URI (userinfo + host + port)
  56. * @property string $iauthority Authority part of the IRI (userinfo + host + port)
  57. * @property string $userinfo Userinfo part, formatted for a URI (after '://' and before '@')
  58. * @property string $iuserinfo Userinfo part of the IRI (after '://' and before '@')
  59. * @property string $host Host part, formatted for a URI
  60. * @property string $ihost Host part of the IRI
  61. * @property string $port Port part of the IRI (after ':')
  62. * @property string $path Path part, formatted for a URI (after first '/')
  63. * @property string $ipath Path part of the IRI (after first '/')
  64. * @property string $query Query part, formatted for a URI (after '?')
  65. * @property string $iquery Query part of the IRI (after '?')
  66. * @property string $fragment Fragment, formatted for a URI (after '#')
  67. * @property string $ifragment Fragment part of the IRI (after '#')
  68. */
  69. class Iri {
  70. /**
  71. * Scheme
  72. *
  73. * @var string|null
  74. */
  75. protected $scheme = null;
  76. /**
  77. * User Information
  78. *
  79. * @var string|null
  80. */
  81. protected $iuserinfo = null;
  82. /**
  83. * ihost
  84. *
  85. * @var string|null
  86. */
  87. protected $ihost = null;
  88. /**
  89. * Port
  90. *
  91. * @var string|null
  92. */
  93. protected $port = null;
  94. /**
  95. * ipath
  96. *
  97. * @var string
  98. */
  99. protected $ipath = '';
  100. /**
  101. * iquery
  102. *
  103. * @var string|null
  104. */
  105. protected $iquery = null;
  106. /**
  107. * ifragment|null
  108. *
  109. * @var string
  110. */
  111. protected $ifragment = null;
  112. /**
  113. * Normalization database
  114. *
  115. * Each key is the scheme, each value is an array with each key as the IRI
  116. * part and value as the default value for that part.
  117. *
  118. * @var array
  119. */
  120. protected $normalization = array(
  121. 'acap' => array(
  122. 'port' => Port::ACAP,
  123. ),
  124. 'dict' => array(
  125. 'port' => Port::DICT,
  126. ),
  127. 'file' => array(
  128. 'ihost' => 'localhost',
  129. ),
  130. 'http' => array(
  131. 'port' => Port::HTTP,
  132. ),
  133. 'https' => array(
  134. 'port' => Port::HTTPS,
  135. ),
  136. );
  137. /**
  138. * Return the entire IRI when you try and read the object as a string
  139. *
  140. * @return string
  141. */
  142. public function __toString() {
  143. return $this->get_iri();
  144. }
  145. /**
  146. * Overload __set() to provide access via properties
  147. *
  148. * @param string $name Property name
  149. * @param mixed $value Property value
  150. */
  151. public function __set($name, $value) {
  152. if (method_exists($this, 'set_' . $name)) {
  153. call_user_func(array($this, 'set_' . $name), $value);
  154. }
  155. elseif (
  156. $name === 'iauthority'
  157. || $name === 'iuserinfo'
  158. || $name === 'ihost'
  159. || $name === 'ipath'
  160. || $name === 'iquery'
  161. || $name === 'ifragment'
  162. ) {
  163. call_user_func(array($this, 'set_' . substr($name, 1)), $value);
  164. }
  165. }
  166. /**
  167. * Overload __get() to provide access via properties
  168. *
  169. * @param string $name Property name
  170. * @return mixed
  171. */
  172. public function __get($name) {
  173. // isset() returns false for null, we don't want to do that
  174. // Also why we use array_key_exists below instead of isset()
  175. $props = get_object_vars($this);
  176. if (
  177. $name === 'iri' ||
  178. $name === 'uri' ||
  179. $name === 'iauthority' ||
  180. $name === 'authority'
  181. ) {
  182. $method = 'get_' . $name;
  183. $return = $this->$method();
  184. }
  185. elseif (array_key_exists($name, $props)) {
  186. $return = $this->$name;
  187. }
  188. // host -> ihost
  189. elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
  190. $name = $prop;
  191. $return = $this->$prop;
  192. }
  193. // ischeme -> scheme
  194. elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
  195. $name = $prop;
  196. $return = $this->$prop;
  197. }
  198. else {
  199. trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
  200. $return = null;
  201. }
  202. if ($return === null && isset($this->normalization[$this->scheme][$name])) {
  203. return $this->normalization[$this->scheme][$name];
  204. }
  205. else {
  206. return $return;
  207. }
  208. }
  209. /**
  210. * Overload __isset() to provide access via properties
  211. *
  212. * @param string $name Property name
  213. * @return bool
  214. */
  215. public function __isset($name) {
  216. return (method_exists($this, 'get_' . $name) || isset($this->$name));
  217. }
  218. /**
  219. * Overload __unset() to provide access via properties
  220. *
  221. * @param string $name Property name
  222. */
  223. public function __unset($name) {
  224. if (method_exists($this, 'set_' . $name)) {
  225. call_user_func(array($this, 'set_' . $name), '');
  226. }
  227. }
  228. /**
  229. * Create a new IRI object, from a specified string
  230. *
  231. * @param string|Stringable|null $iri
  232. *
  233. * @throws \WpOrg\Requests\Exception\InvalidArgument When the passed $iri argument is not a string, Stringable or null.
  234. */
  235. public function __construct($iri = null) {
  236. if ($iri !== null && InputValidator::is_string_or_stringable($iri) === false) {
  237. throw InvalidArgument::create(1, '$iri', 'string|Stringable|null', gettype($iri));
  238. }
  239. $this->set_iri($iri);
  240. }
  241. /**
  242. * Create a new IRI object by resolving a relative IRI
  243. *
  244. * Returns false if $base is not absolute, otherwise an IRI.
  245. *
  246. * @param \WpOrg\Requests\Iri|string $base (Absolute) Base IRI
  247. * @param \WpOrg\Requests\Iri|string $relative Relative IRI
  248. * @return \WpOrg\Requests\Iri|false
  249. */
  250. public static function absolutize($base, $relative) {
  251. if (!($relative instanceof self)) {
  252. $relative = new self($relative);
  253. }
  254. if (!$relative->is_valid()) {
  255. return false;
  256. }
  257. elseif ($relative->scheme !== null) {
  258. return clone $relative;
  259. }
  260. if (!($base instanceof self)) {
  261. $base = new self($base);
  262. }
  263. if ($base->scheme === null || !$base->is_valid()) {
  264. return false;
  265. }
  266. if ($relative->get_iri() !== '') {
  267. if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
  268. $target = clone $relative;
  269. $target->scheme = $base->scheme;
  270. }
  271. else {
  272. $target = new self;
  273. $target->scheme = $base->scheme;
  274. $target->iuserinfo = $base->iuserinfo;
  275. $target->ihost = $base->ihost;
  276. $target->port = $base->port;
  277. if ($relative->ipath !== '') {
  278. if ($relative->ipath[0] === '/') {
  279. $target->ipath = $relative->ipath;
  280. }
  281. elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
  282. $target->ipath = '/' . $relative->ipath;
  283. }
  284. elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
  285. $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
  286. }
  287. else {
  288. $target->ipath = $relative->ipath;
  289. }
  290. $target->ipath = $target->remove_dot_segments($target->ipath);
  291. $target->iquery = $relative->iquery;
  292. }
  293. else {
  294. $target->ipath = $base->ipath;
  295. if ($relative->iquery !== null) {
  296. $target->iquery = $relative->iquery;
  297. }
  298. elseif ($base->iquery !== null) {
  299. $target->iquery = $base->iquery;
  300. }
  301. }
  302. $target->ifragment = $relative->ifragment;
  303. }
  304. }
  305. else {
  306. $target = clone $base;
  307. $target->ifragment = null;
  308. }
  309. $target->scheme_normalization();
  310. return $target;
  311. }
  312. /**
  313. * Parse an IRI into scheme/authority/path/query/fragment segments
  314. *
  315. * @param string $iri
  316. * @return array
  317. */
  318. protected function parse_iri($iri) {
  319. $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
  320. $has_match = preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match);
  321. if (!$has_match) {
  322. throw new Exception('Cannot parse supplied IRI', 'iri.cannot_parse', $iri);
  323. }
  324. if ($match[1] === '') {
  325. $match['scheme'] = null;
  326. }
  327. if (!isset($match[3]) || $match[3] === '') {
  328. $match['authority'] = null;
  329. }
  330. if (!isset($match[5])) {
  331. $match['path'] = '';
  332. }
  333. if (!isset($match[6]) || $match[6] === '') {
  334. $match['query'] = null;
  335. }
  336. if (!isset($match[8]) || $match[8] === '') {
  337. $match['fragment'] = null;
  338. }
  339. return $match;
  340. }
  341. /**
  342. * Remove dot segments from a path
  343. *
  344. * @param string $input
  345. * @return string
  346. */
  347. protected function remove_dot_segments($input) {
  348. $output = '';
  349. while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
  350. // A: If the input buffer begins with a prefix of "../" or "./",
  351. // then remove that prefix from the input buffer; otherwise,
  352. if (strpos($input, '../') === 0) {
  353. $input = substr($input, 3);
  354. }
  355. elseif (strpos($input, './') === 0) {
  356. $input = substr($input, 2);
  357. }
  358. // B: if the input buffer begins with a prefix of "/./" or "/.",
  359. // where "." is a complete path segment, then replace that prefix
  360. // with "/" in the input buffer; otherwise,
  361. elseif (strpos($input, '/./') === 0) {
  362. $input = substr($input, 2);
  363. }
  364. elseif ($input === '/.') {
  365. $input = '/';
  366. }
  367. // C: if the input buffer begins with a prefix of "/../" or "/..",
  368. // where ".." is a complete path segment, then replace that prefix
  369. // with "/" in the input buffer and remove the last segment and its
  370. // preceding "/" (if any) from the output buffer; otherwise,
  371. elseif (strpos($input, '/../') === 0) {
  372. $input = substr($input, 3);
  373. $output = substr_replace($output, '', strrpos($output, '/'));
  374. }
  375. elseif ($input === '/..') {
  376. $input = '/';
  377. $output = substr_replace($output, '', strrpos($output, '/'));
  378. }
  379. // D: if the input buffer consists only of "." or "..", then remove
  380. // that from the input buffer; otherwise,
  381. elseif ($input === '.' || $input === '..') {
  382. $input = '';
  383. }
  384. // E: move the first path segment in the input buffer to the end of
  385. // the output buffer, including the initial "/" character (if any)
  386. // and any subsequent characters up to, but not including, the next
  387. // "/" character or the end of the input buffer
  388. elseif (($pos = strpos($input, '/', 1)) !== false) {
  389. $output .= substr($input, 0, $pos);
  390. $input = substr_replace($input, '', 0, $pos);
  391. }
  392. else {
  393. $output .= $input;
  394. $input = '';
  395. }
  396. }
  397. return $output . $input;
  398. }
  399. /**
  400. * Replace invalid character with percent encoding
  401. *
  402. * @param string $text Input string
  403. * @param string $extra_chars Valid characters not in iunreserved or
  404. * iprivate (this is ASCII-only)
  405. * @param bool $iprivate Allow iprivate
  406. * @return string
  407. */
  408. protected function replace_invalid_with_pct_encoding($text, $extra_chars, $iprivate = false) {
  409. // Normalize as many pct-encoded sections as possible
  410. $text = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $text);
  411. // Replace invalid percent characters
  412. $text = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $text);
  413. // Add unreserved and % to $extra_chars (the latter is safe because all
  414. // pct-encoded sections are now valid).
  415. $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
  416. // Now replace any bytes that aren't allowed with their pct-encoded versions
  417. $position = 0;
  418. $strlen = strlen($text);
  419. while (($position += strspn($text, $extra_chars, $position)) < $strlen) {
  420. $value = ord($text[$position]);
  421. // Start position
  422. $start = $position;
  423. // By default we are valid
  424. $valid = true;
  425. // No one byte sequences are valid due to the while.
  426. // Two byte sequence:
  427. if (($value & 0xE0) === 0xC0) {
  428. $character = ($value & 0x1F) << 6;
  429. $length = 2;
  430. $remaining = 1;
  431. }
  432. // Three byte sequence:
  433. elseif (($value & 0xF0) === 0xE0) {
  434. $character = ($value & 0x0F) << 12;
  435. $length = 3;
  436. $remaining = 2;
  437. }
  438. // Four byte sequence:
  439. elseif (($value & 0xF8) === 0xF0) {
  440. $character = ($value & 0x07) << 18;
  441. $length = 4;
  442. $remaining = 3;
  443. }
  444. // Invalid byte:
  445. else {
  446. $valid = false;
  447. $length = 1;
  448. $remaining = 0;
  449. }
  450. if ($remaining) {
  451. if ($position + $length <= $strlen) {
  452. for ($position++; $remaining; $position++) {
  453. $value = ord($text[$position]);
  454. // Check that the byte is valid, then add it to the character:
  455. if (($value & 0xC0) === 0x80) {
  456. $character |= ($value & 0x3F) << (--$remaining * 6);
  457. }
  458. // If it is invalid, count the sequence as invalid and reprocess the current byte:
  459. else {
  460. $valid = false;
  461. $position--;
  462. break;
  463. }
  464. }
  465. }
  466. else {
  467. $position = $strlen - 1;
  468. $valid = false;
  469. }
  470. }
  471. // Percent encode anything invalid or not in ucschar
  472. if (
  473. // Invalid sequences
  474. !$valid
  475. // Non-shortest form sequences are invalid
  476. || $length > 1 && $character <= 0x7F
  477. || $length > 2 && $character <= 0x7FF
  478. || $length > 3 && $character <= 0xFFFF
  479. // Outside of range of ucschar codepoints
  480. // Noncharacters
  481. || ($character & 0xFFFE) === 0xFFFE
  482. || $character >= 0xFDD0 && $character <= 0xFDEF
  483. || (
  484. // Everything else not in ucschar
  485. $character > 0xD7FF && $character < 0xF900
  486. || $character < 0xA0
  487. || $character > 0xEFFFD
  488. )
  489. && (
  490. // Everything not in iprivate, if it applies
  491. !$iprivate
  492. || $character < 0xE000
  493. || $character > 0x10FFFD
  494. )
  495. ) {
  496. // If we were a character, pretend we weren't, but rather an error.
  497. if ($valid) {
  498. $position--;
  499. }
  500. for ($j = $start; $j <= $position; $j++) {
  501. $text = substr_replace($text, sprintf('%%%02X', ord($text[$j])), $j, 1);
  502. $j += 2;
  503. $position += 2;
  504. $strlen += 2;
  505. }
  506. }
  507. }
  508. return $text;
  509. }
  510. /**
  511. * Callback function for preg_replace_callback.
  512. *
  513. * Removes sequences of percent encoded bytes that represent UTF-8
  514. * encoded characters in iunreserved
  515. *
  516. * @param array $regex_match PCRE match
  517. * @return string Replacement
  518. */
  519. protected function remove_iunreserved_percent_encoded($regex_match) {
  520. // As we just have valid percent encoded sequences we can just explode
  521. // and ignore the first member of the returned array (an empty string).
  522. $bytes = explode('%', $regex_match[0]);
  523. // Initialize the new string (this is what will be returned) and that
  524. // there are no bytes remaining in the current sequence (unsurprising
  525. // at the first byte!).
  526. $string = '';
  527. $remaining = 0;
  528. // Loop over each and every byte, and set $value to its value
  529. for ($i = 1, $len = count($bytes); $i < $len; $i++) {
  530. $value = hexdec($bytes[$i]);
  531. // If we're the first byte of sequence:
  532. if (!$remaining) {
  533. // Start position
  534. $start = $i;
  535. // By default we are valid
  536. $valid = true;
  537. // One byte sequence:
  538. if ($value <= 0x7F) {
  539. $character = $value;
  540. $length = 1;
  541. }
  542. // Two byte sequence:
  543. elseif (($value & 0xE0) === 0xC0) {
  544. $character = ($value & 0x1F) << 6;
  545. $length = 2;
  546. $remaining = 1;
  547. }
  548. // Three byte sequence:
  549. elseif (($value & 0xF0) === 0xE0) {
  550. $character = ($value & 0x0F) << 12;
  551. $length = 3;
  552. $remaining = 2;
  553. }
  554. // Four byte sequence:
  555. elseif (($value & 0xF8) === 0xF0) {
  556. $character = ($value & 0x07) << 18;
  557. $length = 4;
  558. $remaining = 3;
  559. }
  560. // Invalid byte:
  561. else {
  562. $valid = false;
  563. $remaining = 0;
  564. }
  565. }
  566. // Continuation byte:
  567. else {
  568. // Check that the byte is valid, then add it to the character:
  569. if (($value & 0xC0) === 0x80) {
  570. $remaining--;
  571. $character |= ($value & 0x3F) << ($remaining * 6);
  572. }
  573. // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
  574. else {
  575. $valid = false;
  576. $remaining = 0;
  577. $i--;
  578. }
  579. }
  580. // If we've reached the end of the current byte sequence, append it to Unicode::$data
  581. if (!$remaining) {
  582. // Percent encode anything invalid or not in iunreserved
  583. if (
  584. // Invalid sequences
  585. !$valid
  586. // Non-shortest form sequences are invalid
  587. || $length > 1 && $character <= 0x7F
  588. || $length > 2 && $character <= 0x7FF
  589. || $length > 3 && $character <= 0xFFFF
  590. // Outside of range of iunreserved codepoints
  591. || $character < 0x2D
  592. || $character > 0xEFFFD
  593. // Noncharacters
  594. || ($character & 0xFFFE) === 0xFFFE
  595. || $character >= 0xFDD0 && $character <= 0xFDEF
  596. // Everything else not in iunreserved (this is all BMP)
  597. || $character === 0x2F
  598. || $character > 0x39 && $character < 0x41
  599. || $character > 0x5A && $character < 0x61
  600. || $character > 0x7A && $character < 0x7E
  601. || $character > 0x7E && $character < 0xA0
  602. || $character > 0xD7FF && $character < 0xF900
  603. ) {
  604. for ($j = $start; $j <= $i; $j++) {
  605. $string .= '%' . strtoupper($bytes[$j]);
  606. }
  607. }
  608. else {
  609. for ($j = $start; $j <= $i; $j++) {
  610. $string .= chr(hexdec($bytes[$j]));
  611. }
  612. }
  613. }
  614. }
  615. // If we have any bytes left over they are invalid (i.e., we are
  616. // mid-way through a multi-byte sequence)
  617. if ($remaining) {
  618. for ($j = $start; $j < $len; $j++) {
  619. $string .= '%' . strtoupper($bytes[$j]);
  620. }
  621. }
  622. return $string;
  623. }
  624. protected function scheme_normalization() {
  625. if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
  626. $this->iuserinfo = null;
  627. }
  628. if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
  629. $this->ihost = null;
  630. }
  631. if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
  632. $this->port = null;
  633. }
  634. if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
  635. $this->ipath = '';
  636. }
  637. if (isset($this->ihost) && empty($this->ipath)) {
  638. $this->ipath = '/';
  639. }
  640. if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
  641. $this->iquery = null;
  642. }
  643. if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
  644. $this->ifragment = null;
  645. }
  646. }
  647. /**
  648. * Check if the object represents a valid IRI. This needs to be done on each
  649. * call as some things change depending on another part of the IRI.
  650. *
  651. * @return bool
  652. */
  653. public function is_valid() {
  654. $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
  655. if ($this->ipath !== '' &&
  656. (
  657. $isauthority && $this->ipath[0] !== '/' ||
  658. (
  659. $this->scheme === null &&
  660. !$isauthority &&
  661. strpos($this->ipath, ':') !== false &&
  662. (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
  663. )
  664. )
  665. ) {
  666. return false;
  667. }
  668. return true;
  669. }
  670. /**
  671. * Set the entire IRI. Returns true on success, false on failure (if there
  672. * are any invalid characters).
  673. *
  674. * @param string $iri
  675. * @return bool
  676. */
  677. protected function set_iri($iri) {
  678. static $cache;
  679. if (!$cache) {
  680. $cache = array();
  681. }
  682. if ($iri === null) {
  683. return true;
  684. }
  685. $iri = (string) $iri;
  686. if (isset($cache[$iri])) {
  687. list($this->scheme,
  688. $this->iuserinfo,
  689. $this->ihost,
  690. $this->port,
  691. $this->ipath,
  692. $this->iquery,
  693. $this->ifragment,
  694. $return) = $cache[$iri];
  695. return $return;
  696. }
  697. $parsed = $this->parse_iri($iri);
  698. $return = $this->set_scheme($parsed['scheme'])
  699. && $this->set_authority($parsed['authority'])
  700. && $this->set_path($parsed['path'])
  701. && $this->set_query($parsed['query'])
  702. && $this->set_fragment($parsed['fragment']);
  703. $cache[$iri] = array($this->scheme,
  704. $this->iuserinfo,
  705. $this->ihost,
  706. $this->port,
  707. $this->ipath,
  708. $this->iquery,
  709. $this->ifragment,
  710. $return);
  711. return $return;
  712. }
  713. /**
  714. * Set the scheme. Returns true on success, false on failure (if there are
  715. * any invalid characters).
  716. *
  717. * @param string $scheme
  718. * @return bool
  719. */
  720. protected function set_scheme($scheme) {
  721. if ($scheme === null) {
  722. $this->scheme = null;
  723. }
  724. elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
  725. $this->scheme = null;
  726. return false;
  727. }
  728. else {
  729. $this->scheme = strtolower($scheme);
  730. }
  731. return true;
  732. }
  733. /**
  734. * Set the authority. Returns true on success, false on failure (if there are
  735. * any invalid characters).
  736. *
  737. * @param string $authority
  738. * @return bool
  739. */
  740. protected function set_authority($authority) {
  741. static $cache;
  742. if (!$cache) {
  743. $cache = array();
  744. }
  745. if ($authority === null) {
  746. $this->iuserinfo = null;
  747. $this->ihost = null;
  748. $this->port = null;
  749. return true;
  750. }
  751. if (isset($cache[$authority])) {
  752. list($this->iuserinfo,
  753. $this->ihost,
  754. $this->port,
  755. $return) = $cache[$authority];
  756. return $return;
  757. }
  758. $remaining = $authority;
  759. if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
  760. $iuserinfo = substr($remaining, 0, $iuserinfo_end);
  761. $remaining = substr($remaining, $iuserinfo_end + 1);
  762. }
  763. else {
  764. $iuserinfo = null;
  765. }
  766. if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) {
  767. $port = substr($remaining, $port_start + 1);
  768. if ($port === false || $port === '') {
  769. $port = null;
  770. }
  771. $remaining = substr($remaining, 0, $port_start);
  772. }
  773. else {
  774. $port = null;
  775. }
  776. $return = $this->set_userinfo($iuserinfo) &&
  777. $this->set_host($remaining) &&
  778. $this->set_port($port);
  779. $cache[$authority] = array($this->iuserinfo,
  780. $this->ihost,
  781. $this->port,
  782. $return);
  783. return $return;
  784. }
  785. /**
  786. * Set the iuserinfo.
  787. *
  788. * @param string $iuserinfo
  789. * @return bool
  790. */
  791. protected function set_userinfo($iuserinfo) {
  792. if ($iuserinfo === null) {
  793. $this->iuserinfo = null;
  794. }
  795. else {
  796. $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
  797. $this->scheme_normalization();
  798. }
  799. return true;
  800. }
  801. /**
  802. * Set the ihost. Returns true on success, false on failure (if there are
  803. * any invalid characters).
  804. *
  805. * @param string $ihost
  806. * @return bool
  807. */
  808. protected function set_host($ihost) {
  809. if ($ihost === null) {
  810. $this->ihost = null;
  811. return true;
  812. }
  813. if (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
  814. if (Ipv6::check_ipv6(substr($ihost, 1, -1))) {
  815. $this->ihost = '[' . Ipv6::compress(substr($ihost, 1, -1)) . ']';
  816. }
  817. else {
  818. $this->ihost = null;
  819. return false;
  820. }
  821. }
  822. else {
  823. $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
  824. // Lowercase, but ignore pct-encoded sections (as they should
  825. // remain uppercase). This must be done after the previous step
  826. // as that can add unescaped characters.
  827. $position = 0;
  828. $strlen = strlen($ihost);
  829. while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
  830. if ($ihost[$position] === '%') {
  831. $position += 3;
  832. }
  833. else {
  834. $ihost[$position] = strtolower($ihost[$position]);
  835. $position++;
  836. }
  837. }
  838. $this->ihost = $ihost;
  839. }
  840. $this->scheme_normalization();
  841. return true;
  842. }
  843. /**
  844. * Set the port. Returns true on success, false on failure (if there are
  845. * any invalid characters).
  846. *
  847. * @param string $port
  848. * @return bool
  849. */
  850. protected function set_port($port) {
  851. if ($port === null) {
  852. $this->port = null;
  853. return true;
  854. }
  855. if (strspn($port, '0123456789') === strlen($port)) {
  856. $this->port = (int) $port;
  857. $this->scheme_normalization();
  858. return true;
  859. }
  860. $this->port = null;
  861. return false;
  862. }
  863. /**
  864. * Set the ipath.
  865. *
  866. * @param string $ipath
  867. * @return bool
  868. */
  869. protected function set_path($ipath) {
  870. static $cache;
  871. if (!$cache) {
  872. $cache = array();
  873. }
  874. $ipath = (string) $ipath;
  875. if (isset($cache[$ipath])) {
  876. $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
  877. }
  878. else {
  879. $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
  880. $removed = $this->remove_dot_segments($valid);
  881. $cache[$ipath] = array($valid, $removed);
  882. $this->ipath = ($this->scheme !== null) ? $removed : $valid;
  883. }
  884. $this->scheme_normalization();
  885. return true;
  886. }
  887. /**
  888. * Set the iquery.
  889. *
  890. * @param string $iquery
  891. * @return bool
  892. */
  893. protected function set_query($iquery) {
  894. if ($iquery === null) {
  895. $this->iquery = null;
  896. }
  897. else {
  898. $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
  899. $this->scheme_normalization();
  900. }
  901. return true;
  902. }
  903. /**
  904. * Set the ifragment.
  905. *
  906. * @param string $ifragment
  907. * @return bool
  908. */
  909. protected function set_fragment($ifragment) {
  910. if ($ifragment === null) {
  911. $this->ifragment = null;
  912. }
  913. else {
  914. $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
  915. $this->scheme_normalization();
  916. }
  917. return true;
  918. }
  919. /**
  920. * Convert an IRI to a URI (or parts thereof)
  921. *
  922. * @param string|bool $iri IRI to convert (or false from {@see \WpOrg\Requests\Iri::get_iri()})
  923. * @return string|false URI if IRI is valid, false otherwise.
  924. */
  925. protected function to_uri($iri) {
  926. if (!is_string($iri)) {
  927. return false;
  928. }
  929. static $non_ascii;
  930. if (!$non_ascii) {
  931. $non_ascii = implode('', range("\x80", "\xFF"));
  932. }
  933. $position = 0;
  934. $strlen = strlen($iri);
  935. while (($position += strcspn($iri, $non_ascii, $position)) < $strlen) {
  936. $iri = substr_replace($iri, sprintf('%%%02X', ord($iri[$position])), $position, 1);
  937. $position += 3;
  938. $strlen += 2;
  939. }
  940. return $iri;
  941. }
  942. /**
  943. * Get the complete IRI
  944. *
  945. * @return string|false
  946. */
  947. protected function get_iri() {
  948. if (!$this->is_valid()) {
  949. return false;
  950. }
  951. $iri = '';
  952. if ($this->scheme !== null) {
  953. $iri .= $this->scheme . ':';
  954. }
  955. if (($iauthority = $this->get_iauthority()) !== null) {
  956. $iri .= '//' . $iauthority;
  957. }
  958. $iri .= $this->ipath;
  959. if ($this->iquery !== null) {
  960. $iri .= '?' . $this->iquery;
  961. }
  962. if ($this->ifragment !== null) {
  963. $iri .= '#' . $this->ifragment;
  964. }
  965. return $iri;
  966. }
  967. /**
  968. * Get the complete URI
  969. *
  970. * @return string
  971. */
  972. protected function get_uri() {
  973. return $this->to_uri($this->get_iri());
  974. }
  975. /**
  976. * Get the complete iauthority
  977. *
  978. * @return string|null
  979. */
  980. protected function get_iauthority() {
  981. if ($this->iuserinfo === null && $this->ihost === null && $this->port === null) {
  982. return null;
  983. }
  984. $iauthority = '';
  985. if ($this->iuserinfo !== null) {
  986. $iauthority .= $this->iuserinfo . '@';
  987. }
  988. if ($this->ihost !== null) {
  989. $iauthority .= $this->ihost;
  990. }
  991. if ($this->port !== null) {
  992. $iauthority .= ':' . $this->port;
  993. }
  994. return $iauthority;
  995. }
  996. /**
  997. * Get the complete authority
  998. *
  999. * @return string
  1000. */
  1001. protected function get_authority() {
  1002. $iauthority = $this->get_iauthority();
  1003. if (is_string($iauthority)) {
  1004. return $this->to_uri($iauthority);
  1005. }
  1006. else {
  1007. return $iauthority;
  1008. }
  1009. }
  1010. }