Unescaper.php 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. /**
  13. * Unescaper encapsulates unescaping rules for single and double-quoted
  14. * YAML strings.
  15. *
  16. * @author Matthew Lewinski <matthew@lewinski.org>
  17. *
  18. * @internal
  19. */
  20. class Unescaper
  21. {
  22. /**
  23. * Regex fragment that matches an escaped character in a double quoted string.
  24. */
  25. public const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
  26. /**
  27. * Unescapes a single quoted string.
  28. *
  29. * @param string $value A single quoted string
  30. */
  31. public function unescapeSingleQuotedString(string $value): string
  32. {
  33. return str_replace('\'\'', '\'', $value);
  34. }
  35. /**
  36. * Unescapes a double quoted string.
  37. *
  38. * @param string $value A double quoted string
  39. */
  40. public function unescapeDoubleQuotedString(string $value): string
  41. {
  42. $callback = function ($match) {
  43. return $this->unescapeCharacter($match[0]);
  44. };
  45. // evaluate the string
  46. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  47. }
  48. /**
  49. * Unescapes a character that was found in a double-quoted string.
  50. *
  51. * @param string $value An escaped character
  52. */
  53. private function unescapeCharacter(string $value): string
  54. {
  55. switch ($value[1]) {
  56. case '0':
  57. return "\x0";
  58. case 'a':
  59. return "\x7";
  60. case 'b':
  61. return "\x8";
  62. case 't':
  63. return "\t";
  64. case "\t":
  65. return "\t";
  66. case 'n':
  67. return "\n";
  68. case 'v':
  69. return "\xB";
  70. case 'f':
  71. return "\xC";
  72. case 'r':
  73. return "\r";
  74. case 'e':
  75. return "\x1B";
  76. case ' ':
  77. return ' ';
  78. case '"':
  79. return '"';
  80. case '/':
  81. return '/';
  82. case '\\':
  83. return '\\';
  84. case 'N':
  85. // U+0085 NEXT LINE
  86. return "\xC2\x85";
  87. case '_':
  88. // U+00A0 NO-BREAK SPACE
  89. return "\xC2\xA0";
  90. case 'L':
  91. // U+2028 LINE SEPARATOR
  92. return "\xE2\x80\xA8";
  93. case 'P':
  94. // U+2029 PARAGRAPH SEPARATOR
  95. return "\xE2\x80\xA9";
  96. case 'x':
  97. return self::utf8chr(hexdec(substr($value, 2, 2)));
  98. case 'u':
  99. return self::utf8chr(hexdec(substr($value, 2, 4)));
  100. case 'U':
  101. return self::utf8chr(hexdec(substr($value, 2, 8)));
  102. default:
  103. throw new ParseException(sprintf('Found unknown escape character "%s".', $value));
  104. }
  105. }
  106. /**
  107. * Get the UTF-8 character for the given code point.
  108. */
  109. private static function utf8chr(int $c): string
  110. {
  111. if (0x80 > $c %= 0x200000) {
  112. return \chr($c);
  113. }
  114. if (0x800 > $c) {
  115. return \chr(0xC0 | $c >> 6).\chr(0x80 | $c & 0x3F);
  116. }
  117. if (0x10000 > $c) {
  118. return \chr(0xE0 | $c >> 12).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  119. }
  120. return \chr(0xF0 | $c >> 18).\chr(0x80 | $c >> 12 & 0x3F).\chr(0x80 | $c >> 6 & 0x3F).\chr(0x80 | $c & 0x3F);
  121. }
  122. }