1: <?php
2:
3: /*
4: * This file is part of the Symfony package.
5: *
6: * (c) Fabien Potencier <fabien@symfony.com>
7: *
8: * For the full copyright and license information, please view the LICENSE
9: * file that was distributed with this source code.
10: */
11:
12: namespace Symfony\Component\Yaml;
13:
14: /**
15: * Unescaper encapsulates unescaping rules for single and double-quoted
16: * YAML strings.
17: *
18: * @author Matthew Lewinski <matthew@lewinski.org>
19: */
20: class Unescaper
21: {
22: // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
23: // must be converted to that encoding.
24: // @deprecated since 2.5, to be removed in 3.0
25: const ENCODING = 'UTF-8';
26:
27: // Regex fragment that matches an escaped character in a double quoted
28: // string.
29: const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
30:
31: /**
32: * Unescapes a single quoted string.
33: *
34: * @param string $value A single quoted string.
35: *
36: * @return string The unescaped string.
37: */
38: public function unescapeSingleQuotedString($value)
39: {
40: return str_replace('\'\'', '\'', $value);
41: }
42:
43: /**
44: * Unescapes a double quoted string.
45: *
46: * @param string $value A double quoted string.
47: *
48: * @return string The unescaped string.
49: */
50: public function unescapeDoubleQuotedString($value)
51: {
52: $self = $this;
53: $callback = function ($match) use ($self) {
54: return $self->unescapeCharacter($match[0]);
55: };
56:
57: // evaluate the string
58: return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
59: }
60:
61: /**
62: * Unescapes a character that was found in a double-quoted string.
63: *
64: * @param string $value An escaped character
65: *
66: * @return string The unescaped character
67: */
68: public function unescapeCharacter($value)
69: {
70: switch ($value{1}) {
71: case '0':
72: return "\x0";
73: case 'a':
74: return "\x7";
75: case 'b':
76: return "\x8";
77: case 't':
78: return "\t";
79: case "\t":
80: return "\t";
81: case 'n':
82: return "\n";
83: case 'v':
84: return "\xB";
85: case 'f':
86: return "\xC";
87: case 'r':
88: return "\r";
89: case 'e':
90: return "\x1B";
91: case ' ':
92: return ' ';
93: case '"':
94: return '"';
95: case '/':
96: return '/';
97: case '\\':
98: return '\\';
99: case 'N':
100: // U+0085 NEXT LINE
101: return "\xC2\x85";
102: case '_':
103: // U+00A0 NO-BREAK SPACE
104: return "\xC2\xA0";
105: case 'L':
106: // U+2028 LINE SEPARATOR
107: return "\xE2\x80\xA8";
108: case 'P':
109: // U+2029 PARAGRAPH SEPARATOR
110: return "\xE2\x80\xA9";
111: case 'x':
112: return self::utf8chr(hexdec(substr($value, 2, 2)));
113: case 'u':
114: return self::utf8chr(hexdec(substr($value, 2, 4)));
115: case 'U':
116: return self::utf8chr(hexdec(substr($value, 2, 8)));
117: }
118: }
119:
120: /**
121: * Get the UTF-8 character for the given code point.
122: *
123: * @param int $c The unicode code point
124: *
125: * @return string The corresponding UTF-8 character
126: */
127: private static function utf8chr($c)
128: {
129: if (0x80 > $c %= 0x200000) {
130: return chr($c);
131: }
132: if (0x800 > $c) {
133: return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
134: }
135: if (0x10000 > $c) {
136: return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
137: }
138:
139: return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
140: }
141: }
142: