1: <?php
2:
3: 4: 5: 6: 7: 8: 9: 10:
11:
12: namespace Symfony\Component\Yaml;
13:
14: use Symfony\Component\Yaml\Exception\ParseException;
15:
16: 17: 18: 19: 20:
21: class Parser
22: {
23: const FOLDED_SCALAR_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?';
24:
25: private $offset = 0;
26: private $lines = array();
27: private $currentLineNb = -1;
28: private $currentLine = '';
29: private $refs = array();
30:
31: 32: 33: 34: 35:
36: public function __construct($offset = 0)
37: {
38: $this->offset = $offset;
39: }
40:
41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52:
53: public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
54: {
55: $this->currentLineNb = -1;
56: $this->currentLine = '';
57: $this->lines = explode("\n", $this->cleanup($value));
58:
59: if (!preg_match('//u', $value)) {
60: throw new ParseException('The YAML value does not appear to be valid UTF-8.');
61: }
62:
63: if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
64: $mbEncoding = mb_internal_encoding();
65: mb_internal_encoding('UTF-8');
66: }
67:
68: $data = array();
69: $context = null;
70: $allowOverwrite = false;
71: while ($this->moveToNextLine()) {
72: if ($this->isCurrentLineEmpty()) {
73: continue;
74: }
75:
76:
77: if ("\t" === $this->currentLine[0]) {
78: throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
79: }
80:
81: $isRef = $mergeNode = false;
82: if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
83: if ($context && 'mapping' == $context) {
84: throw new ParseException('You cannot define a sequence item when in a mapping');
85: }
86: $context = 'sequence';
87:
88: if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
89: $isRef = $matches['ref'];
90: $values['value'] = $matches['value'];
91: }
92:
93:
94: if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
95: $c = $this->getRealCurrentLineNb() + 1;
96: $parser = new Parser($c);
97: $parser->refs = & $this->refs;
98: $data[] = $parser->parse($this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
99: } else {
100: if (isset($values['leadspaces'])
101: && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
102: ) {
103:
104: $c = $this->getRealCurrentLineNb();
105: $parser = new Parser($c);
106: $parser->refs = & $this->refs;
107:
108: $block = $values['value'];
109: if ($this->isNextLineIndented()) {
110: $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + strlen($values['leadspaces']) + 1);
111: }
112:
113: $data[] = $parser->parse($block, $exceptionOnInvalidType, $objectSupport, $objectForMap);
114: } else {
115: $data[] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport, $objectForMap);
116: }
117: }
118: } elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
119: if ($context && 'sequence' == $context) {
120: throw new ParseException('You cannot define a mapping item when in a sequence');
121: }
122: $context = 'mapping';
123:
124:
125: Inline::parse(null, $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs);
126: try {
127: $key = Inline::parseScalar($values['key']);
128: } catch (ParseException $e) {
129: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
130: $e->setSnippet($this->currentLine);
131:
132: throw $e;
133: }
134:
135: if ('<<' === $key) {
136: $mergeNode = true;
137: $allowOverwrite = true;
138: if (isset($values['value']) && 0 === strpos($values['value'], '*')) {
139: $refName = substr($values['value'], 1);
140: if (!array_key_exists($refName, $this->refs)) {
141: throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine);
142: }
143:
144: $refValue = $this->refs[$refName];
145:
146: if (!is_array($refValue)) {
147: throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
148: }
149:
150: foreach ($refValue as $key => $value) {
151: if (!isset($data[$key])) {
152: $data[$key] = $value;
153: }
154: }
155: } else {
156: if (isset($values['value']) && $values['value'] !== '') {
157: $value = $values['value'];
158: } else {
159: $value = $this->getNextEmbedBlock();
160: }
161: $c = $this->getRealCurrentLineNb() + 1;
162: $parser = new Parser($c);
163: $parser->refs = & $this->refs;
164: $parsed = $parser->parse($value, $exceptionOnInvalidType, $objectSupport, $objectForMap);
165:
166: if (!is_array($parsed)) {
167: throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
168: }
169:
170: if (isset($parsed[0])) {
171:
172:
173:
174: foreach ($parsed as $parsedItem) {
175: if (!is_array($parsedItem)) {
176: throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem);
177: }
178:
179: foreach ($parsedItem as $key => $value) {
180: if (!isset($data[$key])) {
181: $data[$key] = $value;
182: }
183: }
184: }
185: } else {
186:
187:
188: foreach ($parsed as $key => $value) {
189: if (!isset($data[$key])) {
190: $data[$key] = $value;
191: }
192: }
193: }
194: }
195: } elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
196: $isRef = $matches['ref'];
197: $values['value'] = $matches['value'];
198: }
199:
200: if ($mergeNode) {
201:
202: } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
203:
204:
205: if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
206:
207:
208: if ($allowOverwrite || !isset($data[$key])) {
209: $data[$key] = null;
210: }
211: } else {
212: $c = $this->getRealCurrentLineNb() + 1;
213: $parser = new Parser($c);
214: $parser->refs = & $this->refs;
215: $value = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport, $objectForMap);
216:
217:
218: if ($allowOverwrite || !isset($data[$key])) {
219: $data[$key] = $value;
220: }
221: }
222: } else {
223: $value = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport, $objectForMap);
224:
225:
226: if ($allowOverwrite || !isset($data[$key])) {
227: $data[$key] = $value;
228: }
229: }
230: } else {
231:
232: if ('---' === $this->currentLine) {
233: throw new ParseException('Multiple documents are not supported.');
234: }
235:
236:
237: $lineCount = count($this->lines);
238: if (1 === $lineCount || (2 === $lineCount && empty($this->lines[1]))) {
239: try {
240: $value = Inline::parse($this->lines[0], $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs);
241: } catch (ParseException $e) {
242: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
243: $e->setSnippet($this->currentLine);
244:
245: throw $e;
246: }
247:
248: if (is_array($value)) {
249: $first = reset($value);
250: if (is_string($first) && 0 === strpos($first, '*')) {
251: $data = array();
252: foreach ($value as $alias) {
253: $data[] = $this->refs[substr($alias, 1)];
254: }
255: $value = $data;
256: }
257: }
258:
259: if (isset($mbEncoding)) {
260: mb_internal_encoding($mbEncoding);
261: }
262:
263: return $value;
264: }
265:
266: switch (preg_last_error()) {
267: case PREG_INTERNAL_ERROR:
268: $error = 'Internal PCRE error.';
269: break;
270: case PREG_BACKTRACK_LIMIT_ERROR:
271: $error = 'pcre.backtrack_limit reached.';
272: break;
273: case PREG_RECURSION_LIMIT_ERROR:
274: $error = 'pcre.recursion_limit reached.';
275: break;
276: case PREG_BAD_UTF8_ERROR:
277: $error = 'Malformed UTF-8 data.';
278: break;
279: case PREG_BAD_UTF8_OFFSET_ERROR:
280: $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
281: break;
282: default:
283: $error = 'Unable to parse.';
284: }
285:
286: throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
287: }
288:
289: if ($isRef) {
290: $this->refs[$isRef] = end($data);
291: }
292: }
293:
294: if (isset($mbEncoding)) {
295: mb_internal_encoding($mbEncoding);
296: }
297:
298: return empty($data) ? null : $data;
299: }
300:
301: 302: 303: 304: 305:
306: private function getRealCurrentLineNb()
307: {
308: return $this->currentLineNb + $this->offset;
309: }
310:
311: 312: 313: 314: 315:
316: private function getCurrentLineIndentation()
317: {
318: return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
319: }
320:
321: 322: 323: 324: 325: 326: 327: 328: 329: 330:
331: private function getNextEmbedBlock($indentation = null, $inSequence = false)
332: {
333: $oldLineIndentation = $this->getCurrentLineIndentation();
334:
335: if (!$this->moveToNextLine()) {
336: return;
337: }
338:
339: if (null === $indentation) {
340: $newIndent = $this->getCurrentLineIndentation();
341:
342: $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem($this->currentLine);
343:
344: if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
345: throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
346: }
347: } else {
348: $newIndent = $indentation;
349: }
350:
351: $data = array();
352: if ($this->getCurrentLineIndentation() >= $newIndent) {
353: $data[] = substr($this->currentLine, $newIndent);
354: } else {
355: $this->moveToPreviousLine();
356:
357: return;
358: }
359:
360: if ($inSequence && $oldLineIndentation === $newIndent && '-' === $data[0][0]) {
361:
362:
363: $this->moveToPreviousLine();
364:
365: return;
366: }
367:
368: $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem($this->currentLine);
369:
370:
371: $removeCommentsPattern = '~'.self::FOLDED_SCALAR_PATTERN.'$~';
372: $removeComments = !preg_match($removeCommentsPattern, $this->currentLine);
373:
374: while ($this->moveToNextLine()) {
375: $indent = $this->getCurrentLineIndentation();
376:
377: if ($indent === $newIndent) {
378: $removeComments = !preg_match($removeCommentsPattern, $this->currentLine);
379: }
380:
381: if ($isItUnindentedCollection && !$this->isStringUnIndentedCollectionItem($this->currentLine) && $newIndent === $indent) {
382: $this->moveToPreviousLine();
383: break;
384: }
385:
386: if ($this->isCurrentLineBlank()) {
387: $data[] = substr($this->currentLine, $newIndent);
388: continue;
389: }
390:
391: if ($removeComments && $this->isCurrentLineComment()) {
392: continue;
393: }
394:
395: if ($indent >= $newIndent) {
396: $data[] = substr($this->currentLine, $newIndent);
397: } elseif (0 == $indent) {
398: $this->moveToPreviousLine();
399:
400: break;
401: } else {
402: throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
403: }
404: }
405:
406: return implode("\n", $data);
407: }
408:
409: 410: 411: 412: 413:
414: private function moveToNextLine()
415: {
416: if ($this->currentLineNb >= count($this->lines) - 1) {
417: return false;
418: }
419:
420: $this->currentLine = $this->lines[++$this->currentLineNb];
421:
422: return true;
423: }
424:
425: 426: 427:
428: private function moveToPreviousLine()
429: {
430: $this->currentLine = $this->lines[--$this->currentLineNb];
431: }
432:
433: 434: 435: 436: 437: 438: 439: 440: 441: 442: 443: 444:
445: private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $objectForMap)
446: {
447: if (0 === strpos($value, '*')) {
448: if (false !== $pos = strpos($value, '#')) {
449: $value = substr($value, 1, $pos - 2);
450: } else {
451: $value = substr($value, 1);
452: }
453:
454: if (!array_key_exists($value, $this->refs)) {
455: throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLine);
456: }
457:
458: return $this->refs[$value];
459: }
460:
461: if (preg_match('/^'.self::FOLDED_SCALAR_PATTERN.'$/', $value, $matches)) {
462: $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
463:
464: return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
465: }
466:
467: try {
468: return Inline::parse($value, $exceptionOnInvalidType, $objectSupport, $objectForMap, $this->refs);
469: } catch (ParseException $e) {
470: $e->setParsedLine($this->getRealCurrentLineNb() + 1);
471: $e->setSnippet($this->currentLine);
472:
473: throw $e;
474: }
475: }
476:
477: 478: 479: 480: 481: 482: 483: 484: 485:
486: private function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
487: {
488: $notEOF = $this->moveToNextLine();
489: if (!$notEOF) {
490: return '';
491: }
492:
493: $isCurrentLineBlank = $this->isCurrentLineBlank();
494: $text = '';
495:
496:
497: while ($notEOF && $isCurrentLineBlank) {
498:
499: if ($notEOF = $this->moveToNextLine()) {
500: $text .= "\n";
501: $isCurrentLineBlank = $this->isCurrentLineBlank();
502: }
503: }
504:
505:
506: if (0 === $indentation) {
507: if (preg_match('/^ +/', $this->currentLine, $matches)) {
508: $indentation = strlen($matches[0]);
509: }
510: }
511:
512: if ($indentation > 0) {
513: $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
514:
515: while (
516: $notEOF && (
517: $isCurrentLineBlank ||
518: preg_match($pattern, $this->currentLine, $matches)
519: )
520: ) {
521: if ($isCurrentLineBlank) {
522: $text .= substr($this->currentLine, $indentation);
523: } else {
524: $text .= $matches[1];
525: }
526:
527:
528: if ($notEOF = $this->moveToNextLine()) {
529: $text .= "\n";
530: $isCurrentLineBlank = $this->isCurrentLineBlank();
531: }
532: }
533: } elseif ($notEOF) {
534: $text .= "\n";
535: }
536:
537: if ($notEOF) {
538: $this->moveToPreviousLine();
539: }
540:
541:
542: if ('>' === $separator) {
543: preg_match('/(\n*)$/', $text, $matches);
544: $text = preg_replace('/(?<!\n)\n(?!\n)/', ' ', rtrim($text, "\n"));
545: $text .= $matches[1];
546: }
547:
548:
549: if ('' === $indicator) {
550: $text = preg_replace('/\n+$/s', "\n", $text);
551: } elseif ('-' === $indicator) {
552: $text = preg_replace('/\n+$/s', '', $text);
553: }
554:
555: return $text;
556: }
557:
558: 559: 560: 561: 562:
563: private function isNextLineIndented()
564: {
565: $currentIndentation = $this->getCurrentLineIndentation();
566: $EOF = !$this->moveToNextLine();
567:
568: while (!$EOF && $this->isCurrentLineEmpty()) {
569: $EOF = !$this->moveToNextLine();
570: }
571:
572: if ($EOF) {
573: return false;
574: }
575:
576: $ret = false;
577: if ($this->getCurrentLineIndentation() > $currentIndentation) {
578: $ret = true;
579: }
580:
581: $this->moveToPreviousLine();
582:
583: return $ret;
584: }
585:
586: 587: 588: 589: 590:
591: private function isCurrentLineEmpty()
592: {
593: return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
594: }
595:
596: 597: 598: 599: 600:
601: private function isCurrentLineBlank()
602: {
603: return '' == trim($this->currentLine, ' ');
604: }
605:
606: 607: 608: 609: 610:
611: private function isCurrentLineComment()
612: {
613:
614: $ltrimmedLine = ltrim($this->currentLine, ' ');
615:
616: return $ltrimmedLine[0] === '#';
617: }
618:
619: 620: 621: 622: 623: 624: 625:
626: private function cleanup($value)
627: {
628: $value = str_replace(array("\r\n", "\r"), "\n", $value);
629:
630:
631: $count = 0;
632: $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count);
633: $this->offset += $count;
634:
635:
636: $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
637: if ($count == 1) {
638:
639: $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
640: $value = $trimmedValue;
641: }
642:
643:
644: $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
645: if ($count == 1) {
646:
647: $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
648: $value = $trimmedValue;
649:
650:
651: $value = preg_replace('#\.\.\.\s*$#s', '', $value);
652: }
653:
654: return $value;
655: }
656:
657: 658: 659: 660: 661:
662: private function isNextLineUnIndentedCollection()
663: {
664: $currentIndentation = $this->getCurrentLineIndentation();
665: $notEOF = $this->moveToNextLine();
666:
667: while ($notEOF && $this->isCurrentLineEmpty()) {
668: $notEOF = $this->moveToNextLine();
669: }
670:
671: if (false === $notEOF) {
672: return false;
673: }
674:
675: $ret = false;
676: if (
677: $this->getCurrentLineIndentation() == $currentIndentation
678: &&
679: $this->isStringUnIndentedCollectionItem($this->currentLine)
680: ) {
681: $ret = true;
682: }
683:
684: $this->moveToPreviousLine();
685:
686: return $ret;
687: }
688:
689: 690: 691: 692: 693:
694: private function isStringUnIndentedCollectionItem()
695: {
696: return (0 === strpos($this->currentLine, '- '));
697: }
698: }
699: