summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael J. Spencer <bigcheesegs@gmail.com>2012-04-03 23:09:22 +0000
committerMichael J. Spencer <bigcheesegs@gmail.com>2012-04-03 23:09:22 +0000
commit93210e847a1496b24cef881723e57c489082dcfe (patch)
tree83d1f8828d8b6835a6511d28cf3c63fad8b06aef
parent2ce63c73520cd6e715f9114589f802938b5db01f (diff)
downloadllvm-93210e847a1496b24cef881723e57c489082dcfe.tar.gz
llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.bz2
llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.xz
Add YAML parser to Support.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153977 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--CMakeLists.txt1
-rw-r--r--LICENSE.TXT1
-rw-r--r--include/llvm/Support/YAMLParser.h564
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/YAMLParser.cpp2115
-rw-r--r--test/YAMLParser/LICENSE.txt19
-rw-r--r--test/YAMLParser/bool.data6
-rw-r--r--test/YAMLParser/construct-bool.data11
-rw-r--r--test/YAMLParser/construct-custom.data28
-rw-r--r--test/YAMLParser/construct-float.data8
-rw-r--r--test/YAMLParser/construct-int.data8
-rw-r--r--test/YAMLParser/construct-map.data8
-rw-r--r--test/YAMLParser/construct-merge.data29
-rw-r--r--test/YAMLParser/construct-null.data20
-rw-r--r--test/YAMLParser/construct-omap.data10
-rw-r--r--test/YAMLParser/construct-pairs.data9
-rw-r--r--test/YAMLParser/construct-seq.data17
-rw-r--r--test/YAMLParser/construct-set.data9
-rw-r--r--test/YAMLParser/construct-str-ascii.data3
-rw-r--r--test/YAMLParser/construct-str.data3
-rw-r--r--test/YAMLParser/construct-timestamp.data7
-rw-r--r--test/YAMLParser/construct-value.data12
-rw-r--r--test/YAMLParser/duplicate-key.former-loader-error.data5
-rw-r--r--test/YAMLParser/duplicate-mapping-key.former-loader-error.data8
-rw-r--r--test/YAMLParser/duplicate-merge-key.former-loader-error.data6
-rw-r--r--test/YAMLParser/duplicate-value-key.former-loader-error.data6
-rw-r--r--test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data6
-rw-r--r--test/YAMLParser/empty-document-bug.data2
-rw-r--r--test/YAMLParser/float.data8
-rw-r--r--test/YAMLParser/int.data8
-rw-r--r--test/YAMLParser/invalid-single-quote-bug.data4
-rw-r--r--test/YAMLParser/merge.data3
-rw-r--r--test/YAMLParser/more-floats.data3
-rw-r--r--test/YAMLParser/negative-float-bug.data3
-rw-r--r--test/YAMLParser/null.data5
-rw-r--r--test/YAMLParser/resolver.data32
-rw-r--r--test/YAMLParser/run-parser-crash-bug.data10
-rw-r--r--test/YAMLParser/scan-document-end-bug.data5
-rw-r--r--test/YAMLParser/scan-line-break-bug.data5
-rw-r--r--test/YAMLParser/single-dot-is-not-float-bug.data3
-rw-r--r--test/YAMLParser/sloppy-indentation.data19
-rw-r--r--test/YAMLParser/spec-02-01.data5
-rw-r--r--test/YAMLParser/spec-02-02.data5
-rw-r--r--test/YAMLParser/spec-02-03.data10
-rw-r--r--test/YAMLParser/spec-02-04.data10
-rw-r--r--test/YAMLParser/spec-02-05.data5
-rw-r--r--test/YAMLParser/spec-02-06.data7
-rw-r--r--test/YAMLParser/spec-02-07.data12
-rw-r--r--test/YAMLParser/spec-02-08.data12
-rw-r--r--test/YAMLParser/spec-02-09.data10
-rw-r--r--test/YAMLParser/spec-02-10.data10
-rw-r--r--test/YAMLParser/spec-02-11.data11
-rw-r--r--test/YAMLParser/spec-02-12.data10
-rw-r--r--test/YAMLParser/spec-02-13.data6
-rw-r--r--test/YAMLParser/spec-02-14.data6
-rw-r--r--test/YAMLParser/spec-02-15.data10
-rw-r--r--test/YAMLParser/spec-02-16.data9
-rw-r--r--test/YAMLParser/spec-02-17.data16
-rw-r--r--test/YAMLParser/spec-02-18.data8
-rw-r--r--test/YAMLParser/spec-02-19.data7
-rw-r--r--test/YAMLParser/spec-02-20.data8
-rw-r--r--test/YAMLParser/spec-02-21.data6
-rw-r--r--test/YAMLParser/spec-02-22.data6
-rw-r--r--test/YAMLParser/spec-02-23.data15
-rw-r--r--test/YAMLParser/spec-02-24.data16
-rw-r--r--test/YAMLParser/spec-02-25.data9
-rw-r--r--test/YAMLParser/spec-02-26.data9
-rw-r--r--test/YAMLParser/spec-02-27.data31
-rw-r--r--test/YAMLParser/spec-02-28.data28
-rw-r--r--test/YAMLParser/spec-05-01-utf8.data3
-rw-r--r--test/YAMLParser/spec-05-02-utf8.data7
-rw-r--r--test/YAMLParser/spec-05-03.data9
-rw-r--r--test/YAMLParser/spec-05-04.data4
-rw-r--r--test/YAMLParser/spec-05-05.data3
-rw-r--r--test/YAMLParser/spec-05-06.data4
-rw-r--r--test/YAMLParser/spec-05-07.data6
-rw-r--r--test/YAMLParser/spec-05-08.data4
-rw-r--r--test/YAMLParser/spec-05-09.data4
-rw-r--r--test/YAMLParser/spec-05-10.data6
-rw-r--r--test/YAMLParser/spec-05-11.data5
-rw-r--r--test/YAMLParser/spec-05-12.data16
-rw-r--r--test/YAMLParser/spec-05-13.data5
-rw-r--r--test/YAMLParser/spec-05-14.data9
-rw-r--r--test/YAMLParser/spec-05-15.data7
-rw-r--r--test/YAMLParser/spec-06-01.data16
-rw-r--r--test/YAMLParser/spec-06-02.data5
-rw-r--r--test/YAMLParser/spec-06-03.data4
-rw-r--r--test/YAMLParser/spec-06-04.data6
-rw-r--r--test/YAMLParser/spec-06-05.data8
-rw-r--r--test/YAMLParser/spec-06-06.data9
-rw-r--r--test/YAMLParser/spec-06-07.data10
-rw-r--r--test/YAMLParser/spec-06-08.data4
-rw-r--r--test/YAMLParser/spec-07-01.data5
-rw-r--r--test/YAMLParser/spec-07-02.data6
-rw-r--r--test/YAMLParser/spec-07-03.data7
-rw-r--r--test/YAMLParser/spec-07-04.data5
-rw-r--r--test/YAMLParser/spec-07-05.data10
-rw-r--r--test/YAMLParser/spec-07-06.data7
-rw-r--r--test/YAMLParser/spec-07-07a.data4
-rw-r--r--test/YAMLParser/spec-07-07b.data6
-rw-r--r--test/YAMLParser/spec-07-08.data11
-rw-r--r--test/YAMLParser/spec-07-09.data13
-rw-r--r--test/YAMLParser/spec-07-10.data13
-rw-r--r--test/YAMLParser/spec-07-11.data4
-rw-r--r--test/YAMLParser/spec-07-12a.data5
-rw-r--r--test/YAMLParser/spec-07-12b.data6
-rw-r--r--test/YAMLParser/spec-07-13.data11
-rw-r--r--test/YAMLParser/spec-08-01.data4
-rw-r--r--test/YAMLParser/spec-08-02.data4
-rw-r--r--test/YAMLParser/spec-08-03.data4
-rw-r--r--test/YAMLParser/spec-08-04.data9
-rw-r--r--test/YAMLParser/spec-08-05.data7
-rw-r--r--test/YAMLParser/spec-08-06.data12
-rw-r--r--test/YAMLParser/spec-08-07.data6
-rw-r--r--test/YAMLParser/spec-08-08.data15
-rw-r--r--test/YAMLParser/spec-08-09.data13
-rw-r--r--test/YAMLParser/spec-08-10.data17
-rw-r--r--test/YAMLParser/spec-08-11.data4
-rw-r--r--test/YAMLParser/spec-08-12.data10
-rw-r--r--test/YAMLParser/spec-08-13.data6
-rw-r--r--test/YAMLParser/spec-08-14.data7
-rw-r--r--test/YAMLParser/spec-08-15.data7
-rw-r--r--test/YAMLParser/spec-09-01.data8
-rw-r--r--test/YAMLParser/spec-09-02.data14
-rw-r--r--test/YAMLParser/spec-09-03.data8
-rw-r--r--test/YAMLParser/spec-09-04.data6
-rw-r--r--test/YAMLParser/spec-09-05.data10
-rw-r--r--test/YAMLParser/spec-09-06.data3
-rw-r--r--test/YAMLParser/spec-09-07.data8
-rw-r--r--test/YAMLParser/spec-09-08.data3
-rw-r--r--test/YAMLParser/spec-09-09.data8
-rw-r--r--test/YAMLParser/spec-09-10.data5
-rw-r--r--test/YAMLParser/spec-09-11.data7
-rw-r--r--test/YAMLParser/spec-09-12.data10
-rw-r--r--test/YAMLParser/spec-09-13.data8
-rw-r--r--test/YAMLParser/spec-09-14.data21
-rw-r--r--test/YAMLParser/spec-09-15.data15
-rw-r--r--test/YAMLParser/spec-09-16.data5
-rw-r--r--test/YAMLParser/spec-09-17.data5
-rw-r--r--test/YAMLParser/spec-09-18.data11
-rw-r--r--test/YAMLParser/spec-09-19.data6
-rw-r--r--test/YAMLParser/spec-09-20.data13
-rw-r--r--test/YAMLParser/spec-09-21.data12
-rw-r--r--test/YAMLParser/spec-09-22.data6
-rw-r--r--test/YAMLParser/spec-09-23.data13
-rw-r--r--test/YAMLParser/spec-09-24.data8
-rw-r--r--test/YAMLParser/spec-09-25.data5
-rw-r--r--test/YAMLParser/spec-09-26.data10
-rw-r--r--test/YAMLParser/spec-09-27.data10
-rw-r--r--test/YAMLParser/spec-09-28.data10
-rw-r--r--test/YAMLParser/spec-09-29.data6
-rw-r--r--test/YAMLParser/spec-09-30.data16
-rw-r--r--test/YAMLParser/spec-09-31.data16
-rw-r--r--test/YAMLParser/spec-09-32.data16
-rw-r--r--test/YAMLParser/spec-09-33.data16
-rw-r--r--test/YAMLParser/spec-10-01.data4
-rw-r--r--test/YAMLParser/spec-10-02.data10
-rw-r--r--test/YAMLParser/spec-10-03.data6
-rw-r--r--test/YAMLParser/spec-10-04.data6
-rw-r--r--test/YAMLParser/spec-10-05.data9
-rw-r--r--test/YAMLParser/spec-10-06.data4
-rw-r--r--test/YAMLParser/spec-10-07.data9
-rw-r--r--test/YAMLParser/spec-10-08.data13
-rw-r--r--test/YAMLParser/spec-10-09.data6
-rw-r--r--test/YAMLParser/spec-10-10.data10
-rw-r--r--test/YAMLParser/spec-10-11.data9
-rw-r--r--test/YAMLParser/spec-10-12.data5
-rw-r--r--test/YAMLParser/spec-10-13.data7
-rw-r--r--test/YAMLParser/spec-10-14.data6
-rw-r--r--test/YAMLParser/spec-10-15.data5
-rw-r--r--test/YAMLParser/str.data3
-rw-r--r--test/YAMLParser/timestamp-bugs.data8
-rw-r--r--test/YAMLParser/timestamp.data7
-rw-r--r--test/YAMLParser/utf8-implicit.data3
-rw-r--r--test/YAMLParser/utf8.data3
-rw-r--r--test/YAMLParser/value.data3
-rw-r--r--test/YAMLParser/yaml.data5
-rw-r--r--unittests/CMakeLists.txt1
-rw-r--r--unittests/Support/YAMLParserTest.cpp179
-rw-r--r--utils/yaml-bench/CMakeLists.txt5
-rw-r--r--utils/yaml-bench/Makefile20
-rw-r--r--utils/yaml-bench/YAMLBench.cpp203
182 files changed, 4586 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33dd12314d..8336bc975e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -397,6 +397,7 @@ add_subdirectory(utils/count)
add_subdirectory(utils/not)
add_subdirectory(utils/llvm-lit)
add_subdirectory(utils/json-bench)
+add_subdirectory(utils/yaml-bench)
add_subdirectory(projects)
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 837688e76a..00cf601169 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -67,3 +67,4 @@ Autoconf llvm/autoconf
CellSPU backend llvm/lib/Target/CellSPU/README.txt
Google Test llvm/utils/unittest/googletest
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
+pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
new file mode 100644
index 0000000000..27d039164c
--- /dev/null
+++ b/include/llvm/Support/YAMLParser.h
@@ -0,0 +1,564 @@
+//===--- YAMLParser.h - Simple YAML parser --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a YAML 1.2 parser.
+//
+// See http://www.yaml.org/spec/1.2/spec.html for the full standard.
+//
+// This currently does not implement the following:
+// * Multi-line literal folding.
+// * Tag resolution.
+// * UTF-16.
+// * BOMs anywhere other than the first Unicode scalar value in the file.
+//
+// The most important class here is Stream. This represents a YAML stream with
+// 0, 1, or many documents.
+//
+// SourceMgr sm;
+// StringRef input = getInput();
+// yaml::Stream stream(input, sm);
+//
+// for (yaml::document_iterator di = stream.begin(), de = stream.end();
+// di != de; ++di) {
+// yaml::Node *n = di->getRoot();
+// if (n) {
+// // Do something with n...
+// } else
+// break;
+// }
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_YAML_PARSER_H
+#define LLVM_SUPPORT_YAML_PARSER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/SMLoc.h"
+
+#include <limits>
+#include <utility>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class raw_ostream;
+class Twine;
+
+namespace yaml {
+
+class document_iterator;
+class Document;
+class Node;
+class Scanner;
+struct Token;
+
+/// @brief Dump all the tokens in this stream to OS.
+/// @returns true if there was an error, false otherwise.
+bool dumpTokens(StringRef Input, raw_ostream &);
+
+/// @brief Scans all tokens in input without outputting anything. This is used
+/// for benchmarking the tokenizer.
+/// @returns true if there was an error, false otherwise.
+bool scanTokens(StringRef Input);
+
+/// @brief Escape \a Input for a double quoted scalar.
+std::string escape(StringRef Input);
+
+/// @brief This class represents a YAML stream potentially containing multiple
+/// documents.
+class Stream {
+public:
+ Stream(StringRef Input, SourceMgr &);
+
+ document_iterator begin();
+ document_iterator end();
+ void skip();
+ bool failed();
+ bool validate() {
+ skip();
+ return !failed();
+ }
+
+ void printError(Node *N, const Twine &Msg);
+
+private:
+ OwningPtr<Scanner> scanner;
+ OwningPtr<Document> CurrentDoc;
+
+ friend class Document;
+
+ /// @brief Validate a %YAML x.x directive.
+ void handleYAMLDirective(const Token &);
+};
+
+/// @brief Abstract base class for all Nodes.
+class Node {
+public:
+ enum NodeKind {
+ NK_Null,
+ NK_Scalar,
+ NK_KeyValue,
+ NK_Mapping,
+ NK_Sequence,
+ NK_Alias
+ };
+
+ Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
+ virtual ~Node();
+
+ /// @brief Get the value of the anchor attached to this node. If it does not
+ /// have one, getAnchor().size() will be 0.
+ StringRef getAnchor() const { return Anchor; }
+
+ SMRange getSourceRange() const { return SourceRange; }
+ void setSourceRange(SMRange SR) { SourceRange = SR; }
+
+ // These functions forward to Document and Scanner.
+ Token &peekNext();
+ Token getNext();
+ Node *parseBlockNode();
+ BumpPtrAllocator &getAllocator();
+ void setError(const Twine &Message, Token &Location) const;
+ bool failed() const;
+
+ virtual void skip() {};
+
+ unsigned int getType() const { return TypeID; }
+ static inline bool classof(const Node *) { return true; }
+
+ void *operator new ( size_t Size
+ , BumpPtrAllocator &Alloc
+ , size_t Alignment = 16) throw() {
+ return Alloc.Allocate(Size, Alignment);
+ }
+
+ void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
+ Alloc.Deallocate(Ptr);
+ }
+
+protected:
+ OwningPtr<Document> &Doc;
+ SMRange SourceRange;
+
+private:
+ unsigned int TypeID;
+ StringRef Anchor;
+};
+
+/// @brief A null value.
+///
+/// Example:
+/// !!null null
+class NullNode : public Node {
+public:
+ NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
+
+ static inline bool classof(const NullNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_Null;
+ }
+};
+
+/// @brief A scalar node is an opaque datum that can be presented as a
+/// series of zero or more Unicode scalar values.
+///
+/// Example:
+/// Adena
+class ScalarNode : public Node {
+public:
+ ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
+ : Node(NK_Scalar, D, Anchor)
+ , Value(Val) {
+ SMLoc Start = SMLoc::getFromPointer(Val.begin());
+ SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
+ SourceRange = SMRange(Start, End);
+ }
+
+ // Return Value without any escaping or folding or other fun YAML stuff. This
+ // is the exact bytes that are contained in the file (after conversion to
+ // utf8).
+ StringRef getRawValue() const { return Value; }
+
+ /// @brief Gets the value of this node as a StringRef.
+ ///
+ /// @param Storage is used to store the content of the returned StringRef iff
+ /// it requires any modification from how it appeared in the source.
+ /// This happens with escaped characters and multi-line literals.
+ StringRef getValue(SmallVectorImpl<char> &Storage) const;
+
+ static inline bool classof(const ScalarNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_Scalar;
+ }
+
+private:
+ StringRef Value;
+
+ StringRef unescapeDoubleQuoted( StringRef UnquotedValue
+ , StringRef::size_type Start
+ , SmallVectorImpl<char> &Storage) const;
+};
+
+static bool getAs(const ScalarNode *SN, bool &Result) {
+ SmallString<4> Storage;
+ StringRef Value = SN->getValue(Storage);
+ if (Value == "true")
+ Result = true;
+ else if (Value == "false")
+ Result = false;
+ else
+ return false;
+ return true;
+}
+
+template<class T>
+typename enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
+getAs(const ScalarNode *SN, T &Result) {
+ SmallString<4> Storage;
+ return !SN->getValue(Storage).getAsInteger(0, Result);
+}
+
+/// @brief A key and value pair. While not technically a Node under the YAML
+/// representation graph, it is easier to treat them this way.
+///
+/// TODO: Consider making this not a child of Node.
+///
+/// Example:
+/// Section: .text
+class KeyValueNode : public Node {
+public:
+ KeyValueNode(OwningPtr<Document> &D)
+ : Node(NK_KeyValue, D, StringRef())
+ , Key(0)
+ , Value(0)
+ {}
+
+ /// @brief Parse and return the key.
+ ///
+ /// This may be called multiple times.
+ ///
+ /// @returns The key, or nullptr if failed() == true.
+ Node *getKey();
+
+ /// @brief Parse and return the value.
+ ///
+ /// This may be called multiple times.
+ ///
+ /// @returns The value, or nullptr if failed() == true.
+ Node *getValue();
+
+ virtual void skip() {
+ getKey()->skip();
+ getValue()->skip();
+ }
+
+ static inline bool classof(const KeyValueNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_KeyValue;
+ }
+
+private:
+ Node *Key;
+ Node *Value;
+};
+
+/// @brief This is an iterator abstraction over YAML collections shared by both
+/// sequences and maps.
+///
+/// BaseT must have a ValueT* member named CurrentEntry and a member function
+/// increment() which must set CurrentEntry to 0 to create an end iterator.
+template <class BaseT, class ValueT>
+class basic_collection_iterator
+ : public std::iterator<std::forward_iterator_tag, ValueT> {
+public:
+ basic_collection_iterator() : Base(0) {}
+ basic_collection_iterator(BaseT *B) : Base(B) {}
+
+ ValueT *operator ->() const {
+ assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+ return Base->CurrentEntry;
+ }
+
+ ValueT &operator *() const {
+ assert(Base && Base->CurrentEntry &&
+ "Attempted to dereference end iterator!");
+ return *Base->CurrentEntry;
+ }
+
+ operator ValueT*() const {
+ assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+ return Base->CurrentEntry;
+ }
+
+ bool operator !=(const basic_collection_iterator &Other) const {
+ if(Base != Other.Base)
+ return true;
+ return (Base && Other.Base) && Base->CurrentEntry
+ != Other.Base->CurrentEntry;
+ }
+
+ basic_collection_iterator &operator++() {
+ assert(Base && "Attempted to advance iterator past end!");
+ Base->increment();
+ // Create an end iterator.
+ if (Base->CurrentEntry == 0)
+ Base = 0;
+ return *this;
+ }
+
+private:
+ BaseT *Base;
+};
+
+// The following two templates are used for both MappingNode and Sequence Node.
+template <class CollectionType>
+typename CollectionType::iterator begin(CollectionType &C) {
+ assert(C.IsAtBeginning && "You may only iterate over a collection once!");
+ C.IsAtBeginning = false;
+ typename CollectionType::iterator ret(&C);
+ ++ret;
+ return ret;
+}
+
+template <class CollectionType>
+void skip(CollectionType &C) {
+ // TODO: support skipping from the middle of a parsed collection ;/
+ assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
+ if (C.IsAtBeginning)
+ for (typename CollectionType::iterator i = begin(C), e = C.end();
+ i != e; ++i)
+ i->skip();
+}
+
+/// @brief Represents a YAML map created from either a block map for a flow map.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+/// Name: _main
+/// Scope: Global
+class MappingNode : public Node {
+public:
+ enum MappingType {
+ MT_Block,
+ MT_Flow,
+ MT_Inline //< An inline mapping node is used for "[key: value]".
+ };
+
+ MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
+ : Node(NK_Mapping, D, Anchor)
+ , Type(MT)
+ , IsAtBeginning(true)
+ , IsAtEnd(false)
+ , CurrentEntry(0)
+ {}
+
+ friend class basic_collection_iterator<MappingNode, KeyValueNode>;
+ typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
+ template <class T> friend typename T::iterator yaml::begin(T &);
+ template <class T> friend void yaml::skip(T &);
+
+ iterator begin() {
+ return yaml::begin(*this);
+ }
+
+ iterator end() { return iterator(); }
+
+ virtual void skip() {
+ yaml::skip(*this);
+ }
+
+ static inline bool classof(const MappingNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_Mapping;
+ }
+
+private:
+ MappingType Type;
+ bool IsAtBeginning;
+ bool IsAtEnd;
+ KeyValueNode *CurrentEntry;
+
+ void increment();
+};
+
+/// @brief Represents a YAML sequence created from either a block sequence for a
+/// flow sequence.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+/// - Hello
+/// - World
+class SequenceNode : public Node {
+public:
+ enum SequenceType {
+ ST_Block,
+ ST_Flow,
+ // Use for:
+ //
+ // key:
+ // - val1
+ // - val2
+ //
+ // As a BlockMappingEntry and BlockEnd are not created in this case.
+ ST_Indentless
+ };
+
+ SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
+ : Node(NK_Sequence, D, Anchor)
+ , SeqType(ST)
+ , IsAtBeginning(true)
+ , IsAtEnd(false)
+ , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
+ , CurrentEntry(0)
+ {}
+
+ friend class basic_collection_iterator<SequenceNode, Node>;
+ typedef basic_collection_iterator<SequenceNode, Node> iterator;
+ template <class T> friend typename T::iterator yaml::begin(T &);
+ template <class T> friend void yaml::skip(T &);
+
+ void increment();
+
+ iterator begin() {
+ return yaml::begin(*this);
+ }
+
+ iterator end() { return iterator(); }
+
+ virtual void skip() {
+ yaml::skip(*this);
+ }
+
+ static inline bool classof(const SequenceNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_Sequence;
+ }
+
+private:
+ SequenceType SeqType;
+ bool IsAtBeginning;
+ bool IsAtEnd;
+ bool WasPreviousTokenFlowEntry;
+ Node *CurrentEntry;
+};
+
+/// @brief Represents an alias to a Node with an anchor.
+///
+/// Example:
+/// *AnchorName
+class AliasNode : public Node {
+public:
+ AliasNode(OwningPtr<Document> &D, StringRef Val)
+ : Node(NK_Alias, D, StringRef()), Name(Val) {}
+
+ StringRef getName() const { return Name; }
+ Node *getTarget();
+
+ static inline bool classof(const ScalarNode *) { return true; }
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_Alias;
+ }
+
+private:
+ StringRef Name;
+};
+
+/// @brief A YAML Stream is a sequence of Documents. A document contains a root
+/// node.
+class Document {
+public:
+ /// @brief Root for parsing a node. Returns a single node.
+ Node *parseBlockNode();
+
+ Document(Stream &ParentStream);
+
+ /// @brief Finish parsing the current document and return true if there are
+ /// more. Return false otherwise.
+ bool skip();
+
+ /// @brief Parse and return the root level node.
+ Node *getRoot() {
+ if (Root)
+ return Root;
+ return Root = parseBlockNode();
+ }
+
+private:
+ friend class Node;
+ friend class document_iterator;
+
+ /// @brief Stream to read tokens from.
+ Stream &stream;
+
+ /// @brief Used to allocate nodes to. All are destroyed without calling their
+ /// destructor when the document is destroyed.
+ BumpPtrAllocator NodeAllocator;
+
+ /// @brief The root node. Used to support skipping a partially parsed
+ /// document.
+ Node *Root;
+
+ Token &peekNext();
+ Token getNext();
+ void setError(const Twine &Message, Token &Location) const;
+ bool failed() const;
+
+ void handleTagDirective(const Token &Tag) {
+ // TODO: Track tags.
+ }
+
+ /// @brief Parse %BLAH directives and return true if any were encountered.
+ bool parseDirectives();
+
+ /// @brief Consume the next token and error if it is not \a TK.
+ bool expectToken(int TK);
+};
+
+/// @brief Iterator abstraction for Documents over a Stream.
+class document_iterator {
+public:
+ document_iterator() : Doc(NullDoc) {}
+ document_iterator(OwningPtr<Document> &D) : Doc(D) {}
+
+ bool operator !=(const document_iterator &Other) {
+ return Doc != Other.Doc;
+ }
+
+ document_iterator operator ++() {
+ if (!Doc->skip()) {
+ Doc.reset(0);
+ } else {
+ Stream &S = Doc->stream;
+ Doc.reset(new Document(S));
+ }
+ return *this;
+ }
+
+ Document &operator *() {
+ return *Doc;
+ }
+
+ OwningPtr<Document> &operator ->() {
+ return Doc;
+ }
+
+private:
+ static OwningPtr<Document> NullDoc;
+ OwningPtr<Document> &Doc;
+};
+
+}
+}
+
+#endif
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 0b69238274..9b3b6c801d 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport
ToolOutputFile.cpp
Triple.cpp
Twine.cpp
+ YAMLParser.cpp
raw_os_ostream.cpp
raw_ostream.cpp
regcomp.c
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
new file mode 100644
index 0000000000..3e302d0eb1
--- /dev/null
+++ b/lib/Support/YAMLParser.cpp
@@ -0,0 +1,2115 @@
+//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a YAML parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLParser.h"
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace yaml;
+
+enum UnicodeEncodingForm {
+ UEF_UTF32_LE, //< UTF-32 Little Endian
+ UEF_UTF32_BE, //< UTF-32 Big Endian
+ UEF_UTF16_LE, //< UTF-16 Little Endian
+ UEF_UTF16_BE, //< UTF-16 Big Endian
+ UEF_UTF8, //< UTF-8 or ascii.
+ UEF_Unknown //< Not a valid Unicode encoding.
+};
+
+/// EncodingInfo - Holds the encoding type and length of the byte order mark if
+/// it exists. Length is in {0, 2, 3, 4}.
+typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+
+/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
+/// encoding form of \a Input.
+///
+/// @param Input A string of length 0 or more.
+/// @returns An EncodingInfo indicating the Unicode encoding form of the input
+/// and how long the byte order mark is if one exists.
+static EncodingInfo getUnicodeEncoding(StringRef Input) {
+ if (Input.size() == 0)
+ return std::make_pair(UEF_Unknown, 0);
+
+ switch (uint8_t(Input[0])) {
+ case 0x00:
+ if (Input.size() >= 4) {
+ if ( Input[1] == 0
+ && uint8_t(Input[2]) == 0xFE
+ && uint8_t(Input[3]) == 0xFF)
+ return std::make_pair(UEF_UTF32_BE, 4);
+ if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
+ return std::make_pair(UEF_UTF32_BE, 0);
+ }
+
+ if (Input.size() >= 2 && Input[1] != 0)
+ return std::make_pair(UEF_UTF16_BE, 0);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFF:
+ if ( Input.size() >= 4
+ && uint8_t(Input[1]) == 0xFE
+ && Input[2] == 0
+ && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 4);
+
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
+ return std::make_pair(UEF_UTF16_LE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xFE:
+ if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
+ return std::make_pair(UEF_UTF16_BE, 2);
+ return std::make_pair(UEF_Unknown, 0);
+ case 0xEF:
+ if ( Input.size() >= 3
+ && uint8_t(Input[1]) == 0xBB
+ && uint8_t(Input[2]) == 0xBF)
+ return std::make_pair(UEF_UTF8, 3);
+ return std::make_pair(UEF_Unknown, 0);
+ }
+
+ // It could still be utf-32 or utf-16.
+ if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
+ return std::make_pair(UEF_UTF32_LE, 0);
+
+ if (Input.size() >= 2 && Input[1] == 0)
+ return std::make_pair(UEF_UTF16_LE, 0);
+
+ return std::make_pair(UEF_UTF8, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// Token - A single YAML token.
+struct Token : ilist_node<Token> {
+ enum TokenKind {
+ TK_Error, // Uninitialized token.
+ TK_StreamStart,
+ TK_StreamEnd,
+ TK_VersionDirective,
+ TK_TagDirective,
+ TK_DocumentStart,
+ TK_DocumentEnd,
+ TK_BlockEntry,
+ TK_BlockEnd,
+ TK_BlockSequenceStart,
+ TK_BlockMappingStart,
+ TK_FlowEntry,
+ TK_FlowSequenceStart,
+ TK_FlowSequenceEnd,
+ TK_FlowMappingStart,
+ TK_FlowMappingEnd,
+ TK_Key,
+ TK_Value,
+ TK_Scalar,
+ TK_Alias,
+ TK_Anchor,
+ TK_Tag
+ } Kind;
+
+ /// A string of length 0 or more whose begin() points to the logical location
+ /// of the token in the input.
+ StringRef Range;
+
+ Token() : Kind(TK_Error) {}
+};
+}
+}
+
+template<>
+struct ilist_sentinel_traits<Token> {
+ Token *createSentinel() const {
+ return &Sentinel;
+ }
+ static void destroySentinel(Token*) {}
+
+ Token *provideInitialHead() const { return createSentinel(); }
+ Token *ensureHead(Token*) const { return createSentinel(); }
+ static void noteHead(Token*, Token*) {}
+
+private:
+ mutable Token Sentinel;
+};
+
+template<>
+struct ilist_node_traits<Token> {
+ Token *createNode(const Token &V) {
+ return new (Alloc.Allocate<Token>()) Token(V);
+ }
+ static void deleteNode(Token *V) {}
+
+ void addNodeToList(Token *) {}
+ void removeNodeFromList(Token *) {}
+ void transferNodesFromList(ilist_node_traits & /*SrcTraits*/,
+ ilist_iterator<Token> /*first*/,
+ ilist_iterator<Token> /*last*/) {}
+
+ BumpPtrAllocator Alloc;
+};
+
+typedef ilist<Token> TokenQueueT;
+
+namespace {
+/// @brief This struct is used to track simple keys.
+///
+/// Simple keys are handled by creating an entry in SimpleKeys for each Token
+/// which could legally be the start of a simple key. When peekNext is called,
+/// if the Token To be returned is referenced by a SimpleKey, we continue
+/// tokenizing until that potential simple key has either been found to not be
+/// a simple key (we moved on to the next line or went further than 1024 chars).
+/// Or when we run into a Value, and then insert a Key token (and possibly
+/// others) before the SimpleKey's Tok.
+struct SimpleKey {
+ TokenQueueT::iterator Tok;
+ unsigned Column;
+ unsigned Line;
+ unsigned FlowLevel;
+ bool IsRequired;
+
+ bool operator ==(const SimpleKey &Other) {
+ return Tok == Other.Tok;
+ }
+};
+}
+
+/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
+/// subsequence and the subsequence's length in code units (uint8_t).
+/// A length of 0 represents an error.
+typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+
+static UTF8Decoded decodeUTF8(StringRef Range) {
+ StringRef::iterator Position= Range.begin();
+ StringRef::iterator End = Range.end();
+ // 1 byte: [0x00, 0x7f]
+ // Bit pattern: 0xxxxxxx
+ if ((*Position & 0x80) == 0) {
+ return std::make_pair(*Position, 1);
+ }
+ // 2 bytes: [0x80, 0x7ff]
+ // Bit pattern: 110xxxxx 10xxxxxx
+ if (Position + 1 != End &&
+ ((*Position & 0xE0) == 0xC0) &&
+ ((*(Position + 1) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x1F) << 6) |
+ (*(Position + 1) & 0x3F);
+ if (codepoint >= 0x80)
+ return std::make_pair(codepoint, 2);
+ }
+ // 3 bytes: [0x8000, 0xffff]
+ // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
+ if (Position + 2 != End &&
+ ((*Position & 0xF0) == 0xE0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x0F) << 12) |
+ ((*(Position + 1) & 0x3F) << 6) |
+ (*(Position + 2) & 0x3F);
+ // Codepoints between 0xD800 and 0xDFFF are invalid, as
+ // they are high / low surrogate halves used by UTF-16.
+ if (codepoint >= 0x800 &&
+ (codepoint < 0xD800 || codepoint > 0xDFFF))
+ return std::make_pair(codepoint, 3);
+ }
+ // 4 bytes: [0x10000, 0x10FFFF]
+ // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ if (Position + 3 != End &&
+ ((*Position & 0xF8) == 0xF0) &&
+ ((*(Position + 1) & 0xC0) == 0x80) &&
+ ((*(Position + 2) & 0xC0) == 0x80) &&
+ ((*(Position + 3) & 0xC0) == 0x80)) {
+ uint32_t codepoint = ((*Position & 0x07) << 18) |
+ ((*(Position + 1) & 0x3F) << 12) |
+ ((*(Position + 2) & 0x3F) << 6) |
+ (*(Position + 3) & 0x3F);
+ if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
+ return std::make_pair(codepoint, 4);
+ }
+ return std::make_pair(0, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// @brief Scans YAML tokens from a MemoryBuffer.
+class Scanner {
+public:
+ Scanner(const StringRef Input, SourceMgr &SM);
+
+ /// @brief Parse the next token and return it without popping it.
+ Token &peekNext();
+
+ /// @brief Parse the next token and pop it from the queue.
+ Token getNext();
+
+ void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
+ ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ SM.PrintMessage(Loc, Kind, Message, Ranges);
+ }
+
+ void setError(const Twine &Message, StringRef::iterator Position) {
+ if (Current >= End)
+ Current = End - 1;
+
+ // Don't print out more errors after the first one we encounter. The rest
+ // are just the result of the first, and have no meaning.
+ if (!Failed)
+ printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
+ Failed = true;
+ }
+
+ void setError(const Twine &Message) {
+ setError(Message, Current);
+ }
+
+ /// @brief Returns true if an error occurred while parsing.
+ bool failed() {
+ return Failed;
+ }
+
+private:
+ StringRef currentInput() {
+ return StringRef(Current, End - Current);
+ }
+
+ /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
+ /// at \a Position.
+ ///
+ /// If the UTF-8 code units starting at Position do not form a well-formed
+ /// code unit subsequence, then the Unicode scalar value is 0, and the length
+ /// is 0.
+ UTF8Decoded decodeUTF8(StringRef::iterator Position) {
+ return ::decodeUTF8(StringRef(Position, End - Position));
+ }
+
+ // The following functions are based on the gramar rules in the YAML spec. The
+ // style of the function names it meant to closely match how they are written
+ // in the spec. The number within the [] is the number of the grammar rule in
+ // the spec.
+ //
+ // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
+ //
+ // c-
+ // A production starting and ending with a special character.
+ // b-
+ // A production matching a single line break.
+ // nb-
+ // A production starting and ending with a non-break character.
+ // s-
+ // A production starting and ending with a white space character.
+ // ns-
+ // A production starting and ending with a non-space character.
+ // l-
+ // A production matching complete line(s).
+
+ /// @brief Skip a single nb-char[27] starting at Position.
+ ///
+ /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
+ /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
+ ///
+ /// @returns The code unit after the nb-char, or Position if it's not an
+ /// nb-char.
+ StringRef::iterator skip_nb_char(StringRef::iterator Position);
+
+ /// @brief Skip a single b-break[28] starting at Position.
+ ///
+ /// A b-break is 0xD 0xA | 0xD | 0xA
+ ///
+ /// @returns The code unit after the b-break, or Position if it's not a
+ /// b-break.
+ StringRef::iterator skip_b_break(StringRef::iterator Position);
+
+ /// @brief Skip a single s-white[33] starting at Position.
+ ///
+ /// A s-white is 0x20 | 0x9
+ ///
+ /// @returns The code unit after the s-white, or Position if it's not a
+ /// s-white.
+ StringRef::iterator skip_s_white(StringRef::iterator Position);
+
+ /// @brief Skip a single ns-char[34] starting at Position.
+ ///
+ /// A ns-char is nb-char - s-white
+ ///
+ /// @returns The code unit after the ns-char, or Position if it's not a
+ /// ns-char.
+ StringRef::iterator skip_ns_char(StringRef::iterator Position);
+
+ typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+ /// @brief Skip minimal well-formed code unit subsequences until Func
+ /// returns its input.
+ ///
+ /// @returns The code unit after the last minimal well-formed code unit
+ /// subsequence that Func accepted.
+ StringRef::iterator skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position);
+
+ /// @brief Scan ns-uri-char[39]s starting at Cur.
+ ///
+ /// This updates Cur and Column while scanning.
+ ///
+ /// @returns A StringRef starting at Cur which covers the longest contiguous
+ /// sequence of ns-uri-char.
+ StringRef scan_ns_uri_char();
+
+ /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
+ StringRef scan_ns_plain_one_line();
+
+ /// @brief Consume a minimal well-formed code unit subsequence starting at
+ /// \a Cur. Return false if it is not the same Unicode scalar value as
+ /// \a Expected. This updates \a Column.
+ bool consume(uint32_t Expected);
+
+ /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
+ void skip(uint32_t Distance);
+
+ /// @brief Return true if the minimal well-formed code unit subsequence at
+ /// Pos is whitespace or a new line
+ bool isBlankOrBreak(StringRef::iterator Position);
+
+ /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
+ void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired);
+
+ /// @brief Remove simple keys that can no longer be valid simple keys.
+ ///
+ /// Invalid simple keys are not on the current line or are further than 1024
+ /// columns back.
+ void removeStaleSimpleKeyCandidates();
+
+ /// @brief Remove all simple keys on FlowLevel \a Level.
+ void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
+
+ /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
+ /// tokens if needed.
+ bool unrollIndent(int ToColumn);
+
+ /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
+ /// if needed.
+ bool rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint);
+
+ /// @brief Skip whitespace and comments until the start of the next token.
+ void scanToNextToken();
+
+ /// @brief Must be the first token generated.
+ bool scanStreamStart();
+
+ /// @brief Generate tokens needed to close out the stream.
+ bool scanStreamEnd();
+
+ /// @brief Scan a %BLAH directive.
+ bool scanDirective();
+
+ /// @brief Scan a ... or ---.
+ bool scanDocumentIndicator(bool IsStart);
+
+ /// @brief Scan a [ or { and generate the proper flow collection start token.
+ bool scanFlowCollectionStart(bool IsSequence);
+
+ /// @brief Scan a ] or } and generate the proper flow collection end token.
+ bool scanFlowCollectionEnd(bool IsSequence);
+
+ /// @brief Scan the , that separates entries in a flow collection.
+ bool scanFlowEntry();
+
+ /// @brief Scan the - that starts block sequence entries.
+ bool scanBlockEntry();
+
+ /// @brief Scan an explicit ? indicating a key.
+ bool scanKey();
+
+ /// @brief Scan an explicit : indicating a value.
+ bool scanValue();
+
+ /// @brief Scan a quoted scalar.
+ bool scanFlowScalar(bool IsDoubleQuoted);
+
+ /// @brief Scan an unquoted scalar.
+ bool scanPlainScalar();
+
+ /// @brief Scan an Alias or Anchor starting with * or &.
+ bool scanAliasOrAnchor(bool IsAlias);
+
+ /// @brief Scan a block scalar starting with | or >.
+ bool scanBlockScalar(bool IsLiteral);
+
+ /// @brief Scan a tag of the form !stuff.
+ bool scanTag();
+
+ /// @brief Dispatch to the next scanning function based on \a *Cur.
+ bool fetchMoreTokens();
+
+ /// @brief The SourceMgr used for diagnostics and buffer management.
+ SourceMgr &SM;
+
+ /// @brief The original input.
+ MemoryBuffer *InputBuffer;
+
+ /// @brief The current position of the scanner.
+ StringRef::iterator Current;
+
+ /// @brief The end of the input (one past the last character).
+ StringRef::iterator End;
+
+ /// @brief Current YAML indentation level in spaces.
+ int Indent;
+
+ /// @brief Current column number in Unicode code points.
+ unsigned Column;
+
+ /// @brief Current line number.
+ unsigned Line;
+
+ /// @brief How deep we are in flow style containers. 0 Means at block level.
+ unsigned FlowLevel;
+
+ /// @brief Are we at the start of the stream?
+ bool IsStartOfStream;
+
+ /// @brief Can the next token be the start of a simple key?
+ bool IsSimpleKeyAllowed;
+
+ /// @brief Is the next token required to start a simple key?
+ bool IsSimpleKeyRequired;
+
+ /// @brief True if an error has occurred.
+ bool Failed;
+
+ /// @brief Queue of tokens. This is required to queue up tokens while looking
+ /// for the end of a simple key. And for cases where a single character
+ /// can produce multiple tokens (e.g. BlockEnd).
+ TokenQueueT TokenQueue;
+
+ /// @brief Indentation levels.
+ SmallVector<int, 4> Indents;
+
+ /// @brief Potential simple keys.
+ SmallVector<SimpleKey, 4> SimpleKeys;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
+static void encodeUTF8( uint32_t UnicodeScalarValue
+ , SmallVectorImpl<char> &Result) {
+ if (UnicodeScalarValue <= 0x7F) {
+ Result.push_back(UnicodeScalarValue & 0x7F);
+ } else if (UnicodeScalarValue <= 0x7FF) {
+ uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
+ uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ } else if (UnicodeScalarValue <= 0xFFFF) {
+ uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ } else if (UnicodeScalarValue <= 0x10FFFF) {
+ uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
+ uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
+ uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+ uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
+ Result.push_back(FirstByte);
+ Result.push_back(SecondByte);
+ Result.push_back(ThirdByte);
+ Result.push_back(FourthByte);
+ }
+}
+
+bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
+ SourceMgr SM;
+ Scanner scanner(Input, SM);
+ while (true) {
+ Token T = scanner.getNext();
+ switch (T.Kind) {
+ case Token::TK_StreamStart:
+ OS << "Stream-Start: ";
+ break;
+ case Token::TK_StreamEnd:
+ OS << "Stream-End: ";
+ break;
+ case Token::TK_VersionDirective:
+ OS << "Version-Directive: ";
+ break;
+ case Token::TK_TagDirective:
+ OS << "Tag-Directive: ";
+ break;
+ case Token::TK_DocumentStart:
+ OS << "Document-Start: ";
+ break;
+ case Token::TK_DocumentEnd:
+ OS << "Document-End: ";
+ break;
+ case Token::TK_BlockEntry:
+ OS << "Block-Entry: ";
+ break;
+ case Token::TK_BlockEnd:
+ OS << "Block-End: ";
+ break;
+ case Token::TK_BlockSequenceStart:
+ OS << "Block-Sequence-Start: ";
+ break;
+ case Token::TK_BlockMappingStart:
+ OS << "Block-Mapping-Start: ";
+ break;
+ case Token::TK_FlowEntry:
+ OS << "Flow-Entry: ";
+ break;
+ case Token::TK_FlowSequenceStart:
+ OS << "Flow-Sequence-Start: ";
+ break;
+ case Token::TK_FlowSequenceEnd:
+ OS << "Flow-Sequence-End: ";
+ break;
+ case Token::TK_FlowMappingStart:
+ OS << "Flow-Mapping-Start: ";
+ break;
+ case Token::TK_FlowMappingEnd:
+ OS << "Flow-Mapping-End: ";
+ break;
+ case Token::TK_Key:
+ OS << "Key: ";
+ break;
+ case Token::TK_Value:
+ OS << "Value: ";
+ break;
+ case Token::TK_Scalar:
+ OS << "Scalar: ";
+ break;
+ case Token::TK_Alias:
+ OS << "Alias: ";
+ break;
+ case Token::TK_Anchor:
+ OS << "Anchor: ";
+ break;
+ case Token::TK_Tag:
+ OS << "Tag: ";
+ break;
+ case Token::TK_Error:
+ break;
+ }
+ OS << T.Range << "\n";
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+bool yaml::scanTokens(StringRef Input) {
+ llvm::SourceMgr SM;
+ llvm::yaml::Scanner scanner(Input, SM);
+ for (;;) {
+ llvm::yaml::Token T = scanner.getNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ break;
+ else if (T.Kind == Token::TK_Error)
+ return false;
+ }
+ return true;
+}
+
+std::string yaml::escape(StringRef Input) {
+ std::string EscapedInput;
+ for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
+ if (*i == '\\')
+ EscapedInput += "\\\\";
+ else if (*i == '"')
+ EscapedInput += "\\\"";
+ else if (*i == 0)
+ EscapedInput += "\\0";
+ else if (*i == 0x07)
+ EscapedInput += "\\a";
+ else if (*i == 0x08)
+ EscapedInput += "\\b";
+ else if (*i == 0x09)
+ EscapedInput += "\\t";
+ else if (*i == 0x0A)
+ EscapedInput += "\\n";
+ else if (*i == 0x0B)
+ EscapedInput += "\\v";
+ else if (*i == 0x0C)
+ EscapedInput += "\\f";
+ else if (*i == 0x0D)
+ EscapedInput += "\\r";
+ else if (*i == 0x1B)
+ EscapedInput += "\\e";
+ else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+ std::string HexStr = utohexstr(*i);
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
+ UTF8Decoded UnicodeScalarValue
+ = decodeUTF8(StringRef(i, Input.end() - i));
+ if (UnicodeScalarValue.second == 0) {
+ // Found invalid char.
+ SmallString<4> Val;
+ encodeUTF8(0xFFFD, Val);
+ EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+ // FIXME: Error reporting.
+ return EscapedInput;
+ }
+ if (UnicodeScalarValue.first == 0x85)
+ EscapedInput += "\\N";
+ else if (UnicodeScalarValue.first == 0xA0)
+ EscapedInput += "\\_";
+ else if (UnicodeScalarValue.first == 0x2028)
+ EscapedInput += "\\L";
+ else if (UnicodeScalarValue.first == 0x2029)
+ EscapedInput += "\\P";
+ else {
+ std::string HexStr = utohexstr(UnicodeScalarValue.first);
+ if (HexStr.size() <= 2)
+ EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 4)
+ EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
+ else if (HexStr.size() <= 8)
+ EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
+ }
+ i += UnicodeScalarValue.second - 1;
+ } else
+ EscapedInput.push_back(*i);
+ }
+ return EscapedInput;
+}
+
+Scanner::Scanner(StringRef Input, SourceMgr &sm)
+ : SM(sm)
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , IsSimpleKeyRequired(false)
+ , Failed(false) {
+ InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+ Current = InputBuffer->getBufferStart();
+ End = InputBuffer->getBufferEnd();
+}
+
+Token &Scanner::peekNext() {
+ // If the current token is a possible simple key, keep parsing until we
+ // can confirm.
+ bool NeedMore = false;
+ while (true) {
+ if (TokenQueue.empty() || NeedMore) {
+ if (!fetchMoreTokens()) {
+ TokenQueue.clear();
+ TokenQueue.push_back(Token());
+ return TokenQueue.front();
+ }
+ }
+ assert(!TokenQueue.empty() &&
+ "fetchMoreTokens lied about getting tokens!");
+
+ removeStaleSimpleKeyCandidates();
+ SimpleKey SK;
+ SK.Tok = TokenQueue.front();
+ if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
+ == SimpleKeys.end())
+ break;
+ else
+ NeedMore = true;
+ }
+ return TokenQueue.front();
+}
+
+Token Scanner::getNext() {
+ Token Ret = peekNext();
+ // TokenQueue can be empty if there was an error getting the next token.
+ if (!TokenQueue.empty())
+ TokenQueue.pop_front();
+
+ // There cannot be any referenced Token's if the TokenQueue is empty. So do a
+ // quick deallocation of them all.
+ if (TokenQueue.empty()) {
+ TokenQueue.Alloc.Reset();
+ }
+
+ return Ret;
+}
+
+StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+ // Check 7 bit c-printable - b-char.
+ if ( *Position == 0x09
+ || (*Position >= 0x20 && *Position <= 0x7E))
+ return Position + 1;
+
+ // Check for valid UTF-8.
+ if (uint8_t(*Position) & 0x80) {
+ UTF8Decoded u8d = decodeUTF8(Position);
+ if ( u8d.second != 0
+ && u8d.first != 0xFEFF
+ && ( u8d.first == 0x85
+ || ( u8d.first >= 0xA0
+ && u8d.first <= 0xD7FF)
+ || ( u8d.first >= 0xE000
+ && u8d.first <= 0xFFFD)
+ || ( u8d.first >= 0x10000
+ && u8d.first <= 0x10FFFF)))
+ return Position + u8d.second;
+ }
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+ if (*Position == 0x0D) {
+ if (Position + 1 != End && *(Position + 1) == 0x0A)
+ return Position + 2;
+ return Position + 1;
+ }
+
+ if (*Position == 0x0A)
+ return Position + 1;
+ return Position;
+}
+
+
+StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position + 1;
+ return Position;
+}
+
+StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
+ if (*Position == ' ' || *Position == '\t')
+ return Position;
+ return skip_nb_char(Position);
+}
+
+StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
+ , StringRef::iterator Position) {
+ while (true) {
+ StringRef::iterator i = (this->*Func)(Position);
+ if (i == Position)
+ break;
+ Position = i;
+ }
+ return Position;
+}
+
+static bool is_ns_hex_digit(const char C) {
+ return (C >= '0' && C <= '9')
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+static bool is_ns_word_char(const char C) {
+ return C == '-'
+ || (C >= 'a' && C <= 'z')
+ || (C >= 'A' && C <= 'Z');
+}
+
+StringRef Scanner::scan_ns_uri_char() {
+ StringRef::iterator Start = Current;
+ while (true) {
+ if (Current == End)
+ break;
+ if (( *Current == '%'
+ && Current + 2 < End
+ && is_ns_hex_digit(*(Current + 1))
+ && is_ns_hex_digit(*(Current + 2)))
+ || is_ns_word_char(*Current)
+ || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
+ != StringRef::npos) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ }
+ return StringRef(Start, Current - Start);
+}
+
+StringRef Scanner::scan_ns_plain_one_line() {
+ StringRef::iterator start = Current;
+ // The first character must already be verified.
+ ++Current;
+ while (true) {
+ if (Current == End) {
+ break;
+ } else if (*Current == ':') {
+ // Check if the next character is a ns-char.
+ if (Current + 1 == End)
+ break;
+ StringRef::iterator i = skip_ns_char(Current + 1);
+ if (Current + 1 != i) {
+ Current = i;
+ Column += 2; // Consume both the ':' and ns-char.
+ } else
+ break;
+ } else if (*Current == '#') {
+ // Check if the previous character was a ns-char.
+ // The & 0x80 check is to check for the trailing byte of a utf-8
+ if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
+ ++Current;
+ ++Column;
+ } else
+ break;
+ } else {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ return StringRef(start, Current - start);
+}
+
+bool Scanner::consume(uint32_t Expected) {
+ if (Expected >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (Current == End)
+ return false;
+ if (uint8_t(*Current) >= 0x80)
+ report_fatal_error("Not dealing with this yet");
+ if (uint8_t(*Current) == Expected) {
+ ++Current;
+ ++Column;
+ return true;
+ }
+ return false;
+}
+
+void Scanner::skip(uint32_t Distance) {
+ Current += Distance;
+ Column += Distance;
+}
+
+bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
+ if (Position == End)
+ return false;
+ if ( *Position == ' ' || *Position == '\t'
+ || *Position == '\r' || *Position == '\n')
+ return true;
+ return false;
+}
+
+void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+ , unsigned AtColumn
+ , bool IsRequired) {
+ if (IsSimpleKeyAllowed) {
+ SimpleKey SK;
+ SK.Tok = Tok;
+ SK.Line = Line;
+ SK.Column = AtColumn;
+ SK.IsRequired = IsRequired;
+ SK.FlowLevel = FlowLevel;
+ SimpleKeys.push_back(SK);
+ }
+}
+
+void Scanner::removeStaleSimpleKeyCandidates() {
+ for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
+ i != SimpleKeys.end();) {
+ if (i->Line != Line || i->Column + 1024 < Column) {
+ if (i->IsRequired)
+ setError( "Could not find expected : for simple key"
+ , i->Tok->Range.begin());
+ i = SimpleKeys.erase(i);
+ } else
+ ++i;
+ }
+}
+
+void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
+ if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
+ SimpleKeys.pop_back();
+}
+
+bool Scanner::unrollIndent(int ToColumn) {
+ Token T;
+ // Indentation is ignored in flow.
+ if (FlowLevel != 0)
+ return true;
+
+ while (Indent > ToColumn) {
+ T.Kind = Token::TK_BlockEnd;
+ T.Range = StringRef(Current, 1);
+ TokenQueue.push_back(T);
+ Indent = Indents.pop_back_val();
+ }
+
+ return true;
+}
+
+bool Scanner::rollIndent( int ToColumn
+ , Token::TokenKind Kind
+ , TokenQueueT::iterator InsertPoint) {
+ if (FlowLevel)
+ return true;
+ if (Indent < ToColumn) {
+ Indents.push_back(Indent);
+ Indent = ToColumn;
+
+ Token T;
+ T.Kind = Kind;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.insert(InsertPoint, T);
+ }
+ return true;
+}
+
+void Scanner::scanToNextToken() {
+ while (true) {
+ while (*Current == ' ' || *Current == '\t') {
+ skip(1);
+ }
+
+ // Skip comment.
+ if (*Current == '#') {
+ while (true) {
+ // This may skip more than one byte, thus Column is only incremented
+ // for code points.
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+
+ // Skip EOL.
+ StringRef::iterator i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Line;
+ Column = 0;
+ // New lines may start a simple key.
+ if (!FlowLevel)
+ IsSimpleKeyAllowed = true;
+ }
+}
+
+bool Scanner::scanStreamStart() {
+ IsStartOfStream = false;
+
+ EncodingInfo EI = getUnicodeEncoding(currentInput());
+
+ Token T;
+ T.Kind = Token::TK_StreamStart;
+ T.Range = StringRef(Current, EI.second);
+ TokenQueue.push_back(T);
+ Current += EI.second;
+ return true;
+}
+
+bool Scanner::scanStreamEnd() {
+ // Force an ending new line if one isn't present.
+ if (Column != 0) {
+ Column = 0;
+ ++Line;
+ }
+
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = Token::TK_StreamEnd;
+ T.Range = StringRef(Current, 0);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanDirective() {
+ // Reset the indentation level.
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ StringRef::iterator Start = Current;
+ consume('%');
+ StringRef::iterator NameStart = Current;
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ StringRef Name(NameStart, Current - NameStart);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+
+ if (Name == "YAML") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Token T;
+ T.Kind = Token::TK_VersionDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+ }
+ return false;
+}
+
+bool Scanner::scanDocumentIndicator(bool IsStart) {
+ unrollIndent(-1);
+ SimpleKeys.clear();
+ IsSimpleKeyAllowed = false;
+
+ Token T;
+ T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
+ T.Range = StringRef(Current, 3);
+ skip(3);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanFlowCollectionStart(bool IsSequence) {
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceStart
+ : Token::TK_FlowMappingStart;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+
+ // [ and { may begin a simple key.
+ saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+
+ // And may also be followed by a simple key.
+ IsSimpleKeyAllowed = true;
+ ++FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = false;
+ Token T;
+ T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
+ : Token::TK_FlowMappingEnd;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ if (FlowLevel)
+ --FlowLevel;
+ return true;
+}
+
+bool Scanner::scanFlowEntry() {
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_FlowEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanBlockEntry() {
+ rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = true;
+ Token T;
+ T.Kind = Token::TK_BlockEntry;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanKey() {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+
+ removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+ IsSimpleKeyAllowed = !FlowLevel;
+
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanValue() {
+ // If the previous token could have been a simple key, insert the key token
+ // into the token queue.
+ if (!SimpleKeys.empty()) {
+ SimpleKey SK = SimpleKeys.pop_back_val();
+ Token T;
+ T.Kind = Token::TK_Key;
+ T.Range = SK.Tok->Range;
+ TokenQueueT::iterator i, e;
+ for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
+ if (i == SK.Tok)
+ break;
+ }
+ assert(i != e && "SimpleKey not in token queue!");
+ i = TokenQueue.insert(i, T);
+
+ // We may also need to add a Block-Mapping-Start token.
+ rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
+
+ IsSimpleKeyAllowed = false;
+ } else {
+ if (!FlowLevel)
+ rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+ IsSimpleKeyAllowed = !FlowLevel;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Value;
+ T.Range = StringRef(Current, 1);
+ skip(1);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+ StringRef::iterator Position) {
+ assert(Position - 1 >= First);
+ StringRef::iterator I = Position - 1;
+ // We calculate the number of consecutive '\'s before the current position
+ // by iterating backwards through our string.
+ while (I >= First && *I == '\\') --I;
+ // (Position - 1 - I) now contains the number of '\'s before the current
+ // position. If it is odd, the character at 'Position' was escaped.
+ return (Position - 1 - I) % 2 == 1;
+}
+
+bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ if (IsDoubleQuoted) {
+ do {
+ ++Current;
+ while (Current != End && *Current != '"')
+ ++Current;
+ // Repeat until the previous character was not a '\' or was an escaped
+ // backslash.
+ } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+ } else {
+ skip(1);
+ while (true) {
+ // Skip a ' followed by another '.
+ if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
+ skip(2);
+ continue;
+ } else if (*Current == '\'')
+ break;
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ i = skip_b_break(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ Column = 0;
+ ++Line;
+ } else {
+ if (i == End)
+ break;
+ Current = i;
+ ++Column;
+ }
+ }
+ }
+ skip(1); // Skip ending quote.
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanPlainScalar() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ unsigned LeadingBlanks = 0;
+ assert(Indent >= -1 && "Indent must be >= -1 !");
+ unsigned indent = static_cast<unsigned>(Indent + 1);
+ while (true) {
+ if (*Current == '#')
+ break;
+
+ while (!isBlankOrBreak(Current)) {
+ if ( FlowLevel && *Current == ':'
+ && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+ setError("Found unexpected ':' while scanning a plain scalar", Current);
+ return false;
+ }
+
+ // Check for the end of the plain scalar.
+ if ( (*Current == ':' && isBlankOrBreak(Current + 1))
+ || ( FlowLevel
+ && (StringRef(Current, 1).find_first_of(",:?[]{}")
+ != StringRef::npos)))
+ break;
+
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ // Are we at the end?
+ if (!isBlankOrBreak(Current))
+ break;
+
+ // Eat blanks.
+ StringRef::iterator Tmp = Current;
+ while (isBlankOrBreak(Tmp)) {
+ StringRef::iterator i = skip_s_white(Tmp);
+ if (i != Tmp) {
+ if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
+ setError("Found invalid tab character in indentation", Tmp);
+ return false;
+ }
+ Tmp = i;
+ ++Column;
+ } else {
+ i = skip_b_break(Tmp);
+ if (!LeadingBlanks)
+ LeadingBlanks = 1;
+ Tmp = i;
+ Column = 0;
+ ++Line;
+ }
+ }
+
+ if (!FlowLevel && Column < indent)
+ break;
+
+ Current = Tmp;
+ }
+ if (Start == Current) {
+ setError("Got empty plain scalar", Start);
+ return false;
+ }
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Plain scalars can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanAliasOrAnchor(bool IsAlias) {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1);
+ while(true) {
+ if ( *Current == '[' || *Current == ']'
+ || *Current == '{' || *Current == '}'
+ || *Current == ','
+ || *Current == ':')
+ break;
+ StringRef::iterator i = skip_ns_char(Current);
+ if (i == Current)
+ break;
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty alias or anchor", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Alias and anchors can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+ StringRef::iterator Start = Current;
+ skip(1); // Eat | or >
+ while(true) {
+ StringRef::iterator i = skip_nb_char(Current);
+ if (i == Current) {
+ if (Column == 0)
+ break;
+ i = skip_b_break(Current);
+ if (i != Current) {
+ // We got a line break.
+ Column = 0;
+ ++Line;
+ Current = i;
+ continue;
+ } else {
+ // There was an error, which should already have been printed out.
+ return false;
+ }
+ }
+ Current = i;
+ ++Column;
+ }
+
+ if (Start == Current) {
+ setError("Got empty block scalar", Start);
+ return false;
+ }
+
+ Token T;
+ T.Kind = Token::TK_Scalar;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
+}
+
+bool Scanner::scanTag() {
+ StringRef::iterator Start = Current;
+ unsigned ColStart = Column;
+ skip(1); // Eat !.
+ if (Current == End || isBlankOrBreak(Current)); // An empty tag.
+ else if (*Current == '<') {
+ skip(1);
+ scan_ns_uri_char();
+ if (!consume('>'))
+ return false;
+ } else {
+ // FIXME: Actually parse the c-ns-shorthand-tag rule.
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ }
+
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+
+ // Tags can be simple keys.
+ saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+ IsSimpleKeyAllowed = false;
+
+ return true;
+}
+
+bool Scanner::fetchMoreTokens() {
+ if (IsStartOfStream)
+ return scanStreamStart();
+
+ scanToNextToken();
+
+ if (Current == End)
+ return scanStreamEnd();
+
+ removeStaleSimpleKeyCandidates();
+
+ unrollIndent(Column);
+
+ if (Column == 0 && *Current == '%')
+ return scanDirective();
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '-'
+ && *(Current + 1) == '-'
+ && *(Current + 2) == '-'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(true);
+
+ if (Column == 0 && Current + 4 <= End
+ && *Current == '.'
+ && *(Current + 1) == '.'
+ && *(Current + 2) == '.'
+ && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+ return scanDocumentIndicator(false);
+
+ if (*Current == '[')
+ return scanFlowCollectionStart(true);
+
+ if (*Current == '{')
+ return scanFlowCollectionStart(false);
+
+ if (*Current == ']')
+ return scanFlowCollectionEnd(true);
+
+ if (*Current == '}')
+ return scanFlowCollectionEnd(false);
+
+ if (*Current == ',')
+ return scanFlowEntry();
+
+ if (*Current == '-' && isBlankOrBreak(Current + 1))
+ return scanBlockEntry();
+
+ if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanKey();
+
+ if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+ return scanValue();
+
+ if (*Current == '*')
+ return scanAliasOrAnchor(true);
+
+ if (*Current == '&')
+ return scanAliasOrAnchor(false);
+
+ if (*Current == '!')
+ return scanTag();
+
+ if (*Current == '|' && !FlowLevel)
+ return scanBlockScalar(true);
+
+ if (*Current == '>' && !FlowLevel)
+ return scanBlockScalar(false);
+
+ if (*Current == '\'')
+ return scanFlowScalar(false);
+
+ if (*Current == '"')
+ return scanFlowScalar(true);
+
+ // Get a plain scalar.
+ StringRef FirstChar(Current, 1);
+ if (!(isBlankOrBreak(Current)
+ || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
+ || (*Current == '-' && !isBlankOrBreak(Current + 1))
+ || (!FlowLevel && (*Current == '?' || *Current == ':')
+ && isBlankOrBreak(Current + 1))
+ || (!FlowLevel && *Current == ':'
+ && Current + 2 < End
+ && *(Current + 1) == ':'
+ && !isBlankOrBreak(Current + 2)))
+ return scanPlainScalar();
+
+ setError("Unrecognized character while tokenizing.");
+ return false;
+}
+
+Stream::Stream(StringRef Input, SourceMgr &SM)
+ : scanner(new Scanner(Input, SM))
+ , CurrentDoc(0) {}
+
+bool Stream::failed() { return scanner->failed(); }
+
+void Stream::printError(Node *N, const Twine &Msg) {
+ SmallVector<SMRange, 1> Ranges;
+ Ranges.push_back(N->getSourceRange());
+ scanner->printError( N->getSourceRange().Start
+ , SourceMgr::DK_Error
+ , Msg
+ , Ranges);
+}
+
+void Stream::handleYAMLDirective(const Token &t) {
+ // TODO: Ensure version is 1.x.
+}
+
+document_iterator Stream::begin() {
+ if (CurrentDoc)
+ report_fatal_error("Can only iterate over the stream once");
+
+ // Skip Stream-Start.
+ scanner->getNext();
+
+ CurrentDoc.reset(new Document(*this));
+ return document_iterator(CurrentDoc);
+}
+
+document_iterator Stream::end() {
+ return document_iterator();
+}
+
+void Stream::skip() {
+ for (document_iterator i = begin(), e = end(); i != e; ++i)
+ i->skip();
+}
+
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+ : Doc(D)
+ , TypeID(Type)
+ , Anchor(A) {
+ SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
+ SourceRange = SMRange(Start, Start);
+}
+
+Node::~Node() {}
+
+Token &Node::peekNext() {
+ return Doc->peekNext();
+}
+
+Token Node::getNext() {
+ return Doc->getNext();
+}
+
+Node *Node::parseBlockNode() {
+ return Doc->parseBlockNode();
+}
+
+BumpPtrAllocator &Node::getAllocator() {
+ return Doc->NodeAllocator;
+}
+
+void Node::setError(const Twine &Msg, Token &Tok) const {
+ Doc->setError(Msg, Tok);
+}
+
+bool Node::failed() const {
+ return Doc->failed();
+}
+
+
+
+StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
+ // TODO: Handle newlines properly. We need to remove leading whitespace.
+ if (Value[0] == '"') { // Double quoted.
+ // Pull off the leading and trailing "s.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ // Search for characters that would require unescaping the value.
+ StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
+ if (i != StringRef::npos)
+ return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+ return UnquotedValue;
+ } else if (Value[0] == '\'') { // Single quoted.
+ // Pull off the leading and trailing 's.
+ StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+ StringRef::size_type i = UnquotedValue.find('\'');
+ if (i != StringRef::npos) {
+ // We're going to need Storage.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ Storage.push_back('\'');
+ UnquotedValue = UnquotedValue.substr(i + 2);
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+ }
+ return UnquotedValue;
+ }
+ // Plain or block.
+ size_t trimtrail = Value.rfind(' ');
+ return Value.drop_back(
+ trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+}
+
+StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
+ , StringRef::size_type i
+ , SmallVectorImpl<char> &Storage)
+ const {
+ // Use Storage to build proper value.
+ Storage.clear();
+ Storage.reserve(UnquotedValue.size());
+ for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
+ // Insert all previous chars into Storage.
+ StringRef Valid(UnquotedValue.begin(), i);
+ Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+ // Chop off inserted chars.
+ UnquotedValue = UnquotedValue.substr(i);
+
+ assert(!UnquotedValue.empty() && "Can't be empty!");
+
+ // Parse escape or line break.
+ switch (UnquotedValue[0]) {
+ case '\r':
+ case '\n':
+ Storage.push_back('\n');
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ UnquotedValue = UnquotedValue.substr(1);
+ break;
+ default:
+ if (UnquotedValue.size() == 1)
+ // TODO: Report error.
+ break;
+ UnquotedValue = UnquotedValue.substr(1);
+ switch (UnquotedValue[0]) {
+ default: {
+ Token T;
+ T.Range = StringRef(UnquotedValue.begin(), 1);
+ setError("Unrecognized escape code!", T);
+ return "";
+ }
+ case '\r':
+ case '\n':
+ // Remove the new line.
+ if ( UnquotedValue.size() > 1
+ && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+ UnquotedValue = UnquotedValue.substr(1);
+ // If this was just a single byte newline, it will get skipped
+ // below.
+ break;
+ case '0':
+ Storage.push_back(0x00);
+ break;
+ case 'a':
+ Storage.push_back(0x07);
+ break;
+ case 'b':
+ Storage.push_back(0x08);
+ break;
+ case 't':
+ case 0x09:
+ Storage.push_back(0x09);
+ break;
+ case 'n':
+ Storage.push_back(0x0A);
+ break;
+ case 'v':
+ Storage.push_back(0x0B);
+ break;
+ case 'f':
+ Storage.push_back(0x0C);
+ break;
+ case 'r':
+ Storage.push_back(0x0D);
+ break;
+ case 'e':
+ Storage.push_back(0x1B);
+ break;
+ case ' ':
+ Storage.push_back(0x20);
+ break;
+ case '"':
+ Storage.push_back(0x22);
+ break;
+ case '/':
+ Storage.push_back(0x2F);
+ break;
+ case '\\':
+ Storage.push_back(0x5C);
+ break;
+ case 'N':
+ encodeUTF8(0x85, Storage);
+ break;
+ case '_':
+ encodeUTF8(0xA0, Storage);
+ break;
+ case 'L':
+ encodeUTF8(0x2028, Storage);
+ break;
+ case 'P':
+ encodeUTF8(0x2029, Storage);
+ break;
+ case 'x': {
+ if (UnquotedValue.size() < 3)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(2);
+ break;
+ }
+ case 'u': {
+ if (UnquotedValue.size() < 5)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(4);
+ break;
+ }
+ case 'U': {
+ if (UnquotedValue.size() < 9)
+ // TODO: Report error.
+ break;
+ unsigned int UnicodeScalarValue;
+ UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+ encodeUTF8(UnicodeScalarValue, Storage);
+ UnquotedValue = UnquotedValue.substr(8);
+ break;
+ }
+ }
+ UnquotedValue = UnquotedValue.substr(1);
+ }
+ }
+ Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+ return StringRef(Storage.begin(), Storage.size());
+}
+
+Node *KeyValueNode::getKey() {
+ if (Key)
+ return Key;
+ // Handle implicit null keys.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_Value
+ || t.Kind == Token::TK_Error) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+ if (t.Kind == Token::TK_Key)
+ getNext(); // skip TK_Key.
+ }
+
+ // Handle explicit null keys.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
+ return Key = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We've got a normal key.
+ return Key = parseBlockNode();
+}
+
+Node *KeyValueNode::getValue() {
+ if (Value)
+ return Value;
+ getKey()->skip();
+ if (failed())
+ return Value = new (getAllocator()) NullNode(Doc);
+
+ // Handle implicit null values.
+ {
+ Token &t = peekNext();
+ if ( t.Kind == Token::TK_BlockEnd
+ || t.Kind == Token::TK_FlowMappingEnd
+ || t.Kind == Token::TK_Key
+ || t.Kind == Token::TK_FlowEntry
+ || t.Kind == Token::TK_Error) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ if (t.Kind != Token::TK_Value) {
+ setError("Unexpected token in Key Value.", t);
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+ getNext(); // skip TK_Value.
+ }
+
+ // Handle explicit null values.
+ Token &t = peekNext();
+ if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
+ return Value = new (getAllocator()) NullNode(Doc);
+ }
+
+ // We got a normal value.
+ return Value = parseBlockNode();
+}
+
+void MappingNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry) {
+ CurrentEntry->skip();
+ if (Type == MT_Inline) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ }
+ Token T = peekNext();
+ if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
+ // KeyValueNode eats the TK_Key. That way it can detect null keys.
+ CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
+ } else if (Type == MT_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError("Unexpected token. Expected Key or Block End", T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ return increment();
+ case Token::TK_FlowMappingEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
+ "Mapping End."
+ , T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ }
+}
+
+void SequenceNode::increment() {
+ if (failed()) {
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ return;
+ }
+ if (CurrentEntry)
+ CurrentEntry->skip();
+ Token T = peekNext();
+ if (SeqType == ST_Block) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ case Token::TK_BlockEnd:
+ getNext();
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ setError( "Unexpected token. Expected Block Entry or Block End."
+ , T);
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Indentless) {
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ getNext();
+ CurrentEntry = parseBlockNode();
+ if (CurrentEntry == 0) { // An error occurred.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ break;
+ default:
+ case Token::TK_Error:
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ }
+ } else if (SeqType == ST_Flow) {
+ switch (T.Kind) {
+ case Token::TK_FlowEntry:
+ // Eat the flow entry and recurse.
+ getNext();
+ WasPreviousTokenFlowEntry = true;
+ return increment();
+ case Token::TK_FlowSequenceEnd:
+ getNext();
+ case Token::TK_Error:
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ case Token::TK_StreamEnd:
+ case Token::TK_DocumentEnd:
+ case Token::TK_DocumentStart:
+ setError("Could not find closing ]!", T);
+ // Set this to end iterator.
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ default:
+ if (!WasPreviousTokenFlowEntry) {
+ setError("Expected , between entries!", T);
+ IsAtEnd = true;
+ CurrentEntry = 0;
+ break;
+ }
+ // Otherwise it must be a flow entry.
+ CurrentEntry = parseBlockNode();
+ if (!CurrentEntry) {
+ IsAtEnd = true;
+ }
+ WasPreviousTokenFlowEntry = false;
+ break;
+ }
+ }
+}
+
+Document::Document(Stream &S) : stream(S), Root(0) {
+ if (parseDirectives())
+ expectToken(Token::TK_DocumentStart);
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_DocumentStart)
+ getNext();
+}
+
+bool Document::skip() {
+ if (stream.scanner->failed())
+ return false;
+ if (!Root)
+ getRoot();
+ Root->skip();
+ Token &T = peekNext();
+ if (T.Kind == Token::TK_StreamEnd)
+ return false;
+ if (T.Kind == Token::TK_DocumentEnd) {
+ getNext();
+ return skip();
+ }
+ return true;
+}
+
+Token &Document::peekNext() {
+ return stream.scanner->peekNext();
+}
+
+Token Document::getNext() {
+ return stream.scanner->getNext();
+}
+
+void Document::setError(const Twine &Message, Token &Location) const {
+ stream.scanner->setError(Message, Location.Range.begin());
+}
+
+bool Document::failed() const {
+ return stream.scanner->failed();
+}
+
+Node *Document::parseBlockNode() {
+ Token T = peekNext();
+ // Handle properties.
+ Token AnchorInfo;
+parse_property:
+ switch (T.Kind) {
+ case Token::TK_Alias:
+ getNext();
+ return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
+ case Token::TK_Anchor:
+ if (AnchorInfo.Kind == Token::TK_Anchor) {
+ setError("Already encountered an anchor for this node!", T);
+ return 0;
+ }
+ AnchorInfo = getNext(); // Consume TK_Anchor.
+ T = peekNext();
+ goto parse_property;
+ case Token::TK_Tag:
+ getNext(); // Skip TK_Tag.
+ T = peekNext();
+ goto parse_property;
+ default:
+ break;
+ }
+
+ switch (T.Kind) {
+ case Token::TK_BlockEntry:
+ // We got an unindented BlockEntry sequence. This is not terminated with
+ // a BlockEnd.
+ // Don't eat the TK_BlockEntry, SequenceNode needs it.
+ return new (NodeAllocator) SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Indentless);
+ case Token::TK_BlockSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Block);
+ case Token::TK_BlockMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Block);
+ case Token::TK_FlowSequenceStart:
+ getNext();
+ return new (NodeAllocator)
+ SequenceNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , SequenceNode::ST_Flow);
+ case Token::TK_FlowMappingStart:
+ getNext();
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Flow);
+ case Token::TK_Scalar:
+ getNext();
+ return new (NodeAllocator)
+ ScalarNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , T.Range);
+ case Token::TK_Key:
+ // Don't eat the TK_Key, KeyValueNode expects it.
+ return new (NodeAllocator)
+ MappingNode( stream.CurrentDoc
+ , AnchorInfo.Range.substr(1)
+ , MappingNode::MT_Inline);
+ case Token::TK_DocumentStart:
+ case Token::TK_DocumentEnd:
+ case Token::TK_StreamEnd:
+ default:
+ // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
+ // !!null null.
+ return new (NodeAllocator) NullNode(stream.CurrentDoc);
+ case Token::TK_Error:
+ return 0;
+ }
+ llvm_unreachable("Control flow shouldn't reach here.");
+ return 0;
+}
+
+bool Document::parseDirectives() {
+ bool isDirective = false;
+ while (true) {
+ Token T = peekNext();
+ if (T.Kind == Token::TK_TagDirective) {
+ handleTagDirective(getNext());
+ isDirective = true;
+ } else if (T.Kind == Token::TK_VersionDirective) {
+ stream.handleYAMLDirective(getNext());
+ isDirective = true;
+ } else
+ break;
+ }
+ return isDirective;
+}
+
+bool Document::expectToken(int TK) {
+ Token T = getNext();
+ if (T.Kind != TK) {
+ setError("Unexpected token", T);
+ return false;
+ }
+ return true;
+}
+
+OwningPtr<Document> document_iterator::NullDoc;
diff --git a/test/YAMLParser/LICENSE.txt b/test/YAMLParser/LICENSE.txt
new file mode 100644
index 0000000000..050ced23f6
--- /dev/null
+++ b/test/YAMLParser/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2006 Kirill Simonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test/YAMLParser/bool.data b/test/YAMLParser/bool.data
new file mode 100644
index 0000000000..e987a0ec1e
--- /dev/null
+++ b/test/YAMLParser/bool.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- yes
+- NO
+- True
+- on
diff --git a/test/YAMLParser/construct-bool.data b/test/YAMLParser/construct-bool.data
new file mode 100644
index 0000000000..035ec0c858
--- /dev/null
+++ b/test/YAMLParser/construct-bool.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: yes
+answer: NO
+logical: True
+option: on
+
+
+but:
+ y: is a string
+ n: is a string
diff --git a/test/YAMLParser/construct-custom.data b/test/YAMLParser/construct-custom.data
new file mode 100644
index 0000000000..cac95e0a5f
--- /dev/null
+++ b/test/YAMLParser/construct-custom.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- !tag1
+ x: 1
+- !tag1
+ x: 1
+ 'y': 2
+ z: 3
+- !tag2
+ 10
+- !tag2
+ =: 10
+ 'y': 20
+ z: 30
+- !tag3
+ x: 1
+- !tag3
+ x: 1
+ 'y': 2
+ z: 3
+- !tag3
+ =: 1
+ 'y': 2
+ z: 3
+- !foo
+ my-parameter: foo
+ my-another-parameter: [1,2,3]
diff --git a/test/YAMLParser/construct-float.data b/test/YAMLParser/construct-float.data
new file mode 100644
index 0000000000..07c51bdd83
--- /dev/null
+++ b/test/YAMLParser/construct-float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 6.8523015e+5
+exponential: 685.230_15e+03
+fixed: 685_230.15
+sexagesimal: 190:20:30.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/construct-int.data b/test/YAMLParser/construct-int.data
new file mode 100644
index 0000000000..b14c37f788
--- /dev/null
+++ b/test/YAMLParser/construct-int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 685230
+decimal: +685_230
+octal: 02472256
+hexadecimal: 0x_0A_74_AE
+binary: 0b1010_0111_0100_1010_1110
+sexagesimal: 190:20:30
diff --git a/test/YAMLParser/construct-map.data b/test/YAMLParser/construct-map.data
new file mode 100644
index 0000000000..1b681206d1
--- /dev/null
+++ b/test/YAMLParser/construct-map.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+# Unordered set of key: value pairs.
+Block style: !!map
+ Clark : Evans
+ Brian : Ingerson
+ Oren : Ben-Kiki
+Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki }
diff --git a/test/YAMLParser/construct-merge.data b/test/YAMLParser/construct-merge.data
new file mode 100644
index 0000000000..0ebc9f612d
--- /dev/null
+++ b/test/YAMLParser/construct-merge.data
@@ -0,0 +1,29 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- &CENTER { x: 1, 'y': 2 }
+- &LEFT { x: 0, 'y': 2 }
+- &BIG { r: 10 }
+- &SMALL { r: 1 }
+
+# All the following maps are equal:
+
+- # Explicit keys
+ x: 1
+ 'y': 2
+ r: 10
+ label: center/big
+
+- # Merge one map
+ << : *CENTER
+ r: 10
+ label: center/big
+
+- # Merge multiple maps
+ << : [ *CENTER, *BIG ]
+ label: center/big
+
+- # Override
+ << : [ *BIG, *LEFT, *SMALL ]
+ x: 1
+ label: center/big
diff --git a/test/YAMLParser/construct-null.data b/test/YAMLParser/construct-null.data
new file mode 100644
index 0000000000..51f8b61e24
--- /dev/null
+++ b/test/YAMLParser/construct-null.data
@@ -0,0 +1,20 @@
+# RUN: yaml-bench -canonical %s
+
+# A document may be null.
+---
+---
+# This mapping has four keys,
+# one has a value.
+empty:
+canonical: ~
+english: null
+~: null key
+---
+# This sequence has five
+# entries, two have values.
+sparse:
+ - ~
+ - 2nd entry
+ -
+ - 4th entry
+ - Null
diff --git a/test/YAMLParser/construct-omap.data b/test/YAMLParser/construct-omap.data
new file mode 100644
index 0000000000..b96d6799c7
--- /dev/null
+++ b/test/YAMLParser/construct-omap.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed ordered map (dictionary).
+Bestiary: !!omap
+ - aardvark: African pig-like ant eater. Ugly.
+ - anteater: South-American ant eater. Two species.
+ - anaconda: South-American constrictor snake. Scaly.
+ # Etc.
+# Flow style
+Numbers: !!omap [ one: 1, two: 2, three : 3 ]
diff --git a/test/YAMLParser/construct-pairs.data b/test/YAMLParser/construct-pairs.data
new file mode 100644
index 0000000000..40f288d1d7
--- /dev/null
+++ b/test/YAMLParser/construct-pairs.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed pairs.
+Block tasks: !!pairs
+ - meeting: with team.
+ - meeting: with boss.
+ - break: lunch.
+ - meeting: with client.
+Flow tasks: !!pairs [ meeting: with team, meeting: with boss ]
diff --git a/test/YAMLParser/construct-seq.data b/test/YAMLParser/construct-seq.data
new file mode 100644
index 0000000000..f43fd39f8e
--- /dev/null
+++ b/test/YAMLParser/construct-seq.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+# Ordered sequence of nodes
+Block style: !!seq
+- Mercury # Rotates - no light/dark sides.
+- Venus # Deadliest. Aptly named.
+- Earth # Mostly dirt.
+- Mars # Seems empty.
+- Jupiter # The king.
+- Saturn # Pretty.
+- Uranus # Where the sun hardly shines.
+- Neptune # Boring. No rings.
+- Pluto # You call this a planet?
+Flow style: !!seq [ Mercury, Venus, Earth, Mars, # Rocks
+ Jupiter, Saturn, Uranus, Neptune, # Gas
+ Pluto ] # Overrated
+
diff --git a/test/YAMLParser/construct-set.data b/test/YAMLParser/construct-set.data
new file mode 100644
index 0000000000..3e9d095e71
--- /dev/null
+++ b/test/YAMLParser/construct-set.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed set.
+baseball players: !!set
+ ? Mark McGwire
+ ? Sammy Sosa
+ ? Ken Griffey
+# Flow style
+baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees }
diff --git a/test/YAMLParser/construct-str-ascii.data b/test/YAMLParser/construct-str-ascii.data
new file mode 100644
index 0000000000..24290ae8a9
--- /dev/null
+++ b/test/YAMLParser/construct-str-ascii.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- !!str "ascii string"
diff --git a/test/YAMLParser/construct-str.data b/test/YAMLParser/construct-str.data
new file mode 100644
index 0000000000..dc1ce825cd
--- /dev/null
+++ b/test/YAMLParser/construct-str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+string: abcd
diff --git a/test/YAMLParser/construct-timestamp.data b/test/YAMLParser/construct-timestamp.data
new file mode 100644
index 0000000000..f262c2d02c
--- /dev/null
+++ b/test/YAMLParser/construct-timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 2001-12-15T02:59:43.1Z
+valid iso8601: 2001-12-14t21:59:43.10-05:00
+space separated: 2001-12-14 21:59:43.10 -5
+no time zone (Z): 2001-12-15 2:59:43.10
+date (00:00:00Z): 2002-12-14
diff --git a/test/YAMLParser/construct-value.data b/test/YAMLParser/construct-value.data
new file mode 100644
index 0000000000..fe01a0dc90
--- /dev/null
+++ b/test/YAMLParser/construct-value.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+--- # Old schema
+link with:
+ - library1.dll
+ - library2.dll
+--- # New schema
+link with:
+ - = : library1.dll
+ version: 1.2
+ - = : library2.dll
+ version: 2.3
diff --git a/test/YAMLParser/duplicate-key.former-loader-error.data b/test/YAMLParser/duplicate-key.former-loader-error.data
new file mode 100644
index 0000000000..9272103fe6
--- /dev/null
+++ b/test/YAMLParser/duplicate-key.former-loader-error.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo: bar
+foo: baz
diff --git a/test/YAMLParser/duplicate-mapping-key.former-loader-error.data b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
new file mode 100644
index 0000000000..96d175d2ac
--- /dev/null
+++ b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+---
+&anchor foo:
+ foo: bar
+ *anchor: duplicate key
+ baz: bat
+ *anchor: duplicate key
diff --git a/test/YAMLParser/duplicate-merge-key.former-loader-error.data b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
new file mode 100644
index 0000000000..6b1276436a
--- /dev/null
+++ b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+<<: {x: 1, y: 2}
+foo: bar
+<<: {z: 3, t: 4}
diff --git a/test/YAMLParser/duplicate-value-key.former-loader-error.data b/test/YAMLParser/duplicate-value-key.former-loader-error.data
new file mode 100644
index 0000000000..dc20e0b275
--- /dev/null
+++ b/test/YAMLParser/duplicate-value-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+=: 1
+foo: bar
+=: 2
diff --git a/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
new file mode 100644
index 0000000000..f5adedb135
--- /dev/null
+++ b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+? |-
+ foo
+: |-
+ bar
diff --git a/test/YAMLParser/empty-document-bug.data b/test/YAMLParser/empty-document-bug.data
new file mode 100644
index 0000000000..fa131fe78e
--- /dev/null
+++ b/test/YAMLParser/empty-document-bug.data
@@ -0,0 +1,2 @@
+# RUN: yaml-bench -canonical %s
+
diff --git a/test/YAMLParser/float.data b/test/YAMLParser/float.data
new file mode 100644
index 0000000000..c4de97037c
--- /dev/null
+++ b/test/YAMLParser/float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 6.8523015e+5
+- 685.230_15e+03
+- 685_230.15
+- 190:20:30.15
+- -.inf
+- .NaN
diff --git a/test/YAMLParser/int.data b/test/YAMLParser/int.data
new file mode 100644
index 0000000000..2651d096ff
--- /dev/null
+++ b/test/YAMLParser/int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 685230
+- +685_230
+- 02472256
+- 0x_0A_74_AE
+- 0b1010_0111_0100_1010_1110
+- 190:20:30
diff --git a/test/YAMLParser/invalid-single-quote-bug.data b/test/YAMLParser/invalid-single-quote-bug.data
new file mode 100644
index 0000000000..3722a003df
--- /dev/null
+++ b/test/YAMLParser/invalid-single-quote-bug.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- "foo 'bar'"
+- "foo\n'bar'"
diff --git a/test/YAMLParser/merge.data b/test/YAMLParser/merge.data
new file mode 100644
index 0000000000..86313596e1
--- /dev/null
+++ b/test/YAMLParser/merge.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- <<
diff --git a/test/YAMLParser/more-floats.data b/test/YAMLParser/more-floats.data
new file mode 100644
index 0000000000..668b31cd13
--- /dev/null
+++ b/test/YAMLParser/more-floats.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+[0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan]
diff --git a/test/YAMLParser/negative-float-bug.data b/test/YAMLParser/negative-float-bug.data
new file mode 100644
index 0000000000..0ba0ffee30
--- /dev/null
+++ b/test/YAMLParser/negative-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+-1.0
diff --git a/test/YAMLParser/null.data b/test/YAMLParser/null.data
new file mode 100644
index 0000000000..a38d7fa6c5
--- /dev/null
+++ b/test/YAMLParser/null.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+-
+- ~
+- null
diff --git a/test/YAMLParser/resolver.data b/test/YAMLParser/resolver.data
new file mode 100644
index 0000000000..8cbba6328b
--- /dev/null
+++ b/test/YAMLParser/resolver.data
@@ -0,0 +1,32 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"this scalar should be selected"
+---
+key11: !foo
+ key12:
+ is: [selected]
+ key22:
+ key13: [not, selected]
+ key23: [not, selected]
+ key32:
+ key31: [not, selected]
+ key32: [not, selected]
+ key33: {not: selected}
+key21: !bar
+ - not selected
+ - selected
+ - not selected
+key31: !baz
+ key12:
+ key13:
+ key14: {selected}
+ key23:
+ key14: [not, selected]
+ key33:
+ key14: {selected}
+ key24: {not: selected}
+ key22:
+ - key14: {selected}
+ key24: {not: selected}
+ - key14: {selected}
diff --git a/test/YAMLParser/run-parser-crash-bug.data b/test/YAMLParser/run-parser-crash-bug.data
new file mode 100644
index 0000000000..3ec910ce04
--- /dev/null
+++ b/test/YAMLParser/run-parser-crash-bug.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- Harry Potter and the Prisoner of Azkaban
+- Harry Potter and the Goblet of Fire
+- Harry Potter and the Order of the Phoenix
+---
+- Memoirs Found in a Bathtub
+- Snow Crash
+- Ghost World
diff --git a/test/YAMLParser/scan-document-end-bug.data b/test/YAMLParser/scan-document-end-bug.data
new file mode 100644
index 0000000000..7354caf8cd
--- /dev/null
+++ b/test/YAMLParser/scan-document-end-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Ticket #4
+---
+...
diff --git a/test/YAMLParser/scan-line-break-bug.data b/test/YAMLParser/scan-line-break-bug.data
new file mode 100644
index 0000000000..792973d3f5
--- /dev/null
+++ b/test/YAMLParser/scan-line-break-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+foo:
+ bar
+ baz
diff --git a/test/YAMLParser/single-dot-is-not-float-bug.data b/test/YAMLParser/single-dot-is-not-float-bug.data
new file mode 100644
index 0000000000..810a5936a8
--- /dev/null
+++ b/test/YAMLParser/single-dot-is-not-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+.
diff --git a/test/YAMLParser/sloppy-indentation.data b/test/YAMLParser/sloppy-indentation.data
new file mode 100644
index 0000000000..2b2b62b14a
--- /dev/null
+++ b/test/YAMLParser/sloppy-indentation.data
@@ -0,0 +1,19 @@
+# RUN: yaml-bench -canonical %s
+
+---
+in the block context:
+ indentation should be kept: {
+ but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+ bar: 'quoted scalars
+may not adhere indentation'
diff --git a/test/YAMLParser/spec-02-01.data b/test/YAMLParser/spec-02-01.data
new file mode 100644
index 0000000000..dd15b2bc26
--- /dev/null
+++ b/test/YAMLParser/spec-02-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
diff --git a/test/YAMLParser/spec-02-02.data b/test/YAMLParser/spec-02-02.data
new file mode 100644
index 0000000000..a5695d5c27
--- /dev/null
+++ b/test/YAMLParser/spec-02-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+hr: 65 # Home runs
+avg: 0.278 # Batting average
+rbi: 147 # Runs Batted In
diff --git a/test/YAMLParser/spec-02-03.data b/test/YAMLParser/spec-02-03.data
new file mode 100644
index 0000000000..81f8d991f7
--- /dev/null
+++ b/test/YAMLParser/spec-02-03.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+american:
+ - Boston Red Sox
+ - Detroit Tigers
+ - New York Yankees
+national:
+ - New York Mets
+ - Chicago Cubs
+ - Atlanta Braves
diff --git a/test/YAMLParser/spec-02-04.data b/test/YAMLParser/spec-02-04.data
new file mode 100644
index 0000000000..44a218d592
--- /dev/null
+++ b/test/YAMLParser/spec-02-04.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+-
+ name: Mark McGwire
+ hr: 65
+ avg: 0.278
+-
+ name: Sammy Sosa
+ hr: 63
+ avg: 0.288
diff --git a/test/YAMLParser/spec-02-05.data b/test/YAMLParser/spec-02-05.data
new file mode 100644
index 0000000000..c9a4a7572f
--- /dev/null
+++ b/test/YAMLParser/spec-02-05.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- [name , hr, avg ]
+- [Mark McGwire, 65, 0.278]
+- [Sammy Sosa , 63, 0.288]
diff --git a/test/YAMLParser/spec-02-06.data b/test/YAMLParser/spec-02-06.data
new file mode 100644
index 0000000000..85c1e2bab8
--- /dev/null
+++ b/test/YAMLParser/spec-02-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+Mark McGwire: {hr: 65, avg: 0.278}
+Sammy Sosa: {
+ hr: 63,
+ avg: 0.288
+ }
diff --git a/test/YAMLParser/spec-02-07.data b/test/YAMLParser/spec-02-07.data
new file mode 100644
index 0000000000..c349662a98
--- /dev/null
+++ b/test/YAMLParser/spec-02-07.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+# Ranking of 1998 home runs
+---
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+
+# Team ranking
+---
+- Chicago Cubs
+- St Louis Cardinals
diff --git a/test/YAMLParser/spec-02-08.data b/test/YAMLParser/spec-02-08.data
new file mode 100644
index 0000000000..9746a43788
--- /dev/null
+++ b/test/YAMLParser/spec-02-08.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---
+time: 20:03:20
+player: Sammy Sosa
+action: strike (miss)
+...
+---
+time: 20:03:47
+player: Sammy Sosa
+action: grand slam
+...
diff --git a/test/YAMLParser/spec-02-09.data b/test/YAMLParser/spec-02-09.data
new file mode 100644
index 0000000000..6aef933379
--- /dev/null
+++ b/test/YAMLParser/spec-02-09.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr: # 1998 hr ranking
+ - Mark McGwire
+ - Sammy Sosa
+rbi:
+ # 1998 rbi ranking
+ - Sammy Sosa
+ - Ken Griffey
diff --git a/test/YAMLParser/spec-02-10.data b/test/YAMLParser/spec-02-10.data
new file mode 100644
index 0000000000..0302fa7500
--- /dev/null
+++ b/test/YAMLParser/spec-02-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr:
+ - Mark McGwire
+ # Following node labeled SS
+ - &SS Sammy Sosa
+rbi:
+ - *SS # Subsequent occurrence
+ - Ken Griffey
diff --git a/test/YAMLParser/spec-02-11.data b/test/YAMLParser/spec-02-11.data
new file mode 100644
index 0000000000..d8cf863b2c
--- /dev/null
+++ b/test/YAMLParser/spec-02-11.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+? - Detroit Tigers
+ - Chicago cubs
+:
+ - 2001-07-23
+
+? [ New York Yankees,
+ Atlanta Braves ]
+: [ 2001-07-02, 2001-08-12,
+ 2001-08-14 ]
diff --git a/test/YAMLParser/spec-02-12.data b/test/YAMLParser/spec-02-12.data
new file mode 100644
index 0000000000..3b4d5370a9
--- /dev/null
+++ b/test/YAMLParser/spec-02-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+# products purchased
+- item : Super Hoop
+ quantity: 1
+- item : Basketball
+ quantity: 4
+- item : Big Shoes
+ quantity: 1
diff --git a/test/YAMLParser/spec-02-13.data b/test/YAMLParser/spec-02-13.data
new file mode 100644
index 0000000000..2bbccbf5d7
--- /dev/null
+++ b/test/YAMLParser/spec-02-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# ASCII Art
+--- |
+ \//||\/||
+ // || ||__
diff --git a/test/YAMLParser/spec-02-14.data b/test/YAMLParser/spec-02-14.data
new file mode 100644
index 0000000000..5a18ea213e
--- /dev/null
+++ b/test/YAMLParser/spec-02-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+ Mark McGwire's
+ year was crippled
+ by a knee injury.
diff --git a/test/YAMLParser/spec-02-15.data b/test/YAMLParser/spec-02-15.data
new file mode 100644
index 0000000000..2a7fbe96ad
--- /dev/null
+++ b/test/YAMLParser/spec-02-15.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ Sammy Sosa completed another
+ fine season with great stats.
+
+ 63 Home Runs
+ 0.288 Batting Average
+
+ What a year!
diff --git a/test/YAMLParser/spec-02-16.data b/test/YAMLParser/spec-02-16.data
new file mode 100644
index 0000000000..3a5792c763
--- /dev/null
+++ b/test/YAMLParser/spec-02-16.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+name: Mark McGwire
+accomplishment: >
+ Mark set a major league
+ home run record in 1998.
+stats: |
+ 65 Home Runs
+ 0.278 Batting Average
diff --git a/test/YAMLParser/spec-02-17.data b/test/YAMLParser/spec-02-17.data
new file mode 100644
index 0000000000..2bcb60c8d9
--- /dev/null
+++ b/test/YAMLParser/spec-02-17.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+unicode: "Sosa did fine.\u263A"
+control: "\b1998\t1999\t2000\n"
+hexesc: "\x13\x10 is \r\n"
+
+single: '"Howdy!" he cried.'
+quoted: ' # not a ''comment''.'
+tie-fighter: '|\-*-/|'
+
+# CHECK: !!str "Sosa did fine.\u263A"
+# CHECK: !!str "\b1998\t1999\t2000\n"
+# CHECK: !!str "\x13\x10 is \r\n"
+# CHECK: !!str "\"Howdy!\" he cried."
+# CHECK: !!str " # not a 'comment'."
+# CHECK: !!str "|\\-*-/|"
diff --git a/test/YAMLParser/spec-02-18.data b/test/YAMLParser/spec-02-18.data
new file mode 100644
index 0000000000..625a4962e9
--- /dev/null
+++ b/test/YAMLParser/spec-02-18.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+plain:
+ This unquoted scalar
+ spans many lines.
+
+quoted: "So does this
+ quoted scalar.\n"
diff --git a/test/YAMLParser/spec-02-19.data b/test/YAMLParser/spec-02-19.data
new file mode 100644
index 0000000000..cb9df6dd1f
--- /dev/null
+++ b/test/YAMLParser/spec-02-19.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 12345
+decimal: +12,345
+sexagesimal: 3:25:45
+octal: 014
+hexadecimal: 0xC
diff --git a/test/YAMLParser/spec-02-20.data b/test/YAMLParser/spec-02-20.data
new file mode 100644
index 0000000000..ed14798611
--- /dev/null
+++ b/test/YAMLParser/spec-02-20.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 1.23015e+3
+exponential: 12.3015e+02
+sexagesimal: 20:30.15
+fixed: 1,230.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/spec-02-21.data b/test/YAMLParser/spec-02-21.data
new file mode 100644
index 0000000000..ea979db065
--- /dev/null
+++ b/test/YAMLParser/spec-02-21.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+null: ~
+true: y
+false: n
+string: '12345'
diff --git a/test/YAMLParser/spec-02-22.data b/test/YAMLParser/spec-02-22.data
new file mode 100644
index 0000000000..77724f7106
--- /dev/null
+++ b/test/YAMLParser/spec-02-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 2001-12-15T02:59:43.1Z
+iso8601: 2001-12-14t21:59:43.10-05:00
+spaced: 2001-12-14 21:59:43.10 -5
+date: 2002-12-14
diff --git a/test/YAMLParser/spec-02-23.data b/test/YAMLParser/spec-02-23.data
new file mode 100644
index 0000000000..d08dfa755c
--- /dev/null
+++ b/test/YAMLParser/spec-02-23.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+not-date: !!str 2002-04-28
+
+picture: !!binary |
+ R0lGODlhDAAMAIQAAP//9/X
+ 17unp5WZmZgAAAOfn515eXv
+ Pz7Y6OjuDg4J+fn5OTk6enp
+ 56enmleECcgggoBADs=
+
+application specific tag: !something |
+ The semantics of the tag
+ above may be different for
+ different documents.
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
new file mode 100644
index 0000000000..01ca7f5d12
--- /dev/null
+++ b/test/YAMLParser/spec-02-24.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG ! tag:clarkevans.com,2002:
+--- !shape
+ # Use the ! handle for presenting
+ # tag:clarkevans.com,2002:circle
+- !circle
+ center: &ORIGIN {x: 73, y: 129}
+ radius: 7
+- !line
+ start: *ORIGIN
+ finish: { x: 89, y: 102 }
+- !label
+ start: *ORIGIN
+ color: 0xFFEEBB
+ text: Pretty vector drawing.
diff --git a/test/YAMLParser/spec-02-25.data b/test/YAMLParser/spec-02-25.data
new file mode 100644
index 0000000000..fbadfda97e
--- /dev/null
+++ b/test/YAMLParser/spec-02-25.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# sets are represented as a
+# mapping where each key is
+# associated with the empty string
+--- !!set
+? Mark McGwire
+? Sammy Sosa
+? Ken Griff
diff --git a/test/YAMLParser/spec-02-26.data b/test/YAMLParser/spec-02-26.data
new file mode 100644
index 0000000000..257108e7e0
--- /dev/null
+++ b/test/YAMLParser/spec-02-26.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# ordered maps are represented as
+# a sequence of mappings, with
+# each mapping having one key
+--- !!omap
+- Mark McGwire: 65
+- Sammy Sosa: 63
+- Ken Griffy: 58
diff --git a/test/YAMLParser/spec-02-27.data b/test/YAMLParser/spec-02-27.data
new file mode 100644
index 0000000000..a190ff19db
--- /dev/null
+++ b/test/YAMLParser/spec-02-27.data
@@ -0,0 +1,31 @@
+# RUN: yaml-bench -canonical %s
+
+--- !<tag:clarkevans.com,2002:invoice>
+invoice: 34843
+date : 2001-01-23
+bill-to: &id001
+ given : Chris
+ family : Dumars
+ address:
+ lines: |
+ 458 Walkman Dr.
+ Suite #292
+ city : Royal Oak
+ state : MI
+ postal : 48046
+ship-to: *id001
+product:
+ - sku : BL394D
+ quantity : 4
+ description : Basketball
+ price : 450.00
+ - sku : BL4438H
+ quantity : 1
+ description : Super Hoop
+ price : 2392.00
+tax : 251.42
+total: 4443.52
+comments:
+ Late afternoon is best.
+ Backup contact is Nancy
+ Billsmer @ 338-4338.
diff --git a/test/YAMLParser/spec-02-28.data b/test/YAMLParser/spec-02-28.data
new file mode 100644
index 0000000000..695c27f5d5
--- /dev/null
+++ b/test/YAMLParser/spec-02-28.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+Time: 2001-11-23 15:01:42 -5
+User: ed
+Warning:
+ This is an error message
+ for the log file
+---
+Time: 2001-11-23 15:02:31 -5
+User: ed
+Warning:
+ A slightly different error
+ message.
+---
+Date: 2001-11-23 15:03:17 -5
+User: ed
+Fatal:
+ Unknown variable "bar"
+Stack:
+ - file: TopClass.py
+ line: 23
+ code: |
+ x = MoreObject("345\n")
+ - file: MoreClass.py
+ line: 58
+ code: |-
+ foo = bar
diff --git a/test/YAMLParser/spec-05-01-utf8.data b/test/YAMLParser/spec-05-01-utf8.data
new file mode 100644
index 0000000000..349da06fab
--- /dev/null
+++ b/test/YAMLParser/spec-05-01-utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data
new file mode 100644
index 0000000000..b306bdb719
--- /dev/null
+++ b/test/YAMLParser/spec-05-02-utf8.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+# Invalid use of BOM
+# inside a
+# document.
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-03.data b/test/YAMLParser/spec-05-03.data
new file mode 100644
index 0000000000..461e98d2c2
--- /dev/null
+++ b/test/YAMLParser/spec-05-03.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+sequence:
+- one
+- two
+mapping:
+ ? sky
+ : blue
+ ? sea : green
diff --git a/test/YAMLParser/spec-05-04.data b/test/YAMLParser/spec-05-04.data
new file mode 100644
index 0000000000..52850f435b
--- /dev/null
+++ b/test/YAMLParser/spec-05-04.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+sequence: [ one, two, ]
+mapping: { sky: blue, sea: green }
diff --git a/test/YAMLParser/spec-05-05.data b/test/YAMLParser/spec-05-05.data
new file mode 100644
index 0000000000..499ee8ffb8
--- /dev/null
+++ b/test/YAMLParser/spec-05-05.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-06.data b/test/YAMLParser/spec-05-06.data
new file mode 100644
index 0000000000..729141acf4
--- /dev/null
+++ b/test/YAMLParser/spec-05-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+anchored: !local &anchor value
+alias: *anchor
diff --git a/test/YAMLParser/spec-05-07.data b/test/YAMLParser/spec-05-07.data
new file mode 100644
index 0000000000..fc80a0d415
--- /dev/null
+++ b/test/YAMLParser/spec-05-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+literal: |
+ text
+folded: >
+ text
diff --git a/test/YAMLParser/spec-05-08.data b/test/YAMLParser/spec-05-08.data
new file mode 100644
index 0000000000..9f2b7ece53
--- /dev/null
+++ b/test/YAMLParser/spec-05-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+single: 'text'
+double: "text"
diff --git a/test/YAMLParser/spec-05-09.data b/test/YAMLParser/spec-05-09.data
new file mode 100644
index 0000000000..fc061fb298
--- /dev/null
+++ b/test/YAMLParser/spec-05-09.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.1
+--- text
diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data
new file mode 100644
index 0000000000..6788f0bfc3
--- /dev/null
+++ b/test/YAMLParser/spec-05-10.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+commercial-at: @text
+grave-accent: `text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-11.data b/test/YAMLParser/spec-05-11.data
new file mode 100644
index 0000000000..7cba5562d5
--- /dev/null
+++ b/test/YAMLParser/spec-05-11.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ Generic line break (no glyph)
+ Generic line break (glyphed)… Line separator
 Paragraph separator

diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data
new file mode 100644
index 0000000000..7dadff76f8
--- /dev/null
+++ b/test/YAMLParser/spec-05-12.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently reject tabs as indentation.
+# XFAIL: *
+
+# Tabs do's and don'ts:
+# comment:
+quoted: "Quoted "
+block: |
+ void main() {
+ printf("Hello, world!\n");
+ }
+elsewhere: # separation
+ indentation, in plain scalar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-13.data b/test/YAMLParser/spec-05-13.data
new file mode 100644
index 0000000000..db62e866a7
--- /dev/null
+++ b/test/YAMLParser/spec-05-13.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ "Text containing
+ both space and
+ tab characters"
diff --git a/test/YAMLParser/spec-05-14.data b/test/YAMLParser/spec-05-14.data
new file mode 100644
index 0000000000..65451651b6
--- /dev/null
+++ b/test/YAMLParser/spec-05-14.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+"Fun with \\
+\" \a \b \e \f \
+\n \r \t \v \0 \
+\ \_ \N \L \P \
+\x41 \u0041 \U00000041"
+
+# CHECK: !!str "Fun with \\\n\" \a \b \e \f \n \r \t \v \0 \_ \N \L \P A A A"
diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data
new file mode 100644
index 0000000000..cd8421ad27
--- /dev/null
+++ b/test/YAMLParser/spec-05-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+Bad escapes:
+ "\c
+ \xq-"
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-06-01.data b/test/YAMLParser/spec-06-01.data
new file mode 100644
index 0000000000..95b26bdb38
--- /dev/null
+++ b/test/YAMLParser/spec-06-01.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+ # Leading comment line spaces are
+ # neither content nor indentation.
+
+Not indented:
+ By one space: |
+ By four
+ spaces
+ Flow style: [ # Leading spaces
+ By two, # in flow style
+ Also by two, # are neither
+# Tabs are not allowed:
+# Still by two # content nor
+ Still by two # content nor
+ ] # indentation.
diff --git a/test/YAMLParser/spec-06-02.data b/test/YAMLParser/spec-06-02.data
new file mode 100644
index 0000000000..40a15c9f3e
--- /dev/null
+++ b/test/YAMLParser/spec-06-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ # Comment
+
+
diff --git a/test/YAMLParser/spec-06-03.data b/test/YAMLParser/spec-06-03.data
new file mode 100644
index 0000000000..c1893ef083
--- /dev/null
+++ b/test/YAMLParser/spec-06-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+key: # Comment
+ value
diff --git a/test/YAMLParser/spec-06-04.data b/test/YAMLParser/spec-06-04.data
new file mode 100644
index 0000000000..b61bcc6b95
--- /dev/null
+++ b/test/YAMLParser/spec-06-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+key: # Comment
+ # lines
+ value
+
diff --git a/test/YAMLParser/spec-06-05.data b/test/YAMLParser/spec-06-05.data
new file mode 100644
index 0000000000..4bcaa5a818
--- /dev/null
+++ b/test/YAMLParser/spec-06-05.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+{ first: Sammy, last: Sosa }:
+# Statistics:
+ hr: # Home runs
+ 65
+ avg: # Average
+ 0.278
diff --git a/test/YAMLParser/spec-06-06.data b/test/YAMLParser/spec-06-06.data
new file mode 100644
index 0000000000..67e39ddf89
--- /dev/null
+++ b/test/YAMLParser/spec-06-06.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+plain: text
+ lines
+quoted: "text
+ lines"
+block: |
+ text
+ lines
diff --git a/test/YAMLParser/spec-06-07.data b/test/YAMLParser/spec-06-07.data
new file mode 100644
index 0000000000..451bd349e3
--- /dev/null
+++ b/test/YAMLParser/spec-06-07.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- foo
+
+ bar
+- |-
+ foo
+
+ bar
+
diff --git a/test/YAMLParser/spec-06-08.data b/test/YAMLParser/spec-06-08.data
new file mode 100644
index 0000000000..aa06f847ea
--- /dev/null
+++ b/test/YAMLParser/spec-06-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+>-
+ specific
 trimmed… … …… as… space
diff --git a/test/YAMLParser/spec-07-01.data b/test/YAMLParser/spec-07-01.data
new file mode 100644
index 0000000000..21bc5e59d5
--- /dev/null
+++ b/test/YAMLParser/spec-07-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%FOO bar baz # Should be ignored
+ # with a warning.
+--- "foo"
diff --git a/test/YAMLParser/spec-07-02.data b/test/YAMLParser/spec-07-02.data
new file mode 100644
index 0000000000..bf0e758c88
--- /dev/null
+++ b/test/YAMLParser/spec-07-02.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.2 # Attempt parsing
+ # with a warning
+---
+"foo"
diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data
new file mode 100644
index 0000000000..7ca9483016
--- /dev/null
+++ b/test/YAMLParser/spec-07-03.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+%YAML 1.1
+%YAML 1.1
+foo
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
new file mode 100644
index 0000000000..beba7d06ec
--- /dev/null
+++ b/test/YAMLParser/spec-07-04.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !yaml! tag:yaml.org,2002:
+---
+!yaml!str "foo"
diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data
new file mode 100644
index 0000000000..279b54afa1
--- /dev/null
+++ b/test/YAMLParser/spec-07-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently parse TAG directives.
+# XFAIL: *
+
+%TAG ! !foo
+%TAG ! !foo
+bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-06.data b/test/YAMLParser/spec-07-06.data
new file mode 100644
index 0000000000..9f27f91f31
--- /dev/null
+++ b/test/YAMLParser/spec-07-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG ! !foo
+%TAG !yaml! tag:yaml.org,2002:
+---
+- !bar "baz"
+- !yaml!str "string"
diff --git a/test/YAMLParser/spec-07-07a.data b/test/YAMLParser/spec-07-07a.data
new file mode 100644
index 0000000000..e51f8f7d69
--- /dev/null
+++ b/test/YAMLParser/spec-07-07a.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# Private application:
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-07b.data b/test/YAMLParser/spec-07-07b.data
new file mode 100644
index 0000000000..003d575572
--- /dev/null
+++ b/test/YAMLParser/spec-07-07b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Migrated to global:
+%TAG ! tag:ben-kiki.org,2000:app/
+---
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-08.data b/test/YAMLParser/spec-07-08.data
new file mode 100644
index 0000000000..7197404b38
--- /dev/null
+++ b/test/YAMLParser/spec-07-08.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly specify default settings:
+%TAG ! !
+%TAG !! tag:yaml.org,2002:
+# Named handles have no default:
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !foo "bar"
+- !!str "string"
+- !o!type "baz"
diff --git a/test/YAMLParser/spec-07-09.data b/test/YAMLParser/spec-07-09.data
new file mode 100644
index 0000000000..1f98ba0414
--- /dev/null
+++ b/test/YAMLParser/spec-07-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo
+...
+# Repeated end marker.
+...
+---
+bar
+# No end marker.
+---
+baz
+...
diff --git a/test/YAMLParser/spec-07-10.data b/test/YAMLParser/spec-07-10.data
new file mode 100644
index 0000000000..a176683478
--- /dev/null
+++ b/test/YAMLParser/spec-07-10.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+"Root flow
+ scalar"
+--- !!str >
+ Root block
+ scalar
+---
+# Root collection:
+foo : bar
+... # Is optional.
+---
+# Explicit document may be empty.
diff --git a/test/YAMLParser/spec-07-11.data b/test/YAMLParser/spec-07-11.data
new file mode 100644
index 0000000000..ce14b7ebe4
--- /dev/null
+++ b/test/YAMLParser/spec-07-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# A stream may contain
+# no documents.
diff --git a/test/YAMLParser/spec-07-12a.data b/test/YAMLParser/spec-07-12a.data
new file mode 100644
index 0000000000..7327f8188e
--- /dev/null
+++ b/test/YAMLParser/spec-07-12a.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Implicit document. Root
+# collection (mapping) node.
+foo : bar
diff --git a/test/YAMLParser/spec-07-12b.data b/test/YAMLParser/spec-07-12b.data
new file mode 100644
index 0000000000..d759abea7d
--- /dev/null
+++ b/test/YAMLParser/spec-07-12b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicit document. Root
+# scalar (literal) node.
+--- |
+ Text content
diff --git a/test/YAMLParser/spec-07-13.data b/test/YAMLParser/spec-07-13.data
new file mode 100644
index 0000000000..ab74df1018
--- /dev/null
+++ b/test/YAMLParser/spec-07-13.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+! "First document"
+---
+!foo "No directives"
+%TAG ! !foo
+---
+!bar "With directives"
+%YAML 1.1
+---
+!baz "Reset settings"
diff --git a/test/YAMLParser/spec-08-01.data b/test/YAMLParser/spec-08-01.data
new file mode 100644
index 0000000000..5abbfa8094
--- /dev/null
+++ b/test/YAMLParser/spec-08-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!!str &a1 "foo" : !!str bar
+&a2 baz : *a1
diff --git a/test/YAMLParser/spec-08-02.data b/test/YAMLParser/spec-08-02.data
new file mode 100644
index 0000000000..8a75783a70
--- /dev/null
+++ b/test/YAMLParser/spec-08-02.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-03.data b/test/YAMLParser/spec-08-03.data
new file mode 100644
index 0000000000..8c715305a8
--- /dev/null
+++ b/test/YAMLParser/spec-08-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!<tag:yaml.org,2002:str> foo :
+ !<!bar> baz
diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data
new file mode 100644
index 0000000000..f13538bc87
--- /dev/null
+++ b/test/YAMLParser/spec-08-04.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently look at the content of literal tags.
+# XFAIL: *
+
+- !<!> foo
+- !<$:?> bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-05.data b/test/YAMLParser/spec-08-05.data
new file mode 100644
index 0000000000..0613446c89
--- /dev/null
+++ b/test/YAMLParser/spec-08-05.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !local foo
+- !!str bar
+- !o!type baz
diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data
new file mode 100644
index 0000000000..a811bfdefe
--- /dev/null
+++ b/test/YAMLParser/spec-08-06.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently validate tags.
+# XFAIL: *
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !$a!b foo
+- !o! bar
+- !h!type baz
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-07.data b/test/YAMLParser/spec-08-07.data
new file mode 100644
index 0000000000..fc3f2df7f0
--- /dev/null
+++ b/test/YAMLParser/spec-08-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Assuming conventional resolution:
+- "12"
+- 12
+- ! 12
diff --git a/test/YAMLParser/spec-08-08.data b/test/YAMLParser/spec-08-08.data
new file mode 100644
index 0000000000..460029f6ac
--- /dev/null
+++ b/test/YAMLParser/spec-08-08.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo:
+ "bar
+ baz"
+---
+"foo
+ bar"
+---
+foo
+ bar
+--- |
+ foo
+...
diff --git a/test/YAMLParser/spec-08-09.data b/test/YAMLParser/spec-08-09.data
new file mode 100644
index 0000000000..1c82585943
--- /dev/null
+++ b/test/YAMLParser/spec-08-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+scalars:
+ plain: !!str some text
+ quoted:
+ single: 'some text'
+ double: "some text"
+collections:
+ sequence: !!seq [ !!str entry,
+ # Mapping entry:
+ key: value ]
+ mapping: { key: value }
diff --git a/test/YAMLParser/spec-08-10.data b/test/YAMLParser/spec-08-10.data
new file mode 100644
index 0000000000..74054eb088
--- /dev/null
+++ b/test/YAMLParser/spec-08-10.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+block styles:
+ scalars:
+ literal: !!str |
+ #!/usr/bin/perl
+ print "Hello, world!\n";
+ folded: >
+ This sentence
+ is false.
+ collections: !!map
+ sequence: !!seq # Entry:
+ - entry # Plain
+ # Mapping entry:
+ - key: value
+ mapping:
+ key: value
diff --git a/test/YAMLParser/spec-08-11.data b/test/YAMLParser/spec-08-11.data
new file mode 100644
index 0000000000..8a75783a70
--- /dev/null
+++ b/test/YAMLParser/spec-08-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-12.data b/test/YAMLParser/spec-08-12.data
new file mode 100644
index 0000000000..69e78b42d2
--- /dev/null
+++ b/test/YAMLParser/spec-08-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+ Without properties,
+ &anchor "Anchored",
+ !!str 'Tagged',
+ *anchor, # Alias node
+ !!str , # Empty plain scalar
+ '', # Empty plain scalar
+]
diff --git a/test/YAMLParser/spec-08-13.data b/test/YAMLParser/spec-08-13.data
new file mode 100644
index 0000000000..931d56a0cf
--- /dev/null
+++ b/test/YAMLParser/spec-08-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+ ? foo :,
+ ? : bar,
+}
diff --git a/test/YAMLParser/spec-08-14.data b/test/YAMLParser/spec-08-14.data
new file mode 100644
index 0000000000..61c448351a
--- /dev/null
+++ b/test/YAMLParser/spec-08-14.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- "flow in block"
+- >
+ Block scalar
+- !!map # Block collection
+ foo : bar
diff --git a/test/YAMLParser/spec-08-15.data b/test/YAMLParser/spec-08-15.data
new file mode 100644
index 0000000000..f21e84a431
--- /dev/null
+++ b/test/YAMLParser/spec-08-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty plain scalar
+- ? foo
+ :
+ ?
+ : bar
diff --git a/test/YAMLParser/spec-09-01.data b/test/YAMLParser/spec-09-01.data
new file mode 100644
index 0000000000..8999b49616
--- /dev/null
+++ b/test/YAMLParser/spec-09-01.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+"simple key" : {
+ "also simple" : value,
+ ? "not a
+ simple key" : "any
+ value"
+}
diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data
new file mode 100644
index 0000000000..f69037820e
--- /dev/null
+++ b/test/YAMLParser/spec-09-02.data
@@ -0,0 +1,14 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Indent trimming is not yet implemented.
+# XFAIL: *
+
+ "as space
+ trimmed
+
+ specific
+
+ escaped \
+ none"
+
+# CHECK: !!str "as space trimmed\nspecific\nescaped\tnone"
diff --git a/test/YAMLParser/spec-09-03.data b/test/YAMLParser/spec-09-03.data
new file mode 100644
index 0000000000..3fb0d8b184
--- /dev/null
+++ b/test/YAMLParser/spec-09-03.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- "
+ last"
+- "
+ last"
+- " first
+ last"
diff --git a/test/YAMLParser/spec-09-04.data b/test/YAMLParser/spec-09-04.data
new file mode 100644
index 0000000000..4178ec6bef
--- /dev/null
+++ b/test/YAMLParser/spec-09-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+ "first
+ inner 1
+ \ inner 2 \
+ last"
diff --git a/test/YAMLParser/spec-09-05.data b/test/YAMLParser/spec-09-05.data
new file mode 100644
index 0000000000..e482d53662
--- /dev/null
+++ b/test/YAMLParser/spec-09-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- "first
+ "
+- "first
+
+ last"
+- "first
+ inner
+ \ last"
diff --git a/test/YAMLParser/spec-09-06.data b/test/YAMLParser/spec-09-06.data
new file mode 100644
index 0000000000..edc0cbba90
--- /dev/null
+++ b/test/YAMLParser/spec-09-06.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'here''s to "quotes"'
diff --git a/test/YAMLParser/spec-09-07.data b/test/YAMLParser/spec-09-07.data
new file mode 100644
index 0000000000..3c010ca5b9
--- /dev/null
+++ b/test/YAMLParser/spec-09-07.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+'simple key' : {
+ 'also simple' : value,
+ ? 'not a
+ simple key' : 'any
+ value'
+}
diff --git a/test/YAMLParser/spec-09-08.data b/test/YAMLParser/spec-09-08.data
new file mode 100644
index 0000000000..d114e58fca
--- /dev/null
+++ b/test/YAMLParser/spec-09-08.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'as space … trimmed …… specific
… none'
diff --git a/test/YAMLParser/spec-09-09.data b/test/YAMLParser/spec-09-09.data
new file mode 100644
index 0000000000..2fec1b536e
--- /dev/null
+++ b/test/YAMLParser/spec-09-09.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- '
+ last'
+- '
+ last'
+- ' first
+ last'
diff --git a/test/YAMLParser/spec-09-10.data b/test/YAMLParser/spec-09-10.data
new file mode 100644
index 0000000000..faabfb06b5
--- /dev/null
+++ b/test/YAMLParser/spec-09-10.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ 'first
+ inner
+ last'
diff --git a/test/YAMLParser/spec-09-11.data b/test/YAMLParser/spec-09-11.data
new file mode 100644
index 0000000000..3f487ad6b0
--- /dev/null
+++ b/test/YAMLParser/spec-09-11.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 'first
+ '
+- 'first
+
+ last'
diff --git a/test/YAMLParser/spec-09-12.data b/test/YAMLParser/spec-09-12.data
new file mode 100644
index 0000000000..d992c589cd
--- /dev/null
+++ b/test/YAMLParser/spec-09-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Outside flow collection:
+- ::std::vector
+- Up, up, and away!
+- -123
+# Inside flow collection:
+- [ '::std::vector',
+ "Up, up, and away!",
+ -123 ]
diff --git a/test/YAMLParser/spec-09-13.data b/test/YAMLParser/spec-09-13.data
new file mode 100644
index 0000000000..d48f2d2c47
--- /dev/null
+++ b/test/YAMLParser/spec-09-13.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+simple key : {
+ also simple : value,
+ ? not a
+ simple key : any
+ value
+}
diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data
new file mode 100644
index 0000000000..890f6bf2e7
--- /dev/null
+++ b/test/YAMLParser/spec-09-14.data
@@ -0,0 +1,21 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Not quite sure why this doesn't fail.
+# XFAIL: *
+
+---
+--- ||| : foo
+... >>>: bar
+---
+[
+---
+,
+... ,
+{
+--- :
+... # Nested
+}
+]
+...
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-15.data b/test/YAMLParser/spec-09-15.data
new file mode 100644
index 0000000000..4111d1ba2c
--- /dev/null
+++ b/test/YAMLParser/spec-09-15.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"---" : foo
+...: bar
+---
+[
+---,
+...,
+{
+? ---
+: ...
+}
+]
+...
diff --git a/test/YAMLParser/spec-09-16.data b/test/YAMLParser/spec-09-16.data
new file mode 100644
index 0000000000..e595f47bec
--- /dev/null
+++ b/test/YAMLParser/spec-09-16.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Tabs are confusing:
+# as space/trimmed/specific/none
+ as space … trimmed …… specific
… none
diff --git a/test/YAMLParser/spec-09-17.data b/test/YAMLParser/spec-09-17.data
new file mode 100644
index 0000000000..1bacf4d68b
--- /dev/null
+++ b/test/YAMLParser/spec-09-17.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ first line
+
+ more line
diff --git a/test/YAMLParser/spec-09-18.data b/test/YAMLParser/spec-09-18.data
new file mode 100644
index 0000000000..ac623f9973
--- /dev/null
+++ b/test/YAMLParser/spec-09-18.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+- | # Just the style
+ literal
+- >1 # Indentation indicator
+ folded
+- |+ # Chomping indicator
+ keep
+
+- >-1 # Both indicators
+ strip
diff --git a/test/YAMLParser/spec-09-19.data b/test/YAMLParser/spec-09-19.data
new file mode 100644
index 0000000000..52aa157137
--- /dev/null
+++ b/test/YAMLParser/spec-09-19.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ literal
+- >
+ folded
diff --git a/test/YAMLParser/spec-09-20.data b/test/YAMLParser/spec-09-20.data
new file mode 100644
index 0000000000..86fc7ab9a2
--- /dev/null
+++ b/test/YAMLParser/spec-09-20.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ detected
+- >
+
+
+ # detected
+- |1
+ explicit
+- >
+
+ detected
diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data
new file mode 100644
index 0000000000..2bcc28337f
--- /dev/null
+++ b/test/YAMLParser/spec-09-21.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+- |
+
+ text
+- >
+ text
+ text
+- |1
+ text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-22.data b/test/YAMLParser/spec-09-22.data
new file mode 100644
index 0000000000..b95faa50b5
--- /dev/null
+++ b/test/YAMLParser/spec-09-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+strip: |-
+ text
clip: |
+ text…keep: |+
+ text

diff --git a/test/YAMLParser/spec-09-23.data b/test/YAMLParser/spec-09-23.data
new file mode 100644
index 0000000000..94f839818b
--- /dev/null
+++ b/test/YAMLParser/spec-09-23.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+ # Strip
+ # Comments:
+strip: |-
+ # text
 
 # Clip
+ # comments:
+…clip: |
+ # text… 
 # Keep
+ # comments:
+…keep: |+
+ # text
… # Trail
+ # comments.
diff --git a/test/YAMLParser/spec-09-24.data b/test/YAMLParser/spec-09-24.data
new file mode 100644
index 0000000000..f08eae6a80
--- /dev/null
+++ b/test/YAMLParser/spec-09-24.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+strip: >-
+
+clip: >
+
+keep: |+
+
diff --git a/test/YAMLParser/spec-09-25.data b/test/YAMLParser/spec-09-25.data
new file mode 100644
index 0000000000..b15edb523d
--- /dev/null
+++ b/test/YAMLParser/spec-09-25.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+| # Simple block scalar
+ literal
+ text
diff --git a/test/YAMLParser/spec-09-26.data b/test/YAMLParser/spec-09-26.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-26.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+
+
+ literal
+
+ text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-27.data b/test/YAMLParser/spec-09-27.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-27.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+
+
+ literal
+
+ text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-28.data b/test/YAMLParser/spec-09-28.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-28.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+
+
+ literal
+
+ text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-29.data b/test/YAMLParser/spec-09-29.data
new file mode 100644
index 0000000000..e8906ff64a
--- /dev/null
+++ b/test/YAMLParser/spec-09-29.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+> # Simple folded scalar
+ folded
+ text
+ lines
diff --git a/test/YAMLParser/spec-09-30.data b/test/YAMLParser/spec-09-30.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-30.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+ * bullet
+ * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-31.data b/test/YAMLParser/spec-09-31.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-31.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+ * bullet
+ * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-32.data b/test/YAMLParser/spec-09-32.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-32.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+ * bullet
+ * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-33.data b/test/YAMLParser/spec-09-33.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-33.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+ * bullet
+ * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-10-01.data b/test/YAMLParser/spec-10-01.data
new file mode 100644
index 0000000000..549a54db42
--- /dev/null
+++ b/test/YAMLParser/spec-10-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- [ inner, inner, ]
+- [inner,last]
diff --git a/test/YAMLParser/spec-10-02.data b/test/YAMLParser/spec-10-02.data
new file mode 100644
index 0000000000..662427a0c0
--- /dev/null
+++ b/test/YAMLParser/spec-10-02.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+"double
+ quoted", 'single
+ quoted',
+plain
+ text, [ nested ],
+single: pair ,
+]
diff --git a/test/YAMLParser/spec-10-03.data b/test/YAMLParser/spec-10-03.data
new file mode 100644
index 0000000000..43f300e40c
--- /dev/null
+++ b/test/YAMLParser/spec-10-03.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+ # sequence
+- one
+- two : three
diff --git a/test/YAMLParser/spec-10-04.data b/test/YAMLParser/spec-10-04.data
new file mode 100644
index 0000000000..733a570efe
--- /dev/null
+++ b/test/YAMLParser/spec-10-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block:
+- one
+-
+ - two
diff --git a/test/YAMLParser/spec-10-05.data b/test/YAMLParser/spec-10-05.data
new file mode 100644
index 0000000000..3848b2a200
--- /dev/null
+++ b/test/YAMLParser/spec-10-05.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty
+- |
+ block node
+- - one # in-line
+ - two # sequence
+- one: two # in-line
+ # mapping
diff --git a/test/YAMLParser/spec-10-06.data b/test/YAMLParser/spec-10-06.data
new file mode 100644
index 0000000000..40efb2b916
--- /dev/null
+++ b/test/YAMLParser/spec-10-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- { inner : entry , also: inner , }
+- {inner: entry,last : entry}
diff --git a/test/YAMLParser/spec-10-07.data b/test/YAMLParser/spec-10-07.data
new file mode 100644
index 0000000000..7aa350e40b
--- /dev/null
+++ b/test/YAMLParser/spec-10-07.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? : value, # Empty key
+? explicit
+ key: value,
+simple key : value,
+[ collection, simple, key ]: value
+}
diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data
new file mode 100644
index 0000000000..5b981e9833
--- /dev/null
+++ b/test/YAMLParser/spec-10-08.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# This fails because even without a key token, some contexts (in this case flow
+# maps) allow implicit null keys, which mix with this in weird ways.
+# XFAIL: *
+
+{
+multi-line
+ simple key : value,
+very long ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(>1KB)................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... key: value
+}
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-10-09.data b/test/YAMLParser/spec-10-09.data
new file mode 100644
index 0000000000..a6b1fd00dd
--- /dev/null
+++ b/test/YAMLParser/spec-10-09.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+key : value,
+empty: # empty value↓
+}
diff --git a/test/YAMLParser/spec-10-10.data b/test/YAMLParser/spec-10-10.data
new file mode 100644
index 0000000000..c97901ddfb
--- /dev/null
+++ b/test/YAMLParser/spec-10-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3, # Empty value
+simple key1 : explicit value,
+simple key2 : , # Explicit empty
+simple key3, # Empty value
+}
diff --git a/test/YAMLParser/spec-10-11.data b/test/YAMLParser/spec-10-11.data
new file mode 100644
index 0000000000..51bd06f020
--- /dev/null
+++ b/test/YAMLParser/spec-10-11.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+[
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3, # Implicit empty
+simple key1 : explicit value,
+simple key2 : , # Explicit empty
+]
diff --git a/test/YAMLParser/spec-10-12.data b/test/YAMLParser/spec-10-12.data
new file mode 100644
index 0000000000..65a90b3f2c
--- /dev/null
+++ b/test/YAMLParser/spec-10-12.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+ # mapping
+ key: value
diff --git a/test/YAMLParser/spec-10-13.data b/test/YAMLParser/spec-10-13.data
new file mode 100644
index 0000000000..ccadeb1e7d
--- /dev/null
+++ b/test/YAMLParser/spec-10-13.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+? explicit key # implicit value
+? |
+ block key
+: - one # explicit in-line
+ - two # block value
diff --git a/test/YAMLParser/spec-10-14.data b/test/YAMLParser/spec-10-14.data
new file mode 100644
index 0000000000..866ec1f7b2
--- /dev/null
+++ b/test/YAMLParser/spec-10-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+plain key: # empty value
+"quoted key":
+- one # explicit next-line
+- two # block value
diff --git a/test/YAMLParser/spec-10-15.data b/test/YAMLParser/spec-10-15.data
new file mode 100644
index 0000000000..7d061bddd1
--- /dev/null
+++ b/test/YAMLParser/spec-10-15.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- sun: yellow
+- ? earth: blue
+ : moon: white
diff --git a/test/YAMLParser/str.data b/test/YAMLParser/str.data
new file mode 100644
index 0000000000..bf013b6f52
--- /dev/null
+++ b/test/YAMLParser/str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- abcd
diff --git a/test/YAMLParser/timestamp-bugs.data b/test/YAMLParser/timestamp-bugs.data
new file mode 100644
index 0000000000..bf41a21b22
--- /dev/null
+++ b/test/YAMLParser/timestamp-bugs.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-14 21:59:43.10 -5:30
+- 2001-12-14 21:59:43.10 +5:30
+- 2001-12-14 21:59:43.00101
+- 2001-12-14 21:59:43+1
+- 2001-12-14 21:59:43-1:30
+- 2005-07-08 17:35:04.517600
diff --git a/test/YAMLParser/timestamp.data b/test/YAMLParser/timestamp.data
new file mode 100644
index 0000000000..79945451b5
--- /dev/null
+++ b/test/YAMLParser/timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-15T02:59:43.1Z
+- 2001-12-14t21:59:43.10-05:00
+- 2001-12-14 21:59:43.10 -5
+- 2001-12-15 2:59:43.10
+- 2002-12-14
diff --git a/test/YAMLParser/utf8-implicit.data b/test/YAMLParser/utf8-implicit.data
new file mode 100644
index 0000000000..ee2791fb06
--- /dev/null
+++ b/test/YAMLParser/utf8-implicit.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- implicit UTF-8
diff --git a/test/YAMLParser/utf8.data b/test/YAMLParser/utf8.data
new file mode 100644
index 0000000000..3935e9d121
--- /dev/null
+++ b/test/YAMLParser/utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- UTF-8
diff --git a/test/YAMLParser/value.data b/test/YAMLParser/value.data
new file mode 100644
index 0000000000..311ccd4f22
--- /dev/null
+++ b/test/YAMLParser/value.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- =
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
new file mode 100644
index 0000000000..3ce5e4b73e
--- /dev/null
+++ b/test/YAMLParser/yaml.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- !!yaml '!'
+- !!yaml '&'
+- !!yaml '*'
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index ce0f5cd822..5d691728d8 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -175,4 +175,5 @@ add_llvm_unittest(Support
Support/TimeValue.cpp
Support/TypeBuilderTest.cpp
Support/ValueHandleTest.cpp
+ Support/YAMLParserTest.cpp
)
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
new file mode 100644
index 0000000000..e88427ac09
--- /dev/null
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -0,0 +1,179 @@
+//===- unittest/Support/YAMLParserTest ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+// Checks that the given input gives a parse error. Makes sure that an error
+// text is available and the parse fails.
+static void ExpectParseError(StringRef Message, StringRef Input) {
+ SourceMgr SM;
+ yaml::Stream Stream(Input, SM);
+ EXPECT_FALSE(Stream.validate()) << Message << ": " << Input;
+ EXPECT_TRUE(Stream.failed()) << Message << ": " << Input;
+}
+
+// Checks that the given input can be parsed without error.
+static void ExpectParseSuccess(StringRef Message, StringRef Input) {
+ SourceMgr SM;
+ yaml::Stream Stream(Input, SM);
+ EXPECT_TRUE(Stream.validate()) << Message << ": " << Input;
+}
+
+TEST(YAMLParser, ParsesEmptyArray) {
+ ExpectParseSuccess("Empty array", "[]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingArray) {
+ ExpectParseError("Not closing array", "[");
+ ExpectParseError("Not closing array", " [ ");
+ ExpectParseError("Not closing array", " [x");
+}
+
+TEST(YAMLParser, ParsesEmptyArrayWithWhitespace) {
+ ExpectParseSuccess("Array with spaces", " [ ] ");
+ ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n");
+}
+
+TEST(YAMLParser, ParsesEmptyObject) {
+ ExpectParseSuccess("Empty object", "[{}]");
+}
+
+TEST(YAMLParser, ParsesObject) {
+ ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleKeyValuePairsInObject) {
+ ExpectParseSuccess("Multiple key, value pairs",
+ "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingObject) {
+ ExpectParseError("Missing close on empty", "[{]");
+ ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]");
+}
+
+TEST(YAMLParser, FailsIfMissingColon) {
+ ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]");
+ ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]");
+}
+
+TEST(YAMLParser, FailsOnMissingQuote) {
+ ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
+ ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
+}
+
+TEST(YAMLParser, ParsesEscapedQuotes) {
+ ExpectParseSuccess("Parses escaped string in key and value",
+ "[{\"a\":\"\\\"b\\\" \\\" \\\"\"}]");
+}
+
+TEST(YAMLParser, ParsesEmptyString) {
+ ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleObjects) {
+ ExpectParseSuccess(
+ "Multiple objects in array",
+ "["
+ " { \"a\" : \"b\" },"
+ " { \"a\" : \"b\" },"
+ " { \"a\" : \"b\" }"
+ "]");
+}
+
+TEST(YAMLParser, FailsOnMissingComma) {
+ ExpectParseError(
+ "Missing comma",
+ "["
+ " { \"a\" : \"b\" }"
+ " { \"a\" : \"b\" }"
+ "]");
+}
+
+TEST(YAMLParser, ParsesSpacesInBetweenTokens) {
+ ExpectParseSuccess(
+ "Various whitespace between tokens",
+ " \t \n\n \r [ \t \n\n \r"
+ " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+ " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r"
+ " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+ " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r");
+}
+
+TEST(YAMLParser, ParsesArrayOfArrays) {
+ ExpectParseSuccess("Array of arrays", "[[]]");
+}
+
+TEST(YAMLParser, HandlesEndOfFileGracefully) {
+ ExpectParseError("In string starting with EOF", "[\"");
+ ExpectParseError("In string hitting EOF", "[\" ");
+ ExpectParseError("In string escaping EOF", "[\" \\");
+ ExpectParseError("In array starting with EOF", "[");
+ ExpectParseError("In array element starting with EOF", "[[], ");
+ ExpectParseError("In array hitting EOF", "[[] ");
+ ExpectParseError("In array hitting EOF", "[[]");
+ ExpectParseError("In object hitting EOF", "{\"\"");
+}
+
+// Checks that the given string can be parsed into an identical string inside
+// of an array.
+static void ExpectCanParseString(StringRef String) {
+ std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+ SourceMgr SM;
+ yaml::Stream Stream(StringInArray, SM);
+ yaml::SequenceNode *ParsedSequence
+ = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+ StringRef ParsedString
+ = dyn_cast<yaml::ScalarNode>(
+ static_cast<yaml::Node*>(ParsedSequence->begin()))->getRawValue();
+ ParsedString = ParsedString.substr(1, ParsedString.size() - 2);
+ EXPECT_EQ(String, ParsedString.str());
+}
+
+// Checks that parsing the given string inside an array fails.
+static void ExpectCannotParseString(StringRef String) {
+ std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+ ExpectParseError((Twine("When parsing string \"") + String + "\"").str(),
+ StringInArray);
+}
+
+TEST(YAMLParser, ParsesStrings) {
+ ExpectCanParseString("");
+ ExpectCannotParseString("\\");
+ ExpectCannotParseString("\"");
+ ExpectCanParseString(" ");
+ ExpectCanParseString("\\ ");
+ ExpectCanParseString("\\\"");
+ ExpectCannotParseString("\"\\");
+ ExpectCannotParseString(" \\");
+ ExpectCanParseString("\\\\");
+ ExpectCannotParseString("\\\\\\");
+ ExpectCanParseString("\\\\\\\\");
+ ExpectCanParseString("\\\" ");
+ ExpectCannotParseString("\\\\\" ");
+ ExpectCanParseString("\\\\\\\" ");
+ ExpectCanParseString(" \\\\ \\\" \\\\\\\" ");
+}
+
+TEST(YAMLParser, WorksWithIteratorAlgorithms) {
+ SourceMgr SM;
+ yaml::Stream Stream("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]", SM);
+ yaml::SequenceNode *Array
+ = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+ EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
+}
+
+} // end namespace llvm
diff --git a/utils/yaml-bench/CMakeLists.txt b/utils/yaml-bench/CMakeLists.txt
new file mode 100644
index 0000000000..403182ceee
--- /dev/null
+++ b/utils/yaml-bench/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(yaml-bench
+ YAMLBench.cpp
+ )
+
+target_link_libraries(yaml-bench LLVMSupport)
diff --git a/utils/yaml-bench/Makefile b/utils/yaml-bench/Makefile
new file mode 100644
index 0000000000..07e91226c7
--- /dev/null
+++ b/utils/yaml-bench/Makefile
@@ -0,0 +1,20 @@
+##===- utils/yaml-bench/Makefile ---------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = yaml-bench
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
new file mode 100644
index 0000000000..e5ee52a16d
--- /dev/null
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -0,0 +1,203 @@
+//===- YAMLBench - Benchmark the YAMLParser implementation ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program executes the YAMLParser on differntly sized YAML texts and
+// outputs the run time.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/YAMLParser.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ DumpTokens( "tokens"
+ , cl::desc("Print the tokenization of the file.")
+ , cl::init(false)
+ );
+
+static cl::opt<bool>
+ DumpCanonical( "canonical"
+ , cl::desc("Print the canonical YAML for this file.")
+ , cl::init(false)
+ );
+
+static cl::opt<std::string>
+ Input(cl::Positional, cl::desc("<input>"));
+
+static cl::opt<bool>
+ Verify( "verify"
+ , cl::desc(
+ "Run a quick verification useful for regression testing")
+ , cl::init(false)
+ );
+
+static cl::opt<unsigned>
+ MemoryLimitMB("memory-limit", cl::desc(
+ "Do not use more megabytes of memory"),
+ cl::init(1000));
+
+struct indent {
+ unsigned distance;
+ indent(unsigned d) : distance(d) {}
+};
+
+static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
+ for (unsigned i = 0; i < in.distance; ++i)
+ os << " ";
+ return os;
+}
+
+static void dumpNode( yaml::Node *n
+ , unsigned Indent = 0
+ , bool SuppressFirstIndent = false) {
+ if (!n)
+ return;
+ if (!SuppressFirstIndent)
+ outs() << indent(Indent);
+ StringRef Anchor = n->getAnchor();
+ if (!Anchor.empty())
+ outs() << "&" << Anchor << " ";
+ if (yaml::ScalarNode *sn = dyn_cast<yaml::ScalarNode>(n)) {
+ SmallString<32> Storage;
+ StringRef Val = sn->getValue(Storage);
+ outs() << "!!str \"" << yaml::escape(Val) << "\"";
+ } else if (yaml::SequenceNode *sn = dyn_cast<yaml::SequenceNode>(n)) {
+ outs() << "!!seq [\n";
+ ++Indent;
+ for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
+ i != e; ++i) {
+ dumpNode(i, Indent);
+ outs() << ",\n";
+ }
+ --Indent;
+ outs() << indent(Indent) << "]";
+ } else if (yaml::MappingNode *mn = dyn_cast<yaml::MappingNode>(n)) {
+ outs() << "!!map {\n";
+ ++Indent;
+ for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
+ i != e; ++i) {
+ outs() << indent(Indent) << "? ";
+ dumpNode(i->getKey(), Indent, true);
+ outs() << "\n";
+ outs() << indent(Indent) << ": ";
+ dumpNode(i->getValue(), Indent, true);
+ outs() << ",\n";
+ }
+ --Indent;
+ outs() << indent(Indent) << "}";
+ } else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
+ outs() << "*" << an->getName();
+ } else if (dyn_cast<yaml::NullNode>(n)) {
+ outs() << "!!null null";
+ }
+}
+
+static void dumpStream(yaml::Stream &stream) {
+ for (yaml::document_iterator di = stream.begin(), de = stream.end(); di != de;
+ ++di) {
+ outs() << "%YAML 1.2\n"
+ << "---\n";
+ yaml::Node *n = di->getRoot();
+ if (n)
+ dumpNode(n);
+ else
+ break;
+ outs() << "\n...\n";
+ }
+}
+
+static void benchmark( llvm::TimerGroup &Group
+ , llvm::StringRef Name
+ , llvm::StringRef JSONText) {
+ llvm::Timer BaseLine((Name + ": Loop").str(), Group);
+ BaseLine.startTimer();
+ char C = 0;
+ for (llvm::StringRef::iterator I = JSONText.begin(),
+ E = JSONText.end();
+ I != E; ++I) { C += *I; }
+ BaseLine.stopTimer();
+ volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
+
+ llvm::Timer Tokenizing((Name + ": Tokenizing").str(), Group);
+ Tokenizing.startTimer();
+ {
+ yaml::scanTokens(JSONText);
+ }
+ Tokenizing.stopTimer();
+
+ llvm::Timer Parsing((Name + ": Parsing").str(), Group);
+ Parsing.startTimer();
+ {
+ llvm::SourceMgr SM;
+ llvm::yaml::Stream stream(JSONText, SM);
+ stream.skip();
+ }
+ Parsing.stopTimer();
+}
+
+static std::string createJSONText(size_t MemoryMB, unsigned ValueSize) {
+ std::string JSONText;
+ llvm::raw_string_ostream Stream(JSONText);
+ Stream << "[\n";
+ size_t MemoryBytes = MemoryMB * 1024 * 1024;
+ while (JSONText.size() < MemoryBytes) {
+ Stream << " {\n"
+ << " \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
+ << " \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
+ << " \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
+ << " }";
+ Stream.flush();
+ if (JSONText.size() < MemoryBytes) Stream << ",";
+ Stream << "\n";
+ }
+ Stream << "]\n";
+ Stream.flush();
+ return JSONText;
+}
+
+int main(int argc, char **argv) {
+ llvm::cl::ParseCommandLineOptions(argc, argv);
+ if (Input.getNumOccurrences()) {
+ OwningPtr<MemoryBuffer> Buf;
+ if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+ return 1;
+
+ llvm::SourceMgr sm;
+ if (DumpTokens) {
+ yaml::dumpTokens(Buf->getBuffer(), outs());
+ }
+
+ if (DumpCanonical) {
+ yaml::Stream stream(Buf->getBuffer(), sm);
+ dumpStream(stream);
+ }
+ }
+
+ if (Verify) {
+ llvm::TimerGroup Group("YAML parser benchmark");
+ benchmark(Group, "Fast", createJSONText(10, 500));
+ } else if (!DumpCanonical && !DumpTokens) {
+ llvm::TimerGroup Group("YAML parser benchmark");
+ benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
+ benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
+ benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
+ }
+
+ return 0;
+}