Add YAML parser to Support.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153977 91177308-0d34-0410-b5e6-96231b3b80d8
author: Michael J. Spencer <bigcheesegs@gmail.com> 2012-04-03 23:09:22 +0000
committer: Michael J. Spencer <bigcheesegs@gmail.com> 2012-04-03 23:09:22 +0000
commit: 93210e847a1496b24cef881723e57c489082dcfe (patch)
tree: 83d1f8828d8b6835a6511d28cf3c63fad8b06aef
parent: 2ce63c73520cd6e715f9114589f802938b5db01f (diff)
download: llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.gz
llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.bz2
llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.xz
182 files changed, 4586 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 33dd12314d..8336bc975e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -397,6 +397,7 @@ add_subdirectory(utils/count)
 add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
 add_subdirectory(utils/json-bench)
+add_subdirectory(utils/yaml-bench)
 
 add_subdirectory(projects)
 
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 837688e76a..00cf601169 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -67,3 +67,4 @@ Autoconf            llvm/autoconf
 CellSPU backend     llvm/lib/Target/CellSPU/README.txt
 Google Test         llvm/utils/unittest/googletest
 OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
+pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
new file mode 100644
index 0000000000..27d039164c
--- /dev/null
+++ b/include/llvm/Support/YAMLParser.h
@@ -0,0 +1,564 @@
+//===--- YAMLParser.h - Simple YAML parser --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This is a YAML 1.2 parser.
+//
+//  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
+//
+//  This currently does not implement the following:
+//    * Multi-line literal folding.
+//    * Tag resolution.
+//    * UTF-16.
+//    * BOMs anywhere other than the first Unicode scalar value in the file.
+//
+//  The most important class here is Stream. This represents a YAML stream with
+//  0, 1, or many documents.
+//
+//  SourceMgr sm;
+//  StringRef input = getInput();
+//  yaml::Stream stream(input, sm);
+//
+//  for (yaml::document_iterator di = stream.begin(), de = stream.end();
+//       di != de; ++di) {
+//    yaml::Node *n = di->getRoot();
+//    if (n) {
+//      // Do something with n...
+//    } else
+//      break;
+//  }
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_YAML_PARSER_H
+#define LLVM_SUPPORT_YAML_PARSER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/SMLoc.h"
+
+#include <limits>
+#include <utility>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class raw_ostream;
+class Twine;
+
+namespace yaml {
+
+class document_iterator;
+class Document;
+class Node;
+class Scanner;
+struct Token;
+
+/// @brief Dump all the tokens in this stream to OS.
+/// @returns true if there was an error, false otherwise.
+bool dumpTokens(StringRef Input, raw_ostream &);
+
+/// @brief Scans all tokens in input without outputting anything. This is used
+///        for benchmarking the tokenizer.
+/// @returns true if there was an error, false otherwise.
+bool scanTokens(StringRef Input);
+
+/// @brief Escape \a Input for a double quoted scalar.
+std::string escape(StringRef Input);
+
+/// @brief This class represents a YAML stream potentially containing multiple
+///        documents.
+class Stream {
+public:
+  Stream(StringRef Input, SourceMgr &);
+
+  document_iterator begin();
+  document_iterator end();
+  void skip();
+  bool failed();
+  bool validate() {
+    skip();
+    return !failed();
+  }
+
+  void printError(Node *N, const Twine &Msg);
+
+private:
+  OwningPtr<Scanner> scanner;
+  OwningPtr<Document> CurrentDoc;
+
+  friend class Document;
+
+  /// @brief Validate a %YAML x.x directive.
+  void handleYAMLDirective(const Token &);
+};
+
+/// @brief Abstract base class for all Nodes.
+class Node {
+public:
+  enum NodeKind {
+    NK_Null,
+    NK_Scalar,
+    NK_KeyValue,
+    NK_Mapping,
+    NK_Sequence,
+    NK_Alias
+  };
+
+  Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
+  virtual ~Node();
+
+  /// @brief Get the value of the anchor attached to this node. If it does not
+  ///        have one, getAnchor().size() will be 0.
+  StringRef getAnchor() const { return Anchor; }
+
+  SMRange getSourceRange() const { return SourceRange; }
+  void setSourceRange(SMRange SR) { SourceRange = SR; }
+
+  // These functions forward to Document and Scanner.
+  Token &peekNext();
+  Token getNext();
+  Node *parseBlockNode();
+  BumpPtrAllocator &getAllocator();
+  void setError(const Twine &Message, Token &Location) const;
+  bool failed() const;
+
+  virtual void skip() {};
+
+  unsigned int getType() const { return TypeID; }
+  static inline bool classof(const Node *) { return true; }
+
+  void *operator new ( size_t Size
+                     , BumpPtrAllocator &Alloc
+                     , size_t Alignment = 16) throw() {
+    return Alloc.Allocate(Size, Alignment);
+  }
+
+  void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
+    Alloc.Deallocate(Ptr);
+  }
+
+protected:
+  OwningPtr<Document> &Doc;
+  SMRange SourceRange;
+
+private:
+  unsigned int TypeID;
+  StringRef Anchor;
+};
+
+/// @brief A null value.
+///
+/// Example:
+///   !!null null
+class NullNode : public Node {
+public:
+  NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
+
+  static inline bool classof(const NullNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Null;
+  }
+};
+
+/// @brief A scalar node is an opaque datum that can be presented as a
+///        series of zero or more Unicode scalar values.
+///
+/// Example:
+///   Adena
+class ScalarNode : public Node {
+public:
+  ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
+    : Node(NK_Scalar, D, Anchor)
+    , Value(Val) {
+    SMLoc Start = SMLoc::getFromPointer(Val.begin());
+    SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
+    SourceRange = SMRange(Start, End);
+  }
+
+  // Return Value without any escaping or folding or other fun YAML stuff. This
+  // is the exact bytes that are contained in the file (after conversion to
+  // utf8).
+  StringRef getRawValue() const { return Value; }
+
+  /// @brief Gets the value of this node as a StringRef.
+  ///
+  /// @param Storage is used to store the content of the returned StringRef iff
+  ///        it requires any modification from how it appeared in the source.
+  ///        This happens with escaped characters and multi-line literals.
+  StringRef getValue(SmallVectorImpl<char> &Storage) const;
+
+  static inline bool classof(const ScalarNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Scalar;
+  }
+
+private:
+  StringRef Value;
+
+  StringRef unescapeDoubleQuoted( StringRef UnquotedValue
+                                , StringRef::size_type Start
+                                , SmallVectorImpl<char> &Storage) const;
+};
+
+static bool getAs(const ScalarNode *SN, bool &Result) {
+  SmallString<4> Storage;
+  StringRef Value = SN->getValue(Storage);
+  if (Value == "true")
+    Result = true;
+  else if (Value == "false")
+    Result = false;
+  else
+    return false;
+  return true;
+}
+
+template<class T>
+typename enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
+getAs(const ScalarNode *SN, T &Result) {
+  SmallString<4> Storage;
+  return !SN->getValue(Storage).getAsInteger(0, Result);
+}
+
+/// @brief A key and value pair. While not technically a Node under the YAML
+///        representation graph, it is easier to treat them this way.
+///
+/// TODO: Consider making this not a child of Node.
+///
+/// Example:
+///   Section: .text
+class KeyValueNode : public Node {
+public:
+  KeyValueNode(OwningPtr<Document> &D)
+    : Node(NK_KeyValue, D, StringRef())
+    , Key(0)
+    , Value(0)
+  {}
+
+  /// @brief Parse and return the key.
+  ///
+  /// This may be called multiple times.
+  ///
+  /// @returns The key, or nullptr if failed() == true.
+  Node *getKey();
+
+  /// @brief Parse and return the value.
+  ///
+  /// This may be called multiple times.
+  ///
+  /// @returns The value, or nullptr if failed() == true.
+  Node *getValue();
+
+  virtual void skip() {
+    getKey()->skip();
+    getValue()->skip();
+  }
+
+  static inline bool classof(const KeyValueNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_KeyValue;
+  }
+
+private:
+  Node *Key;
+  Node *Value;
+};
+
+/// @brief This is an iterator abstraction over YAML collections shared by both
+///        sequences and maps.
+///
+/// BaseT must have a ValueT* member named CurrentEntry and a member function
+/// increment() which must set CurrentEntry to 0 to create an end iterator.
+template <class BaseT, class ValueT>
+class basic_collection_iterator
+  : public std::iterator<std::forward_iterator_tag, ValueT> {
+public:
+  basic_collection_iterator() : Base(0) {}
+  basic_collection_iterator(BaseT *B) : Base(B) {}
+
+  ValueT *operator ->() const {
+    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+    return Base->CurrentEntry;
+  }
+
+  ValueT &operator *() const {
+    assert(Base && Base->CurrentEntry &&
+           "Attempted to dereference end iterator!");
+    return *Base->CurrentEntry;
+  }
+
+  operator ValueT*() const {
+    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+    return Base->CurrentEntry;
+  }
+
+  bool operator !=(const basic_collection_iterator &Other) const {
+    if(Base != Other.Base)
+      return true;
+    return (Base && Other.Base) && Base->CurrentEntry
+                                   != Other.Base->CurrentEntry;
+  }
+
+  basic_collection_iterator &operator++() {
+    assert(Base && "Attempted to advance iterator past end!");
+    Base->increment();
+    // Create an end iterator.
+    if (Base->CurrentEntry == 0)
+      Base = 0;
+    return *this;
+  }
+
+private:
+  BaseT *Base;
+};
+
+// The following two templates are used for both MappingNode and Sequence Node.
+template <class CollectionType>
+typename CollectionType::iterator begin(CollectionType &C) {
+  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
+  C.IsAtBeginning = false;
+  typename CollectionType::iterator ret(&C);
+  ++ret;
+  return ret;
+}
+
+template <class CollectionType>
+void skip(CollectionType &C) {
+  // TODO: support skipping from the middle of a parsed collection ;/
+  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
+  if (C.IsAtBeginning)
+    for (typename CollectionType::iterator i = begin(C), e = C.end();
+                                           i != e; ++i)
+      i->skip();
+}
+
+/// @brief Represents a YAML map created from either a block map for a flow map.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+///   Name: _main
+///   Scope: Global
+class MappingNode : public Node {
+public:
+  enum MappingType {
+    MT_Block,
+    MT_Flow,
+    MT_Inline //< An inline mapping node is used for "[key: value]".
+  };
+
+  MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
+    : Node(NK_Mapping, D, Anchor)
+    , Type(MT)
+    , IsAtBeginning(true)
+    , IsAtEnd(false)
+    , CurrentEntry(0)
+  {}
+
+  friend class basic_collection_iterator<MappingNode, KeyValueNode>;
+  typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
+  template <class T> friend typename T::iterator yaml::begin(T &);
+  template <class T> friend void yaml::skip(T &);
+
+  iterator begin() {
+    return yaml::begin(*this);
+  }
+
+  iterator end() { return iterator(); }
+
+  virtual void skip() {
+    yaml::skip(*this);
+  }
+
+  static inline bool classof(const MappingNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Mapping;
+  }
+
+private:
+  MappingType Type;
+  bool IsAtBeginning;
+  bool IsAtEnd;
+  KeyValueNode *CurrentEntry;
+
+  void increment();
+};
+
+/// @brief Represents a YAML sequence created from either a block sequence for a
+///        flow sequence.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+///   - Hello
+///   - World
+class SequenceNode : public Node {
+public:
+  enum SequenceType {
+    ST_Block,
+    ST_Flow,
+    // Use for:
+    //
+    // key:
+    // - val1
+    // - val2
+    //
+    // As a BlockMappingEntry and BlockEnd are not created in this case.
+    ST_Indentless
+  };
+
+  SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
+    : Node(NK_Sequence, D, Anchor)
+    , SeqType(ST)
+    , IsAtBeginning(true)
+    , IsAtEnd(false)
+    , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
+    , CurrentEntry(0)
+  {}
+
+  friend class basic_collection_iterator<SequenceNode, Node>;
+  typedef basic_collection_iterator<SequenceNode, Node> iterator;
+  template <class T> friend typename T::iterator yaml::begin(T &);
+  template <class T> friend void yaml::skip(T &);
+
+  void increment();
+
+  iterator begin() {
+    return yaml::begin(*this);
+  }
+
+  iterator end() { return iterator(); }
+
+  virtual void skip() {
+    yaml::skip(*this);
+  }
+
+  static inline bool classof(const SequenceNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Sequence;
+  }
+
+private:
+  SequenceType SeqType;
+  bool IsAtBeginning;
+  bool IsAtEnd;
+  bool WasPreviousTokenFlowEntry;
+  Node *CurrentEntry;
+};
+
+/// @brief Represents an alias to a Node with an anchor.
+///
+/// Example:
+///   *AnchorName
+class AliasNode : public Node {
+public:
+  AliasNode(OwningPtr<Document> &D, StringRef Val)
+    : Node(NK_Alias, D, StringRef()), Name(Val) {}
+
+  StringRef getName() const { return Name; }
+  Node *getTarget();
+
+  static inline bool classof(const ScalarNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Alias;
+  }
+
+private:
+  StringRef Name;
+};
+
+/// @brief A YAML Stream is a sequence of Documents. A document contains a root
+///        node.
+class Document {
+public:
+  /// @brief Root for parsing a node. Returns a single node.
+  Node *parseBlockNode();
+
+  Document(Stream &ParentStream);
+
+  /// @brief Finish parsing the current document and return true if there are
+  ///        more. Return false otherwise.
+  bool skip();
+
+  /// @brief Parse and return the root level node.
+  Node *getRoot() {
+    if (Root)
+      return Root;
+    return Root = parseBlockNode();
+  }
+
+private:
+  friend class Node;
+  friend class document_iterator;
+
+  /// @brief Stream to read tokens from.
+  Stream &stream;
+
+  /// @brief Used to allocate nodes to. All are destroyed without calling their
+  ///        destructor when the document is destroyed.
+  BumpPtrAllocator NodeAllocator;
+
+  /// @brief The root node. Used to support skipping a partially parsed
+  ///        document.
+  Node *Root;
+
+  Token &peekNext();
+  Token getNext();
+  void setError(const Twine &Message, Token &Location) const;
+  bool failed() const;
+
+  void handleTagDirective(const Token &Tag) {
+    // TODO: Track tags.
+  }
+
+  /// @brief Parse %BLAH directives and return true if any were encountered.
+  bool parseDirectives();
+
+  /// @brief Consume the next token and error if it is not \a TK.
+  bool expectToken(int TK);
+};
+
+/// @brief Iterator abstraction for Documents over a Stream.
+class document_iterator {
+public:
+  document_iterator() : Doc(NullDoc) {}
+  document_iterator(OwningPtr<Document> &D) : Doc(D) {}
+
+  bool operator !=(const document_iterator &Other) {
+    return Doc != Other.Doc;
+  }
+
+  document_iterator operator ++() {
+    if (!Doc->skip()) {
+      Doc.reset(0);
+    } else {
+      Stream &S = Doc->stream;
+      Doc.reset(new Document(S));
+    }
+    return *this;
+  }
+
+  Document &operator *() {
+    return *Doc;
+  }
+
+  OwningPtr<Document> &operator ->() {
+    return Doc;
+  }
+
+private:
+  static OwningPtr<Document> NullDoc;
+  OwningPtr<Document> &Doc;
+};
+
+}
+}
+
+#endif
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 0b69238274..9b3b6c801d 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport
   ToolOutputFile.cpp
   Triple.cpp
   Twine.cpp
+  YAMLParser.cpp
   raw_os_ostream.cpp
   raw_ostream.cpp
   regcomp.c
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
new file mode 100644
index 0000000000..3e302d0eb1
--- /dev/null
+++ b/lib/Support/YAMLParser.cpp
@@ -0,0 +1,2115 @@
+//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements a YAML parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLParser.h"
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace yaml;
+
+enum UnicodeEncodingForm {
+  UEF_UTF32_LE, //< UTF-32 Little Endian
+  UEF_UTF32_BE, //< UTF-32 Big Endian
+  UEF_UTF16_LE, //< UTF-16 Little Endian
+  UEF_UTF16_BE, //< UTF-16 Big Endian
+  UEF_UTF8,     //< UTF-8 or ascii.
+  UEF_Unknown   //< Not a valid Unicode encoding.
+};
+
+/// EncodingInfo - Holds the encoding type and length of the byte order mark if
+///                it exists. Length is in {0, 2, 3, 4}.
+typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+
+/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
+///                      encoding form of \a Input.
+///
+/// @param Input A string of length 0 or more.
+/// @returns An EncodingInfo indicating the Unicode encoding form of the input
+///          and how long the byte order mark is if one exists.
+static EncodingInfo getUnicodeEncoding(StringRef Input) {
+  if (Input.size() == 0)
+    return std::make_pair(UEF_Unknown, 0);
+
+  switch (uint8_t(Input[0])) {
+  case 0x00:
+    if (Input.size() >= 4) {
+      if (  Input[1] == 0
+         && uint8_t(Input[2]) == 0xFE
+         && uint8_t(Input[3]) == 0xFF)
+        return std::make_pair(UEF_UTF32_BE, 4);
+      if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
+        return std::make_pair(UEF_UTF32_BE, 0);
+    }
+
+    if (Input.size() >= 2 && Input[1] != 0)
+      return std::make_pair(UEF_UTF16_BE, 0);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xFF:
+    if (  Input.size() >= 4
+       && uint8_t(Input[1]) == 0xFE
+       && Input[2] == 0
+       && Input[3] == 0)
+      return std::make_pair(UEF_UTF32_LE, 4);
+
+    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
+      return std::make_pair(UEF_UTF16_LE, 2);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xFE:
+    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
+      return std::make_pair(UEF_UTF16_BE, 2);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xEF:
+    if (  Input.size() >= 3
+       && uint8_t(Input[1]) == 0xBB
+       && uint8_t(Input[2]) == 0xBF)
+      return std::make_pair(UEF_UTF8, 3);
+    return std::make_pair(UEF_Unknown, 0);
+  }
+
+  // It could still be utf-32 or utf-16.
+  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
+    return std::make_pair(UEF_UTF32_LE, 0);
+
+  if (Input.size() >= 2 && Input[1] == 0)
+    return std::make_pair(UEF_UTF16_LE, 0);
+
+  return std::make_pair(UEF_UTF8, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// Token - A single YAML token.
+struct Token : ilist_node<Token> {
+  enum TokenKind {
+    TK_Error, // Uninitialized token.
+    TK_StreamStart,
+    TK_StreamEnd,
+    TK_VersionDirective,
+    TK_TagDirective,
+    TK_DocumentStart,
+    TK_DocumentEnd,
+    TK_BlockEntry,
+    TK_BlockEnd,
+    TK_BlockSequenceStart,
+    TK_BlockMappingStart,
+    TK_FlowEntry,
+    TK_FlowSequenceStart,
+    TK_FlowSequenceEnd,
+    TK_FlowMappingStart,
+    TK_FlowMappingEnd,
+    TK_Key,
+    TK_Value,
+    TK_Scalar,
+    TK_Alias,
+    TK_Anchor,
+    TK_Tag
+  } Kind;
+
+  /// A string of length 0 or more whose begin() points to the logical location
+  /// of the token in the input.
+  StringRef Range;
+
+  Token() : Kind(TK_Error) {}
+};
+}
+}
+
+template<>
+struct ilist_sentinel_traits<Token> {
+  Token *createSentinel() const {
+    return &Sentinel;
+  }
+  static void destroySentinel(Token*) {}
+
+  Token *provideInitialHead() const { return createSentinel(); }
+  Token *ensureHead(Token*) const { return createSentinel(); }
+  static void noteHead(Token*, Token*) {}
+
+private:
+  mutable Token Sentinel;
+};
+
+template<>
+struct ilist_node_traits<Token> {
+  Token *createNode(const Token &V) {
+    return new (Alloc.Allocate<Token>()) Token(V);
+  }
+  static void deleteNode(Token *V) {}
+
+  void addNodeToList(Token *) {}
+  void removeNodeFromList(Token *) {}
+  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
+                             ilist_iterator<Token> /*first*/,
+                             ilist_iterator<Token> /*last*/) {}
+
+  BumpPtrAllocator Alloc;
+};
+
+typedef ilist<Token> TokenQueueT;
+
+namespace {
+/// @brief This struct is used to track simple keys.
+///
+/// Simple keys are handled by creating an entry in SimpleKeys for each Token
+/// which could legally be the start of a simple key. When peekNext is called,
+/// if the Token To be returned is referenced by a SimpleKey, we continue
+/// tokenizing until that potential simple key has either been found to not be
+/// a simple key (we moved on to the next line or went further than 1024 chars).
+/// Or when we run into a Value, and then insert a Key token (and possibly
+/// others) before the SimpleKey's Tok.
+struct SimpleKey {
+  TokenQueueT::iterator Tok;
+  unsigned Column;
+  unsigned Line;
+  unsigned FlowLevel;
+  bool IsRequired;
+
+  bool operator ==(const SimpleKey &Other) {
+    return Tok == Other.Tok;
+  }
+};
+}
+
+/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
+///        subsequence and the subsequence's length in code units (uint8_t).
+///        A length of 0 represents an error.
+typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+
+static UTF8Decoded decodeUTF8(StringRef Range) {
+  StringRef::iterator Position= Range.begin();
+  StringRef::iterator End = Range.end();
+  // 1 byte: [0x00, 0x7f]
+  // Bit pattern: 0xxxxxxx
+  if ((*Position & 0x80) == 0) {
+     return std::make_pair(*Position, 1);
+  }
+  // 2 bytes: [0x80, 0x7ff]
+  // Bit pattern: 110xxxxx 10xxxxxx
+  if (Position + 1 != End &&
+      ((*Position & 0xE0) == 0xC0) &&
+      ((*(Position + 1) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x1F) << 6) |
+                          (*(Position + 1) & 0x3F);
+    if (codepoint >= 0x80)
+      return std::make_pair(codepoint, 2);
+  }
+  // 3 bytes: [0x8000, 0xffff]
+  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
+  if (Position + 2 != End &&
+      ((*Position & 0xF0) == 0xE0) &&
+      ((*(Position + 1) & 0xC0) == 0x80) &&
+      ((*(Position + 2) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x0F) << 12) |
+                         ((*(Position + 1) & 0x3F) << 6) |
+                          (*(Position + 2) & 0x3F);
+    // Codepoints between 0xD800 and 0xDFFF are invalid, as
+    // they are high / low surrogate halves used by UTF-16.
+    if (codepoint >= 0x800 &&
+        (codepoint < 0xD800 || codepoint > 0xDFFF))
+      return std::make_pair(codepoint, 3);
+  }
+  // 4 bytes: [0x10000, 0x10FFFF]
+  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+  if (Position + 3 != End &&
+      ((*Position & 0xF8) == 0xF0) &&
+      ((*(Position + 1) & 0xC0) == 0x80) &&
+      ((*(Position + 2) & 0xC0) == 0x80) &&
+      ((*(Position + 3) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x07) << 18) |
+                         ((*(Position + 1) & 0x3F) << 12) |
+                         ((*(Position + 2) & 0x3F) << 6) |
+                          (*(Position + 3) & 0x3F);
+    if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
+      return std::make_pair(codepoint, 4);
+  }
+  return std::make_pair(0, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// @brief Scans YAML tokens from a MemoryBuffer.
+class Scanner {
+public:
+  Scanner(const StringRef Input, SourceMgr &SM);
+
+  /// @brief Parse the next token and return it without popping it.
+  Token &peekNext();
+
+  /// @brief Parse the next token and pop it from the queue.
+  Token getNext();
+
+  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
+                  ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+    SM.PrintMessage(Loc, Kind, Message, Ranges);
+  }
+
+  void setError(const Twine &Message, StringRef::iterator Position) {
+    if (Current >= End)
+      Current = End - 1;
+
+    // Don't print out more errors after the first one we encounter. The rest
+    // are just the result of the first, and have no meaning.
+    if (!Failed)
+      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
+    Failed = true;
+  }
+
+  void setError(const Twine &Message) {
+    setError(Message, Current);
+  }
+
+  /// @brief Returns true if an error occurred while parsing.
+  bool failed() {
+    return Failed;
+  }
+
+private:
+  StringRef currentInput() {
+    return StringRef(Current, End - Current);
+  }
+
+  /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
+  ///        at \a Position.
+  ///
+  /// If the UTF-8 code units starting at Position do not form a well-formed
+  /// code unit subsequence, then the Unicode scalar value is 0, and the length
+  /// is 0.
+  UTF8Decoded decodeUTF8(StringRef::iterator Position) {
+    return ::decodeUTF8(StringRef(Position, End - Position));
+  }
+
+  // The following functions are based on the gramar rules in the YAML spec. The
+  // style of the function names it meant to closely match how they are written
+  // in the spec. The number within the [] is the number of the grammar rule in
+  // the spec.
+  //
+  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
+  //
+  // c-
+  //   A production starting and ending with a special character.
+  // b-
+  //   A production matching a single line break.
+  // nb-
+  //   A production starting and ending with a non-break character.
+  // s-
+  //   A production starting and ending with a white space character.
+  // ns-
+  //   A production starting and ending with a non-space character.
+  // l-
+  //   A production matching complete line(s).
+
+  /// @brief Skip a single nb-char[27] starting at Position.
+  ///
+  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
+  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
+  ///
+  /// @returns The code unit after the nb-char, or Position if it's not an
+  ///          nb-char.
+  StringRef::iterator skip_nb_char(StringRef::iterator Position);
+
+  /// @brief Skip a single b-break[28] starting at Position.
+  ///
+  /// A b-break is 0xD 0xA | 0xD | 0xA
+  ///
+  /// @returns The code unit after the b-break, or Position if it's not a
+  ///          b-break.
+  StringRef::iterator skip_b_break(StringRef::iterator Position);
+
+  /// @brief Skip a single s-white[33] starting at Position.
+  ///
+  /// A s-white is 0x20 | 0x9
+  ///
+  /// @returns The code unit after the s-white, or Position if it's not a
+  ///          s-white.
+  StringRef::iterator skip_s_white(StringRef::iterator Position);
+
+  /// @brief Skip a single ns-char[34] starting at Position.
+  ///
+  /// A ns-char is nb-char - s-white
+  ///
+  /// @returns The code unit after the ns-char, or Position if it's not a
+  ///          ns-char.
+  StringRef::iterator skip_ns_char(StringRef::iterator Position);
+
+  typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+  /// @brief Skip minimal well-formed code unit subsequences until Func
+  ///        returns its input.
+  ///
+  /// @returns The code unit after the last minimal well-formed code unit
+  ///          subsequence that Func accepted.
+  StringRef::iterator skip_while( SkipWhileFunc Func
+                                , StringRef::iterator Position);
+
+  /// @brief Scan ns-uri-char[39]s starting at Cur.
+  ///
+  /// This updates Cur and Column while scanning.
+  ///
+  /// @returns A StringRef starting at Cur which covers the longest contiguous
+  ///          sequence of ns-uri-char.
+  StringRef scan_ns_uri_char();
+
+  /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
+  StringRef scan_ns_plain_one_line();
+
+  /// @brief Consume a minimal well-formed code unit subsequence starting at
+  ///        \a Cur. Return false if it is not the same Unicode scalar value as
+  ///        \a Expected. This updates \a Column.
+  bool consume(uint32_t Expected);
+
+  /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
+  void skip(uint32_t Distance);
+
+  /// @brief Return true if the minimal well-formed code unit subsequence at
+  ///        Pos is whitespace or a new line
+  bool isBlankOrBreak(StringRef::iterator Position);
+
+  /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
+  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+                             , unsigned AtColumn
+                             , bool IsRequired);
+
+  /// @brief Remove simple keys that can no longer be valid simple keys.
+  ///
+  /// Invalid simple keys are not on the current line or are further than 1024
+  /// columns back.
+  void removeStaleSimpleKeyCandidates();
+
+  /// @brief Remove all simple keys on FlowLevel \a Level.
+  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
+
+  /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
+  ///        tokens if needed.
+  bool unrollIndent(int ToColumn);
+
+  /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
+  ///        if needed.
+  bool rollIndent( int ToColumn
+                 , Token::TokenKind Kind
+                 , TokenQueueT::iterator InsertPoint);
+
+  /// @brief Skip whitespace and comments until the start of the next token.
+  void scanToNextToken();
+
+  /// @brief Must be the first token generated.
+  bool scanStreamStart();
+
+  /// @brief Generate tokens needed to close out the stream.
+  bool scanStreamEnd();
+
+  /// @brief Scan a %BLAH directive.
+  bool scanDirective();
+
+  /// @brief Scan a ... or ---.
+  bool scanDocumentIndicator(bool IsStart);
+
+  /// @brief Scan a [ or { and generate the proper flow collection start token.
+  bool scanFlowCollectionStart(bool IsSequence);
+
+  /// @brief Scan a ] or } and generate the proper flow collection end token.
+  bool scanFlowCollectionEnd(bool IsSequence);
+
+  /// @brief Scan the , that separates entries in a flow collection.
+  bool scanFlowEntry();
+
+  /// @brief Scan the - that starts block sequence entries.
+  bool scanBlockEntry();
+
+  /// @brief Scan an explicit ? indicating a key.
+  bool scanKey();
+
+  /// @brief Scan an explicit : indicating a value.
+  bool scanValue();
+
+  /// @brief Scan a quoted scalar.
+  bool scanFlowScalar(bool IsDoubleQuoted);
+
+  /// @brief Scan an unquoted scalar.
+  bool scanPlainScalar();
+
+  /// @brief Scan an Alias or Anchor starting with * or &.
+  bool scanAliasOrAnchor(bool IsAlias);
+
+  /// @brief Scan a block scalar starting with | or >.
+  bool scanBlockScalar(bool IsLiteral);
+
+  /// @brief Scan a tag of the form !stuff.
+  bool scanTag();
+
+  /// @brief Dispatch to the next scanning function based on \a *Cur.
+  bool fetchMoreTokens();
+
+  /// @brief The SourceMgr used for diagnostics and buffer management.
+  SourceMgr &SM;
+
+  /// @brief The original input.
+  MemoryBuffer *InputBuffer;
+
+  /// @brief The current position of the scanner.
+  StringRef::iterator Current;
+
+  /// @brief The end of the input (one past the last character).
+  StringRef::iterator End;
+
+  /// @brief Current YAML indentation level in spaces.
+  int Indent;
+
+  /// @brief Current column number in Unicode code points.
+  unsigned Column;
+
+  /// @brief Current line number.
+  unsigned Line;
+
+  /// @brief How deep we are in flow style containers. 0 Means at block level.
+  unsigned FlowLevel;
+
+  /// @brief Are we at the start of the stream?
+  bool IsStartOfStream;
+
+  /// @brief Can the next token be the start of a simple key?
+  bool IsSimpleKeyAllowed;
+
+  /// @brief Is the next token required to start a simple key?
+  bool IsSimpleKeyRequired;
+
+  /// @brief True if an error has occurred.
+  bool Failed;
+
+  /// @brief Queue of tokens. This is required to queue up tokens while looking
+  ///        for the end of a simple key. And for cases where a single character
+  ///        can produce multiple tokens (e.g. BlockEnd).
+  TokenQueueT TokenQueue;
+
+  /// @brief Indentation levels.
+  SmallVector<int, 4> Indents;
+
+  /// @brief Potential simple keys.
+  SmallVector<SimpleKey, 4> SimpleKeys;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
+static void encodeUTF8( uint32_t UnicodeScalarValue
+                      , SmallVectorImpl<char> &Result) {
+  if (UnicodeScalarValue <= 0x7F) {
+    Result.push_back(UnicodeScalarValue & 0x7F);
+  } else if (UnicodeScalarValue <= 0x7FF) {
+    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
+    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+  } else if (UnicodeScalarValue <= 0xFFFF) {
+    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
+    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+    Result.push_back(ThirdByte);
+  } else if (UnicodeScalarValue <= 0x10FFFF) {
+    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
+    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
+    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+    Result.push_back(ThirdByte);
+    Result.push_back(FourthByte);
+  }
+}
+
+bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
+  SourceMgr SM;
+  Scanner scanner(Input, SM);
+  while (true) {
+    Token T = scanner.getNext();
+    switch (T.Kind) {
+    case Token::TK_StreamStart:
+      OS << "Stream-Start: ";
+      break;
+    case Token::TK_StreamEnd:
+      OS << "Stream-End: ";
+      break;
+    case Token::TK_VersionDirective:
+      OS << "Version-Directive: ";
+      break;
+    case Token::TK_TagDirective:
+      OS << "Tag-Directive: ";
+      break;
+    case Token::TK_DocumentStart:
+      OS << "Document-Start: ";
+      break;
+    case Token::TK_DocumentEnd:
+      OS << "Document-End: ";
+      break;
+    case Token::TK_BlockEntry:
+      OS << "Block-Entry: ";
+      break;
+    case Token::TK_BlockEnd:
+      OS << "Block-End: ";
+      break;
+    case Token::TK_BlockSequenceStart:
+      OS << "Block-Sequence-Start: ";
+      break;
+    case Token::TK_BlockMappingStart:
+      OS << "Block-Mapping-Start: ";
+      break;
+    case Token::TK_FlowEntry:
+      OS << "Flow-Entry: ";
+      break;
+    case Token::TK_FlowSequenceStart:
+      OS << "Flow-Sequence-Start: ";
+      break;
+    case Token::TK_FlowSequenceEnd:
+      OS << "Flow-Sequence-End: ";
+      break;
+    case Token::TK_FlowMappingStart:
+      OS << "Flow-Mapping-Start: ";
+      break;
+    case Token::TK_FlowMappingEnd:
+      OS << "Flow-Mapping-End: ";
+      break;
+    case Token::TK_Key:
+      OS << "Key: ";
+      break;
+    case Token::TK_Value:
+      OS << "Value: ";
+      break;
+    case Token::TK_Scalar:
+      OS << "Scalar: ";
+      break;
+    case Token::TK_Alias:
+      OS << "Alias: ";
+      break;
+    case Token::TK_Anchor:
+      OS << "Anchor: ";
+      break;
+    case Token::TK_Tag:
+      OS << "Tag: ";
+      break;
+    case Token::TK_Error:
+      break;
+    }
+    OS << T.Range << "\n";
+    if (T.Kind == Token::TK_StreamEnd)
+      break;
+    else if (T.Kind == Token::TK_Error)
+      return false;
+  }
+  return true;
+}
+
+bool yaml::scanTokens(StringRef Input) {
+  llvm::SourceMgr SM;
+  llvm::yaml::Scanner scanner(Input, SM);
+  for (;;) {
+    llvm::yaml::Token T = scanner.getNext();
+    if (T.Kind == Token::TK_StreamEnd)
+      break;
+    else if (T.Kind == Token::TK_Error)
+      return false;
+  }
+  return true;
+}
+
+std::string yaml::escape(StringRef Input) {
+  std::string EscapedInput;
+  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
+    if (*i == '\\')
+      EscapedInput += "\\\\";
+    else if (*i == '"')
+      EscapedInput += "\\\"";
+    else if (*i == 0)
+      EscapedInput += "\\0";
+    else if (*i == 0x07)
+      EscapedInput += "\\a";
+    else if (*i == 0x08)
+      EscapedInput += "\\b";
+    else if (*i == 0x09)
+      EscapedInput += "\\t";
+    else if (*i == 0x0A)
+      EscapedInput += "\\n";
+    else if (*i == 0x0B)
+      EscapedInput += "\\v";
+    else if (*i == 0x0C)
+      EscapedInput += "\\f";
+    else if (*i == 0x0D)
+      EscapedInput += "\\r";
+    else if (*i == 0x1B)
+      EscapedInput += "\\e";
+    else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+      std::string HexStr = utohexstr(*i);
+      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+    } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
+      UTF8Decoded UnicodeScalarValue
+        = decodeUTF8(StringRef(i, Input.end() - i));
+      if (UnicodeScalarValue.second == 0) {
+        // Found invalid char.
+        SmallString<4> Val;
+        encodeUTF8(0xFFFD, Val);
+        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+        // FIXME: Error reporting.
+        return EscapedInput;
+      }
+      if (UnicodeScalarValue.first == 0x85)
+        EscapedInput += "\\N";
+      else if (UnicodeScalarValue.first == 0xA0)
+        EscapedInput += "\\_";
+      else if (UnicodeScalarValue.first == 0x2028)
+        EscapedInput += "\\L";
+      else if (UnicodeScalarValue.first == 0x2029)
+        EscapedInput += "\\P";
+      else {
+        std::string HexStr = utohexstr(UnicodeScalarValue.first);
+        if (HexStr.size() <= 2)
+          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+        else if (HexStr.size() <= 4)
+          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
+        else if (HexStr.size() <= 8)
+          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
+      }
+      i += UnicodeScalarValue.second - 1;
+    } else
+      EscapedInput.push_back(*i);
+  }
+  return EscapedInput;
+}
+
+Scanner::Scanner(StringRef Input, SourceMgr &sm)
+  : SM(sm)
+  , Indent(-1)
+  , Column(0)
+  , Line(0)
+  , FlowLevel(0)
+  , IsStartOfStream(true)
+  , IsSimpleKeyAllowed(true)
+  , IsSimpleKeyRequired(false)
+  , Failed(false) {
+  InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
+  SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+  Current = InputBuffer->getBufferStart();
+  End = InputBuffer->getBufferEnd();
+}
+
+Token &Scanner::peekNext() {
+  // If the current token is a possible simple key, keep parsing until we
+  // can confirm.
+  bool NeedMore = false;
+  while (true) {
+    if (TokenQueue.empty() || NeedMore) {
+      if (!fetchMoreTokens()) {
+        TokenQueue.clear();
+        TokenQueue.push_back(Token());
+        return TokenQueue.front();
+      }
+    }
+    assert(!TokenQueue.empty() &&
+            "fetchMoreTokens lied about getting tokens!");
+
+    removeStaleSimpleKeyCandidates();
+    SimpleKey SK;
+    SK.Tok = TokenQueue.front();
+    if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
+        == SimpleKeys.end())
+      break;
+    else
+      NeedMore = true;
+  }
+  return TokenQueue.front();
+}
+
+Token Scanner::getNext() {
+  Token Ret = peekNext();
+  // TokenQueue can be empty if there was an error getting the next token.
+  if (!TokenQueue.empty())
+    TokenQueue.pop_front();
+
+  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
+  // quick deallocation of them all.
+  if (TokenQueue.empty()) {
+    TokenQueue.Alloc.Reset();
+  }
+
+  return Ret;
+}
+
+StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+  // Check 7 bit c-printable - b-char.
+  if (   *Position == 0x09
+      || (*Position >= 0x20 && *Position <= 0x7E))
+    return Position + 1;
+
+  // Check for valid UTF-8.
+  if (uint8_t(*Position) & 0x80) {
+    UTF8Decoded u8d = decodeUTF8(Position);
+    if (   u8d.second != 0
+        && u8d.first != 0xFEFF
+        && ( u8d.first == 0x85
+          || ( u8d.first >= 0xA0
+            && u8d.first <= 0xD7FF)
+          || ( u8d.first >= 0xE000
+            && u8d.first <= 0xFFFD)
+          || ( u8d.first >= 0x10000
+            && u8d.first <= 0x10FFFF)))
+      return Position + u8d.second;
+  }
+  return Position;
+}
+
+StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+  if (*Position == 0x0D) {
+    if (Position + 1 != End && *(Position + 1) == 0x0A)
+      return Position + 2;
+    return Position + 1;
+  }
+
+  if (*Position == 0x0A)
+    return Position + 1;
+  return Position;
+}
+
+
+StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
+  if (Position == End)
+    return Position;
+  if (*Position == ' ' || *Position == '\t')
+    return Position + 1;
+  return Position;
+}
+
+StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
+  if (Position == End)
+    return Position;
+  if (*Position == ' ' || *Position == '\t')
+    return Position;
+  return skip_nb_char(Position);
+}
+
+StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
+                                       , StringRef::iterator Position) {
+  while (true) {
+    StringRef::iterator i = (this->*Func)(Position);
+    if (i == Position)
+      break;
+    Position = i;
+  }
+  return Position;
+}
+
+static bool is_ns_hex_digit(const char C) {
+  return    (C >= '0' && C <= '9')
+         || (C >= 'a' && C <= 'z')
+         || (C >= 'A' && C <= 'Z');
+}
+
+static bool is_ns_word_char(const char C) {
+  return    C == '-'
+         || (C >= 'a' && C <= 'z')
+         || (C >= 'A' && C <= 'Z');
+}
+
+StringRef Scanner::scan_ns_uri_char() {
+  StringRef::iterator Start = Current;
+  while (true) {
+    if (Current == End)
+      break;
+    if ((   *Current == '%'
+          && Current + 2 < End
+          && is_ns_hex_digit(*(Current + 1))
+          && is_ns_hex_digit(*(Current + 2)))
+        || is_ns_word_char(*Current)
+        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
+          != StringRef::npos) {
+      ++Current;
+      ++Column;
+    } else
+      break;
+  }
+  return StringRef(Start, Current - Start);
+}
+
+StringRef Scanner::scan_ns_plain_one_line() {
+  StringRef::iterator start = Current;
+  // The first character must already be verified.
+  ++Current;
+  while (true) {
+    if (Current == End) {
+      break;
+    } else if (*Current == ':') {
+      // Check if the next character is a ns-char.
+      if (Current + 1 == End)
+        break;
+      StringRef::iterator i = skip_ns_char(Current + 1);
+      if (Current + 1 != i) {
+        Current = i;
+        Column += 2; // Consume both the ':' and ns-char.
+      } else
+        break;
+    } else if (*Current == '#') {
+      // Check if the previous character was a ns-char.
+      // The & 0x80 check is to check for the trailing byte of a utf-8
+      if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
+        ++Current;
+        ++Column;
+      } else
+        break;
+    } else {
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current)
+        break;
+      Current = i;
+      ++Column;
+    }
+  }
+  return StringRef(start, Current - start);
+}
+
+bool Scanner::consume(uint32_t Expected) {
+  if (Expected >= 0x80)
+    report_fatal_error("Not dealing with this yet");
+  if (Current == End)
+    return false;
+  if (uint8_t(*Current) >= 0x80)
+    report_fatal_error("Not dealing with this yet");
+  if (uint8_t(*Current) == Expected) {
+    ++Current;
+    ++Column;
+    return true;
+  }
+  return false;
+}
+
+void Scanner::skip(uint32_t Distance) {
+  Current += Distance;
+  Column += Distance;
+}
+
+bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
+  if (Position == End)
+    return false;
+  if (   *Position == ' ' || *Position == '\t'
+      || *Position == '\r' || *Position == '\n')
+    return true;
+  return false;
+}
+
+void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+                                    , unsigned AtColumn
+                                    , bool IsRequired) {
+  if (IsSimpleKeyAllowed) {
+    SimpleKey SK;
+    SK.Tok = Tok;
+    SK.Line = Line;
+    SK.Column = AtColumn;
+    SK.IsRequired = IsRequired;
+    SK.FlowLevel = FlowLevel;
+    SimpleKeys.push_back(SK);
+  }
+}
+
+void Scanner::removeStaleSimpleKeyCandidates() {
+  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
+                                            i != SimpleKeys.end();) {
+    if (i->Line != Line || i->Column + 1024 < Column) {
+      if (i->IsRequired)
+        setError( "Could not find expected : for simple key"
+                , i->Tok->Range.begin());
+      i = SimpleKeys.erase(i);
+    } else
+      ++i;
+  }
+}
+
+void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
+  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
+    SimpleKeys.pop_back();
+}
+
+bool Scanner::unrollIndent(int ToColumn) {
+  Token T;
+  // Indentation is ignored in flow.
+  if (FlowLevel != 0)
+    return true;
+
+  while (Indent > ToColumn) {
+    T.Kind = Token::TK_BlockEnd;
+    T.Range = StringRef(Current, 1);
+    TokenQueue.push_back(T);
+    Indent = Indents.pop_back_val();
+  }
+
+  return true;
+}
+
+bool Scanner::rollIndent( int ToColumn
+                        , Token::TokenKind Kind
+                        , TokenQueueT::iterator InsertPoint) {
+  if (FlowLevel)
+    return true;
+  if (Indent < ToColumn) {
+    Indents.push_back(Indent);
+    Indent = ToColumn;
+
+    Token T;
+    T.Kind = Kind;
+    T.Range = StringRef(Current, 0);
+    TokenQueue.insert(InsertPoint, T);
+  }
+  return true;
+}
+
+void Scanner::scanToNextToken() {
+  while (true) {
+    while (*Current == ' ' || *Current == '\t') {
+      skip(1);
+    }
+
+    // Skip comment.
+    if (*Current == '#') {
+      while (true) {
+        // This may skip more than one byte, thus Column is only incremented
+        // for code points.
+        StringRef::iterator i = skip_nb_char(Current);
+        if (i == Current)
+          break;
+        Current = i;
+        ++Column;
+      }
+    }
+
+    // Skip EOL.
+    StringRef::iterator i = skip_b_break(Current);
+    if (i == Current)
+      break;
+    Current = i;
+    ++Line;
+    Column = 0;
+    // New lines may start a simple key.
+    if (!FlowLevel)
+      IsSimpleKeyAllowed = true;
+  }
+}
+
+bool Scanner::scanStreamStart() {
+  IsStartOfStream = false;
+
+  EncodingInfo EI = getUnicodeEncoding(currentInput());
+
+  Token T;
+  T.Kind = Token::TK_StreamStart;
+  T.Range = StringRef(Current, EI.second);
+  TokenQueue.push_back(T);
+  Current += EI.second;
+  return true;
+}
+
+bool Scanner::scanStreamEnd() {
+  // Force an ending new line if one isn't present.
+  if (Column != 0) {
+    Column = 0;
+    ++Line;
+  }
+
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  Token T;
+  T.Kind = Token::TK_StreamEnd;
+  T.Range = StringRef(Current, 0);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanDirective() {
+  // Reset the indentation level.
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  StringRef::iterator Start = Current;
+  consume('%');
+  StringRef::iterator NameStart = Current;
+  Current = skip_while(&Scanner::skip_ns_char, Current);
+  StringRef Name(NameStart, Current - NameStart);
+  Current = skip_while(&Scanner::skip_s_white, Current);
+
+  if (Name == "YAML") {
+    Current = skip_while(&Scanner::skip_ns_char, Current);
+    Token T;
+    T.Kind = Token::TK_VersionDirective;
+    T.Range = StringRef(Start, Current - Start);
+    TokenQueue.push_back(T);
+    return true;
+  }
+  return false;
+}
+
+bool Scanner::scanDocumentIndicator(bool IsStart) {
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  Token T;
+  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
+  T.Range = StringRef(Current, 3);
+  skip(3);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanFlowCollectionStart(bool IsSequence) {
+  Token T;
+  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
+                      : Token::TK_FlowMappingStart;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+
+  // [ and { may begin a simple key.
+  saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+
+  // And may also be followed by a simple key.
+  IsSimpleKeyAllowed = true;
+  ++FlowLevel;
+  return true;
+}
+
+bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = false;
+  Token T;
+  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
+                      : Token::TK_FlowMappingEnd;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  if (FlowLevel)
+    --FlowLevel;
+  return true;
+}
+
+bool Scanner::scanFlowEntry() {
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = true;
+  Token T;
+  T.Kind = Token::TK_FlowEntry;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanBlockEntry() {
+  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = true;
+  Token T;
+  T.Kind = Token::TK_BlockEntry;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanKey() {
+  if (!FlowLevel)
+    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = !FlowLevel;
+
+  Token T;
+  T.Kind = Token::TK_Key;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanValue() {
+  // If the previous token could have been a simple key, insert the key token
+  // into the token queue.
+  if (!SimpleKeys.empty()) {
+    SimpleKey SK = SimpleKeys.pop_back_val();
+    Token T;
+    T.Kind = Token::TK_Key;
+    T.Range = SK.Tok->Range;
+    TokenQueueT::iterator i, e;
+    for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
+      if (i == SK.Tok)
+        break;
+    }
+    assert(i != e && "SimpleKey not in token queue!");
+    i = TokenQueue.insert(i, T);
+
+    // We may also need to add a Block-Mapping-Start token.
+    rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
+
+    IsSimpleKeyAllowed = false;
+  } else {
+    if (!FlowLevel)
+      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+    IsSimpleKeyAllowed = !FlowLevel;
+  }
+
+  Token T;
+  T.Kind = Token::TK_Value;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+                       StringRef::iterator Position) {
+  assert(Position - 1 >= First);
+  StringRef::iterator I = Position - 1;
+  // We calculate the number of consecutive '\'s before the current position
+  // by iterating backwards through our string.
+  while (I >= First && *I == '\\') --I;
+  // (Position - 1 - I) now contains the number of '\'s before the current
+  // position. If it is odd, the character at 'Position' was escaped.
+  return (Position - 1 - I) % 2 == 1;
+}
+
+bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  if (IsDoubleQuoted) {
+    do {
+      ++Current;
+      while (Current != End && *Current != '"')
+        ++Current;
+      // Repeat until the previous character was not a '\' or was an escaped
+      // backslash.
+    } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+  } else {
+    skip(1);
+    while (true) {
+      // Skip a ' followed by another '.
+      if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
+        skip(2);
+        continue;
+      } else if (*Current == '\'')
+        break;
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current) {
+        i = skip_b_break(Current);
+        if (i == Current)
+          break;
+        Current = i;
+        Column = 0;
+        ++Line;
+      } else {
+        if (i == End)
+          break;
+        Current = i;
+        ++Column;
+      }
+    }
+  }
+  skip(1); // Skip ending quote.
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanPlainScalar() {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  unsigned LeadingBlanks = 0;
+  assert(Indent >= -1 && "Indent must be >= -1 !");
+  unsigned indent = static_cast<unsigned>(Indent + 1);
+  while (true) {
+    if (*Current == '#')
+      break;
+
+    while (!isBlankOrBreak(Current)) {
+      if (  FlowLevel && *Current == ':'
+          && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+        setError("Found unexpected ':' while scanning a plain scalar", Current);
+        return false;
+      }
+
+      // Check for the end of the plain scalar.
+      if (  (*Current == ':' && isBlankOrBreak(Current + 1))
+          || (  FlowLevel
+          && (StringRef(Current, 1).find_first_of(",:?[]{}")
+              != StringRef::npos)))
+        break;
+
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current)
+        break;
+      Current = i;
+      ++Column;
+    }
+
+    // Are we at the end?
+    if (!isBlankOrBreak(Current))
+      break;
+
+    // Eat blanks.
+    StringRef::iterator Tmp = Current;
+    while (isBlankOrBreak(Tmp)) {
+      StringRef::iterator i = skip_s_white(Tmp);
+      if (i != Tmp) {
+        if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
+          setError("Found invalid tab character in indentation", Tmp);
+          return false;
+        }
+        Tmp = i;
+        ++Column;
+      } else {
+        i = skip_b_break(Tmp);
+        if (!LeadingBlanks)
+          LeadingBlanks = 1;
+        Tmp = i;
+        Column = 0;
+        ++Line;
+      }
+    }
+
+    if (!FlowLevel && Column < indent)
+      break;
+
+    Current = Tmp;
+  }
+  if (Start == Current) {
+    setError("Got empty plain scalar", Start);
+    return false;
+  }
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Plain scalars can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanAliasOrAnchor(bool IsAlias) {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  skip(1);
+  while(true) {
+    if (   *Current == '[' || *Current == ']'
+        || *Current == '{' || *Current == '}'
+        || *Current == ','
+        || *Current == ':')
+      break;
+    StringRef::iterator i = skip_ns_char(Current);
+    if (i == Current)
+      break;
+    Current = i;
+    ++Column;
+  }
+
+  if (Start == Current) {
+    setError("Got empty alias or anchor", Start);
+    return false;
+  }
+
+  Token T;
+  T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Alias and anchors can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+  StringRef::iterator Start = Current;
+  skip(1); // Eat | or >
+  while(true) {
+    StringRef::iterator i = skip_nb_char(Current);
+    if (i == Current) {
+      if (Column == 0)
+        break;
+      i = skip_b_break(Current);
+      if (i != Current) {
+        // We got a line break.
+        Column = 0;
+        ++Line;
+        Current = i;
+        continue;
+      } else {
+        // There was an error, which should already have been printed out.
+        return false;
+      }
+    }
+    Current = i;
+    ++Column;
+  }
+
+  if (Start == Current) {
+    setError("Got empty block scalar", Start);
+    return false;
+  }
+
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanTag() {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  skip(1); // Eat !.
+  if (Current == End || isBlankOrBreak(Current)); // An empty tag.
+  else if (*Current == '<') {
+    skip(1);
+    scan_ns_uri_char();
+    if (!consume('>'))
+      return false;
+  } else {
+    // FIXME: Actually parse the c-ns-shorthand-tag rule.
+    Current = skip_while(&Scanner::skip_ns_char, Current);
+  }
+
+  Token T;
+  T.Kind = Token::TK_Tag;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Tags can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::fetchMoreTokens() {
+  if (IsStartOfStream)
+    return scanStreamStart();
+
+  scanToNextToken();
+
+  if (Current == End)
+    return scanStreamEnd();
+
+  removeStaleSimpleKeyCandidates();
+
+  unrollIndent(Column);
+
+  if (Column == 0 && *Current == '%')
+    return scanDirective();
+
+  if (Column == 0 && Current + 4 <= End
+      && *Current == '-'
+      && *(Current + 1) == '-'
+      && *(Current + 2) == '-'
+      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+    return scanDocumentIndicator(true);
+
+  if (Column == 0 && Current + 4 <= End
+      && *Current == '.'
+      && *(Current + 1) == '.'
+      && *(Current + 2) == '.'
+      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+    return scanDocumentIndicator(false);
+
+  if (*Current == '[')
+    return scanFlowCollectionStart(true);
+
+  if (*Current == '{')
+    return scanFlowCollectionStart(false);
+
+  if (*Current == ']')
+    return scanFlowCollectionEnd(true);
+
+  if (*Current == '}')
+    return scanFlowCollectionEnd(false);
+
+  if (*Current == ',')
+    return scanFlowEntry();
+
+  if (*Current == '-' && isBlankOrBreak(Current + 1))
+    return scanBlockEntry();
+
+  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+    return scanKey();
+
+  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+    return scanValue();
+
+  if (*Current == '*')
+    return scanAliasOrAnchor(true);
+
+  if (*Current == '&')
+    return scanAliasOrAnchor(false);
+
+  if (*Current == '!')
+    return scanTag();
+
+  if (*Current == '|' && !FlowLevel)
+    return scanBlockScalar(true);
+
+  if (*Current == '>' && !FlowLevel)
+    return scanBlockScalar(false);
+
+  if (*Current == '\'')
+    return scanFlowScalar(false);
+
+  if (*Current == '"')
+    return scanFlowScalar(true);
+
+  // Get a plain scalar.
+  StringRef FirstChar(Current, 1);
+  if (!(isBlankOrBreak(Current)
+        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
+      || (*Current == '-' && !isBlankOrBreak(Current + 1))
+      || (!FlowLevel && (*Current == '?' || *Current == ':')
+          && isBlankOrBreak(Current + 1))
+      || (!FlowLevel && *Current == ':'
+                      && Current + 2 < End
+                      && *(Current + 1) == ':'
+                      && !isBlankOrBreak(Current + 2)))
+    return scanPlainScalar();
+
+  setError("Unrecognized character while tokenizing.");
+  return false;
+}
+
+Stream::Stream(StringRef Input, SourceMgr &SM)
+  : scanner(new Scanner(Input, SM))
+  , CurrentDoc(0) {}
+
+bool Stream::failed() { return scanner->failed(); }
+
+void Stream::printError(Node *N, const Twine &Msg) {
+  SmallVector<SMRange, 1> Ranges;
+  Ranges.push_back(N->getSourceRange());
+  scanner->printError( N->getSourceRange().Start
+                     , SourceMgr::DK_Error
+                     , Msg
+                     , Ranges);
+}
+
+void Stream::handleYAMLDirective(const Token &t) {
+  // TODO: Ensure version is 1.x.
+}
+
+document_iterator Stream::begin() {
+  if (CurrentDoc)
+    report_fatal_error("Can only iterate over the stream once");
+
+  // Skip Stream-Start.
+  scanner->getNext();
+
+  CurrentDoc.reset(new Document(*this));
+  return document_iterator(CurrentDoc);
+}
+
+document_iterator Stream::end() {
+  return document_iterator();
+}
+
+void Stream::skip() {
+  for (document_iterator i = begin(), e = end(); i != e; ++i)
+    i->skip();
+}
+
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+  : Doc(D)
+  , TypeID(Type)
+  , Anchor(A) {
+  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
+  SourceRange = SMRange(Start, Start);
+}
+
+Node::~Node() {}
+
+Token &Node::peekNext() {
+  return Doc->peekNext();
+}
+
+Token Node::getNext() {
+  return Doc->getNext();
+}
+
+Node *Node::parseBlockNode() {
+  return Doc->parseBlockNode();
+}
+
+BumpPtrAllocator &Node::getAllocator() {
+  return Doc->NodeAllocator;
+}
+
+void Node::setError(const Twine &Msg, Token &Tok) const {
+  Doc->setError(Msg, Tok);
+}
+
+bool Node::failed() const {
+  return Doc->failed();
+}
+
+
+
+StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
+  // TODO: Handle newlines properly. We need to remove leading whitespace.
+  if (Value[0] == '"') { // Double quoted.
+    // Pull off the leading and trailing "s.
+    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+    // Search for characters that would require unescaping the value.
+    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
+    if (i != StringRef::npos)
+      return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+    return UnquotedValue;
+  } else if (Value[0] == '\'') { // Single quoted.
+    // Pull off the leading and trailing 's.
+    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+    StringRef::size_type i = UnquotedValue.find('\'');
+    if (i != StringRef::npos) {
+      // We're going to need Storage.
+      Storage.clear();
+      Storage.reserve(UnquotedValue.size());
+      for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
+        StringRef Valid(UnquotedValue.begin(), i);
+        Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+        Storage.push_back('\'');
+        UnquotedValue = UnquotedValue.substr(i + 2);
+      }
+      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+      return StringRef(Storage.begin(), Storage.size());
+    }
+    return UnquotedValue;
+  }
+  // Plain or block.
+  size_t trimtrail = Value.rfind(' ');
+  return Value.drop_back(
+    trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+}
+
+StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
+                                          , StringRef::size_type i
+                                          , SmallVectorImpl<char> &Storage)
+                                          const {
+  // Use Storage to build proper value.
+  Storage.clear();
+  Storage.reserve(UnquotedValue.size());
+  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
+    // Insert all previous chars into Storage.
+    StringRef Valid(UnquotedValue.begin(), i);
+    Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+    // Chop off inserted chars.
+    UnquotedValue = UnquotedValue.substr(i);
+
+    assert(!UnquotedValue.empty() && "Can't be empty!");
+
+    // Parse escape or line break.
+    switch (UnquotedValue[0]) {
+    case '\r':
+    case '\n':
+      Storage.push_back('\n');
+      if (   UnquotedValue.size() > 1
+          && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+        UnquotedValue = UnquotedValue.substr(1);
+      UnquotedValue = UnquotedValue.substr(1);
+      break;
+    default:
+      if (UnquotedValue.size() == 1)
+        // TODO: Report error.
+        break;
+      UnquotedValue = UnquotedValue.substr(1);
+      switch (UnquotedValue[0]) {
+      default: {
+          Token T;
+          T.Range = StringRef(UnquotedValue.begin(), 1);
+          setError("Unrecognized escape code!", T);
+          return "";
+        }
+      case '\r':
+      case '\n':
+        // Remove the new line.
+        if (   UnquotedValue.size() > 1
+            && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+          UnquotedValue = UnquotedValue.substr(1);
+        // If this was just a single byte newline, it will get skipped
+        // below.
+        break;
+      case '0':
+        Storage.push_back(0x00);
+        break;
+      case 'a':
+        Storage.push_back(0x07);
+        break;
+      case 'b':
+        Storage.push_back(0x08);
+        break;
+      case 't':
+      case 0x09:
+        Storage.push_back(0x09);
+        break;
+      case 'n':
+        Storage.push_back(0x0A);
+        break;
+      case 'v':
+        Storage.push_back(0x0B);
+        break;
+      case 'f':
+        Storage.push_back(0x0C);
+        break;
+      case 'r':
+        Storage.push_back(0x0D);
+        break;
+      case 'e':
+        Storage.push_back(0x1B);
+        break;
+      case ' ':
+        Storage.push_back(0x20);
+        break;
+      case '"':
+        Storage.push_back(0x22);
+        break;
+      case '/':
+        Storage.push_back(0x2F);
+        break;
+      case '\\':
+        Storage.push_back(0x5C);
+        break;
+      case 'N':
+        encodeUTF8(0x85, Storage);
+        break;
+      case '_':
+        encodeUTF8(0xA0, Storage);
+        break;
+      case 'L':
+        encodeUTF8(0x2028, Storage);
+        break;
+      case 'P':
+        encodeUTF8(0x2029, Storage);
+        break;
+      case 'x': {
+          if (UnquotedValue.size() < 3)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(2);
+          break;
+        }
+      case 'u': {
+          if (UnquotedValue.size() < 5)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(4);
+          break;
+        }
+      case 'U': {
+          if (UnquotedValue.size() < 9)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(8);
+          break;
+        }
+      }
+      UnquotedValue = UnquotedValue.substr(1);
+    }
+  }
+  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+  return StringRef(Storage.begin(), Storage.size());
+}
+
+Node *KeyValueNode::getKey() {
+  if (Key)
+    return Key;
+  // Handle implicit null keys.
+  {
+    Token &t = peekNext();
+    if (   t.Kind == Token::TK_BlockEnd
+        || t.Kind == Token::TK_Value
+        || t.Kind == Token::TK_Error) {
+      return Key = new (getAllocator()) NullNode(Doc);
+    }
+    if (t.Kind == Token::TK_Key)
+      getNext(); // skip TK_Key.
+  }
+
+  // Handle explicit null keys.
+  Token &t = peekNext();
+  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
+    return Key = new (getAllocator()) NullNode(Doc);
+  }
+
+  // We've got a normal key.
+  return Key = parseBlockNode();
+}
+
+Node *KeyValueNode::getValue() {
+  if (Value)
+    return Value;
+  getKey()->skip();
+  if (failed())
+    return Value = new (getAllocator()) NullNode(Doc);
+
+  // Handle implicit null values.
+  {
+    Token &t = peekNext();
+    if (   t.Kind == Token::TK_BlockEnd
+        || t.Kind == Token::TK_FlowMappingEnd
+        || t.Kind == Token::TK_Key
+        || t.Kind == Token::TK_FlowEntry
+        || t.Kind == Token::TK_Error) {
+      return Value = new (getAllocator()) NullNode(Doc);
+    }
+
+    if (t.Kind != Token::TK_Value) {
+      setError("Unexpected token in Key Value.", t);
+      return Value = new (getAllocator()) NullNode(Doc);
+    }
+    getNext(); // skip TK_Value.
+  }
+
+  // Handle explicit null values.
+  Token &t = peekNext();
+  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
+    return Value = new (getAllocator()) NullNode(Doc);
+  }
+
+  // We got a normal value.
+  return Value = parseBlockNode();
+}
+
+void MappingNode::increment() {
+  if (failed()) {
+    IsAtEnd = true;
+    CurrentEntry = 0;
+    return;
+  }
+  if (CurrentEntry) {
+    CurrentEntry->skip();
+    if (Type == MT_Inline) {
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      return;
+    }
+  }
+  Token T = peekNext();
+  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
+    // KeyValueNode eats the TK_Key. That way it can detect null keys.
+    CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
+  } else if (Type == MT_Block) {
+    switch (T.Kind) {
+    case Token::TK_BlockEnd:
+      getNext();
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError("Unexpected token. Expected Key or Block End", T);
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else {
+    switch (T.Kind) {
+    case Token::TK_FlowEntry:
+      // Eat the flow entry and recurse.
+      getNext();
+      return increment();
+    case Token::TK_FlowMappingEnd:
+      getNext();
+    case Token::TK_Error:
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
+                "Mapping End."
+              , T);
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  }
+}
+
+void SequenceNode::increment() {
+  if (failed()) {
+    IsAtEnd = true;
+    CurrentEntry = 0;
+    return;
+  }
+  if (CurrentEntry)
+    CurrentEntry->skip();
+  Token T = peekNext();
+  if (SeqType == ST_Block) {
+    switch (T.Kind) {
+    case Token::TK_BlockEntry:
+      getNext();
+      CurrentEntry = parseBlockNode();
+      if (CurrentEntry == 0) { // An error occurred.
+        IsAtEnd = true;
+        CurrentEntry = 0;
+      }
+      break;
+    case Token::TK_BlockEnd:
+      getNext();
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError( "Unexpected token. Expected Block Entry or Block End."
+              , T);
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else if (SeqType == ST_Indentless) {
+    switch (T.Kind) {
+    case Token::TK_BlockEntry:
+      getNext();
+      CurrentEntry = parseBlockNode();
+      if (CurrentEntry == 0) { // An error occurred.
+        IsAtEnd = true;
+        CurrentEntry = 0;
+      }
+      break;
+    default:
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else if (SeqType == ST_Flow) {
+    switch (T.Kind) {
+    case Token::TK_FlowEntry:
+      // Eat the flow entry and recurse.
+      getNext();
+      WasPreviousTokenFlowEntry = true;
+      return increment();
+    case Token::TK_FlowSequenceEnd:
+      getNext();
+    case Token::TK_Error:
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    case Token::TK_StreamEnd:
+    case Token::TK_DocumentEnd:
+    case Token::TK_DocumentStart:
+      setError("Could not find closing ]!", T);
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      if (!WasPreviousTokenFlowEntry) {
+        setError("Expected , between entries!", T);
+        IsAtEnd = true;
+        CurrentEntry = 0;
+        break;
+      }
+      // Otherwise it must be a flow entry.
+      CurrentEntry = parseBlockNode();
+      if (!CurrentEntry) {
+        IsAtEnd = true;
+      }
+      WasPreviousTokenFlowEntry = false;
+      break;
+    }
+  }
+}
+
+Document::Document(Stream &S) : stream(S), Root(0) {
+  if (parseDirectives())
+    expectToken(Token::TK_DocumentStart);
+  Token &T = peekNext();
+  if (T.Kind == Token::TK_DocumentStart)
+    getNext();
+}
+
+bool Document::skip()  {
+  if (stream.scanner->failed())
+    return false;
+  if (!Root)
+    getRoot();
+  Root->skip();
+  Token &T = peekNext();
+  if (T.Kind == Token::TK_StreamEnd)
+    return false;
+  if (T.Kind == Token::TK_DocumentEnd) {
+    getNext();
+    return skip();
+  }
+  return true;
+}
+
+Token &Document::peekNext() {
+  return stream.scanner->peekNext();
+}
+
+Token Document::getNext() {
+  return stream.scanner->getNext();
+}
+
+void Document::setError(const Twine &Message, Token &Location) const {
+  stream.scanner->setError(Message, Location.Range.begin());
+}
+
+bool Document::failed() const {
+  return stream.scanner->failed();
+}
+
+Node *Document::parseBlockNode() {
+  Token T = peekNext();
+  // Handle properties.
+  Token AnchorInfo;
+parse_property:
+  switch (T.Kind) {
+  case Token::TK_Alias:
+    getNext();
+    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
+  case Token::TK_Anchor:
+    if (AnchorInfo.Kind == Token::TK_Anchor) {
+      setError("Already encountered an anchor for this node!", T);
+      return 0;
+    }
+    AnchorInfo = getNext(); // Consume TK_Anchor.
+    T = peekNext();
+    goto parse_property;
+  case Token::TK_Tag:
+    getNext(); // Skip TK_Tag.
+    T = peekNext();
+    goto parse_property;
+  default:
+    break;
+  }
+
+  switch (T.Kind) {
+  case Token::TK_BlockEntry:
+    // We got an unindented BlockEntry sequence. This is not terminated with
+    // a BlockEnd.
+    // Don't eat the TK_BlockEntry, SequenceNode needs it.
+    return new (NodeAllocator) SequenceNode( stream.CurrentDoc
+                                           , AnchorInfo.Range.substr(1)
+                                           , SequenceNode::ST_Indentless);
+  case Token::TK_BlockSequenceStart:
+    getNext();
+    return new (NodeAllocator)
+      SequenceNode( stream.CurrentDoc
+                  , AnchorInfo.Range.substr(1)
+                  , SequenceNode::ST_Block);
+  case Token::TK_BlockMappingStart:
+    getNext();
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Block);
+  case Token::TK_FlowSequenceStart:
+    getNext();
+    return new (NodeAllocator)
+      SequenceNode( stream.CurrentDoc
+                  , AnchorInfo.Range.substr(1)
+                  , SequenceNode::ST_Flow);
+  case Token::TK_FlowMappingStart:
+    getNext();
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Flow);
+  case Token::TK_Scalar:
+    getNext();
+    return new (NodeAllocator)
+      ScalarNode( stream.CurrentDoc
+                , AnchorInfo.Range.substr(1)
+                , T.Range);
+  case Token::TK_Key:
+    // Don't eat the TK_Key, KeyValueNode expects it.
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Inline);
+  case Token::TK_DocumentStart:
+  case Token::TK_DocumentEnd:
+  case Token::TK_StreamEnd:
+  default:
+    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
+    //       !!null null.
+    return new (NodeAllocator) NullNode(stream.CurrentDoc);
+  case Token::TK_Error:
+    return 0;
+  }
+  llvm_unreachable("Control flow shouldn't reach here.");
+  return 0;
+}
+
+bool Document::parseDirectives() {
+  bool isDirective = false;
+  while (true) {
+    Token T = peekNext();
+    if (T.Kind == Token::TK_TagDirective) {
+      handleTagDirective(getNext());
+      isDirective = true;
+    } else if (T.Kind == Token::TK_VersionDirective) {
+      stream.handleYAMLDirective(getNext());
+      isDirective = true;
+    } else
+      break;
+  }
+  return isDirective;
+}
+
+bool Document::expectToken(int TK) {
+  Token T = getNext();
+  if (T.Kind != TK) {
+    setError("Unexpected token", T);
+    return false;
+  }
+  return true;
+}
+
+OwningPtr<Document> document_iterator::NullDoc;
diff --git a/test/YAMLParser/LICENSE.txt b/test/YAMLParser/LICENSE.txt
new file mode 100644
index 0000000000..050ced23f6
--- /dev/null
+++ b/test/YAMLParser/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2006 Kirill Simonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test/YAMLParser/bool.data b/test/YAMLParser/bool.data
new file mode 100644
index 0000000000..e987a0ec1e
--- /dev/null
+++ b/test/YAMLParser/bool.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- yes
+- NO
+- True
+- on
diff --git a/test/YAMLParser/construct-bool.data b/test/YAMLParser/construct-bool.data
new file mode 100644
index 0000000000..035ec0c858
--- /dev/null
+++ b/test/YAMLParser/construct-bool.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: yes
+answer: NO
+logical: True
+option: on
+
+
+but:
+    y: is a string
+    n: is a string
diff --git a/test/YAMLParser/construct-custom.data b/test/YAMLParser/construct-custom.data
new file mode 100644
index 0000000000..cac95e0a5f
--- /dev/null
+++ b/test/YAMLParser/construct-custom.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- !tag1
+  x: 1
+- !tag1
+  x: 1
+  'y': 2
+  z: 3
+- !tag2
+  10
+- !tag2
+  =: 10
+  'y': 20
+  z: 30
+- !tag3
+  x: 1
+- !tag3
+  x: 1
+  'y': 2
+  z: 3
+- !tag3
+  =: 1
+  'y': 2
+  z: 3
+- !foo
+  my-parameter: foo
+  my-another-parameter: [1,2,3]
diff --git a/test/YAMLParser/construct-float.data b/test/YAMLParser/construct-float.data
new file mode 100644
index 0000000000..07c51bdd83
--- /dev/null
+++ b/test/YAMLParser/construct-float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 6.8523015e+5
+exponential: 685.230_15e+03
+fixed: 685_230.15
+sexagesimal: 190:20:30.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/construct-int.data b/test/YAMLParser/construct-int.data
new file mode 100644
index 0000000000..b14c37f788
--- /dev/null
+++ b/test/YAMLParser/construct-int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 685230
+decimal: +685_230
+octal: 02472256
+hexadecimal: 0x_0A_74_AE
+binary: 0b1010_0111_0100_1010_1110
+sexagesimal: 190:20:30
diff --git a/test/YAMLParser/construct-map.data b/test/YAMLParser/construct-map.data
new file mode 100644
index 0000000000..1b681206d1
--- /dev/null
+++ b/test/YAMLParser/construct-map.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+# Unordered set of key: value pairs.
+Block style: !!map
+  Clark : Evans
+  Brian : Ingerson
+  Oren  : Ben-Kiki
+Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki }
diff --git a/test/YAMLParser/construct-merge.data b/test/YAMLParser/construct-merge.data
new file mode 100644
index 0000000000..0ebc9f612d
--- /dev/null
+++ b/test/YAMLParser/construct-merge.data
@@ -0,0 +1,29 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- &CENTER { x: 1, 'y': 2 }
+- &LEFT { x: 0, 'y': 2 }
+- &BIG { r: 10 }
+- &SMALL { r: 1 }
+
+# All the following maps are equal:
+
+- # Explicit keys
+  x: 1
+  'y': 2
+  r: 10
+  label: center/big
+
+- # Merge one map
+  << : *CENTER
+  r: 10
+  label: center/big
+
+- # Merge multiple maps
+  << : [ *CENTER, *BIG ]
+  label: center/big
+
+- # Override
+  << : [ *BIG, *LEFT, *SMALL ]
+  x: 1
+  label: center/big
diff --git a/test/YAMLParser/construct-null.data b/test/YAMLParser/construct-null.data
new file mode 100644
index 0000000000..51f8b61e24
--- /dev/null
+++ b/test/YAMLParser/construct-null.data
@@ -0,0 +1,20 @@
+# RUN: yaml-bench -canonical %s
+
+# A document may be null.
+---
+---
+# This mapping has four keys,
+# one has a value.
+empty:
+canonical: ~
+english: null
+~: null key
+---
+# This sequence has five
+# entries, two have values.
+sparse:
+  - ~
+  - 2nd entry
+  -
+  - 4th entry
+  - Null
diff --git a/test/YAMLParser/construct-omap.data b/test/YAMLParser/construct-omap.data
new file mode 100644
index 0000000000..b96d6799c7
--- /dev/null
+++ b/test/YAMLParser/construct-omap.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed ordered map (dictionary).
+Bestiary: !!omap
+  - aardvark: African pig-like ant eater. Ugly.
+  - anteater: South-American ant eater. Two species.
+  - anaconda: South-American constrictor snake. Scaly.
+  # Etc.
+# Flow style
+Numbers: !!omap [ one: 1, two: 2, three : 3 ]
diff --git a/test/YAMLParser/construct-pairs.data b/test/YAMLParser/construct-pairs.data
new file mode 100644
index 0000000000..40f288d1d7
--- /dev/null
+++ b/test/YAMLParser/construct-pairs.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed pairs.
+Block tasks: !!pairs
+  - meeting: with team.
+  - meeting: with boss.
+  - break: lunch.
+  - meeting: with client.
+Flow tasks: !!pairs [ meeting: with team, meeting: with boss ]
diff --git a/test/YAMLParser/construct-seq.data b/test/YAMLParser/construct-seq.data
new file mode 100644
index 0000000000..f43fd39f8e
--- /dev/null
+++ b/test/YAMLParser/construct-seq.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+# Ordered sequence of nodes
+Block style: !!seq
+- Mercury   # Rotates - no light/dark sides.
+- Venus     # Deadliest. Aptly named.
+- Earth     # Mostly dirt.
+- Mars      # Seems empty.
+- Jupiter   # The king.
+- Saturn    # Pretty.
+- Uranus    # Where the sun hardly shines.
+- Neptune   # Boring. No rings.
+- Pluto     # You call this a planet?
+Flow style: !!seq [ Mercury, Venus, Earth, Mars,      # Rocks
+                    Jupiter, Saturn, Uranus, Neptune, # Gas
+                    Pluto ]                           # Overrated
+
diff --git a/test/YAMLParser/construct-set.data b/test/YAMLParser/construct-set.data
new file mode 100644
index 0000000000..3e9d095e71
--- /dev/null
+++ b/test/YAMLParser/construct-set.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed set.
+baseball players: !!set
+  ? Mark McGwire
+  ? Sammy Sosa
+  ? Ken Griffey
+# Flow style
+baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees }
diff --git a/test/YAMLParser/construct-str-ascii.data b/test/YAMLParser/construct-str-ascii.data
new file mode 100644
index 0000000000..24290ae8a9
--- /dev/null
+++ b/test/YAMLParser/construct-str-ascii.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- !!str "ascii string"
diff --git a/test/YAMLParser/construct-str.data b/test/YAMLParser/construct-str.data
new file mode 100644
index 0000000000..dc1ce825cd
--- /dev/null
+++ b/test/YAMLParser/construct-str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+string: abcd
diff --git a/test/YAMLParser/construct-timestamp.data b/test/YAMLParser/construct-timestamp.data
new file mode 100644
index 0000000000..f262c2d02c
--- /dev/null
+++ b/test/YAMLParser/construct-timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical:        2001-12-15T02:59:43.1Z
+valid iso8601:    2001-12-14t21:59:43.10-05:00
+space separated:  2001-12-14 21:59:43.10 -5
+no time zone (Z): 2001-12-15 2:59:43.10
+date (00:00:00Z): 2002-12-14
diff --git a/test/YAMLParser/construct-value.data b/test/YAMLParser/construct-value.data
new file mode 100644
index 0000000000..fe01a0dc90
--- /dev/null
+++ b/test/YAMLParser/construct-value.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---     # Old schema
+link with:
+  - library1.dll
+  - library2.dll
+---     # New schema
+link with:
+  - = : library1.dll
+    version: 1.2
+  - = : library2.dll
+    version: 2.3
diff --git a/test/YAMLParser/duplicate-key.former-loader-error.data b/test/YAMLParser/duplicate-key.former-loader-error.data
new file mode 100644
index 0000000000..9272103fe6
--- /dev/null
+++ b/test/YAMLParser/duplicate-key.former-loader-error.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo: bar
+foo: baz
diff --git a/test/YAMLParser/duplicate-mapping-key.former-loader-error.data b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
new file mode 100644
index 0000000000..96d175d2ac
--- /dev/null
+++ b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+---
+&anchor foo:
+    foo: bar
+    *anchor: duplicate key
+    baz: bat
+    *anchor: duplicate key
diff --git a/test/YAMLParser/duplicate-merge-key.former-loader-error.data b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
new file mode 100644
index 0000000000..6b1276436a
--- /dev/null
+++ b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+<<: {x: 1, y: 2}
+foo: bar
+<<: {z: 3, t: 4}
diff --git a/test/YAMLParser/duplicate-value-key.former-loader-error.data b/test/YAMLParser/duplicate-value-key.former-loader-error.data
new file mode 100644
index 0000000000..dc20e0b275
--- /dev/null
+++ b/test/YAMLParser/duplicate-value-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+=: 1
+foo: bar
+=: 2
diff --git a/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
new file mode 100644
index 0000000000..f5adedb135
--- /dev/null
+++ b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+? |-
+  foo
+: |-
+  bar
diff --git a/test/YAMLParser/empty-document-bug.data b/test/YAMLParser/empty-document-bug.data
new file mode 100644
index 0000000000..fa131fe78e
--- /dev/null
+++ b/test/YAMLParser/empty-document-bug.data
@@ -0,0 +1,2 @@
+# RUN: yaml-bench -canonical %s
+
diff --git a/test/YAMLParser/float.data b/test/YAMLParser/float.data
new file mode 100644
index 0000000000..c4de97037c
--- /dev/null
+++ b/test/YAMLParser/float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 6.8523015e+5
+- 685.230_15e+03
+- 685_230.15
+- 190:20:30.15
+- -.inf
+- .NaN
diff --git a/test/YAMLParser/int.data b/test/YAMLParser/int.data
new file mode 100644
index 0000000000..2651d096ff
--- /dev/null
+++ b/test/YAMLParser/int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 685230
+- +685_230
+- 02472256
+- 0x_0A_74_AE
+- 0b1010_0111_0100_1010_1110
+- 190:20:30
diff --git a/test/YAMLParser/invalid-single-quote-bug.data b/test/YAMLParser/invalid-single-quote-bug.data
new file mode 100644
index 0000000000..3722a003df
--- /dev/null
+++ b/test/YAMLParser/invalid-single-quote-bug.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- "foo 'bar'"
+- "foo\n'bar'"
diff --git a/test/YAMLParser/merge.data b/test/YAMLParser/merge.data
new file mode 100644
index 0000000000..86313596e1
--- /dev/null
+++ b/test/YAMLParser/merge.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- <<
diff --git a/test/YAMLParser/more-floats.data b/test/YAMLParser/more-floats.data
new file mode 100644
index 0000000000..668b31cd13
--- /dev/null
+++ b/test/YAMLParser/more-floats.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+[0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan]
diff --git a/test/YAMLParser/negative-float-bug.data b/test/YAMLParser/negative-float-bug.data
new file mode 100644
index 0000000000..0ba0ffee30
--- /dev/null
+++ b/test/YAMLParser/negative-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+-1.0
diff --git a/test/YAMLParser/null.data b/test/YAMLParser/null.data
new file mode 100644
index 0000000000..a38d7fa6c5
--- /dev/null
+++ b/test/YAMLParser/null.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+-
+- ~
+- null
diff --git a/test/YAMLParser/resolver.data b/test/YAMLParser/resolver.data
new file mode 100644
index 0000000000..8cbba6328b
--- /dev/null
+++ b/test/YAMLParser/resolver.data
@@ -0,0 +1,32 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"this scalar should be selected"
+---
+key11: !foo
+    key12:
+        is: [selected]
+    key22:
+        key13: [not, selected]
+        key23: [not, selected]
+    key32:
+        key31: [not, selected]
+        key32: [not, selected]
+        key33: {not: selected}
+key21: !bar
+    - not selected
+    - selected
+    - not selected
+key31: !baz
+    key12:
+        key13:
+            key14: {selected}
+        key23:
+            key14: [not, selected]
+        key33:
+            key14: {selected}
+            key24: {not: selected}
+    key22:
+        -   key14: {selected}
+            key24: {not: selected}
+        -   key14: {selected}
diff --git a/test/YAMLParser/run-parser-crash-bug.data b/test/YAMLParser/run-parser-crash-bug.data
new file mode 100644
index 0000000000..3ec910ce04
--- /dev/null
+++ b/test/YAMLParser/run-parser-crash-bug.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- Harry Potter and the Prisoner of Azkaban
+- Harry Potter and the Goblet of Fire
+- Harry Potter and the Order of the Phoenix
+---
+- Memoirs Found in a Bathtub
+- Snow Crash
+- Ghost World
diff --git a/test/YAMLParser/scan-document-end-bug.data b/test/YAMLParser/scan-document-end-bug.data
new file mode 100644
index 0000000000..7354caf8cd
--- /dev/null
+++ b/test/YAMLParser/scan-document-end-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Ticket #4
+---
+...
diff --git a/test/YAMLParser/scan-line-break-bug.data b/test/YAMLParser/scan-line-break-bug.data
new file mode 100644
index 0000000000..792973d3f5
--- /dev/null
+++ b/test/YAMLParser/scan-line-break-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+foo:
+    bar
+    baz
diff --git a/test/YAMLParser/single-dot-is-not-float-bug.data b/test/YAMLParser/single-dot-is-not-float-bug.data
new file mode 100644
index 0000000000..810a5936a8
--- /dev/null
+++ b/test/YAMLParser/single-dot-is-not-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+.
diff --git a/test/YAMLParser/sloppy-indentation.data b/test/YAMLParser/sloppy-indentation.data
new file mode 100644
index 0000000000..2b2b62b14a
--- /dev/null
+++ b/test/YAMLParser/sloppy-indentation.data
@@ -0,0 +1,19 @@
+# RUN: yaml-bench -canonical %s
+
+---
+in the block context:
+    indentation should be kept: { 
+    but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+    bar: 'quoted scalars
+may not adhere indentation'
diff --git a/test/YAMLParser/spec-02-01.data b/test/YAMLParser/spec-02-01.data
new file mode 100644
index 0000000000..dd15b2bc26
--- /dev/null
+++ b/test/YAMLParser/spec-02-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
diff --git a/test/YAMLParser/spec-02-02.data b/test/YAMLParser/spec-02-02.data
new file mode 100644
index 0000000000..a5695d5c27
--- /dev/null
+++ b/test/YAMLParser/spec-02-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+hr:  65    # Home runs
+avg: 0.278 # Batting average
+rbi: 147   # Runs Batted In
diff --git a/test/YAMLParser/spec-02-03.data b/test/YAMLParser/spec-02-03.data
new file mode 100644
index 0000000000..81f8d991f7
--- /dev/null
+++ b/test/YAMLParser/spec-02-03.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+american:
+  - Boston Red Sox
+  - Detroit Tigers
+  - New York Yankees
+national:
+  - New York Mets
+  - Chicago Cubs
+  - Atlanta Braves
diff --git a/test/YAMLParser/spec-02-04.data b/test/YAMLParser/spec-02-04.data
new file mode 100644
index 0000000000..44a218d592
--- /dev/null
+++ b/test/YAMLParser/spec-02-04.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+-
+  name: Mark McGwire
+  hr:   65
+  avg:  0.278
+-
+  name: Sammy Sosa
+  hr:   63
+  avg:  0.288
diff --git a/test/YAMLParser/spec-02-05.data b/test/YAMLParser/spec-02-05.data
new file mode 100644
index 0000000000..c9a4a7572f
--- /dev/null
+++ b/test/YAMLParser/spec-02-05.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- [name        , hr, avg  ]
+- [Mark McGwire, 65, 0.278]
+- [Sammy Sosa  , 63, 0.288]
diff --git a/test/YAMLParser/spec-02-06.data b/test/YAMLParser/spec-02-06.data
new file mode 100644
index 0000000000..85c1e2bab8
--- /dev/null
+++ b/test/YAMLParser/spec-02-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+Mark McGwire: {hr: 65, avg: 0.278}
+Sammy Sosa: {
+    hr: 63,
+    avg: 0.288
+  }
diff --git a/test/YAMLParser/spec-02-07.data b/test/YAMLParser/spec-02-07.data
new file mode 100644
index 0000000000..c349662a98
--- /dev/null
+++ b/test/YAMLParser/spec-02-07.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+# Ranking of 1998 home runs
+---
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+
+# Team ranking
+---
+- Chicago Cubs
+- St Louis Cardinals
diff --git a/test/YAMLParser/spec-02-08.data b/test/YAMLParser/spec-02-08.data
new file mode 100644
index 0000000000..9746a43788
--- /dev/null
+++ b/test/YAMLParser/spec-02-08.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---
+time: 20:03:20
+player: Sammy Sosa
+action: strike (miss)
+...
+---
+time: 20:03:47
+player: Sammy Sosa
+action: grand slam
+...
diff --git a/test/YAMLParser/spec-02-09.data b/test/YAMLParser/spec-02-09.data
new file mode 100644
index 0000000000..6aef933379
--- /dev/null
+++ b/test/YAMLParser/spec-02-09.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr: # 1998 hr ranking
+  - Mark McGwire
+  - Sammy Sosa
+rbi:
+  # 1998 rbi ranking
+  - Sammy Sosa
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-10.data b/test/YAMLParser/spec-02-10.data
new file mode 100644
index 0000000000..0302fa7500
--- /dev/null
+++ b/test/YAMLParser/spec-02-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr:
+  - Mark McGwire
+  # Following node labeled SS
+  - &SS Sammy Sosa
+rbi:
+  - *SS # Subsequent occurrence
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-11.data b/test/YAMLParser/spec-02-11.data
new file mode 100644
index 0000000000..d8cf863b2c
--- /dev/null
+++ b/test/YAMLParser/spec-02-11.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+? - Detroit Tigers
+  - Chicago cubs
+:
+  - 2001-07-23
+
+? [ New York Yankees,
+    Atlanta Braves ]
+: [ 2001-07-02, 2001-08-12,
+    2001-08-14 ]
diff --git a/test/YAMLParser/spec-02-12.data b/test/YAMLParser/spec-02-12.data
new file mode 100644
index 0000000000..3b4d5370a9
--- /dev/null
+++ b/test/YAMLParser/spec-02-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+# products purchased
+- item    : Super Hoop
+  quantity: 1
+- item    : Basketball
+  quantity: 4
+- item    : Big Shoes
+  quantity: 1
diff --git a/test/YAMLParser/spec-02-13.data b/test/YAMLParser/spec-02-13.data
new file mode 100644
index 0000000000..2bbccbf5d7
--- /dev/null
+++ b/test/YAMLParser/spec-02-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# ASCII Art
+--- |
+  \//||\/||
+  // ||  ||__
diff --git a/test/YAMLParser/spec-02-14.data b/test/YAMLParser/spec-02-14.data
new file mode 100644
index 0000000000..5a18ea213e
--- /dev/null
+++ b/test/YAMLParser/spec-02-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+  Mark McGwire's
+  year was crippled
+  by a knee injury.
diff --git a/test/YAMLParser/spec-02-15.data b/test/YAMLParser/spec-02-15.data
new file mode 100644
index 0000000000..2a7fbe96ad
--- /dev/null
+++ b/test/YAMLParser/spec-02-15.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ Sammy Sosa completed another
+ fine season with great stats.
+
+   63 Home Runs
+   0.288 Batting Average
+
+ What a year!
diff --git a/test/YAMLParser/spec-02-16.data b/test/YAMLParser/spec-02-16.data
new file mode 100644
index 0000000000..3a5792c763
--- /dev/null
+++ b/test/YAMLParser/spec-02-16.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+name: Mark McGwire
+accomplishment: >
+  Mark set a major league
+  home run record in 1998.
+stats: |
+  65 Home Runs
+  0.278 Batting Average
diff --git a/test/YAMLParser/spec-02-17.data b/test/YAMLParser/spec-02-17.data
new file mode 100644
index 0000000000..2bcb60c8d9
--- /dev/null
+++ b/test/YAMLParser/spec-02-17.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+unicode: "Sosa did fine.\u263A"
+control: "\b1998\t1999\t2000\n"
+hexesc:  "\x13\x10 is \r\n"
+
+single: '"Howdy!" he cried.'
+quoted: ' # not a ''comment''.'
+tie-fighter: '|\-*-/|'
+
+# CHECK: !!str "Sosa did fine.\u263A"
+# CHECK: !!str "\b1998\t1999\t2000\n"
+# CHECK: !!str "\x13\x10 is \r\n"
+# CHECK: !!str "\"Howdy!\" he cried."
+# CHECK: !!str " # not a 'comment'."
+# CHECK: !!str "|\\-*-/|"
diff --git a/test/YAMLParser/spec-02-18.data b/test/YAMLParser/spec-02-18.data
new file mode 100644
index 0000000000..625a4962e9
--- /dev/null
+++ b/test/YAMLParser/spec-02-18.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+plain:
+  This unquoted scalar
+  spans many lines.
+
+quoted: "So does this
+  quoted scalar.\n"
diff --git a/test/YAMLParser/spec-02-19.data b/test/YAMLParser/spec-02-19.data
new file mode 100644
index 0000000000..cb9df6dd1f
--- /dev/null
+++ b/test/YAMLParser/spec-02-19.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 12345
+decimal: +12,345
+sexagesimal: 3:25:45
+octal: 014
+hexadecimal: 0xC
diff --git a/test/YAMLParser/spec-02-20.data b/test/YAMLParser/spec-02-20.data
new file mode 100644
index 0000000000..ed14798611
--- /dev/null
+++ b/test/YAMLParser/spec-02-20.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 1.23015e+3
+exponential: 12.3015e+02
+sexagesimal: 20:30.15
+fixed: 1,230.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/spec-02-21.data b/test/YAMLParser/spec-02-21.data
new file mode 100644
index 0000000000..ea979db065
--- /dev/null
+++ b/test/YAMLParser/spec-02-21.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+null: ~
+true: y
+false: n
+string: '12345'
diff --git a/test/YAMLParser/spec-02-22.data b/test/YAMLParser/spec-02-22.data
new file mode 100644
index 0000000000..77724f7106
--- /dev/null
+++ b/test/YAMLParser/spec-02-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 2001-12-15T02:59:43.1Z
+iso8601: 2001-12-14t21:59:43.10-05:00
+spaced: 2001-12-14 21:59:43.10 -5
+date: 2002-12-14
diff --git a/test/YAMLParser/spec-02-23.data b/test/YAMLParser/spec-02-23.data
new file mode 100644
index 0000000000..d08dfa755c
--- /dev/null
+++ b/test/YAMLParser/spec-02-23.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+not-date: !!str 2002-04-28
+
+picture: !!binary |
+ R0lGODlhDAAMAIQAAP//9/X
+ 17unp5WZmZgAAAOfn515eXv
+ Pz7Y6OjuDg4J+fn5OTk6enp
+ 56enmleECcgggoBADs=
+
+application specific tag: !something |
+ The semantics of the tag
+ above may be different for
+ different documents.
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
new file mode 100644
index 0000000000..01ca7f5d12
--- /dev/null
+++ b/test/YAMLParser/spec-02-24.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG ! tag:clarkevans.com,2002:
+--- !shape
+  # Use the ! handle for presenting
+  # tag:clarkevans.com,2002:circle
+- !circle
+  center: &ORIGIN {x: 73, y: 129}
+  radius: 7
+- !line
+  start: *ORIGIN
+  finish: { x: 89, y: 102 }
+- !label
+  start: *ORIGIN
+  color: 0xFFEEBB
+  text: Pretty vector drawing.
diff --git a/test/YAMLParser/spec-02-25.data b/test/YAMLParser/spec-02-25.data
new file mode 100644
index 0000000000..fbadfda97e
--- /dev/null
+++ b/test/YAMLParser/spec-02-25.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# sets are represented as a
+# mapping where each key is
+# associated with the empty string
+--- !!set
+? Mark McGwire
+? Sammy Sosa
+? Ken Griff
diff --git a/test/YAMLParser/spec-02-26.data b/test/YAMLParser/spec-02-26.data
new file mode 100644
index 0000000000..257108e7e0
--- /dev/null
+++ b/test/YAMLParser/spec-02-26.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# ordered maps are represented as
+# a sequence of mappings, with
+# each mapping having one key
+--- !!omap
+- Mark McGwire: 65
+- Sammy Sosa: 63
+- Ken Griffy: 58
diff --git a/test/YAMLParser/spec-02-27.data b/test/YAMLParser/spec-02-27.data
new file mode 100644
index 0000000000..a190ff19db
--- /dev/null
+++ b/test/YAMLParser/spec-02-27.data
@@ -0,0 +1,31 @@
+# RUN: yaml-bench -canonical %s
+
+--- !<tag:clarkevans.com,2002:invoice>
+invoice: 34843
+date   : 2001-01-23
+bill-to: &id001
+    given  : Chris
+    family : Dumars
+    address:
+        lines: |
+            458 Walkman Dr.
+            Suite #292
+        city    : Royal Oak
+        state   : MI
+        postal  : 48046
+ship-to: *id001
+product:
+    - sku         : BL394D
+      quantity    : 4
+      description : Basketball
+      price       : 450.00
+    - sku         : BL4438H
+      quantity    : 1
+      description : Super Hoop
+      price       : 2392.00
+tax  : 251.42
+total: 4443.52
+comments:
+    Late afternoon is best.
+    Backup contact is Nancy
+    Billsmer @ 338-4338.
diff --git a/test/YAMLParser/spec-02-28.data b/test/YAMLParser/spec-02-28.data
new file mode 100644
index 0000000000..695c27f5d5
--- /dev/null
+++ b/test/YAMLParser/spec-02-28.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+Time: 2001-11-23 15:01:42 -5
+User: ed
+Warning:
+  This is an error message
+  for the log file
+---
+Time: 2001-11-23 15:02:31 -5
+User: ed
+Warning:
+  A slightly different error
+  message.
+---
+Date: 2001-11-23 15:03:17 -5
+User: ed
+Fatal:
+  Unknown variable "bar"
+Stack:
+  - file: TopClass.py
+    line: 23
+    code: |
+      x = MoreObject("345\n")
+  - file: MoreClass.py
+    line: 58
+    code: |-
+      foo = bar
diff --git a/test/YAMLParser/spec-05-01-utf8.data b/test/YAMLParser/spec-05-01-utf8.data
new file mode 100644
index 0000000000..349da06fab
--- /dev/null
+++ b/test/YAMLParser/spec-05-01-utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data
new file mode 100644
index 0000000000..b306bdb719
--- /dev/null
+++ b/test/YAMLParser/spec-05-02-utf8.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+# Invalid use of BOM
+# inside a
+# document.
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-03.data b/test/YAMLParser/spec-05-03.data
new file mode 100644
index 0000000000..461e98d2c2
--- /dev/null
+++ b/test/YAMLParser/spec-05-03.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+sequence:
+- one
+- two
+mapping:
+  ? sky
+  : blue
+  ? sea : green
diff --git a/test/YAMLParser/spec-05-04.data b/test/YAMLParser/spec-05-04.data
new file mode 100644
index 0000000000..52850f435b
--- /dev/null
+++ b/test/YAMLParser/spec-05-04.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+sequence: [ one, two, ]
+mapping: { sky: blue, sea: green }
diff --git a/test/YAMLParser/spec-05-05.data b/test/YAMLParser/spec-05-05.data
new file mode 100644
index 0000000000..499ee8ffb8
--- /dev/null
+++ b/test/YAMLParser/spec-05-05.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-06.data b/test/YAMLParser/spec-05-06.data
new file mode 100644
index 0000000000..729141acf4
--- /dev/null
+++ b/test/YAMLParser/spec-05-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+anchored: !local &anchor value
+alias: *anchor
diff --git a/test/YAMLParser/spec-05-07.data b/test/YAMLParser/spec-05-07.data
new file mode 100644
index 0000000000..fc80a0d415
--- /dev/null
+++ b/test/YAMLParser/spec-05-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+literal: |
+  text
+folded: >
+  text
diff --git a/test/YAMLParser/spec-05-08.data b/test/YAMLParser/spec-05-08.data
new file mode 100644
index 0000000000..9f2b7ece53
--- /dev/null
+++ b/test/YAMLParser/spec-05-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+single: 'text'
+double: "text"
diff --git a/test/YAMLParser/spec-05-09.data b/test/YAMLParser/spec-05-09.data
new file mode 100644
index 0000000000..fc061fb298
--- /dev/null
+++ b/test/YAMLParser/spec-05-09.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.1
+--- text
diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data
new file mode 100644
index 0000000000..6788f0bfc3
--- /dev/null
+++ b/test/YAMLParser/spec-05-10.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+commercial-at: @text
+grave-accent: `text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-11.data b/test/YAMLParser/spec-05-11.data
new file mode 100644
index 0000000000..7cba5562d5
--- /dev/null
+++ b/test/YAMLParser/spec-05-11.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+|
+  Generic line break (no glyph)
+  Generic line break (glyphed)  Line separator   Paragraph separator 
diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data
new file mode 100644
index 0000000000..7dadff76f8
--- /dev/null
+++ b/test/YAMLParser/spec-05-12.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently reject tabs as indentation.
+# XFAIL: *
+
+# Tabs do's and don'ts:
+# comment:
+quoted: "Quoted		"
+block: |
+  void main() {
+  	printf("Hello, world!\n");
+  }
+elsewhere:	# separation
+	indentation, in	plain scalar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-13.data b/test/YAMLParser/spec-05-13.data
new file mode 100644
index 0000000000..db62e866a7
--- /dev/null
+++ b/test/YAMLParser/spec-05-13.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  "Text containing   
+  both space and	
+  	tab	characters"
diff --git a/test/YAMLParser/spec-05-14.data b/test/YAMLParser/spec-05-14.data
new file mode 100644
index 0000000000..65451651b6
--- /dev/null
+++ b/test/YAMLParser/spec-05-14.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+"Fun with \\
+\" \a \b \e \f \
+\n \r \t \v \0 \
+\  \_ \N \L \P \
+\x41 \u0041 \U00000041"
+
+# CHECK: !!str "Fun with \\\n\" \a \b \e \f \n \r \t \v \0   \_ \N \L \P A A A"
diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data
new file mode 100644
index 0000000000..cd8421ad27
--- /dev/null
+++ b/test/YAMLParser/spec-05-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+Bad escapes:
+  "\c
+  \xq-"
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-06-01.data b/test/YAMLParser/spec-06-01.data
new file mode 100644
index 0000000000..95b26bdb38
--- /dev/null
+++ b/test/YAMLParser/spec-06-01.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+  # Leading comment line spaces are
+   # neither content nor indentation.
+    
+Not indented:
+ By one space: |
+    By four
+      spaces
+ Flow style: [    # Leading spaces
+   By two,        # in flow style
+  Also by two,    # are neither
+# Tabs are not allowed:
+#  	Still by two   # content nor
+    Still by two   # content nor
+    ]             # indentation.
diff --git a/test/YAMLParser/spec-06-02.data b/test/YAMLParser/spec-06-02.data
new file mode 100644
index 0000000000..40a15c9f3e
--- /dev/null
+++ b/test/YAMLParser/spec-06-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  # Comment
+   
+
diff --git a/test/YAMLParser/spec-06-03.data b/test/YAMLParser/spec-06-03.data
new file mode 100644
index 0000000000..c1893ef083
--- /dev/null
+++ b/test/YAMLParser/spec-06-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+  value
diff --git a/test/YAMLParser/spec-06-04.data b/test/YAMLParser/spec-06-04.data
new file mode 100644
index 0000000000..b61bcc6b95
--- /dev/null
+++ b/test/YAMLParser/spec-06-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+        # lines
+  value
+
diff --git a/test/YAMLParser/spec-06-05.data b/test/YAMLParser/spec-06-05.data
new file mode 100644
index 0000000000..4bcaa5a818
--- /dev/null
+++ b/test/YAMLParser/spec-06-05.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+{ first: Sammy, last: Sosa }:
+# Statistics:
+  hr:  # Home runs
+    65
+  avg: # Average
+    0.278
diff --git a/test/YAMLParser/spec-06-06.data b/test/YAMLParser/spec-06-06.data
new file mode 100644
index 0000000000..67e39ddf89
--- /dev/null
+++ b/test/YAMLParser/spec-06-06.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+plain: text
+  lines
+quoted: "text
+  	lines"
+block: |
+  text
+   	lines
diff --git a/test/YAMLParser/spec-06-07.data b/test/YAMLParser/spec-06-07.data
new file mode 100644
index 0000000000..451bd349e3
--- /dev/null
+++ b/test/YAMLParser/spec-06-07.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- foo
+ 
+  bar
+- |-
+  foo
+ 
+  bar
+  
diff --git a/test/YAMLParser/spec-06-08.data b/test/YAMLParser/spec-06-08.data
new file mode 100644
index 0000000000..aa06f847ea
--- /dev/null
+++ b/test/YAMLParser/spec-06-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+>-
+  specific   trimmed     as  space
diff --git a/test/YAMLParser/spec-07-01.data b/test/YAMLParser/spec-07-01.data
new file mode 100644
index 0000000000..21bc5e59d5
--- /dev/null
+++ b/test/YAMLParser/spec-07-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%FOO  bar baz # Should be ignored
+               # with a warning.
+--- "foo"
diff --git a/test/YAMLParser/spec-07-02.data b/test/YAMLParser/spec-07-02.data
new file mode 100644
index 0000000000..bf0e758c88
--- /dev/null
+++ b/test/YAMLParser/spec-07-02.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.2 # Attempt parsing
+           # with a warning
+---
+"foo"
diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data
new file mode 100644
index 0000000000..7ca9483016
--- /dev/null
+++ b/test/YAMLParser/spec-07-03.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+%YAML 1.1
+%YAML 1.1
+foo
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
new file mode 100644
index 0000000000..beba7d06ec
--- /dev/null
+++ b/test/YAMLParser/spec-07-04.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !yaml! tag:yaml.org,2002:
+---
+!yaml!str "foo"
diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data
new file mode 100644
index 0000000000..279b54afa1
--- /dev/null
+++ b/test/YAMLParser/spec-07-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently parse TAG directives.
+# XFAIL: *
+
+%TAG ! !foo
+%TAG ! !foo
+bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-06.data b/test/YAMLParser/spec-07-06.data
new file mode 100644
index 0000000000..9f27f91f31
--- /dev/null
+++ b/test/YAMLParser/spec-07-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !      !foo
+%TAG !yaml! tag:yaml.org,2002:
+---
+- !bar "baz"
+- !yaml!str "string"
diff --git a/test/YAMLParser/spec-07-07a.data b/test/YAMLParser/spec-07-07a.data
new file mode 100644
index 0000000000..e51f8f7d69
--- /dev/null
+++ b/test/YAMLParser/spec-07-07a.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# Private application:
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-07b.data b/test/YAMLParser/spec-07-07b.data
new file mode 100644
index 0000000000..003d575572
--- /dev/null
+++ b/test/YAMLParser/spec-07-07b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Migrated to global:
+%TAG ! tag:ben-kiki.org,2000:app/
+---
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-08.data b/test/YAMLParser/spec-07-08.data
new file mode 100644
index 0000000000..7197404b38
--- /dev/null
+++ b/test/YAMLParser/spec-07-08.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly specify default settings:
+%TAG !     !
+%TAG !!    tag:yaml.org,2002:
+# Named handles have no default:
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !foo "bar"
+- !!str "string"
+- !o!type "baz"
diff --git a/test/YAMLParser/spec-07-09.data b/test/YAMLParser/spec-07-09.data
new file mode 100644
index 0000000000..1f98ba0414
--- /dev/null
+++ b/test/YAMLParser/spec-07-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo
+...
+# Repeated end marker.
+...
+---
+bar
+# No end marker.
+---
+baz
+...
diff --git a/test/YAMLParser/spec-07-10.data b/test/YAMLParser/spec-07-10.data
new file mode 100644
index 0000000000..a176683478
--- /dev/null
+++ b/test/YAMLParser/spec-07-10.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+"Root flow
+ scalar"
+--- !!str >
+ Root block
+ scalar
+---
+# Root collection:
+foo : bar
+... # Is optional.
+---
+# Explicit document may be empty.
diff --git a/test/YAMLParser/spec-07-11.data b/test/YAMLParser/spec-07-11.data
new file mode 100644
index 0000000000..ce14b7ebe4
--- /dev/null
+++ b/test/YAMLParser/spec-07-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# A stream may contain
+# no documents.
diff --git a/test/YAMLParser/spec-07-12a.data b/test/YAMLParser/spec-07-12a.data
new file mode 100644
index 0000000000..7327f8188e
--- /dev/null
+++ b/test/YAMLParser/spec-07-12a.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Implicit document. Root
+# collection (mapping) node.
+foo : bar
diff --git a/test/YAMLParser/spec-07-12b.data b/test/YAMLParser/spec-07-12b.data
new file mode 100644
index 0000000000..d759abea7d
--- /dev/null
+++ b/test/YAMLParser/spec-07-12b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicit document. Root
+# scalar (literal) node.
+--- |
+ Text content
diff --git a/test/YAMLParser/spec-07-13.data b/test/YAMLParser/spec-07-13.data
new file mode 100644
index 0000000000..ab74df1018
--- /dev/null
+++ b/test/YAMLParser/spec-07-13.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+! "First document"
+---
+!foo "No directives"
+%TAG ! !foo
+---
+!bar "With directives"
+%YAML 1.1
+---
+!baz "Reset settings"
diff --git a/test/YAMLParser/spec-08-01.data b/test/YAMLParser/spec-08-01.data
new file mode 100644
index 0000000000..5abbfa8094
--- /dev/null
+++ b/test/YAMLParser/spec-08-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!!str &a1 "foo" : !!str bar
+&a2 baz : *a1
diff --git a/test/YAMLParser/spec-08-02.data b/test/YAMLParser/spec-08-02.data
new file mode 100644
index 0000000000..8a75783a70
--- /dev/null
+++ b/test/YAMLParser/spec-08-02.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-03.data b/test/YAMLParser/spec-08-03.data
new file mode 100644
index 0000000000..8c715305a8
--- /dev/null
+++ b/test/YAMLParser/spec-08-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!<tag:yaml.org,2002:str> foo :
+  !<!bar> baz
diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data
new file mode 100644
index 0000000000..f13538bc87
--- /dev/null
+++ b/test/YAMLParser/spec-08-04.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently look at the content of literal tags.
+# XFAIL: *
+
+- !<!> foo
+- !<$:?> bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-05.data b/test/YAMLParser/spec-08-05.data
new file mode 100644
index 0000000000..0613446c89
--- /dev/null
+++ b/test/YAMLParser/spec-08-05.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !local foo
+- !!str bar
+- !o!type baz
diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data
new file mode 100644
index 0000000000..a811bfdefe
--- /dev/null
+++ b/test/YAMLParser/spec-08-06.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently validate tags.
+# XFAIL: *
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !$a!b foo
+- !o! bar
+- !h!type baz
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-07.data b/test/YAMLParser/spec-08-07.data
new file mode 100644
index 0000000000..fc3f2df7f0
--- /dev/null
+++ b/test/YAMLParser/spec-08-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Assuming conventional resolution:
+- "12"
+- 12
+- ! 12
diff --git a/test/YAMLParser/spec-08-08.data b/test/YAMLParser/spec-08-08.data
new file mode 100644
index 0000000000..460029f6ac
--- /dev/null
+++ b/test/YAMLParser/spec-08-08.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo:
+ "bar
+ baz"
+---
+"foo
+ bar"
+---
+foo
+ bar
+--- |
+ foo
+...
diff --git a/test/YAMLParser/spec-08-09.data b/test/YAMLParser/spec-08-09.data
new file mode 100644
index 0000000000..1c82585943
--- /dev/null
+++ b/test/YAMLParser/spec-08-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+scalars:
+  plain: !!str some text
+  quoted:
+    single: 'some text'
+    double: "some text"
+collections:
+  sequence: !!seq [ !!str entry,
+    # Mapping entry:
+      key: value ]
+  mapping: { key: value }
diff --git a/test/YAMLParser/spec-08-10.data b/test/YAMLParser/spec-08-10.data
new file mode 100644
index 0000000000..74054eb088
--- /dev/null
+++ b/test/YAMLParser/spec-08-10.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+block styles:
+  scalars:
+    literal: !!str |
+      #!/usr/bin/perl
+      print "Hello, world!\n";
+    folded: >
+      This sentence
+      is false.
+  collections: !!map
+    sequence: !!seq # Entry:
+      - entry # Plain
+      # Mapping entry:
+      - key: value
+    mapping: 
+      key: value
diff --git a/test/YAMLParser/spec-08-11.data b/test/YAMLParser/spec-08-11.data
new file mode 100644
index 0000000000..8a75783a70
--- /dev/null
+++ b/test/YAMLParser/spec-08-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-12.data b/test/YAMLParser/spec-08-12.data
new file mode 100644
index 0000000000..69e78b42d2
--- /dev/null
+++ b/test/YAMLParser/spec-08-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+  Without properties,
+  &anchor "Anchored",
+  !!str 'Tagged',
+  *anchor, # Alias node
+  !!str ,  # Empty plain scalar
+  '',   # Empty plain scalar
+]
diff --git a/test/YAMLParser/spec-08-13.data b/test/YAMLParser/spec-08-13.data
new file mode 100644
index 0000000000..931d56a0cf
--- /dev/null
+++ b/test/YAMLParser/spec-08-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+  ? foo :,
+  ? : bar,
+}
diff --git a/test/YAMLParser/spec-08-14.data b/test/YAMLParser/spec-08-14.data
new file mode 100644
index 0000000000..61c448351a
--- /dev/null
+++ b/test/YAMLParser/spec-08-14.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- "flow in block"
+- >
+ Block scalar
+- !!map # Block collection
+  foo : bar
diff --git a/test/YAMLParser/spec-08-15.data b/test/YAMLParser/spec-08-15.data
new file mode 100644
index 0000000000..f21e84a431
--- /dev/null
+++ b/test/YAMLParser/spec-08-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty plain scalar
+- ? foo
+  :
+  ?
+  : bar
diff --git a/test/YAMLParser/spec-09-01.data b/test/YAMLParser/spec-09-01.data
new file mode 100644
index 0000000000..8999b49616
--- /dev/null
+++ b/test/YAMLParser/spec-09-01.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+"simple key" : {
+  "also simple" : value,
+  ? "not a
+  simple key" : "any
+  value"
+}
diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data
new file mode 100644
index 0000000000..f69037820e
--- /dev/null
+++ b/test/YAMLParser/spec-09-02.data
@@ -0,0 +1,14 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Indent trimming is not yet implemented.
+# XFAIL: *
+
+ "as space
+ trimmed
+
+ specific
+
+ escaped	\
+ none"
+
+# CHECK: !!str "as space trimmed\nspecific\nescaped\tnone"
diff --git a/test/YAMLParser/spec-09-03.data b/test/YAMLParser/spec-09-03.data
new file mode 100644
index 0000000000..3fb0d8b184
--- /dev/null
+++ b/test/YAMLParser/spec-09-03.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- "
+  last"
+- " 	
+  last"
+- " 	first
+  last"
diff --git a/test/YAMLParser/spec-09-04.data b/test/YAMLParser/spec-09-04.data
new file mode 100644
index 0000000000..4178ec6bef
--- /dev/null
+++ b/test/YAMLParser/spec-09-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+ "first
+ 	inner 1	
+ \ inner 2 \
+ last"
diff --git a/test/YAMLParser/spec-09-05.data b/test/YAMLParser/spec-09-05.data
new file mode 100644
index 0000000000..e482d53662
--- /dev/null
+++ b/test/YAMLParser/spec-09-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- "first
+  	"
+- "first
+
+  	last"
+- "first
+ inner
+ \ 	last"
diff --git a/test/YAMLParser/spec-09-06.data b/test/YAMLParser/spec-09-06.data
new file mode 100644
index 0000000000..edc0cbba90
--- /dev/null
+++ b/test/YAMLParser/spec-09-06.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'here''s to "quotes"'
diff --git a/test/YAMLParser/spec-09-07.data b/test/YAMLParser/spec-09-07.data
new file mode 100644
index 0000000000..3c010ca5b9
--- /dev/null
+++ b/test/YAMLParser/spec-09-07.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+'simple key' : {
+  'also simple' : value,
+  ? 'not a
+  simple key' : 'any
+  value'
+}
diff --git a/test/YAMLParser/spec-09-08.data b/test/YAMLParser/spec-09-08.data
new file mode 100644
index 0000000000..d114e58fca
--- /dev/null
+++ b/test/YAMLParser/spec-09-08.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'as space	 trimmed  specific  none'
diff --git a/test/YAMLParser/spec-09-09.data b/test/YAMLParser/spec-09-09.data
new file mode 100644
index 0000000000..2fec1b536e
--- /dev/null
+++ b/test/YAMLParser/spec-09-09.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- '
+  last'
+- ' 	
+  last'
+- ' 	first
+  last'
diff --git a/test/YAMLParser/spec-09-10.data b/test/YAMLParser/spec-09-10.data
new file mode 100644
index 0000000000..faabfb06b5
--- /dev/null
+++ b/test/YAMLParser/spec-09-10.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ 'first
+ 	inner	
+ last'
diff --git a/test/YAMLParser/spec-09-11.data b/test/YAMLParser/spec-09-11.data
new file mode 100644
index 0000000000..3f487ad6b0
--- /dev/null
+++ b/test/YAMLParser/spec-09-11.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 'first
+  	'
+- 'first
+
+  	last'
diff --git a/test/YAMLParser/spec-09-12.data b/test/YAMLParser/spec-09-12.data
new file mode 100644
index 0000000000..d992c589cd
--- /dev/null
+++ b/test/YAMLParser/spec-09-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Outside flow collection:
+- ::std::vector
+- Up, up, and away!
+- -123
+# Inside flow collection:
+- [ '::std::vector',
+  "Up, up, and away!",
+  -123 ]
diff --git a/test/YAMLParser/spec-09-13.data b/test/YAMLParser/spec-09-13.data
new file mode 100644
index 0000000000..d48f2d2c47
--- /dev/null
+++ b/test/YAMLParser/spec-09-13.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+simple key : {
+  also simple : value,
+  ? not a
+  simple key : any
+  value
+}
diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data
new file mode 100644
index 0000000000..890f6bf2e7
--- /dev/null
+++ b/test/YAMLParser/spec-09-14.data
@@ -0,0 +1,21 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Not quite sure why this doesn't fail.
+# XFAIL: *
+
+---
+--- ||| : foo
+... >>>: bar
+---
+[
+---
+,
+... ,
+{
+--- :
+... # Nested
+}
+]
+...
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-15.data b/test/YAMLParser/spec-09-15.data
new file mode 100644
index 0000000000..4111d1ba2c
--- /dev/null
+++ b/test/YAMLParser/spec-09-15.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"---" : foo
+...: bar
+---
+[
+---,
+...,
+{
+? ---
+: ...
+}
+]
+...
diff --git a/test/YAMLParser/spec-09-16.data b/test/YAMLParser/spec-09-16.data
new file mode 100644
index 0000000000..e595f47bec
--- /dev/null
+++ b/test/YAMLParser/spec-09-16.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Tabs are confusing:
+# as space/trimmed/specific/none
+ as space  trimmed  specific  none
diff --git a/test/YAMLParser/spec-09-17.data b/test/YAMLParser/spec-09-17.data
new file mode 100644
index 0000000000..1bacf4d68b
--- /dev/null
+++ b/test/YAMLParser/spec-09-17.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ first line 
+   
+  more line
diff --git a/test/YAMLParser/spec-09-18.data b/test/YAMLParser/spec-09-18.data
new file mode 100644
index 0000000000..ac623f9973
--- /dev/null
+++ b/test/YAMLParser/spec-09-18.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+- | # Just the style
+ literal
+- >1 # Indentation indicator
+  folded
+- |+ # Chomping indicator
+ keep
+
+- >-1 # Both indicators
+  strip
diff --git a/test/YAMLParser/spec-09-19.data b/test/YAMLParser/spec-09-19.data
new file mode 100644
index 0000000000..52aa157137
--- /dev/null
+++ b/test/YAMLParser/spec-09-19.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ literal
+- >
+ folded
diff --git a/test/YAMLParser/spec-09-20.data b/test/YAMLParser/spec-09-20.data
new file mode 100644
index 0000000000..86fc7ab9a2
--- /dev/null
+++ b/test/YAMLParser/spec-09-20.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ detected
+- >
+ 
+  
+  # detected
+- |1
+  explicit
+- >
+ 	
+ detected
diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data
new file mode 100644
index 0000000000..2bcc28337f
--- /dev/null
+++ b/test/YAMLParser/spec-09-21.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+- |
+
+ text
+- >
+  text
+ text
+- |1
+ text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-22.data b/test/YAMLParser/spec-09-22.data
new file mode 100644
index 0000000000..b95faa50b5
--- /dev/null
+++ b/test/YAMLParser/spec-09-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+strip: |-
+  text clip: |
+  textkeep: |+
+  text 
diff --git a/test/YAMLParser/spec-09-23.data b/test/YAMLParser/spec-09-23.data
new file mode 100644
index 0000000000..94f839818b
--- /dev/null
+++ b/test/YAMLParser/spec-09-23.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+ # Strip
+  # Comments:
+strip: |-
+  # text     # Clip
+  # comments:
+clip: |
+  # text   # Keep
+  # comments:
+keep: |+
+  # text  # Trail
+  # comments.
diff --git a/test/YAMLParser/spec-09-24.data b/test/YAMLParser/spec-09-24.data
new file mode 100644
index 0000000000..f08eae6a80
--- /dev/null
+++ b/test/YAMLParser/spec-09-24.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+strip: >-
+
+clip: >
+
+keep: |+
+
diff --git a/test/YAMLParser/spec-09-25.data b/test/YAMLParser/spec-09-25.data
new file mode 100644
index 0000000000..b15edb523d
--- /dev/null
+++ b/test/YAMLParser/spec-09-25.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+| # Simple block scalar
+ literal
+ 	text
diff --git a/test/YAMLParser/spec-09-26.data b/test/YAMLParser/spec-09-26.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-26.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-27.data b/test/YAMLParser/spec-09-27.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-27.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-28.data b/test/YAMLParser/spec-09-28.data
new file mode 100644
index 0000000000..286740ed39
--- /dev/null
+++ b/test/YAMLParser/spec-09-28.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-29.data b/test/YAMLParser/spec-09-29.data
new file mode 100644
index 0000000000..e8906ff64a
--- /dev/null
+++ b/test/YAMLParser/spec-09-29.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+> # Simple folded scalar
+ folded
+ text
+ 	lines
diff --git a/test/YAMLParser/spec-09-30.data b/test/YAMLParser/spec-09-30.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-30.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-31.data b/test/YAMLParser/spec-09-31.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-31.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-32.data b/test/YAMLParser/spec-09-32.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-32.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-33.data b/test/YAMLParser/spec-09-33.data
new file mode 100644
index 0000000000..a2d8bf4950
--- /dev/null
+++ b/test/YAMLParser/spec-09-33.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-10-01.data b/test/YAMLParser/spec-10-01.data
new file mode 100644
index 0000000000..549a54db42
--- /dev/null
+++ b/test/YAMLParser/spec-10-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- [ inner, inner, ]
+- [inner,last]
diff --git a/test/YAMLParser/spec-10-02.data b/test/YAMLParser/spec-10-02.data
new file mode 100644
index 0000000000..662427a0c0
--- /dev/null
+++ b/test/YAMLParser/spec-10-02.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+"double
+ quoted", 'single
+           quoted',
+plain
+ text, [ nested ],
+single: pair ,
+]
diff --git a/test/YAMLParser/spec-10-03.data b/test/YAMLParser/spec-10-03.data
new file mode 100644
index 0000000000..43f300e40c
--- /dev/null
+++ b/test/YAMLParser/spec-10-03.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+       # sequence
+- one
+- two : three
diff --git a/test/YAMLParser/spec-10-04.data b/test/YAMLParser/spec-10-04.data
new file mode 100644
index 0000000000..733a570efe
--- /dev/null
+++ b/test/YAMLParser/spec-10-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block:
+- one
+-
+ - two
diff --git a/test/YAMLParser/spec-10-05.data b/test/YAMLParser/spec-10-05.data
new file mode 100644
index 0000000000..3848b2a200
--- /dev/null
+++ b/test/YAMLParser/spec-10-05.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty
+- |
+ block node
+- - one # in-line
+  - two # sequence
+- one: two # in-line
+           # mapping
diff --git a/test/YAMLParser/spec-10-06.data b/test/YAMLParser/spec-10-06.data
new file mode 100644
index 0000000000..40efb2b916
--- /dev/null
+++ b/test/YAMLParser/spec-10-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- { inner : entry , also: inner , }
+- {inner: entry,last : entry}
diff --git a/test/YAMLParser/spec-10-07.data b/test/YAMLParser/spec-10-07.data
new file mode 100644
index 0000000000..7aa350e40b
--- /dev/null
+++ b/test/YAMLParser/spec-10-07.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? : value, # Empty key
+? explicit
+ key: value,
+simple key : value,
+[ collection, simple, key ]: value
+}
diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data
new file mode 100644
index 0000000000..5b981e9833
--- /dev/null
+++ b/test/YAMLParser/spec-10-08.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# This fails because even without a key token, some contexts (in this case flow
+# maps) allow implicit null keys, which mix with this in weird ways.
+# XFAIL: *
+
+{
+multi-line
+ simple key : value,
+very long ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(>1KB)................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... key: value
+}
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-10-09.data b/test/YAMLParser/spec-10-09.data
new file mode 100644
index 0000000000..a6b1fd00dd
--- /dev/null
+++ b/test/YAMLParser/spec-10-09.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+key : value,
+empty: # empty value↓
+}
diff --git a/test/YAMLParser/spec-10-10.data b/test/YAMLParser/spec-10-10.data
new file mode 100644
index 0000000000..c97901ddfb
--- /dev/null
+++ b/test/YAMLParser/spec-10-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Empty value
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+simple key3,         # Empty value
+}
diff --git a/test/YAMLParser/spec-10-11.data b/test/YAMLParser/spec-10-11.data
new file mode 100644
index 0000000000..51bd06f020
--- /dev/null
+++ b/test/YAMLParser/spec-10-11.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+[
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Implicit empty
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+]
diff --git a/test/YAMLParser/spec-10-12.data b/test/YAMLParser/spec-10-12.data
new file mode 100644
index 0000000000..65a90b3f2c
--- /dev/null
+++ b/test/YAMLParser/spec-10-12.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+    # mapping
+ key: value
diff --git a/test/YAMLParser/spec-10-13.data b/test/YAMLParser/spec-10-13.data
new file mode 100644
index 0000000000..ccadeb1e7d
--- /dev/null
+++ b/test/YAMLParser/spec-10-13.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+? explicit key # implicit value
+? |
+  block key
+: - one # explicit in-line
+  - two # block value
diff --git a/test/YAMLParser/spec-10-14.data b/test/YAMLParser/spec-10-14.data
new file mode 100644
index 0000000000..866ec1f7b2
--- /dev/null
+++ b/test/YAMLParser/spec-10-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+plain key: # empty value
+"quoted key":
+- one # explicit next-line
+- two # block value
diff --git a/test/YAMLParser/spec-10-15.data b/test/YAMLParser/spec-10-15.data
new file mode 100644
index 0000000000..7d061bddd1
--- /dev/null
+++ b/test/YAMLParser/spec-10-15.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- sun: yellow
+- ? earth: blue
+  : moon: white
diff --git a/test/YAMLParser/str.data b/test/YAMLParser/str.data
new file mode 100644
index 0000000000..bf013b6f52
--- /dev/null
+++ b/test/YAMLParser/str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- abcd
diff --git a/test/YAMLParser/timestamp-bugs.data b/test/YAMLParser/timestamp-bugs.data
new file mode 100644
index 0000000000..bf41a21b22
--- /dev/null
+++ b/test/YAMLParser/timestamp-bugs.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-14 21:59:43.10 -5:30
+- 2001-12-14 21:59:43.10 +5:30
+- 2001-12-14 21:59:43.00101
+- 2001-12-14 21:59:43+1
+- 2001-12-14 21:59:43-1:30
+- 2005-07-08 17:35:04.517600
diff --git a/test/YAMLParser/timestamp.data b/test/YAMLParser/timestamp.data
new file mode 100644
index 0000000000..79945451b5
--- /dev/null
+++ b/test/YAMLParser/timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-15T02:59:43.1Z
+- 2001-12-14t21:59:43.10-05:00
+- 2001-12-14 21:59:43.10 -5
+- 2001-12-15 2:59:43.10
+- 2002-12-14
diff --git a/test/YAMLParser/utf8-implicit.data b/test/YAMLParser/utf8-implicit.data
new file mode 100644
index 0000000000..ee2791fb06
--- /dev/null
+++ b/test/YAMLParser/utf8-implicit.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- implicit UTF-8
diff --git a/test/YAMLParser/utf8.data b/test/YAMLParser/utf8.data
new file mode 100644
index 0000000000..3935e9d121
--- /dev/null
+++ b/test/YAMLParser/utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- UTF-8
diff --git a/test/YAMLParser/value.data b/test/YAMLParser/value.data
new file mode 100644
index 0000000000..311ccd4f22
--- /dev/null
+++ b/test/YAMLParser/value.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- =
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
new file mode 100644
index 0000000000..3ce5e4b73e
--- /dev/null
+++ b/test/YAMLParser/yaml.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- !!yaml '!'
+- !!yaml '&'
+- !!yaml '*'
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index ce0f5cd822..5d691728d8 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -175,4 +175,5 @@ add_llvm_unittest(Support
   Support/TimeValue.cpp
   Support/TypeBuilderTest.cpp
   Support/ValueHandleTest.cpp
+  Support/YAMLParserTest.cpp
   )
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
new file mode 100644
index 0000000000..e88427ac09
--- /dev/null
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -0,0 +1,179 @@
+//===- unittest/Support/YAMLParserTest ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+// Checks that the given input gives a parse error. Makes sure that an error
+// text is available and the parse fails.
+static void ExpectParseError(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  yaml::Stream Stream(Input, SM);
+  EXPECT_FALSE(Stream.validate()) << Message << ": " << Input;
+  EXPECT_TRUE(Stream.failed()) << Message << ": " << Input;
+}
+
+// Checks that the given input can be parsed without error.
+static void ExpectParseSuccess(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  yaml::Stream Stream(Input, SM);
+  EXPECT_TRUE(Stream.validate()) << Message << ": " << Input;
+}
+
+TEST(YAMLParser, ParsesEmptyArray) {
+  ExpectParseSuccess("Empty array", "[]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingArray) {
+  ExpectParseError("Not closing array", "[");
+  ExpectParseError("Not closing array", "  [  ");
+  ExpectParseError("Not closing array", "  [x");
+}
+
+TEST(YAMLParser, ParsesEmptyArrayWithWhitespace) {
+  ExpectParseSuccess("Array with spaces", "  [  ]  ");
+  ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n");
+}
+
+TEST(YAMLParser, ParsesEmptyObject) {
+  ExpectParseSuccess("Empty object", "[{}]");
+}
+
+TEST(YAMLParser, ParsesObject) {
+  ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleKeyValuePairsInObject) {
+  ExpectParseSuccess("Multiple key, value pairs",
+                     "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingObject) {
+  ExpectParseError("Missing close on empty", "[{]");
+  ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]");
+}
+
+TEST(YAMLParser, FailsIfMissingColon) {
+  ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]");
+  ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]");
+}
+
+TEST(YAMLParser, FailsOnMissingQuote) {
+  ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
+  ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
+}
+
+TEST(YAMLParser, ParsesEscapedQuotes) {
+  ExpectParseSuccess("Parses escaped string in key and value",
+                     "[{\"a\":\"\\\"b\\\"  \\\" \\\"\"}]");
+}
+
+TEST(YAMLParser, ParsesEmptyString) {
+  ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleObjects) {
+  ExpectParseSuccess(
+      "Multiple objects in array",
+      "["
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(YAMLParser, FailsOnMissingComma) {
+  ExpectParseError(
+      "Missing comma",
+      "["
+      " { \"a\" : \"b\" }"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(YAMLParser, ParsesSpacesInBetweenTokens) {
+  ExpectParseSuccess(
+      "Various whitespace between tokens",
+      " \t \n\n \r [ \t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r");
+}
+
+TEST(YAMLParser, ParsesArrayOfArrays) {
+  ExpectParseSuccess("Array of arrays", "[[]]");
+}
+
+TEST(YAMLParser, HandlesEndOfFileGracefully) {
+  ExpectParseError("In string starting with EOF", "[\"");
+  ExpectParseError("In string hitting EOF", "[\"   ");
+  ExpectParseError("In string escaping EOF", "[\"  \\");
+  ExpectParseError("In array starting with EOF", "[");
+  ExpectParseError("In array element starting with EOF", "[[], ");
+  ExpectParseError("In array hitting EOF", "[[] ");
+  ExpectParseError("In array hitting EOF", "[[]");
+  ExpectParseError("In object hitting EOF", "{\"\"");
+}
+
+// Checks that the given string can be parsed into an identical string inside
+// of an array.
+static void ExpectCanParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  SourceMgr SM;
+  yaml::Stream Stream(StringInArray, SM);
+  yaml::SequenceNode *ParsedSequence
+    = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+  StringRef ParsedString
+    = dyn_cast<yaml::ScalarNode>(
+      static_cast<yaml::Node*>(ParsedSequence->begin()))->getRawValue();
+  ParsedString = ParsedString.substr(1, ParsedString.size() - 2);
+  EXPECT_EQ(String, ParsedString.str());
+}
+
+// Checks that parsing the given string inside an array fails.
+static void ExpectCannotParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  ExpectParseError((Twine("When parsing string \"") + String + "\"").str(),
+                   StringInArray);
+}
+
+TEST(YAMLParser, ParsesStrings) {
+  ExpectCanParseString("");
+  ExpectCannotParseString("\\");
+  ExpectCannotParseString("\"");
+  ExpectCanParseString(" ");
+  ExpectCanParseString("\\ ");
+  ExpectCanParseString("\\\"");
+  ExpectCannotParseString("\"\\");
+  ExpectCannotParseString(" \\");
+  ExpectCanParseString("\\\\");
+  ExpectCannotParseString("\\\\\\");
+  ExpectCanParseString("\\\\\\\\");
+  ExpectCanParseString("\\\" ");
+  ExpectCannotParseString("\\\\\" ");
+  ExpectCanParseString("\\\\\\\" ");
+  ExpectCanParseString("    \\\\  \\\"  \\\\\\\"   ");
+}
+
+TEST(YAMLParser, WorksWithIteratorAlgorithms) {
+  SourceMgr SM;
+  yaml::Stream Stream("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]", SM);
+  yaml::SequenceNode *Array
+    = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+  EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
+}
+
+} // end namespace llvm
diff --git a/utils/yaml-bench/CMakeLists.txt b/utils/yaml-bench/CMakeLists.txt
new file mode 100644
index 0000000000..403182ceee
--- /dev/null
+++ b/utils/yaml-bench/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(yaml-bench
+  YAMLBench.cpp
+  )
+
+target_link_libraries(yaml-bench LLVMSupport)
diff --git a/utils/yaml-bench/Makefile b/utils/yaml-bench/Makefile
new file mode 100644
index 0000000000..07e91226c7
--- /dev/null
+++ b/utils/yaml-bench/Makefile
@@ -0,0 +1,20 @@
+##===- utils/yaml-bench/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = yaml-bench
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
new file mode 100644
index 0000000000..e5ee52a16d
--- /dev/null
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -0,0 +1,203 @@
+//===- YAMLBench - Benchmark the YAMLParser implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program executes the YAMLParser on differntly sized YAML texts and
+// outputs the run time.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/YAMLParser.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+  DumpTokens( "tokens"
+            , cl::desc("Print the tokenization of the file.")
+            , cl::init(false)
+            );
+
+static cl::opt<bool>
+  DumpCanonical( "canonical"
+               , cl::desc("Print the canonical YAML for this file.")
+               , cl::init(false)
+               );
+
+static cl::opt<std::string>
+ Input(cl::Positional, cl::desc("<input>"));
+
+static cl::opt<bool>
+  Verify( "verify"
+        , cl::desc(
+            "Run a quick verification useful for regression testing")
+        , cl::init(false)
+        );
+
+static cl::opt<unsigned>
+  MemoryLimitMB("memory-limit", cl::desc(
+                  "Do not use more megabytes of memory"),
+                cl::init(1000));
+
+struct indent {
+  unsigned distance;
+  indent(unsigned d) : distance(d) {}
+};
+
+static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
+  for (unsigned i = 0; i < in.distance; ++i)
+    os << "  ";
+  return os;
+}
+
+static void dumpNode( yaml::Node *n
+                    , unsigned Indent = 0
+                    , bool SuppressFirstIndent = false) {
+  if (!n)
+    return;
+  if (!SuppressFirstIndent)
+    outs() << indent(Indent);
+  StringRef Anchor = n->getAnchor();
+  if (!Anchor.empty())
+    outs() << "&" << Anchor << " ";
+  if (yaml::ScalarNode *sn = dyn_cast<yaml::ScalarNode>(n)) {
+    SmallString<32> Storage;
+    StringRef Val = sn->getValue(Storage);
+    outs() << "!!str \"" << yaml::escape(Val) << "\"";
+  } else if (yaml::SequenceNode *sn = dyn_cast<yaml::SequenceNode>(n)) {
+    outs() << "!!seq [\n";
+    ++Indent;
+    for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
+                                      i != e; ++i) {
+      dumpNode(i, Indent);
+      outs() << ",\n";
+    }
+    --Indent;
+    outs() << indent(Indent) << "]";
+  } else if (yaml::MappingNode *mn = dyn_cast<yaml::MappingNode>(n)) {
+    outs() << "!!map {\n";
+    ++Indent;
+    for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
+                                     i != e; ++i) {
+      outs() << indent(Indent) << "? ";
+      dumpNode(i->getKey(), Indent, true);
+      outs() << "\n";
+      outs() << indent(Indent) << ": ";
+      dumpNode(i->getValue(), Indent, true);
+      outs() << ",\n";
+    }
+    --Indent;
+    outs() << indent(Indent) << "}";
+  } else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
+    outs() << "*" << an->getName();
+  } else if (dyn_cast<yaml::NullNode>(n)) {
+    outs() << "!!null null";
+  }
+}
+
+static void dumpStream(yaml::Stream &stream) {
+  for (yaml::document_iterator di = stream.begin(), de = stream.end(); di != de;
+       ++di) {
+    outs() << "%YAML 1.2\n"
+           << "---\n";
+    yaml::Node *n = di->getRoot();
+    if (n)
+      dumpNode(n);
+    else
+      break;
+    outs() << "\n...\n";
+  }
+}
+
+static void benchmark( llvm::TimerGroup &Group
+                     , llvm::StringRef Name
+                     , llvm::StringRef JSONText) {
+  llvm::Timer BaseLine((Name + ": Loop").str(), Group);
+  BaseLine.startTimer();
+  char C = 0;
+  for (llvm::StringRef::iterator I = JSONText.begin(),
+                                 E = JSONText.end();
+       I != E; ++I) { C += *I; }
+  BaseLine.stopTimer();
+  volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
+
+  llvm::Timer Tokenizing((Name + ": Tokenizing").str(), Group);
+  Tokenizing.startTimer();
+  {
+    yaml::scanTokens(JSONText);
+  }
+  Tokenizing.stopTimer();
+
+  llvm::Timer Parsing((Name + ": Parsing").str(), Group);
+  Parsing.startTimer();
+  {
+    llvm::SourceMgr SM;
+    llvm::yaml::Stream stream(JSONText, SM);
+    stream.skip();
+  }
+  Parsing.stopTimer();
+}
+
+static std::string createJSONText(size_t MemoryMB, unsigned ValueSize) {
+  std::string JSONText;
+  llvm::raw_string_ostream Stream(JSONText);
+  Stream << "[\n";
+  size_t MemoryBytes = MemoryMB * 1024 * 1024;
+  while (JSONText.size() < MemoryBytes) {
+    Stream << " {\n"
+           << "  \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
+           << " }";
+    Stream.flush();
+    if (JSONText.size() < MemoryBytes) Stream << ",";
+    Stream << "\n";
+  }
+  Stream << "]\n";
+  Stream.flush();
+  return JSONText;
+}
+
+int main(int argc, char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+  if (Input.getNumOccurrences()) {
+    OwningPtr<MemoryBuffer> Buf;
+    if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+      return 1;
+
+    llvm::SourceMgr sm;
+    if (DumpTokens) {
+      yaml::dumpTokens(Buf->getBuffer(), outs());
+    }
+
+    if (DumpCanonical) {
+      yaml::Stream stream(Buf->getBuffer(), sm);
+      dumpStream(stream);
+    }
+  }
+
+  if (Verify) {
+    llvm::TimerGroup Group("YAML parser benchmark");
+    benchmark(Group, "Fast", createJSONText(10, 500));
+  } else if (!DumpCanonical && !DumpTokens) {
+    llvm::TimerGroup Group("YAML parser benchmark");
+    benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
+    benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
+    benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
+  }
+
+  return 0;
+}
author	Michael J. Spencer <bigcheesegs@gmail.com>	2012-04-03 23:09:22 +0000
committer	Michael J. Spencer <bigcheesegs@gmail.com>	2012-04-03 23:09:22 +0000
commit	93210e847a1496b24cef881723e57c489082dcfe (patch)
tree	83d1f8828d8b6835a6511d28cf3c63fad8b06aef
parent	2ce63c73520cd6e715f9114589f802938b5db01f (diff)
download	llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.gz llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.bz2 llvm-93210e847a1496b24cef881723e57c489082dcfe.tar.xz