//===--- YAMLParser.h - Simple YAML parser --------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This is a YAML 1.2 parser. // // See http://www.yaml.org/spec/1.2/spec.html for the full standard. // // This currently does not implement the following: // * Multi-line literal folding. // * Tag resolution. // * UTF-16. // * BOMs anywhere other than the first Unicode scalar value in the file. // // The most important class here is Stream. This represents a YAML stream with // 0, 1, or many documents. // // SourceMgr sm; // StringRef input = getInput(); // yaml::Stream stream(input, sm); // // for (yaml::document_iterator di = stream.begin(), de = stream.end(); // di != de; ++di) { // yaml::Node *n = di->getRoot(); // if (n) { // // Do something with n... // } else // break; // } // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_YAMLPARSER_H #define LLVM_SUPPORT_YAMLPARSER_H #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SMLoc.h" #include #include namespace llvm { class MemoryBuffer; class SourceMgr; class raw_ostream; class Twine; namespace yaml { class document_iterator; class Document; class Node; class Scanner; struct Token; /// @brief Dump all the tokens in this stream to OS. /// @returns true if there was an error, false otherwise. bool dumpTokens(StringRef Input, raw_ostream &); /// @brief Scans all tokens in input without outputting anything. This is used /// for benchmarking the tokenizer. /// @returns true if there was an error, false otherwise. bool scanTokens(StringRef Input); /// @brief Escape \a Input for a double quoted scalar. std::string escape(StringRef Input); /// @brief This class represents a YAML stream potentially containing multiple /// documents. class Stream { public: /// @brief This keeps a reference to the string referenced by \p Input. Stream(StringRef Input, SourceMgr &); /// @brief This takes ownership of \p InputBuffer. Stream(MemoryBuffer *InputBuffer, SourceMgr &); ~Stream(); document_iterator begin(); document_iterator end(); void skip(); bool failed(); bool validate() { skip(); return !failed(); } void printError(Node *N, const Twine &Msg); private: OwningPtr scanner; OwningPtr CurrentDoc; friend class Document; /// @brief Validate a %YAML x.x directive. void handleYAMLDirective(const Token &); }; /// @brief Abstract base class for all Nodes. class Node { public: enum NodeKind { NK_Null, NK_Scalar, NK_KeyValue, NK_Mapping, NK_Sequence, NK_Alias }; Node(unsigned int Type, OwningPtr&, StringRef Anchor); /// @brief Get the value of the anchor attached to this node. If it does not /// have one, getAnchor().size() will be 0. StringRef getAnchor() const { return Anchor; } SMRange getSourceRange() const { return SourceRange; } void setSourceRange(SMRange SR) { SourceRange = SR; } // These functions forward to Document and Scanner. Token &peekNext(); Token getNext(); Node *parseBlockNode(); BumpPtrAllocator &getAllocator(); void setError(const Twine &Message, Token &Location) const; bool failed() const; virtual void skip() {} unsigned int getType() const { return TypeID; } void *operator new ( size_t Size , BumpPtrAllocator &Alloc , size_t Alignment = 16) throw() { return Alloc.Allocate(Size, Alignment); } void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { Alloc.Deallocate(Ptr); } protected: OwningPtr &Doc; SMRange SourceRange; void operator delete(void *) throw() {} virtual ~Node() {} private: unsigned int TypeID; StringRef Anchor; }; /// @brief A null value. /// /// Example: /// !!null null class NullNode : public Node { public: NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {} static inline bool classof(const Node *N) { return N->getType() == NK_Null; } }; /// @brief A scalar node is an opaque datum that can be presented as a /// series of zero or more Unicode scalar values. /// /// Example: /// Adena class ScalarNode : public Node { public: ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val) : Node(NK_Scalar, D, Anchor) , Value(Val) { SMLoc Start = SMLoc::getFromPointer(Val.begin()); SMLoc End = SMLoc::getFromPointer(Val.end()); SourceRange = SMRange(Start, End); } // Return Value without any escaping or folding or other fun YAML stuff. This // is the exact bytes that are contained in the file (after conversion to // utf8). StringRef getRawValue() const { return Value; } /// @brief Gets the value of this node as a StringRef. /// /// @param Storage is used to store the content of the returned StringRef iff /// it requires any modification from how it appeared in the source. /// This happens with escaped characters and multi-line literals. StringRef getValue(SmallVectorImpl &Storage) const; static inline bool classof(const Node *N) { return N->getType() == NK_Scalar; } private: StringRef Value; StringRef unescapeDoubleQuoted( StringRef UnquotedValue , StringRef::size_type Start , SmallVectorImpl &Storage) const; }; /// @brief A key and value pair. While not technically a Node under the YAML /// representation graph, it is easier to treat them this way. /// /// TODO: Consider making this not a child of Node. /// /// Example: /// Section: .text class KeyValueNode : public Node { public: KeyValueNode(OwningPtr &D) : Node(NK_KeyValue, D, StringRef()) , Key(0) , Value(0) {} /// @brief Parse and return the key. /// /// This may be called multiple times. /// /// @returns The key, or nullptr if failed() == true. Node *getKey(); /// @brief Parse and return the value. /// /// This may be called multiple times. /// /// @returns The value, or nullptr if failed() == true. Node *getValue(); virtual void skip() LLVM_OVERRIDE { getKey()->skip(); getValue()->skip(); } static inline bool classof(const Node *N) { return N->getType() == NK_KeyValue; } private: Node *Key; Node *Value; }; /// @brief This is an iterator abstraction over YAML collections shared by both /// sequences and maps. /// /// BaseT must have a ValueT* member named CurrentEntry and a member function /// increment() which must set CurrentEntry to 0 to create an end iterator. template class basic_collection_iterator : public std::iterator { public: basic_collection_iterator() : Base(0) {} basic_collection_iterator(BaseT *B) : Base(B) {} ValueT *operator ->() const { assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); return Base->CurrentEntry; } ValueT &operator *() const { assert(Base && Base->CurrentEntry && "Attempted to dereference end iterator!"); return *Base->CurrentEntry; } operator ValueT*() const { assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); return Base->CurrentEntry; } bool operator !=(const basic_collection_iterator &Other) const { if(Base != Other.Base) return true; return (Base && Other.Base) && Base->CurrentEntry != Other.Base->CurrentEntry; } basic_collection_iterator &operator++() { assert(Base && "Attempted to advance iterator past end!"); Base->increment(); // Create an end iterator. if (Base->CurrentEntry == 0) Base = 0; return *this; } private: BaseT *Base; }; // The following two templates are used for both MappingNode and Sequence Node. template typename CollectionType::iterator begin(CollectionType &C) { assert(C.IsAtBeginning && "You may only iterate over a collection once!"); C.IsAtBeginning = false; typename CollectionType::iterator ret(&C); ++ret; return ret; } template void skip(CollectionType &C) { // TODO: support skipping from the middle of a parsed collection ;/ assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); if (C.IsAtBeginning) for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e; ++i) i->skip(); } /// @brief Represents a YAML map created from either a block map for a flow map. /// /// This parses the YAML stream as increment() is called. /// /// Example: /// Name: _main /// Scope: Global class MappingNode : public Node { public: enum MappingType { MT_Block, MT_Flow, MT_Inline ///< An inline mapping node is used for "[key: value]". }; MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT) : Node(NK_Mapping, D, Anchor) , Type(MT) , IsAtBeginning(true) , IsAtEnd(false) , CurrentEntry(0) {} friend class basic_collection_iterator; typedef basic_collection_iterator iterator; template friend typename T::iterator yaml::begin(T &); template friend void yaml::skip(T &); iterator begin() { return yaml::begin(*this); } iterator end() { return iterator(); } virtual void skip() LLVM_OVERRIDE { yaml::skip(*this); } static inline bool classof(const Node *N) { return N->getType() == NK_Mapping; } private: MappingType Type; bool IsAtBeginning; bool IsAtEnd; KeyValueNode *CurrentEntry; void increment(); }; /// @brief Represents a YAML sequence created from either a block sequence for a /// flow sequence. /// /// This parses the YAML stream as increment() is called. /// /// Example: /// - Hello /// - World class SequenceNode : public Node { public: enum SequenceType { ST_Block, ST_Flow, // Use for: // // key: // - val1 // - val2 // // As a BlockMappingEntry and BlockEnd are not created in this case. ST_Indentless }; SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST) : Node(NK_Sequence, D, Anchor) , SeqType(ST) , IsAtBeginning(true) , IsAtEnd(false) , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. , CurrentEntry(0) {} friend class basic_collection_iterator; typedef basic_collection_iterator iterator; template friend typename T::iterator yaml::begin(T &); template friend void yaml::skip(T &); void increment(); iterator begin() { return yaml::begin(*this); } iterator end() { return iterator(); } virtual void skip() LLVM_OVERRIDE { yaml::skip(*this); } static inline bool classof(const Node *N) { return N->getType() == NK_Sequence; } private: SequenceType SeqType; bool IsAtBeginning; bool IsAtEnd; bool WasPreviousTokenFlowEntry; Node *CurrentEntry; }; /// @brief Represents an alias to a Node with an anchor. /// /// Example: /// *AnchorName class AliasNode : public Node { public: AliasNode(OwningPtr &D, StringRef Val) : Node(NK_Alias, D, StringRef()), Name(Val) {} StringRef getName() const { return Name; } Node *getTarget(); static inline bool classof(const Node *N) { return N->getType() == NK_Alias; } private: StringRef Name; }; /// @brief A YAML Stream is a sequence of Documents. A document contains a root /// node. class Document { public: /// @brief Root for parsing a node. Returns a single node. Node *parseBlockNode(); Document(Stream &ParentStream); /// @brief Finish parsing the current document and return true if there are /// more. Return false otherwise. bool skip(); /// @brief Parse and return the root level node. Node *getRoot() { if (Root) return Root; return Root = parseBlockNode(); } private: friend class Node; friend class document_iterator; /// @brief Stream to read tokens from. Stream &stream; /// @brief Used to allocate nodes to. All are destroyed without calling their /// destructor when the document is destroyed. BumpPtrAllocator NodeAllocator; /// @brief The root node. Used to support skipping a partially parsed /// document. Node *Root; Token &peekNext(); Token getNext(); void setError(const Twine &Message, Token &Location) const; bool failed() const; void handleTagDirective(const Token &Tag) { // TODO: Track tags. } /// @brief Parse %BLAH directives and return true if any were encountered. bool parseDirectives(); /// @brief Consume the next token and error if it is not \a TK. bool expectToken(int TK); }; /// @brief Iterator abstraction for Documents over a Stream. class document_iterator { public: document_iterator() : Doc(0) {} document_iterator(OwningPtr &D) : Doc(&D) {} bool operator ==(const document_iterator &Other) { if (isAtEnd() || Other.isAtEnd()) return isAtEnd() && Other.isAtEnd(); return Doc == Other.Doc; } bool operator !=(const document_iterator &Other) { return !(*this == Other); } document_iterator operator ++() { assert(Doc != 0 && "incrementing iterator past the end."); if (!(*Doc)->skip()) { Doc->reset(0); } else { Stream &S = (*Doc)->stream; Doc->reset(new Document(S)); } return *this; } Document &operator *() { return *Doc->get(); } OwningPtr &operator ->() { return *Doc; } private: bool isAtEnd() const { return !Doc || !*Doc; } OwningPtr *Doc; }; } } #endif