Added quoted scalars (with escaping).

Refactored some common whitespace-parsing code in scanning both scalars.
Implemented the flow collection tokens.
This commit is contained in:
Jesse Beder 2008-06-27 23:11:46 +00:00
parent a224c7818b
commit 49a75b2d78
9 changed files with 307 additions and 54 deletions

View file

@ -12,4 +12,21 @@ namespace YAML
class IllegalMapValue: public Exception {}; class IllegalMapValue: public Exception {};
class IllegalScalar: public Exception {}; class IllegalScalar: public Exception {};
class IllegalTabInScalar: public Exception {}; class IllegalTabInScalar: public Exception {};
class DocIndicatorInQuote: public Exception {};
class EOFInQuote: public Exception {};
class UnknownEscapeSequence: public Exception {
public:
UnknownEscapeSequence(char ch_): ch(ch_) {}
char ch;
};
class NonHexNumber: public Exception {
public:
NonHexNumber(char ch_): ch(ch_) {}
char ch;
};
class InvalidUnicode: public Exception {
public:
InvalidUnicode(unsigned value_): value(value_) {}
unsigned value;
};
} }

106
exp.cpp Normal file
View file

@ -0,0 +1,106 @@
#include "exp.h"
#include "exceptions.h"
namespace YAML
{
namespace Exp
{
unsigned ParseHex(std::string str)
{
unsigned value = 0;
for(unsigned i=0;i<str.size();i++) {
char ch = str[i];
int digit = 0;
if('a' <= ch && ch <= 'f')
digit = ch - 'a' + 10;
else if('A' <= ch && ch <= 'F')
digit = ch - 'A' + 10;
else if('0' <= ch && ch <= '9')
digit = ch - '0';
else
throw NonHexNumber(ch);
value = (value << 4) + digit;
}
return value;
}
std::string Str(char ch)
{
return std::string("") + ch;
}
// Escape
// . Translates the next 'codeLength' characters into a hex number and returns the result.
// . Throws if it's not actually hex.
std::string Escape(std::istream& in, int& length, int codeLength)
{
// grab string
length += codeLength;
std::string str;
for(int i=0;i<codeLength;i++)
str += in.get();
// get the value
unsigned value = ParseHex(str);
// legal unicode?
if((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
throw InvalidUnicode(value);
// now break it up into chars
if(value <= 0x7F)
return Str(value);
else if(value <= 0x7FF)
return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
else if(value <= 0xFFFF)
return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
else
return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
}
// Escape
// . Escapes the sequence starting 'in' (it must begin with a '\')
// and returns the result.
// . Fills 'length' with how many characters we ate.
// . Throws if it's an unknown escape character.
std::string Escape(std::istream& in, int& length)
{
// slash + character
length = 2;
// eat slash
in.get();
// switch on escape character
char ch = in.get();
switch(ch) {
case '0': return "\0";
case 'a': return "\x07";
case 'b': return "\x08";
case 't':
case '\t': return "\x09";
case 'n': return "\x0A";
case 'v': return "\x0B";
case 'f': return "\x0C";
case 'r': return "\x0D";
case 'e': return "\x1B";
case ' ': return "\x20";
case '\"': return "\"";
case '\'': return "\'";
case '\\': return "\\";
case 'N': return "\xC2\x85"; // NEL (#x85)
case '_': return "\xC2\xA0"; // #xA0
case 'L': return "\xE2\x80\xA8"; // LS (#x2028)
case 'P': return "\xE2\x80\xA9"; // PS (#x2029)
case 'x': return Escape(in, length, 2);
case 'u': return Escape(in, length, 4);
case 'U': return Escape(in, length, 8);
}
throw UnknownEscapeSequence(ch);
}
}
}

14
exp.h
View file

@ -1,6 +1,8 @@
#pragma once #pragma once
#include "regex.h" #include "regex.h"
#include <string>
#include <ios>
namespace YAML namespace YAML
{ {
@ -13,6 +15,8 @@ namespace YAML
const RegEx Blank = RegEx(' ') || RegEx('\t'); const RegEx Blank = RegEx(' ') || RegEx('\t');
const RegEx Break = RegEx('\n'); const RegEx Break = RegEx('\n');
const RegEx BlankOrBreak = Blank || Break; const RegEx BlankOrBreak = Blank || Break;
const RegEx Digit = RegEx('0', '9');
const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f');
// actual tags // actual tags
@ -30,11 +34,17 @@ namespace YAML
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ ` // . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
// . In the block context - ? : must be not be followed with a space. // . In the block context - ? : must be not be followed with a space.
// . In the flow context ? : are illegal and - must not be followed with a space. // . In the flow context ? : are illegal and - must not be followed with a space.
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)), const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank)),
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank)); PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak; const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
const RegEx EndScalar = RegEx(':') + BlankOrBreak, const RegEx EndScalar = RegEx(':') + BlankOrBreak,
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}"); EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}", REGEX_OR);
const RegEx EscSingleQuote = RegEx("\'\'");
const RegEx EscBreak = RegEx('\\') + Break;
// and some functions
std::string Escape(std::istream& in, int& length);
} }
namespace Keys namespace Keys

View file

@ -35,7 +35,7 @@ namespace YAML
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0) RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
{ {
for(unsigned i=0;i<str.size();i++) for(unsigned i=0;i<str.size();i++)
m_params.push_back(RegEx(str[0])); m_params.push_back(RegEx(str[i]));
SetOp(); SetOp();
} }

View file

@ -286,6 +286,22 @@ namespace YAML
} }
} }
// IncreaseFlowLevel
void Scanner::IncreaseFlowLevel()
{
// TODO: Push simple key
m_flowLevel++;
}
// DecreaseFlowLevel
void Scanner::DecreaseFlowLevel()
{
if(m_flowLevel > 0) {
m_flowLevel--;
// TODO: Pop simple key
}
}
// temporary function for testing // temporary function for testing
void Scanner::Scan() void Scanner::Scan()
{ {

View file

@ -20,6 +20,8 @@ namespace YAML
void ScanToNextToken(); void ScanToNextToken();
void PushIndentTo(int column, bool sequence); void PushIndentTo(int column, bool sequence);
void PopIndentTo(int column); void PopIndentTo(int column);
void IncreaseFlowLevel();
void DecreaseFlowLevel();
void Scan(); void Scan();
@ -37,6 +39,16 @@ namespace YAML
bool IsValue(); bool IsValue();
bool IsPlainScalar(); bool IsPlainScalar();
struct WhitespaceInfo {
WhitespaceInfo();
void AddBlank(char ch);
void AddBreak(const std::string& line);
std::string Join();
bool leadingBlanks;
std::string whitespace, leadingBreaks, trailingBreaks;
};
template <typename T> void ScanAndEnqueue(T *pToken); template <typename T> void ScanAndEnqueue(T *pToken);
template <typename T> T *ScanToken(T *pToken); template <typename T> T *ScanToken(T *pToken);

View file

@ -64,8 +64,8 @@ namespace YAML
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken) template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
{ {
// TODO: "save simple key" // TODO: "save simple key"
// TODO: increase flow level
IncreaseFlowLevel();
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
// eat // eat
@ -77,8 +77,8 @@ namespace YAML
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken) template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
{ {
// TODO: "save simple key" // TODO: "save simple key"
// TODO: increase flow level
IncreaseFlowLevel();
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
// eat // eat
@ -90,8 +90,8 @@ namespace YAML
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken) template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
{ {
// TODO: "remove simple key" // TODO: "remove simple key"
// TODO: decrease flow level
DecreaseFlowLevel();
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat // eat
@ -103,8 +103,8 @@ namespace YAML
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken) template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
{ {
// TODO: "remove simple key" // TODO: "remove simple key"
// TODO: decrease flow level
DecreaseFlowLevel();
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// eat // eat
@ -210,8 +210,8 @@ namespace YAML
m_simpleKeyAllowed = false; m_simpleKeyAllowed = false;
// now eat and store the scalar // now eat and store the scalar
std::string scalar, whitespace, leadingBreaks, trailingBreaks; std::string scalar;
bool leadingBlanks = false; WhitespaceInfo info;
while(INPUT) { while(INPUT) {
// doc start/end tokens // doc start/end tokens
@ -234,26 +234,6 @@ namespace YAML
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT)) if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
break; break;
// join whitespace
if(leadingBlanks) {
if(Exp::Break.Matches(leadingBreaks)) {
// fold line break?
if(trailingBreaks.empty())
scalar += ' ';
else
scalar += trailingBreaks;
} else {
scalar += leadingBreaks + trailingBreaks;
}
leadingBlanks = false;
leadingBreaks = "";
trailingBreaks = "";
} else if(!whitespace.empty()) {
scalar += whitespace;
whitespace = "";
}
// finally, read the character! // finally, read the character!
scalar += GetChar(); scalar += GetChar();
} }
@ -266,37 +246,29 @@ namespace YAML
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) { while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) { if(Exp::Blank.Matches(INPUT)) {
// can't use tabs as indentation! only spaces! // can't use tabs as indentation! only spaces!
if(INPUT.peek() == '\t' && leadingBlanks && m_column <= m_indents.top()) if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
throw IllegalTabInScalar(); throw IllegalTabInScalar();
// maybe store this character info.AddBlank(GetChar());
if(!leadingBlanks) } else {
whitespace += GetChar();
else
Eat(1);
} else {
// we know it's a line break; see how many characters to read // we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT); int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n); std::string line = GetChar(n);
info.AddBreak(line);
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += line;
} else
trailingBreaks += line;
} }
} }
// and finally break if we're below the indentation level // break if we're below the indentation level
if(m_flowLevel == 0 && m_column <= m_indents.top()) if(m_flowLevel == 0 && m_column <= m_indents.top())
break; break;
// finally join whitespace
scalar += info.Join();
} }
// now modify our token // now modify our token
pToken->value = scalar; pToken->value = scalar;
if(leadingBlanks) if(info.leadingBlanks)
m_simpleKeyAllowed = true; m_simpleKeyAllowed = true;
return pToken; return pToken;
@ -305,6 +277,128 @@ namespace YAML
// QuotedScalarToken // QuotedScalarToken
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken) template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
{ {
// TODO: "save simple key"
m_simpleKeyAllowed = false;
// eat single or double quote
char quote = GetChar();
bool single = (quote == '\'');
// now eat and store the scalar
std::string scalar;
WhitespaceInfo info;
while(INPUT) {
if(IsDocumentStart() || IsDocumentEnd())
throw DocIndicatorInQuote();
if(INPUT.peek() == EOF)
throw EOFInQuote();
// first eat non-blanks
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
// escaped single quote?
if(single && Exp::EscSingleQuote.Matches(INPUT)) {
int n = Exp::EscSingleQuote.Match(INPUT);
scalar += GetChar(n);
continue;
}
// is the quote ending?
if(INPUT.peek() == (single ? '\'' : '\"'))
break;
// escaped newline?
if(Exp::EscBreak.Matches(INPUT))
break;
// other escape sequence
if(INPUT.peek() == '\\') {
int length = 0;
scalar += Exp::Escape(INPUT, length);
m_column += length;
continue;
}
// and finally, just add the damn character
scalar += GetChar();
}
// is the quote ending?
if(INPUT.peek() == (single ? '\'' : '\"')) {
// eat and go
GetChar();
break;
}
// now we eat blanks
while(Exp::BlankOrBreak.Matches(INPUT)) {
if(Exp::Blank.Matches(INPUT)) {
info.AddBlank(GetChar());
} else {
// we know it's a line break; see how many characters to read
int n = Exp::Break.Match(INPUT);
std::string line = GetChar(n);
info.AddBreak(line);
}
}
// and finally join the whitespace
scalar += info.Join();
}
pToken->value = scalar;
return pToken; return pToken;
} }
//////////////////////////////////////////////////////////
// WhitespaceInfo stuff
Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false)
{
}
void Scanner::WhitespaceInfo::AddBlank(char ch)
{
if(!leadingBlanks)
whitespace += ch;
}
void Scanner::WhitespaceInfo::AddBreak(const std::string& line)
{
// where to store this character?
if(!leadingBlanks) {
leadingBlanks = true;
whitespace = "";
leadingBreaks += line;
} else
trailingBreaks += line;
}
std::string Scanner::WhitespaceInfo::Join()
{
std::string ret;
if(leadingBlanks) {
if(Exp::Break.Matches(leadingBreaks)) {
// fold line break?
if(trailingBreaks.empty())
ret = " ";
else
ret = trailingBreaks;
} else {
ret = leadingBreaks + trailingBreaks;
}
leadingBlanks = false;
leadingBreaks = "";
trailingBreaks = "";
} else if(!whitespace.empty()) {
ret = whitespace;
whitespace = "";
}
return ret;
}
} }

View file

@ -1,10 +1,4 @@
--- ---
- green - milk and eggs
eggs, - [cheddar, american, swiss]
and
ham!
- eggs # this is really important!
- - cheddar cheese
- american cheese
- bread
... ...

View file

@ -169,6 +169,10 @@
RelativePath=".\document.cpp" RelativePath=".\document.cpp"
> >
</File> </File>
<File
RelativePath=".\exp.cpp"
>
</File>
<File <File
RelativePath=".\main.cpp" RelativePath=".\main.cpp"
> >