mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-28 13:28:01 +03:00
Added quoted scalars (with escaping).
Refactored some common whitespace-parsing code in scanning both scalars. Implemented the flow collection tokens.
This commit is contained in:
parent
a224c7818b
commit
49a75b2d78
9 changed files with 307 additions and 54 deletions
17
exceptions.h
17
exceptions.h
|
@ -12,4 +12,21 @@ namespace YAML
|
|||
class IllegalMapValue: public Exception {};
|
||||
class IllegalScalar: public Exception {};
|
||||
class IllegalTabInScalar: public Exception {};
|
||||
class DocIndicatorInQuote: public Exception {};
|
||||
class EOFInQuote: public Exception {};
|
||||
class UnknownEscapeSequence: public Exception {
|
||||
public:
|
||||
UnknownEscapeSequence(char ch_): ch(ch_) {}
|
||||
char ch;
|
||||
};
|
||||
class NonHexNumber: public Exception {
|
||||
public:
|
||||
NonHexNumber(char ch_): ch(ch_) {}
|
||||
char ch;
|
||||
};
|
||||
class InvalidUnicode: public Exception {
|
||||
public:
|
||||
InvalidUnicode(unsigned value_): value(value_) {}
|
||||
unsigned value;
|
||||
};
|
||||
}
|
||||
|
|
106
exp.cpp
Normal file
106
exp.cpp
Normal file
|
@ -0,0 +1,106 @@
|
|||
#include "exp.h"
|
||||
#include "exceptions.h"
|
||||
|
||||
namespace YAML
|
||||
{
|
||||
namespace Exp
|
||||
{
|
||||
unsigned ParseHex(std::string str)
|
||||
{
|
||||
unsigned value = 0;
|
||||
for(unsigned i=0;i<str.size();i++) {
|
||||
char ch = str[i];
|
||||
int digit = 0;
|
||||
if('a' <= ch && ch <= 'f')
|
||||
digit = ch - 'a' + 10;
|
||||
else if('A' <= ch && ch <= 'F')
|
||||
digit = ch - 'A' + 10;
|
||||
else if('0' <= ch && ch <= '9')
|
||||
digit = ch - '0';
|
||||
else
|
||||
throw NonHexNumber(ch);
|
||||
|
||||
value = (value << 4) + digit;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
std::string Str(char ch)
|
||||
{
|
||||
return std::string("") + ch;
|
||||
}
|
||||
|
||||
// Escape
|
||||
// . Translates the next 'codeLength' characters into a hex number and returns the result.
|
||||
// . Throws if it's not actually hex.
|
||||
std::string Escape(std::istream& in, int& length, int codeLength)
|
||||
{
|
||||
// grab string
|
||||
length += codeLength;
|
||||
std::string str;
|
||||
for(int i=0;i<codeLength;i++)
|
||||
str += in.get();
|
||||
|
||||
// get the value
|
||||
unsigned value = ParseHex(str);
|
||||
|
||||
// legal unicode?
|
||||
if((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
|
||||
throw InvalidUnicode(value);
|
||||
|
||||
// now break it up into chars
|
||||
if(value <= 0x7F)
|
||||
return Str(value);
|
||||
else if(value <= 0x7FF)
|
||||
return Str(0xC0 + (value >> 6)) + Str(0x80 + (value & 0x3F));
|
||||
else if(value <= 0xFFFF)
|
||||
return Str(0xE0 + (value >> 12)) + Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
|
||||
else
|
||||
return Str(0xF0 + (value >> 18)) + Str(0x80 + ((value >> 12) & 0x3F)) +
|
||||
Str(0x80 + ((value >> 6) & 0x3F)) + Str(0x80 + (value & 0x3F));
|
||||
}
|
||||
|
||||
// Escape
|
||||
// . Escapes the sequence starting 'in' (it must begin with a '\')
|
||||
// and returns the result.
|
||||
// . Fills 'length' with how many characters we ate.
|
||||
// . Throws if it's an unknown escape character.
|
||||
std::string Escape(std::istream& in, int& length)
|
||||
{
|
||||
// slash + character
|
||||
length = 2;
|
||||
|
||||
// eat slash
|
||||
in.get();
|
||||
|
||||
// switch on escape character
|
||||
char ch = in.get();
|
||||
switch(ch) {
|
||||
case '0': return "\0";
|
||||
case 'a': return "\x07";
|
||||
case 'b': return "\x08";
|
||||
case 't':
|
||||
case '\t': return "\x09";
|
||||
case 'n': return "\x0A";
|
||||
case 'v': return "\x0B";
|
||||
case 'f': return "\x0C";
|
||||
case 'r': return "\x0D";
|
||||
case 'e': return "\x1B";
|
||||
case ' ': return "\x20";
|
||||
case '\"': return "\"";
|
||||
case '\'': return "\'";
|
||||
case '\\': return "\\";
|
||||
case 'N': return "\xC2\x85"; // NEL (#x85)
|
||||
case '_': return "\xC2\xA0"; // #xA0
|
||||
case 'L': return "\xE2\x80\xA8"; // LS (#x2028)
|
||||
case 'P': return "\xE2\x80\xA9"; // PS (#x2029)
|
||||
case 'x': return Escape(in, length, 2);
|
||||
case 'u': return Escape(in, length, 4);
|
||||
case 'U': return Escape(in, length, 8);
|
||||
}
|
||||
|
||||
throw UnknownEscapeSequence(ch);
|
||||
}
|
||||
}
|
||||
}
|
14
exp.h
14
exp.h
|
@ -1,6 +1,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "regex.h"
|
||||
#include <string>
|
||||
#include <ios>
|
||||
|
||||
namespace YAML
|
||||
{
|
||||
|
@ -13,6 +15,8 @@ namespace YAML
|
|||
const RegEx Blank = RegEx(' ') || RegEx('\t');
|
||||
const RegEx Break = RegEx('\n');
|
||||
const RegEx BlankOrBreak = Blank || Break;
|
||||
const RegEx Digit = RegEx('0', '9');
|
||||
const RegEx Hex = Digit || RegEx('A', 'F') || RegEx('a', 'f');
|
||||
|
||||
// actual tags
|
||||
|
||||
|
@ -30,11 +34,17 @@ namespace YAML
|
|||
// . Can never start with any of , [ ] { } # & * ! | > \' \" % @ `
|
||||
// . In the block context - ? : must be not be followed with a space.
|
||||
// . In the flow context ? : are illegal and - must not be followed with a space.
|
||||
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:") + Blank)),
|
||||
const RegEx PlainScalar = !(BlankOrBreak || RegEx(",[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx("-?:", REGEX_OR) + Blank)),
|
||||
PlainScalarInFlow = !(BlankOrBreak || RegEx("?:,[]{}#&*!|>\'\"%@`", REGEX_OR) || (RegEx('-') + Blank));
|
||||
const RegEx IllegalColonInScalar = RegEx(':') + !BlankOrBreak;
|
||||
const RegEx EndScalar = RegEx(':') + BlankOrBreak,
|
||||
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}");
|
||||
EndScalarInFlow = (RegEx(':') + BlankOrBreak) || RegEx(",:?[]{}", REGEX_OR);
|
||||
|
||||
const RegEx EscSingleQuote = RegEx("\'\'");
|
||||
const RegEx EscBreak = RegEx('\\') + Break;
|
||||
|
||||
// and some functions
|
||||
std::string Escape(std::istream& in, int& length);
|
||||
}
|
||||
|
||||
namespace Keys
|
||||
|
|
|
@ -35,7 +35,7 @@ namespace YAML
|
|||
RegEx::RegEx(const std::string& str, REGEX_OP op): m_op(op), m_pOp(0)
|
||||
{
|
||||
for(unsigned i=0;i<str.size();i++)
|
||||
m_params.push_back(RegEx(str[0]));
|
||||
m_params.push_back(RegEx(str[i]));
|
||||
|
||||
SetOp();
|
||||
}
|
||||
|
|
16
scanner.cpp
16
scanner.cpp
|
@ -286,6 +286,22 @@ namespace YAML
|
|||
}
|
||||
}
|
||||
|
||||
// IncreaseFlowLevel
|
||||
void Scanner::IncreaseFlowLevel()
|
||||
{
|
||||
// TODO: Push simple key
|
||||
m_flowLevel++;
|
||||
}
|
||||
|
||||
// DecreaseFlowLevel
|
||||
void Scanner::DecreaseFlowLevel()
|
||||
{
|
||||
if(m_flowLevel > 0) {
|
||||
m_flowLevel--;
|
||||
// TODO: Pop simple key
|
||||
}
|
||||
}
|
||||
|
||||
// temporary function for testing
|
||||
void Scanner::Scan()
|
||||
{
|
||||
|
|
12
scanner.h
12
scanner.h
|
@ -20,6 +20,8 @@ namespace YAML
|
|||
void ScanToNextToken();
|
||||
void PushIndentTo(int column, bool sequence);
|
||||
void PopIndentTo(int column);
|
||||
void IncreaseFlowLevel();
|
||||
void DecreaseFlowLevel();
|
||||
|
||||
void Scan();
|
||||
|
||||
|
@ -37,6 +39,16 @@ namespace YAML
|
|||
bool IsValue();
|
||||
bool IsPlainScalar();
|
||||
|
||||
struct WhitespaceInfo {
|
||||
WhitespaceInfo();
|
||||
void AddBlank(char ch);
|
||||
void AddBreak(const std::string& line);
|
||||
std::string Join();
|
||||
|
||||
bool leadingBlanks;
|
||||
std::string whitespace, leadingBreaks, trailingBreaks;
|
||||
};
|
||||
|
||||
template <typename T> void ScanAndEnqueue(T *pToken);
|
||||
template <typename T> T *ScanToken(T *pToken);
|
||||
|
||||
|
|
180
scantoken.cpp
180
scantoken.cpp
|
@ -64,8 +64,8 @@ namespace YAML
|
|||
template <> FlowSeqStartToken *Scanner::ScanToken(FlowSeqStartToken *pToken)
|
||||
{
|
||||
// TODO: "save simple key"
|
||||
// TODO: increase flow level
|
||||
|
||||
IncreaseFlowLevel();
|
||||
m_simpleKeyAllowed = true;
|
||||
|
||||
// eat
|
||||
|
@ -77,8 +77,8 @@ namespace YAML
|
|||
template <> FlowMapStartToken *Scanner::ScanToken(FlowMapStartToken *pToken)
|
||||
{
|
||||
// TODO: "save simple key"
|
||||
// TODO: increase flow level
|
||||
|
||||
IncreaseFlowLevel();
|
||||
m_simpleKeyAllowed = true;
|
||||
|
||||
// eat
|
||||
|
@ -90,8 +90,8 @@ namespace YAML
|
|||
template <> FlowSeqEndToken *Scanner::ScanToken(FlowSeqEndToken *pToken)
|
||||
{
|
||||
// TODO: "remove simple key"
|
||||
// TODO: decrease flow level
|
||||
|
||||
DecreaseFlowLevel();
|
||||
m_simpleKeyAllowed = false;
|
||||
|
||||
// eat
|
||||
|
@ -103,8 +103,8 @@ namespace YAML
|
|||
template <> FlowMapEndToken *Scanner::ScanToken(FlowMapEndToken *pToken)
|
||||
{
|
||||
// TODO: "remove simple key"
|
||||
// TODO: decrease flow level
|
||||
|
||||
DecreaseFlowLevel();
|
||||
m_simpleKeyAllowed = false;
|
||||
|
||||
// eat
|
||||
|
@ -210,8 +210,8 @@ namespace YAML
|
|||
m_simpleKeyAllowed = false;
|
||||
|
||||
// now eat and store the scalar
|
||||
std::string scalar, whitespace, leadingBreaks, trailingBreaks;
|
||||
bool leadingBlanks = false;
|
||||
std::string scalar;
|
||||
WhitespaceInfo info;
|
||||
|
||||
while(INPUT) {
|
||||
// doc start/end tokens
|
||||
|
@ -234,26 +234,6 @@ namespace YAML
|
|||
if(m_flowLevel == 0 && Exp::EndScalar.Matches(INPUT))
|
||||
break;
|
||||
|
||||
// join whitespace
|
||||
if(leadingBlanks) {
|
||||
if(Exp::Break.Matches(leadingBreaks)) {
|
||||
// fold line break?
|
||||
if(trailingBreaks.empty())
|
||||
scalar += ' ';
|
||||
else
|
||||
scalar += trailingBreaks;
|
||||
} else {
|
||||
scalar += leadingBreaks + trailingBreaks;
|
||||
}
|
||||
|
||||
leadingBlanks = false;
|
||||
leadingBreaks = "";
|
||||
trailingBreaks = "";
|
||||
} else if(!whitespace.empty()) {
|
||||
scalar += whitespace;
|
||||
whitespace = "";
|
||||
}
|
||||
|
||||
// finally, read the character!
|
||||
scalar += GetChar();
|
||||
}
|
||||
|
@ -266,37 +246,29 @@ namespace YAML
|
|||
while(INPUT && Exp::BlankOrBreak.Matches(INPUT)) {
|
||||
if(Exp::Blank.Matches(INPUT)) {
|
||||
// can't use tabs as indentation! only spaces!
|
||||
if(INPUT.peek() == '\t' && leadingBlanks && m_column <= m_indents.top())
|
||||
if(INPUT.peek() == '\t' && info.leadingBlanks && m_column <= m_indents.top())
|
||||
throw IllegalTabInScalar();
|
||||
|
||||
// maybe store this character
|
||||
if(!leadingBlanks)
|
||||
whitespace += GetChar();
|
||||
else
|
||||
Eat(1);
|
||||
} else {
|
||||
info.AddBlank(GetChar());
|
||||
} else {
|
||||
// we know it's a line break; see how many characters to read
|
||||
int n = Exp::Break.Match(INPUT);
|
||||
std::string line = GetChar(n);
|
||||
|
||||
// where to store this character?
|
||||
if(!leadingBlanks) {
|
||||
leadingBlanks = true;
|
||||
whitespace = "";
|
||||
leadingBreaks += line;
|
||||
} else
|
||||
trailingBreaks += line;
|
||||
info.AddBreak(line);
|
||||
}
|
||||
}
|
||||
|
||||
// and finally break if we're below the indentation level
|
||||
// break if we're below the indentation level
|
||||
if(m_flowLevel == 0 && m_column <= m_indents.top())
|
||||
break;
|
||||
|
||||
// finally join whitespace
|
||||
scalar += info.Join();
|
||||
}
|
||||
|
||||
// now modify our token
|
||||
pToken->value = scalar;
|
||||
if(leadingBlanks)
|
||||
if(info.leadingBlanks)
|
||||
m_simpleKeyAllowed = true;
|
||||
|
||||
return pToken;
|
||||
|
@ -305,6 +277,128 @@ namespace YAML
|
|||
// QuotedScalarToken
|
||||
template <> QuotedScalarToken *Scanner::ScanToken(QuotedScalarToken *pToken)
|
||||
{
|
||||
// TODO: "save simple key"
|
||||
|
||||
m_simpleKeyAllowed = false;
|
||||
|
||||
// eat single or double quote
|
||||
char quote = GetChar();
|
||||
bool single = (quote == '\'');
|
||||
|
||||
// now eat and store the scalar
|
||||
std::string scalar;
|
||||
WhitespaceInfo info;
|
||||
|
||||
while(INPUT) {
|
||||
if(IsDocumentStart() || IsDocumentEnd())
|
||||
throw DocIndicatorInQuote();
|
||||
|
||||
if(INPUT.peek() == EOF)
|
||||
throw EOFInQuote();
|
||||
|
||||
// first eat non-blanks
|
||||
while(INPUT && !Exp::BlankOrBreak.Matches(INPUT)) {
|
||||
// escaped single quote?
|
||||
if(single && Exp::EscSingleQuote.Matches(INPUT)) {
|
||||
int n = Exp::EscSingleQuote.Match(INPUT);
|
||||
scalar += GetChar(n);
|
||||
continue;
|
||||
}
|
||||
|
||||
// is the quote ending?
|
||||
if(INPUT.peek() == (single ? '\'' : '\"'))
|
||||
break;
|
||||
|
||||
// escaped newline?
|
||||
if(Exp::EscBreak.Matches(INPUT))
|
||||
break;
|
||||
|
||||
// other escape sequence
|
||||
if(INPUT.peek() == '\\') {
|
||||
int length = 0;
|
||||
scalar += Exp::Escape(INPUT, length);
|
||||
m_column += length;
|
||||
continue;
|
||||
}
|
||||
|
||||
// and finally, just add the damn character
|
||||
scalar += GetChar();
|
||||
}
|
||||
|
||||
// is the quote ending?
|
||||
if(INPUT.peek() == (single ? '\'' : '\"')) {
|
||||
// eat and go
|
||||
GetChar();
|
||||
break;
|
||||
}
|
||||
|
||||
// now we eat blanks
|
||||
while(Exp::BlankOrBreak.Matches(INPUT)) {
|
||||
if(Exp::Blank.Matches(INPUT)) {
|
||||
info.AddBlank(GetChar());
|
||||
} else {
|
||||
// we know it's a line break; see how many characters to read
|
||||
int n = Exp::Break.Match(INPUT);
|
||||
std::string line = GetChar(n);
|
||||
info.AddBreak(line);
|
||||
}
|
||||
}
|
||||
|
||||
// and finally join the whitespace
|
||||
scalar += info.Join();
|
||||
}
|
||||
|
||||
pToken->value = scalar;
|
||||
return pToken;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// WhitespaceInfo stuff
|
||||
|
||||
Scanner::WhitespaceInfo::WhitespaceInfo(): leadingBlanks(false)
|
||||
{
|
||||
}
|
||||
|
||||
void Scanner::WhitespaceInfo::AddBlank(char ch)
|
||||
{
|
||||
if(!leadingBlanks)
|
||||
whitespace += ch;
|
||||
}
|
||||
|
||||
void Scanner::WhitespaceInfo::AddBreak(const std::string& line)
|
||||
{
|
||||
// where to store this character?
|
||||
if(!leadingBlanks) {
|
||||
leadingBlanks = true;
|
||||
whitespace = "";
|
||||
leadingBreaks += line;
|
||||
} else
|
||||
trailingBreaks += line;
|
||||
}
|
||||
|
||||
std::string Scanner::WhitespaceInfo::Join()
|
||||
{
|
||||
std::string ret;
|
||||
|
||||
if(leadingBlanks) {
|
||||
if(Exp::Break.Matches(leadingBreaks)) {
|
||||
// fold line break?
|
||||
if(trailingBreaks.empty())
|
||||
ret = " ";
|
||||
else
|
||||
ret = trailingBreaks;
|
||||
} else {
|
||||
ret = leadingBreaks + trailingBreaks;
|
||||
}
|
||||
|
||||
leadingBlanks = false;
|
||||
leadingBreaks = "";
|
||||
trailingBreaks = "";
|
||||
} else if(!whitespace.empty()) {
|
||||
ret = whitespace;
|
||||
whitespace = "";
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
10
test.yaml
10
test.yaml
|
@ -1,10 +1,4 @@
|
|||
---
|
||||
- green
|
||||
eggs,
|
||||
and
|
||||
ham!
|
||||
- eggs # this is really important!
|
||||
- - cheddar cheese
|
||||
- american cheese
|
||||
- bread
|
||||
- milk and eggs
|
||||
- [cheddar, american, swiss]
|
||||
...
|
|
@ -169,6 +169,10 @@
|
|||
RelativePath=".\document.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\exp.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\main.cpp"
|
||||
>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue