-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpegdebug.cpp
More file actions
215 lines (182 loc) · 6.13 KB
/
pegdebug.cpp
File metadata and controls
215 lines (182 loc) · 6.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#include "peglib.h"
#include <iostream>
#include <string>
#include <sstream>
#include <fstream>
#include <streambuf>
using namespace peg;
using namespace std;
// helper for repeating output
struct repeat{
repeat(const char* s_, std::size_t num_):s(s_), num(num_){}
const char* s;
std::size_t num;
};
inline std::ostream& operator <<(std::ostream& stream, const repeat& rep) {
for(std::size_t i=0; i<rep.num; ++i){
stream << rep.s;
}
return stream;
}
// read complete file into string
// https://stackoverflow.com/a/2602060/3825996
bool readFile(const string &path, string &txt){
ifstream ifs(path.c_str());
if(ifs.is_open()){
txt = string((std::istreambuf_iterator<char>(ifs)),
(std::istreambuf_iterator<char>()));
return true;
}
else{
return false;
}
}
// write string into file
bool writeFile(const string &path, const string &txt){
ofstream ofs(path);
if(ofs.is_open()){
ofs << txt;
ofs.close();
return true;
}
else{
return false;
}
}
// https://stackoverflow.com/a/3418285/3825996
bool replace(string& str, const string& from, const string& to) {
size_t start_pos = str.find(from);
if(start_pos == string::npos)
return false;
str.replace(start_pos, from.length(), to);
return true;
}
void replaceAll(std::string& str, const std::string& from, const std::string& to) {
if(from.empty())
return;
size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
str.replace(start_pos, from.length(), to);
start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx'
}
}
int Main(const vector<string> &args) {
if(args.size() != 4){
cerr << "usage: " << args[0] << " mygrammar.peg mytext.txt myoutput.html\n";
return EXIT_FAILURE;
}
#include "html.inc" // creates std::string html holding the html source
bool ok = false;
// read grammar file
string grammar;
ok = readFile(args[1], grammar);
if(!ok){
cerr << "error reading file " << args[1] << "\n";
return EXIT_FAILURE;
}
replaceAll(grammar, "\r\n", "\n"); // in case we open a windows text file on linux
// read text file to parse
string text;
ok = readFile(args[2], text);
if(!ok){
cerr << "error reading file " << args[2] << "\n";
return EXIT_FAILURE;
}
replaceAll(text, "\r\n", "\n"); // in case we open a windows text file on linux
// create parser
parser parser;
parser.set_logger([](size_t line, size_t col, const string& msg) {
cerr << line << ":" << col << ": " << msg << "\n";
});
// load grammar
ok = parser.load_grammar(grammar.c_str());
if(!ok){
cerr << "error loading grammar\n";
return EXIT_FAILURE;
}
// pointer to text start (will be used to find current position in text)
const char* pStart = text.c_str();
// list all reduction rules
std::vector<std::string> rules;
rules.reserve(parser.get_grammar().size());
for (auto const& r : parser.get_grammar()) {
rules.emplace_back(r.first);
}
// output string for tree view
stringstream tree;
struct substitution{
const char* start; // where the substitution starts in the original text
size_t len=0; // how long the substituted text is
string insert=""; // the text that has to be inserted
};
// assign callbacks for enter, match and leave of each rule (html wrapping of the parsee)
for(auto& rule:rules){
parser[rule.c_str()].enter = [rule, &pStart, &tree](const Context &c, const char* s, size_t n, any& dt) {
auto indent = std::any_cast<int*>(dt);
tree << repeat(" ", *indent) << "<div title=\"" << rule << "\" data-pos=" << (s-pStart) << ">" << "\n";
(*indent)++;
};
parser[rule.c_str()] = [rule](const SemanticValues& sv, any&) {
string result(sv.sv());
const char* start = sv.sv().begin();
for(int i = sv.size()-1; i>=0; i--){
auto sub = std::any_cast<substitution>(sv[i]);
result = result.replace(sub.start-start, sub.len, sub.insert);
}
result = "\x01A_1" + rule + "\x01A_2" + to_string(start - sv.ss) + "\x01A_3" + result + "\x01A_4"; // use ascii SUB for later substitution
return substitution{start, sv.sv().size(), result};
};
parser[rule.c_str()].leave = [rule, &pStart, &tree](const Context &c, const char* s, size_t n, size_t matchlen, any& value, any& dt) {
auto indent = std::any_cast<int*>(dt);
int match = success(matchlen)? matchlen : -1;
tree << repeat(" ", *indent) << "<span data-match=" << match << "></span>" << "\n";
(*indent)--;
tree << repeat(" ", *indent) << "</div>\n";
};
}
parser.set_logger([](size_t ln, size_t col, const string& msg) {
cout << "(" << ln << ":" << col << ") " << msg << "\n";
});
//parser.enable_packrat_parsing(); // enable packrat parsing
// parse
int indent = 0;
any dt = &indent;
substitution result;
ok = parser.parse(text.c_str(), dt, result);
if(ok){
cout << "parsing successful\n";
}
else{
cout << "parsing not successful\n";
}
// encode tree text for javascript string
replaceAll(text, "\\", "\\\\");
replaceAll(text, "\"", "\\\"");
replaceAll(text, "\n", "\\n");
replaceAll(text, "\t", "\\t");
text = "\"" + text + "\"";
// encode source text for html display
string source = result.insert;
replaceAll(source, "&", "&");
replaceAll(source, "<", "<");
replaceAll(source, ">", ">");
replaceAll(source, " ", "<i>␣</i>");
replaceAll(source, "\t", "<i>→</i> ");
replaceAll(source, "\n", "<i>↲</i><br>\n");
// now do substitutions that had to be immune to the substitutions before
replaceAll(source, "\x01A_1", "<div title=\"");
replaceAll(source, "\x01A_2", "\" data-pos=");
replaceAll(source, "\x01A_3", ">");
replaceAll(source, "\x01A_4", "</div>");
// place generated texts in html
replace(html, "TREE", tree.str());
replace(html, "SOURCE", source);
replace(html, "TEXT", text);
ok = writeFile(args[3], html);
if(!ok){
cerr << "error writing file " << args[3] << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int main(int c,char**v){return Main(vector<string>(v,c+v));}