Browse Source

support text nodes, support comments, slightly better error messages

Patrick Brosi 6 years ago
parent
commit
5081d32879
2 changed files with 97 additions and 13 deletions
  1. 83 12
      File.cpp
  2. 14 1
      File.h

+ 83 - 12
File.cpp

@@ -42,7 +42,8 @@ void File::reset() {
42 42
 
43 43
   if (_file) close(_file);
44 44
   _file = open(_path.c_str(), O_RDONLY);
45
-  if (_file < 0) throw XmlFileException(std::string("could not open ") + _path);
45
+  if (_file < 0)
46
+    throw XmlFileException(std::string("could not open file"), _path, 0, 0, 0);
46 47
 #ifdef __unix__
47 48
   posix_fadvise(_file, 0, 0, POSIX_FADV_SEQUENTIAL);
48 49
 #endif
@@ -93,6 +94,7 @@ bool File::next() {
93 94
 
94 95
   if (_s.hanging) _s.hanging--;
95 96
   _ret.name = 0;
97
+  _ret.text = emptyStr;
96 98
   _ret.attrs.clear();
97 99
   void* i;
98 100
   while (_lastBytes) {
@@ -107,10 +109,65 @@ bool File::next() {
107 109
             continue;
108 110
           } else {
109 111
             _s.s = IN_TEXT;
112
+            _ret.name = emptyStr;
113
+            _tmp = _c;
110 114
             continue;
111 115
           }
112 116
         case IN_TEXT:
113
-          throw XmlFileException("text nodes not yet supported");
117
+          i = memchr(_c, '<', _lastBytes - (_c - _buffer[_which]));
118
+          if (!i) {
119
+            _c = _buffer[_which] + _lastBytes;
120
+            continue;
121
+          } else {
122
+            _c = (char*)i;
123
+            *_c = 0;
124
+            _ret.text = _tmp;
125
+            _s.s = IN_TAG_TENTATIVE;
126
+            _c++;
127
+            return true;
128
+          }
129
+        case IN_COMMENT_TENTATIVE:
130
+          if (c == '-') {
131
+            _s.s = IN_COMMENT_TENTATIVE2;
132
+            continue;
133
+          }
134
+          throw XmlFileException("Expected comment", _path, _c, _buffer[_which],
135
+                                 _prevs.off);
136
+
137
+        case IN_COMMENT_TENTATIVE2:
138
+          if (c == '-') {
139
+            _s.s = IN_COMMENT;
140
+            continue;
141
+          }
142
+          throw XmlFileException("Expected comment", _path, _c, _buffer[_which],
143
+                                 _prevs.off);
144
+
145
+        case IN_COMMENT_CL_TENTATIVE:
146
+          if (c == '-') {
147
+            _s.s = IN_COMMENT_CL_TENTATIVE2;
148
+            continue;
149
+          }
150
+          _s.s = IN_COMMENT;
151
+          continue;
152
+
153
+        case IN_COMMENT_CL_TENTATIVE2:
154
+          if (c == '>') {
155
+            _s.s = NONE;
156
+            continue;
157
+          }
158
+          _s.s = IN_COMMENT;
159
+        // fall through, we are still in comment
160
+
161
+        case IN_COMMENT:
162
+          i = memchr(_c, '-', _lastBytes - (_c - _buffer[_which]));
163
+          if (!i) {
164
+            _c = _buffer[_which] + _lastBytes;
165
+            continue;
166
+          } else {
167
+            _c = (char*)i;
168
+            _s.s = IN_COMMENT_CL_TENTATIVE;
169
+            continue;
170
+          }
114 171
         case IN_TAG_TENTATIVE:
115 172
           if (c == '/') {
116 173
             _s.s = IN_TAG_NAME_CLOSE;
@@ -119,6 +176,9 @@ bool File::next() {
119 176
           } else if (c == '?') {
120 177
             _s.s = IN_TAG_NAME_META;
121 178
             continue;
179
+          } else if (c == '!') {
180
+            _s.s = IN_COMMENT_TENTATIVE;
181
+            continue;
122 182
           } else if (std::isalnum(c) || c == '-' || c == '_' || c == '.') {
123 183
             _s.s = IN_TAG_NAME;
124 184
             _ret.name = _c;
@@ -141,7 +201,8 @@ bool File::next() {
141 201
             _s.s = WS_SKIP;
142 202
             continue;
143 203
           } else {
144
-            throw XmlFileException("expected valid tag");
204
+            throw XmlFileException("Expected valid tag", _path, _c,
205
+                                   _buffer[_which], _prevs.off);
145 206
           }
146 207
 
147 208
         case IN_ATTRVAL_SQ:
@@ -182,7 +243,8 @@ bool File::next() {
182 243
             _tmp2 = _c + 1;
183 244
             continue;
184 245
           } else {
185
-            throw XmlFileException("expected attribute value");
246
+            throw XmlFileException("Expected attribute value", _path, _c,
247
+                                   _buffer[_which], _prevs.off);
186 248
           }
187 249
 
188 250
         case IN_ATTRKEY:
@@ -198,7 +260,8 @@ bool File::next() {
198 260
             continue;
199 261
           }
200 262
 
201
-          throw XmlFileException("expected attribute key char or =");
263
+          throw XmlFileException("Expected attribute key char or =", _path, _c,
264
+                                 _buffer[_which], _prevs.off);
202 265
 
203 266
         case AFTER_ATTRKEY:
204 267
           if (std::isspace(c))
@@ -207,8 +270,9 @@ bool File::next() {
207 270
             _s.s = AW_IN_ATTRVAL;
208 271
             continue;
209 272
           } else {
210
-            // TODO: error
211
-            continue;
273
+            throw XmlFileException(
274
+                std::string("Expected attribute value for '") + _tmp + "'.",
275
+                _path, _c, _buffer[_which], _prevs.off);
212 276
           }
213 277
 
214 278
         case IN_TAG_NAME:
@@ -249,7 +313,10 @@ bool File::next() {
249 313
           } else if (c == '>') {
250 314
             *_c = 0;
251 315
             if (_tmp != _s.tagStack.top()) {
252
-              throw XmlFileException("closing wrong tag");
316
+              throw XmlFileException(std::string("Closing wrong tag '<") + _tmp +
317
+                                         ">', expected close of '<" +
318
+                                         _s.tagStack.top() + ">'.",
319
+                                     _path, _c, _buffer[_which], _prevs.off);
253 320
             }
254 321
             _s.tagStack.pop();
255 322
             _s.s = NONE;
@@ -261,13 +328,17 @@ bool File::next() {
261 328
             continue;
262 329
           else if (c == '>') {
263 330
             if (_tmp != _s.tagStack.top()) {
264
-              throw XmlFileException("closing wrong tag");
331
+              throw XmlFileException(std::string("Closing wrong tag '<") + _tmp +
332
+                                         ">', expected close of '<" +
333
+                                         _s.tagStack.top() + ">'.",
334
+                                     _path, _c, _buffer[_which], _prevs.off);
265 335
             }
266 336
             _s.tagStack.pop();
267 337
             _s.s = NONE;
268 338
             continue;
269 339
           } else {
270
-            throw XmlFileException("expected '>'");
340
+            throw XmlFileException("Expected '>'", _path, _c, _buffer[_which],
341
+                                   _prevs.off);
271 342
           }
272 343
 
273 344
         case AW_CLOSING:
@@ -316,8 +387,8 @@ bool File::next() {
316 387
 
317 388
   if (_s.tagStack.size()) {
318 389
     if (_s.tagStack.top() != "[root]") {
319
-      // TODO error
320
-      throw XmlFileException("XML tree not complete");
390
+      throw XmlFileException("XML tree not complete", _path, _c,
391
+                             _buffer[_which], _prevs.off);
321 392
     } else {
322 393
       _s.tagStack.pop();
323 394
     }

+ 14 - 1
File.h

@@ -7,6 +7,7 @@
7 7
 #include <cstring>
8 8
 #include <fstream>
9 9
 #include <map>
10
+#include <sstream>
10 11
 #include <stack>
11 12
 #include <string>
12 13
 
@@ -16,7 +17,12 @@ const static size_t BUFFER_S = 16 * 1024;
16 17
 
17 18
 class XmlFileException : public std::exception {
18 19
  public:
19
-  XmlFileException(std::string msg) : _msg(msg) {}
20
+  XmlFileException(std::string msg, std::string file, const char* p, char* buff,
21
+                   size_t offset) {
22
+    std::stringstream ss;
23
+    ss << file << " at position " << (offset + (p - buff)) << ": " << msg;
24
+    _msg = ss.str();
25
+  }
20 26
   ~XmlFileException() throw() {}
21 27
 
22 28
   virtual const char* what() const throw() { return _msg.c_str(); };
@@ -39,6 +45,11 @@ enum State {
39 45
   IN_ATTRVAL_SQ,
40 46
   IN_ATTRVAL_DQ,
41 47
   IN_TEXT,
48
+  IN_COMMENT_TENTATIVE,
49
+  IN_COMMENT_TENTATIVE2,
50
+  IN_COMMENT,
51
+  IN_COMMENT_CL_TENTATIVE,
52
+  IN_COMMENT_CL_TENTATIVE2,
42 53
   AW_CLOSING,
43 54
   WS_SKIP
44 55
 };
@@ -61,6 +72,7 @@ typedef std::map<const char*, const char*, AttrCmp> AttrMap;
61 72
 
62 73
 struct Tag {
63 74
   const char* name;
75
+  const char* text;
64 76
   AttrMap attrs;
65 77
 };
66 78
 
@@ -99,6 +111,7 @@ class File {
99 111
   Tag _ret;
100 112
 
101 113
   static size_t utf8(size_t cp, char* out);
114
+  const char* emptyStr = "";
102 115
 };
103 116
 }
104 117