1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro.tql;
15
16 import java.io.IOException;
17 import java.io.InputStream;
18 import java.io.InputStreamReader;
19 import java.io.Reader;
20 import java.nio.charset.Charset;
21
22 import org.openrdf.model.Resource;
23 import org.openrdf.model.Statement;
24 import org.openrdf.model.URI;
25 import org.openrdf.model.Value;
26 import org.openrdf.model.ValueFactory;
27 import org.openrdf.model.impl.ValueFactoryImpl;
28 import org.openrdf.model.vocabulary.SESAME;
29 import org.openrdf.rio.RDFFormat;
30 import org.openrdf.rio.RDFHandlerException;
31 import org.openrdf.rio.RDFParseException;
32 import org.openrdf.rio.helpers.NTriplesParserSettings;
33 import org.openrdf.rio.helpers.RDFParserBase;
34
35
36
37
38
39
40 public class TQLParser extends RDFParserBase {
41
42 private static final int EOF = -1;
43
44 private Reader reader;
45
46 private int lineNo;
47
48 private StringBuilder builder;
49
50 private Value value;
51
52
53
54
55
56 public TQLParser() {
57 super();
58 }
59
60
61
62
63
64
65
66
67 public TQLParser(final ValueFactory valueFactory) {
68 super(valueFactory);
69 }
70
71 @Override
72 public RDFFormat getRDFFormat() {
73 return TQL.FORMAT;
74 }
75
76 @Override
77 public void parse(final InputStream stream, final String baseURI) throws IOException,
78 RDFParseException, RDFHandlerException {
79 parse(new InputStreamReader(stream, Charset.forName("UTF-8")), baseURI);
80 }
81
82 @Override
83 public void parse(final Reader reader, final String baseURI) throws IOException,
84 RDFParseException, RDFHandlerException {
85
86 if (reader == null) {
87 throw new NullPointerException("Null reader");
88 }
89
90 if (this.rdfHandler != null) {
91 this.rdfHandler.startRDF();
92 }
93
94 this.reader = reader;
95 this.lineNo = 1;
96 this.builder = new StringBuilder(1024);
97 this.value = null;
98
99 reportLocation(this.lineNo, 1);
100
101 try {
102 int c = read();
103 c = skipWhitespace(c);
104 while (c != EOF) {
105 if (c == '#') {
106 c = skipLine(c);
107 } else if (c == '\r' || c == '\n') {
108 c = skipLine(c);
109 } else {
110 c = parseQuad(c);
111 }
112 c = skipWhitespace(c);
113 }
114 } finally {
115 clear();
116 this.reader = null;
117 this.builder = null;
118 this.value = null;
119 }
120
121 if (this.rdfHandler != null) {
122 this.rdfHandler.endRDF();
123 }
124 }
125
126 private int skipLine(final int ch) throws IOException {
127 int c = ch;
128 while (c != EOF && c != '\r' && c != '\n') {
129 c = read();
130 }
131 if (c == '\n') {
132 c = read();
133 this.lineNo++;
134 reportLocation(this.lineNo, 1);
135 } else if (c == '\r') {
136 c = read();
137 if (c == '\n') {
138 c = read();
139 }
140 this.lineNo++;
141 reportLocation(this.lineNo, 1);
142 }
143 return c;
144 }
145
146 private int skipWhitespace(final int ch) throws IOException {
147 int c = ch;
148 while (c == ' ' || c == '\t') {
149 c = read();
150 }
151 return c;
152 }
153
154 private int parseQuad(final int ch) throws IOException, RDFParseException, RDFHandlerException {
155
156 int c = ch;
157 try {
158 c = parseResource(c);
159 boolean periodConsumed = (c & 0x80000000) != 0;
160 final Resource subject = (Resource) this.value;
161 if (periodConsumed) {
162 throwParseException("Found unexpected '.' " + (char) c);
163 }
164
165 c = skipWhitespace(c);
166 c = parseURI(c);
167 periodConsumed = (c & 0x80000000) != 0;
168 final URI predicate = (URI) this.value;
169 if (periodConsumed) {
170 throwParseException("Found unexpected '.' " + (char) c);
171 }
172
173 c = skipWhitespace(c);
174 c = parseValue(c);
175 periodConsumed = (c & 0x80000000) != 0;
176 final Value object = this.value;
177
178 Resource context = null;
179 if (!periodConsumed) {
180 c = skipWhitespace(c);
181 if (c != '.') {
182 c = parseResource(c);
183 periodConsumed = (c & 0x80000000) != 0;
184 context = (Resource) this.value;
185 if (!periodConsumed) {
186 c = skipWhitespace(c);
187 }
188 }
189 }
190
191 if (c == EOF) {
192 throwEOFException();
193 } else if (c != '.' && !periodConsumed) {
194 throwParseException("Expected '.', found: " + (char) c);
195 }
196
197 c = periodConsumed ? c & 0x7FFFFFFF : read();
198 c = skipWhitespace(c);
199 if (c != EOF && c != '\r' && c != '\n') {
200 throwParseException("Content after '.' is not allowed");
201 }
202
203 if (this.rdfHandler != null) {
204 final Statement statement;
205 if (context == null || context.equals(SESAME.NIL)) {
206 statement = createStatement(subject, predicate, object);
207 } else {
208 statement = createStatement(subject, predicate, object, context);
209 }
210 this.rdfHandler.handleStatement(statement);
211 }
212
213 } catch (final RDFParseException ex) {
214 if (getParserConfig().isNonFatalError(
215 NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES)) {
216 reportError(ex, this.lineNo, -1,
217 NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
218 } else {
219 throw ex;
220 }
221 }
222
223 c = skipLine(c);
224 return c;
225 }
226
227 private int parseValue(final int ch) throws IOException, RDFParseException {
228 int c = ch;
229 if (c == '<') {
230 c = parseURI(c);
231 } else if (c == '_') {
232 c = parseBNode(c);
233 } else if (c == '"' || c == '\'') {
234 c = parseLiteral(c);
235 } else if (c == EOF) {
236 throwEOFException();
237 } else {
238 throwParseException("Expected '<', '_' or '\"', found: " + (char) c + "");
239 }
240 return c;
241 }
242
243 private int parseResource(final int ch) throws IOException, RDFParseException {
244 int c = ch;
245 if (c == '<') {
246 c = parseURI(c);
247 } else if (c == '_') {
248 c = parseBNode(c);
249 } else if (c == EOF) {
250 throwEOFException();
251 } else {
252 throwParseException("Expected '<' or '_', found: " + (char) c);
253 }
254 return c;
255 }
256
257 private int parseURI(final int ch) throws IOException, RDFParseException {
258 int c = ch;
259 if (c != '<') {
260 throwParseException("Supplied char should be a '<', it is: " + c);
261 }
262 this.builder.setLength(0);
263 c = read();
264 while (c != '>') {
265 switch (c) {
266 case EOF:
267 throwEOFException();
268 break;
269 case '\\':
270 c = read();
271 if (c == EOF) {
272 throwEOFException();
273 } else if (c == 'u' || c == 'U') {
274 parseUChar(c);
275 } else {
276 this.builder.append((char) c);
277 }
278 break;
279 default:
280 if (c < 32) {
281
282 throwParseException("Expected valid IRI char, found: " + (char) c);
283 }
284 this.builder.append((char) c);
285 break;
286 }
287 c = read();
288 }
289 this.value = createURI(this.builder.toString());
290 c = read();
291 return c;
292 }
293
294 private int parseBNode(final int ch) throws IOException, RDFParseException {
295 int c = ch;
296 if (c != '_') {
297 throwParseException("Expected '_', found: " + c);
298 }
299 c = read();
300 if (c == EOF) {
301 throwEOFException();
302 } else if (c != ':') {
303 throwParseException("Expected ':', found: " + (char) c);
304 }
305 c = read();
306 if (c == EOF) {
307 throwEOFException();
308 } else if (!TQL.isPN_CHARS_U(c) && !TQL.isNumber(c)) {
309 throwParseException("Invalid bnode character: " + (char) c);
310 }
311 this.builder.setLength(0);
312 this.builder.append((char) c);
313 c = read();
314 while (c != EOF && TQL.isPN_CHARS(c)) {
315 this.builder.append((char) c);
316 c = read();
317 }
318 final int last = this.builder.length() - 1;
319 if (this.builder.charAt(last) == '.') {
320 this.builder.setLength(last);
321 c = c | 0x80000000;
322 }
323 this.value = createBNode(this.builder.toString());
324 return c;
325 }
326
327 private int parseLiteral(final int ch) throws IOException, RDFParseException {
328 int c = ch;
329 if (c != '"' && c != '\'') {
330 throwParseException("Expected '\"' or '\'', found: " + c);
331 }
332 final int delim = c;
333 this.builder.setLength(0);
334 c = read();
335 while (c != delim) {
336 if (c == EOF) {
337 throwEOFException();
338 } else if (c == '\\') {
339 c = read();
340 switch (c) {
341 case EOF:
342 throwEOFException();
343 break;
344 case 'b':
345 this.builder.append('\b');
346 break;
347 case 'f':
348 this.builder.append('\f');
349 break;
350 case 'n':
351 this.builder.append('\n');
352 break;
353 case 'r':
354 this.builder.append('\r');
355 break;
356 case 't':
357 this.builder.append('\t');
358 break;
359 case 'u':
360 case 'U':
361 parseUChar(c);
362 break;
363 default:
364 this.builder.append((char) c);
365 break;
366 }
367 } else {
368 this.builder.append((char) c);
369 }
370 c = read();
371 }
372 c = read();
373 final String label = this.builder.toString();
374 if (c == '@') {
375 this.builder.setLength(0);
376 c = read();
377 boolean minusFound = false;
378 while (true) {
379 if (c == '-' && this.builder.length() > 0) {
380 minusFound = true;
381 } else if (!TQL.isLetter(c) && !(TQL.isNumber(c) && minusFound)) {
382 break;
383 }
384 this.builder.append((char) c);
385 c = read();
386 }
387 if (this.builder.charAt(this.builder.length() - 1) == '-') {
388 throwParseException("Invalid lang tag: " + this.builder.toString());
389 }
390 final String language = this.builder.toString();
391 this.value = createLiteral(label, language, null, this.lineNo, -1);
392 } else if (c == '^') {
393 c = read();
394 if (c == EOF) {
395 throwEOFException();
396 } else if (c != '^') {
397 throwParseException("Expected '^', found: " + (char) c);
398 }
399 c = read();
400 if (c == EOF) {
401 throwEOFException();
402 } else if (c != '<') {
403 throwParseException("Expected '<', found: " + (char) c);
404 }
405 c = parseURI(c);
406 final URI datatype = (URI) this.value;
407 this.value = createLiteral(label, null, datatype, this.lineNo, -1);
408 } else {
409 this.value = createLiteral(label, null, null, this.lineNo, -1);
410 }
411 return c;
412 }
413
414 private void parseUChar(final int ch) throws IOException, RDFParseException {
415 int c = ch;
416 int count = 0;
417 if (c == 'u') {
418 count = 4;
419 } else if (c == 'U') {
420 count = 8;
421 } else {
422 throwParseException("Expected 'u' or 'U', found: " + c);
423 }
424 int code = 0;
425 for (int i = 0; i < count; ++i) {
426 c = read();
427 if (c == EOF) {
428 throwEOFException();
429 } else {
430 final int digit = Character.digit(c, 16);
431 if (digit < 0) {
432 throwParseException("Expected hex digit, found: " + (char) c);
433 }
434 code = code * 16 + digit;
435 }
436 }
437 this.builder.append((char) code);
438 }
439
440 private int read() throws IOException {
441 return this.reader.read();
442 }
443
444 private void throwEOFException() throws RDFParseException {
445 throw new RDFParseException("Unexpected end of file", this.lineNo, -1);
446 }
447
448 private void throwParseException(final String message) throws RDFParseException {
449 throw new RDFParseException(message, this.lineNo, -1);
450 }
451
452 }