1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5    * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro.tql;
15  
16  import java.io.IOException;
17  import java.io.OutputStream;
18  import java.io.OutputStreamWriter;
19  import java.io.Writer;
20  import java.nio.charset.Charset;
21  
22  import org.openrdf.model.BNode;
23  import org.openrdf.model.Literal;
24  import org.openrdf.model.Resource;
25  import org.openrdf.model.Statement;
26  import org.openrdf.model.URI;
27  import org.openrdf.model.Value;
28  import org.openrdf.model.vocabulary.XMLSchema;
29  import org.openrdf.rio.RDFFormat;
30  import org.openrdf.rio.RDFHandlerException;
31  import org.openrdf.rio.helpers.RDFWriterBase;
32  
33  /**
34   * An implementation of the RDFWriter interface that writes RDF documents in the Turtle Quads
35   * (TQL) format. TQL is N-Quads with the more permissive (and efficient!) Turtle encoding. TQL is
36   * used in DBpedia exports and is supported in input by the Virtuoso triple store.
37   */
38  public class TQLWriter extends RDFWriterBase {
39  
40      private final Writer writer;
41  
42      /**
43       * Creates a new TQLWriter that will write to the supplied OutputStream. The UTF-8 character
44       * encoding is used.
45       *
46       * @param stream
47       *            the OutputStream to write to
48       */
49      public TQLWriter(final OutputStream stream) {
50          this(new OutputStreamWriter(stream, Charset.forName("UTF-8")));
51      }
52  
53      /**
54       * Creates a new TurtleWriter that will write to the supplied Writer.
55       *
56       * @param writer
57       *            the Writer to write to
58       */
59      public TQLWriter(final Writer writer) {
60          if (writer == null) {
61              throw new NullPointerException("Null writer");
62          }
63          this.writer = writer;
64      }
65  
66      @Override
67      public RDFFormat getRDFFormat() {
68          return TQL.FORMAT;
69      }
70  
71      @Override
72      public void startRDF() throws RDFHandlerException {
73          // nothing to do
74      }
75  
76      @Override
77      public void handleComment(final String comment) throws RDFHandlerException {
78          // nothing to do
79      }
80  
81      @Override
82      public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException {
83          // nothing to do
84      }
85  
86      @Override
87      public void handleStatement(final Statement statement) throws RDFHandlerException {
88          try {
89              emitResource(statement.getSubject());
90              this.writer.write(' ');
91              emitURI(statement.getPredicate());
92              this.writer.write(' ');
93              emitValue(statement.getObject());
94              final Resource ctx = statement.getContext();
95              if (ctx != null) {
96                  this.writer.write(' ');
97                  emitResource(statement.getContext());
98              }
99              this.writer.write(' ');
100             this.writer.write('.');
101             this.writer.write('\n');
102         } catch (final IOException ex) {
103             throw new RDFHandlerException(ex);
104         }
105     }
106 
107     @Override
108     public void endRDF() throws RDFHandlerException {
109         try {
110             this.writer.flush();
111             this.writer.flush();
112         } catch (final IOException ex) {
113             throw new RDFHandlerException(ex);
114         }
115     }
116 
117     private void emitValue(final Value value) throws IOException, RDFHandlerException {
118         if (value instanceof URI) {
119             emitURI((URI) value);
120         } else if (value instanceof BNode) {
121             emitBNode((BNode) value);
122         } else if (value instanceof Literal) {
123             emitLiteral((Literal) value);
124         }
125     }
126 
127     private void emitResource(final Resource resource) throws IOException, RDFHandlerException {
128         if (resource instanceof URI) {
129             emitURI((URI) resource);
130         } else if (resource instanceof BNode) {
131             emitBNode((BNode) resource);
132         }
133     }
134 
135     private void emitURI(final URI uri) throws IOException, RDFHandlerException {
136         final String string = uri.stringValue();
137         final int length = string.length();
138         this.writer.write('<');
139         for (int i = 0; i < length; ++i) {
140             final char ch = string.charAt(i);
141             switch (ch) {
142             case 0x22: // "
143                 this.writer.write("\\u0022");
144                 break;
145             case 0x3C: // <
146                 this.writer.write("\\u003C");
147                 break;
148             case 0x3E: // >
149                 this.writer.write("\\u003E");
150                 break;
151             case 0x5C: // \
152                 this.writer.write("\\u005C");
153                 break;
154             case 0x5E: // ^
155                 this.writer.write("\\u005E");
156                 break;
157             case 0x60: // `
158                 this.writer.write("\\u0060");
159                 break;
160             case 0x7B: // {
161                 this.writer.write("\\u007B");
162                 break;
163             case 0x7C: // |
164                 this.writer.write("\\u007C");
165                 break;
166             case 0x7D: // }
167                 this.writer.write("\\u007D");
168                 break;
169             case 0x7F: // delete control char (not strictly necessary)
170                 this.writer.write("\\u007F");
171                 break;
172             default:
173                 if (ch <= 32) { // control char and ' '
174                     this.writer.write("\\u00");
175                     this.writer.write(Character.forDigit(ch / 16, 16));
176                     this.writer.write(Character.forDigit(ch % 16, 16));
177                 } else {
178                     this.writer.write(ch);
179                 }
180             }
181         }
182         this.writer.write('>');
183     }
184 
185     private void emitBNode(final BNode bnode) throws IOException, RDFHandlerException {
186         final String id = bnode.getID();
187         final int last = id.length() - 1;
188         this.writer.write('_');
189         this.writer.write(':');
190         if (last < 0) {
191             this.writer.write("genid-hash-");
192             this.writer.write(Integer.toHexString(System.identityHashCode(bnode)));
193         } else {
194             char ch = id.charAt(0);
195             if (!TQL.isPN_CHARS_U(ch) && !TQL.isNumber(ch)) {
196                 this.writer.write("genid-start-");
197                 this.writer.write(ch);
198             } else {
199                 this.writer.write(ch);
200             }
201             if (last > 0) {
202                 for (int i = 1; i < last; ++i) {
203                     ch = id.charAt(i);
204                     if (TQL.isPN_CHARS(ch) || ch == '.') {
205                         this.writer.write(ch);
206                     } else {
207                         this.writer.write(Integer.toHexString(ch));
208                     }
209                 }
210                 ch = id.charAt(last);
211                 if (TQL.isPN_CHARS(ch)) {
212                     this.writer.write(ch);
213                 } else {
214                     this.writer.write(Integer.toHexString(ch));
215                 }
216             }
217         }
218     }
219 
220     private void emitLiteral(final Literal literal) throws IOException, RDFHandlerException {
221         final String label = literal.getLabel();
222         final int length = label.length();
223         this.writer.write('"');
224         for (int i = 0; i < length; ++i) {
225             final char ch = label.charAt(i);
226             switch (ch) {
227             case 0x08: // \b
228                 this.writer.write('\\');
229                 this.writer.write('b');
230                 break;
231             case 0x09: // \t
232                 this.writer.write('\\');
233                 this.writer.write('t');
234                 break;
235             case 0x0A: // \n
236                 this.writer.write('\\');
237                 this.writer.write('n');
238                 break;
239             case 0x0C: // \f
240                 this.writer.write('\\');
241                 this.writer.write('f');
242                 break;
243             case 0x0D: // \r
244                 this.writer.write('\\');
245                 this.writer.write('r');
246                 break;
247             case 0x22: // "
248                 this.writer.write('\\');
249                 this.writer.write('"');
250                 break;
251             case 0x5C: // \
252                 this.writer.write('\\');
253                 this.writer.write('\\');
254                 break;
255             case 0x7F: // delete control char
256                 this.writer.write("\\u007F");
257                 break;
258             default:
259                 if (ch < 32) { // other control char (not strictly necessary)
260                     this.writer.write("\\u00");
261                     this.writer.write(Character.forDigit(ch / 16, 16));
262                     this.writer.write(Character.forDigit(ch % 16, 16));
263                 } else {
264                     this.writer.write(ch);
265                 }
266             }
267         }
268         this.writer.write('"');
269         final String language = literal.getLanguage();
270         if (language != null) {
271             this.writer.write('@');
272             final int len = language.length();
273             boolean minusFound = false;
274             for (int i = 0; i < len; ++i) {
275                 final char ch = language.charAt(i);
276                 boolean valid = true;
277                 if (ch == '-') {
278                     minusFound = true;
279                     if (i == 0) {
280                         valid = false;
281                     } else {
282                         final char prev = language.charAt(i - 1);
283                         valid = TQL.isLetter(prev) || TQL.isNumber(prev);
284                     }
285                 } else if (TQL.isNumber(ch)) {
286                     valid = minusFound;
287                 } else {
288                     valid = TQL.isLetter(ch);
289                 }
290                 if (!valid) {
291                     throw new RDFHandlerException("Cannot serialize language tag '" + language
292                             + "' in TQL: invalid char '" + ch + "' (see Turtle specs)");
293                 }
294                 this.writer.write(ch);
295             }
296             if (language.charAt(len - 1) == '-') {
297                 throw new RDFHandlerException("Cannot serialize language tag '" + language
298                         + "' in TQL: invalid final char '-' (see Turtle specs)");
299             }
300         } else {
301             final URI datatype = literal.getDatatype();
302             if (datatype != null && !XMLSchema.STRING.equals(datatype)) {
303                 this.writer.write('^');
304                 this.writer.write('^');
305                 emitURI(datatype);
306             }
307         }
308     }
309 
310 }