1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5    * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro.jsonld;
15  
16  import java.io.IOException;
17  import java.io.OutputStream;
18  import java.io.OutputStreamWriter;
19  import java.io.Writer;
20  import java.nio.charset.Charset;
21  import java.util.ArrayList;
22  import java.util.Arrays;
23  import java.util.Collections;
24  import java.util.Comparator;
25  import java.util.HashMap;
26  import java.util.LinkedHashMap;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Objects;
30  import java.util.Set;
31  
32  import org.openrdf.model.BNode;
33  import org.openrdf.model.Literal;
34  import org.openrdf.model.Resource;
35  import org.openrdf.model.Statement;
36  import org.openrdf.model.URI;
37  import org.openrdf.model.Value;
38  import org.openrdf.model.vocabulary.RDF;
39  import org.openrdf.model.vocabulary.XMLSchema;
40  import org.openrdf.rio.RDFFormat;
41  import org.openrdf.rio.RDFHandlerException;
42  import org.openrdf.rio.helpers.RDFWriterBase;
43  
44  /**
45   * An implementation of the RDFWriter interface that writes RDF documents in the JSON-LD format.
46   *
47   * <p>
48   * JSON-LD is a JSON-based format for serializing data in (a superset of) RDF as JSON and
49   * interpreting JSON contents as RDF. See http://www.w3.org/TR/json-ld/ for the format
50   * specification.
51   * </p>
52   * <p>
53   * Similarly to RDF/XML, JSON-LD provides for several ways to encode the same triples. This
54   * RDFWriter adopts a simple and consistent strategy whose output is a JSON-LD document consisting
55   * of a sequence of resource blocks, one for each RDF resource of the configured root type (@see
56   * {@link JSONLD#ROOT_TYPES}).
57   * </p>
58   */
59  public class JSONLDWriter extends RDFWriterBase {
60  
61      private static final int WINDOW = 32 * 1024;
62  
63      private final Writer writer;
64  
65      private final Map<String, String> prefixes; // namespace-to-prefix map
66  
67      private final Map<Resource, Map<Resource, JSONLDWriter.Node>> nodes; // context-to-id-to-node
68      // map
69  
70      private JSONLDWriter.Node lrsHead; // head of least recently seen (LRS) linked list
71  
72      private JSONLDWriter.Node lrsTail; // tail of least recently seen (LRS) linked list
73  
74      private long counter; // statement counter;
75  
76      private int indent; // current indentation level
77  
78      private Resource emitContext; // context being currently emitted
79  
80      private Map<Resource, JSONLDWriter.Node> emitContextNodes; // id-to-node map for cur. context
81  
82      private Set<URI> rootTypes;
83  
84      /**
85       * Creates a new JSONLDWriter that will write to the supplied OutputStream. The UTF-8
86       * character encoding is used.
87       *
88       * @param stream
89       *            the OutputStream to write to
90       */
91      public JSONLDWriter(final OutputStream stream) {
92          this(new OutputStreamWriter(stream, Charset.forName("UTF-8")));
93      }
94  
95      /**
96       * Creates a new JSONLDWriter that will write to the supplied Writer.
97       *
98       * @param writer
99       *            the Writer to write to
100      */
101     public JSONLDWriter(final Writer writer) {
102         if (writer == null) {
103             throw new NullPointerException("Null writer");
104         }
105         this.writer = writer;
106         this.prefixes = new LinkedHashMap<String, String>();
107         this.nodes = new HashMap<Resource, Map<Resource, JSONLDWriter.Node>>();
108         this.lrsHead = null;
109         this.lrsTail = null;
110         this.counter = 0;
111         this.indent = 1;
112         this.rootTypes = null;
113     }
114 
115     @Override
116     public RDFFormat getRDFFormat() {
117         return RDFFormat.JSONLD;
118     }
119 
120     @Override
121     public void startRDF() throws RDFHandlerException {
122         this.rootTypes = getWriterConfig().get(JSONLD.ROOT_TYPES);
123     }
124 
125     @Override
126     public void handleComment(final String comment) throws RDFHandlerException {
127         try {
128             // comments cannot be emitted in JSONLD, but still we use them to flush output
129             flush(true);
130         } catch (final IOException ex) {
131             throw new RDFHandlerException(ex);
132         }
133     }
134 
135     @Override
136     public void handleNamespace(final String prefix, final String uri) throws RDFHandlerException {
137 
138         // add only if emission to writer not started yet
139         if (this.emitContextNodes == null) {
140             this.prefixes.put(uri, prefix);
141         }
142     }
143 
144     @Override
145     public void handleStatement(final Statement statement) throws RDFHandlerException {
146 
147         // retrieve or create a node map for the statement context
148         final Resource context = statement.getContext();
149         Map<Resource, JSONLDWriter.Node> nodes = this.nodes.get(context);
150         if (nodes == null) {
151             nodes = new HashMap<Resource, JSONLDWriter.Node>();
152             this.nodes.put(context, nodes);
153         }
154 
155         // retrieve or create a node for the statement subject in the statement context
156         final Resource subject = statement.getSubject();
157         JSONLDWriter.Node node = nodes.get(subject);
158         if (node != null) {
159             detach(node);
160         } else {
161             node = new Node(subject, context);
162             nodes.put(subject, node);
163         }
164         attach(node, this.lrsTail); // move node at the end of LRS list
165         node.counter = this.counter++; // update LRS statement counter
166         node.statements.add(statement);
167         if (statement.getPredicate().equals(RDF.TYPE)
168                 && this.rootTypes.contains(statement.getObject())) {
169             node.root = true;
170         }
171 
172         try {
173             flush(false); // emit nodes not seen in last WINDOW statement
174         } catch (final IOException ex) {
175             throw new RDFHandlerException(ex);
176         }
177     }
178 
179     @Override
180     public void endRDF() throws RDFHandlerException {
181         try {
182             flush(true);
183             this.writer.append("]\n}");
184             this.writer.flush();
185         } catch (final IOException ex) {
186             throw new RDFHandlerException(ex);
187         }
188     }
189 
190     private void flush(final boolean force) throws IOException {
191 
192         // Emit preamble of JSONLD document if necessary and select context
193         if (this.emitContextNodes == null
194                 && (force || this.counter - this.lrsHead.counter >= WINDOW)) {
195             this.writer.append("{\n\t\"@context\": {");
196             if (!this.prefixes.isEmpty()) {
197                 String separator = "\n\t\t";
198                 final String[] sortedNamespaces = new String[this.prefixes.size()];
199                 this.prefixes.keySet().toArray(sortedNamespaces);
200                 Arrays.sort(sortedNamespaces);
201                 for (final String namespace : sortedNamespaces) {
202                     final String prefix = this.prefixes.get(namespace);
203                     this.writer.append(separator);
204                     this.writer.append('\"');
205                     emitString(prefix);
206                     this.writer.append("\": \"");
207                     emitString(namespace);
208                     this.writer.append('\"');
209                     separator = ",\n\t\t";
210                 }
211             }
212             this.writer.append("},\n\t\"@graph\": [");
213         }
214 
215         // Emit all the nodes if force=true, otherwise limit to old nodes
216         while (this.lrsHead != null && (force || this.counter - this.lrsHead.counter >= WINDOW)) {
217 
218             // detect change of context
219             final boolean sameContext = Objects.equals(this.lrsHead.context, this.emitContext);
220 
221             // otherwise, close old context if necessary, and add required comma
222             if (this.emitContextNodes == null) {
223                 this.emitContextNodes = this.nodes.get(this.lrsHead.context);
224             } else {
225                 if (!sameContext && this.emitContext != null) {
226                     this.writer.append("]\n\t}");
227                     --this.indent;
228                 }
229                 this.writer.append(',');
230                 this.writer.append(' ');
231             }
232 
233             // open new context if necessary
234             if (!sameContext) {
235                 if (this.lrsHead.context != null) {
236                     this.writer.append("{\n\t\t\"@id\": ");
237                     emit(this.lrsHead.context, false);
238                     this.writer.append(",\n\t\t\"@graph\": [");
239                     ++this.indent;
240                 }
241                 this.emitContext = this.lrsHead.context;
242                 this.emitContextNodes = this.nodes.get(this.lrsHead.context);
243             }
244 
245             // emit the node
246             emitNode(this.emitContextNodes.get(this.lrsHead.id));
247         }
248 
249         // if force=true, close the context if necessary
250         if (force && this.emitContext != null) {
251             this.writer.append("]\n\t}");
252             --this.indent;
253             this.emitContext = null;
254         }
255     }
256 
257     private void emit(final Value value, final boolean expand) throws IOException {
258 
259         if (value instanceof Literal) {
260             emitLiteral((Literal) value);
261         } else {
262             final JSONLDWriter.Node node = expand ? this.emitContextNodes.get(value) : null;
263             if (node != null && !node.root) {
264                 emitNode(node);
265             } else {
266                 if (expand) {
267                     this.writer.append("{\"@id\": ");
268                 }
269                 if (value instanceof BNode) {
270                     emitBNode((BNode) value);
271                 } else if (value instanceof URI) {
272                     emitURI((URI) value);
273                 }
274                 if (expand) {
275                     this.writer.append('}');
276                 }
277             }
278         }
279     }
280 
281     private void emitNode(final JSONLDWriter.Node node) throws IOException {
282 
283         this.emitContextNodes.remove(node.id);
284         detach(node);
285 
286         ++this.indent;
287         this.writer.append('{');
288         emitNewline();
289         this.writer.append("\"@id\": ");
290         emit(node.id, false);
291 
292         boolean startProperty = true;
293         boolean isTypeProperty = true;
294         boolean insideArray = false;
295 
296         Collections.sort(node.statements, StatementComparator.INSTANCE);
297         final int statementCount = node.statements.size();
298         for (int i = 0; i < statementCount; ++i) {
299 
300             final Statement statement = node.statements.get(i);
301             final URI property = statement.getPredicate();
302             final boolean last = i == statementCount - 1
303                     || !property.equals(node.statements.get(i + 1).getPredicate());
304 
305             if (startProperty) {
306                 this.writer.append(',');
307                 emitNewline();
308                 isTypeProperty = property.equals(RDF.TYPE);
309                 if (isTypeProperty) {
310                     this.writer.append("\"@type\"");
311                 } else {
312                     emit(property, false);
313                 }
314                 this.writer.append(": ");
315                 insideArray = !last;
316                 if (insideArray) {
317                     this.writer.append('[');
318                 }
319             } else {
320                 this.writer.append(", ");
321             }
322 
323             emit(statement.getObject(), !isTypeProperty);
324 
325             startProperty = last;
326             if (startProperty && insideArray) {
327                 this.writer.append(']');
328             }
329         }
330 
331         --this.indent;
332         emitNewline();
333         this.writer.append('}');
334     }
335 
336     private void emitBNode(final BNode bnode) throws IOException {
337         this.writer.append("\"_:");
338         emitString(bnode.getID());
339         this.writer.append('\"');
340     }
341 
342     private void emitURI(final URI uri) throws IOException {
343         final String prefix = this.prefixes.get(uri.getNamespace());
344         this.writer.append('\"');
345         if (prefix != null) {
346             emitString(prefix);
347             this.writer.append(':');
348             emitString(uri.getLocalName());
349         } else {
350             emitString(uri.stringValue());
351         }
352         this.writer.append('\"');
353     }
354 
355     private void emitLiteral(final Literal literal) throws IOException {
356         final String language = literal.getLanguage();
357         if (language != null) {
358             this.writer.append("{\"@language\": \"");
359             emitString(language);
360             this.writer.append("\", \"@value\": \"");
361         } else {
362             final URI datatype = literal.getDatatype();
363             if (datatype != null && !XMLSchema.STRING.equals(datatype)) {
364                 this.writer.append("{\"@type\": ");
365                 emit(datatype, false);
366                 this.writer.append(", \"@value\": \"");
367             } else {
368                 this.writer.append("{\"@value\": \"");
369             }
370         }
371         emitString(literal.getLabel());
372         this.writer.append("\"}");
373     }
374 
375     private void emitString(final String string) throws IOException {
376         final int length = string.length();
377         for (int i = 0; i < length; ++i) {
378             final char ch = string.charAt(i);
379             if (ch == '\"' || ch == '\\') {
380                 this.writer.append('\\').append(ch);
381             } else if (Character.isISOControl(ch)) {
382                 if (ch == '\n') {
383                     this.writer.append('\\').append('n');
384                 } else if (ch == '\r') {
385                     this.writer.append('\\').append('r');
386                 } else if (ch == '\t') {
387                     this.writer.append('\\').append('t');
388                 } else if (ch == '\b') {
389                     this.writer.append('\\').append('b');
390                 } else if (ch == '\f') {
391                     this.writer.append('\\').append('f');
392                 } else {
393                     this.writer.append(String.format("\\u%04x", (int) ch));
394                 }
395             } else {
396                 this.writer.append(ch);
397             }
398         }
399     }
400 
401     private void emitNewline() throws IOException {
402         this.writer.append('\n');
403         for (int i = 0; i < this.indent; ++i) {
404             this.writer.append('\t');
405         }
406     }
407 
408     private void detach(final JSONLDWriter.Node node) {
409         final JSONLDWriter.Node prev = node.lrsPrev;
410         final JSONLDWriter.Node next = node.lrsNext;
411         if (prev != null) {
412             prev.lrsNext = next;
413         } else {
414             this.lrsHead = next;
415         }
416         if (next != null) {
417             next.lrsPrev = prev;
418         } else {
419             this.lrsTail = prev;
420         }
421     }
422 
423     private void attach(final JSONLDWriter.Node node, final JSONLDWriter.Node prev) {
424         JSONLDWriter.Node next;
425         if (prev == null) {
426             next = this.lrsHead;
427             this.lrsHead = node;
428         } else {
429             next = prev.lrsNext;
430             prev.lrsNext = node;
431         }
432         if (next == null) {
433             this.lrsTail = node;
434         } else {
435             next.lrsPrev = node;
436         }
437         node.lrsPrev = prev;
438         node.lrsNext = next;
439     }
440 
441     private static final class Node {
442 
443         final Resource id; // node identifier (statement subject)
444 
445         final Resource context; // node context
446 
447         final List<Statement> statements; // node statements
448 
449         long counter; // last recently seen (LRS) counter
450 
451         JSONLDWriter.Node lrsPrev; // pointer to prev node in LRS linked list
452 
453         JSONLDWriter.Node lrsNext; // pointer to next node in LRS linked list
454 
455         boolean root;
456 
457         Node(final Resource id, final Resource context) {
458             this.id = id;
459             this.context = context;
460             this.statements = new ArrayList<Statement>();
461         }
462 
463     }
464 
465     private static final class StatementComparator implements Comparator<Statement> {
466 
467         static final StatementComparator INSTANCE = new StatementComparator();
468 
469         @Override
470         public int compare(final Statement first, final Statement second) {
471             int result = compare(first.getPredicate(), second.getPredicate());
472             if (result == 0) {
473                 result = compare(first.getObject(), second.getObject());
474             }
475             return result;
476         }
477 
478         private int compare(final Value first, final Value second) {
479 
480             if (first instanceof Literal) {
481                 if (second instanceof Literal) {
482                     int result = first.stringValue().compareTo(second.stringValue());
483                     if (result == 0) {
484                         final Literal firstLit = (Literal) first;
485                         final Literal secondLit = (Literal) second;
486                         final String firstLang = firstLit.getLanguage();
487                         final String secondLang = secondLit.getLanguage();
488                         result = firstLang == null ? secondLang == null ? 0 : -1
489                                 : secondLang == null ? 1 : firstLang.compareTo(secondLang);
490                         if (result == 0) {
491                             final URI firstDt = firstLit.getDatatype();
492                             final URI secondDt = secondLit.getDatatype();
493                             result = firstDt == null ? secondDt == null ? 0 : -1
494                                     : secondDt == null ? 1 : firstDt.stringValue().compareTo(
495                                             secondDt.stringValue());
496                         }
497                     }
498                     return result;
499                 } else {
500                     return -1;
501                 }
502 
503             } else if (first instanceof URI) {
504                 if (second instanceof URI) {
505                     int result = first.stringValue().compareTo(second.stringValue());
506                     if (result != 0) {
507                         if (first.equals(RDF.TYPE)) { // rdf:type always first
508                             result = -1;
509                         } else if (second.equals(RDF.TYPE)) {
510                             result = 1;
511                         }
512                     }
513                     return result;
514                 } else if (second instanceof Literal) {
515                     return 1;
516                 } else {
517                     return -1;
518                 }
519 
520             } else if (first instanceof BNode) {
521                 if (second instanceof BNode) {
522                     return first.stringValue().compareTo(second.stringValue());
523                 } else {
524                     return 1;
525                 }
526             }
527 
528             throw new IllegalArgumentException("Invalid arguments: " + first + ", " + second);
529         }
530     }
531 
532 }