1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro;
15
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collections;
19 import java.util.Comparator;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.IdentityHashMap;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Map;
26 import java.util.Objects;
27 import java.util.Set;
28 import java.util.concurrent.ConcurrentHashMap;
29 import java.util.function.Consumer;
30
31 import javax.annotation.Nullable;
32
33 import org.openrdf.model.BNode;
34 import org.openrdf.model.Literal;
35 import org.openrdf.model.Resource;
36 import org.openrdf.model.Statement;
37 import org.openrdf.model.URI;
38 import org.openrdf.model.Value;
39 import org.openrdf.model.impl.StatementImpl;
40 import org.openrdf.model.vocabulary.OWL;
41 import org.openrdf.model.vocabulary.RDF;
42 import org.openrdf.model.vocabulary.RDFS;
43 import org.openrdf.model.vocabulary.SESAME;
44 import org.openrdf.model.vocabulary.XMLSchema;
45 import org.openrdf.rio.RDFHandler;
46 import org.openrdf.rio.RDFHandlerException;
47
48 import eu.fbk.rdfpro.util.Statements;
49
50 final class ProcessorRDFS implements RDFProcessor {
51
52 private static final Map<URI, URI> VOC;
53
54 static {
55 VOC = new IdentityHashMap<URI, URI>();
56 for (final URI uri : new URI[] { RDF.TYPE, RDF.PROPERTY, RDF.XMLLITERAL, RDF.SUBJECT,
57 RDF.PREDICATE, RDF.OBJECT, RDF.STATEMENT, RDF.BAG, RDF.ALT, RDF.SEQ, RDF.VALUE,
58 RDF.LI, RDF.LIST, RDF.FIRST, RDF.REST, RDF.NIL, RDF.LANGSTRING, RDFS.RESOURCE,
59 RDFS.LITERAL, RDFS.CLASS, RDFS.SUBCLASSOF, RDFS.SUBPROPERTYOF, RDFS.DOMAIN,
60 RDFS.RANGE, RDFS.COMMENT, RDFS.LABEL, RDFS.DATATYPE, RDFS.CONTAINER, RDFS.MEMBER,
61 RDFS.ISDEFINEDBY, RDFS.SEEALSO, RDFS.CONTAINERMEMBERSHIPPROPERTY, OWL.CLASS,
62 OWL.INDIVIDUAL, OWL.THING, OWL.NOTHING, OWL.EQUIVALENTCLASS,
63 OWL.EQUIVALENTPROPERTY, OWL.SAMEAS, OWL.DIFFERENTFROM, OWL.ALLDIFFERENT,
64 OWL.DISTINCTMEMBERS, OWL.OBJECTPROPERTY, OWL.DATATYPEPROPERTY, OWL.INVERSEOF,
65 OWL.TRANSITIVEPROPERTY, OWL.SYMMETRICPROPERTY, OWL.FUNCTIONALPROPERTY,
66 OWL.INVERSEFUNCTIONALPROPERTY, OWL.RESTRICTION, OWL.ONPROPERTY, OWL.ALLVALUESFROM,
67 OWL.SOMEVALUESFROM, OWL.MINCARDINALITY, OWL.MAXCARDINALITY, OWL.CARDINALITY,
68 OWL.ONTOLOGY, OWL.IMPORTS, OWL.INTERSECTIONOF, OWL.VERSIONINFO, OWL.VERSIONIRI,
69 OWL.PRIORVERSION, OWL.BACKWARDCOMPATIBLEWITH, OWL.INCOMPATIBLEWITH,
70 OWL.DEPRECATEDCLASS, OWL.DEPRECATEDPROPERTY, OWL.ANNOTATIONPROPERTY,
71 OWL.ONTOLOGYPROPERTY, OWL.ONEOF, OWL.HASVALUE, OWL.DISJOINTWITH, OWL.UNIONOF,
72 OWL.COMPLEMENTOF, XMLSchema.DURATION, XMLSchema.DATETIME,
73 XMLSchema.DAYTIMEDURATION, XMLSchema.TIME, XMLSchema.DATE, XMLSchema.GYEARMONTH,
74 XMLSchema.GYEAR, XMLSchema.GMONTHDAY, XMLSchema.GDAY, XMLSchema.GMONTH,
75 XMLSchema.STRING, XMLSchema.BOOLEAN, XMLSchema.BASE64BINARY, XMLSchema.HEXBINARY,
76 XMLSchema.FLOAT, XMLSchema.DECIMAL, XMLSchema.DOUBLE, XMLSchema.ANYURI,
77 XMLSchema.QNAME, XMLSchema.NOTATION, XMLSchema.NORMALIZEDSTRING, XMLSchema.TOKEN,
78 XMLSchema.LANGUAGE, XMLSchema.NMTOKEN, XMLSchema.NMTOKENS, XMLSchema.NAME,
79 XMLSchema.NCNAME, XMLSchema.ID, XMLSchema.IDREF, XMLSchema.IDREFS,
80 XMLSchema.ENTITY, XMLSchema.ENTITIES, XMLSchema.INTEGER, XMLSchema.LONG,
81 XMLSchema.INT, XMLSchema.SHORT, XMLSchema.BYTE, XMLSchema.NON_POSITIVE_INTEGER,
82 XMLSchema.NEGATIVE_INTEGER, XMLSchema.NON_NEGATIVE_INTEGER,
83 XMLSchema.POSITIVE_INTEGER, XMLSchema.UNSIGNED_LONG, XMLSchema.UNSIGNED_INT,
84 XMLSchema.UNSIGNED_SHORT, XMLSchema.UNSIGNED_BYTE }) {
85 VOC.put(uri, uri);
86 }
87 }
88
89 private final Ruleset ruleset;
90
91 private final TBox tbox;
92
93 private final boolean dropBNodeTypes;
94
95 private final boolean emitTBox;
96
97 ProcessorRDFS(final RDFSource tbox, @Nullable final Resource tboxContext,
98 final boolean decomposeOWLAxioms, final boolean dropBNodeTypes,
99 final String... excludedRules) {
100
101 final Map<Value, Value> interner = new HashMap<Value, Value>();
102 for (final URI uri : VOC.keySet()) {
103 interner.put(uri, uri);
104 }
105
106 final Database database = new Database();
107 tbox.forEach(new Consumer<Statement>() {
108
109 @Override
110 public void accept(final Statement t) {
111 final Resource s = normalize(t.getSubject());
112 final URI p = normalize(t.getPredicate());
113 final Value o = normalize(t.getObject());
114 database.add(s, p, o);
115 }
116
117 @SuppressWarnings("unchecked")
118 @Nullable
119 private <T extends Value> T normalize(@Nullable T value) {
120 final Value v = interner.get(value);
121 if (v != null) {
122 return (T) v;
123 }
124 if (value instanceof Literal) {
125 final Literal lit = (Literal) value;
126 final URI dt = lit.getDatatype();
127 if (dt != null) {
128 final URI dtn = normalize(dt);
129 if (dtn != dt) {
130 value = (T) Statements.VALUE_FACTORY.createLiteral(lit.getLabel(), dtn);
131 }
132 }
133 }
134 interner.put(value, value);
135 return value;
136 }
137
138 });
139 database.commit();
140 final Ruleset ruleset = excludedRules == null || excludedRules.length == 0 ? Ruleset.DEFAULT
141 : new Ruleset(excludedRules);
142
143 new TBoxInferencer(decomposeOWLAxioms, ruleset, database).infer();
144
145 this.tbox = new TBox(database, SESAME.NIL.equals(tboxContext) ? null : tboxContext);
146 this.dropBNodeTypes = dropBNodeTypes;
147 this.emitTBox = tboxContext != null;
148 this.ruleset = ruleset;
149 }
150
151 @Override
152 public RDFHandler wrap(final RDFHandler handler) {
153 return new Handler(Objects.requireNonNull(handler));
154 }
155
156 private final class Handler extends AbstractRDFHandlerWrapper {
157
158 private final Deduplicator deduplicator;
159
160 private ThreadLocal<ABoxInferencer> inferencer;
161
162 Handler(final RDFHandler handler) {
163
164 super(handler);
165
166 this.deduplicator = new Deduplicator();
167 this.inferencer = new ThreadLocal<ABoxInferencer>() {
168
169 @Override
170 protected ABoxInferencer initialValue() {
171 return new ABoxInferencer(Handler.this.handler, ProcessorRDFS.this.ruleset,
172 ProcessorRDFS.this.tbox, Handler.this.deduplicator,
173 ProcessorRDFS.this.dropBNodeTypes);
174 }
175
176 };
177 }
178
179 @Override
180 public void handleStatement(final Statement statement) throws RDFHandlerException {
181 this.inferencer.get().handleStatement(statement);
182 }
183
184 @Override
185 public void endRDF() throws RDFHandlerException {
186 if (ProcessorRDFS.this.emitTBox) {
187 for (final Statement statement : ProcessorRDFS.this.tbox.statements) {
188 super.handleStatement(statement);
189 }
190 }
191 this.handler.endRDF();
192 }
193
194 }
195
196 private static final class TBox {
197
198 private static final Resource[] EMPTY = new Resource[0];
199
200 final List<Statement> statements;
201
202 final Map<Resource, Resource> resources;
203
204 final Map<Resource, Type> types;
205
206 final Map<Resource, Property> properties;
207
208 TBox(final Database database, @Nullable final Resource context) {
209
210 final List<Statement> attributes = new ArrayList<Statement>();
211
212 final Map<Resource, Resource> resources = new HashMap<Resource, Resource>();
213 final Map<Resource, Type> types = new HashMap<Resource, Type>();
214 final Map<Resource, Property> properties = new HashMap<Resource, Property>();
215
216 final Statement[] statementArray = new Statement[database.size()];
217
218 int index = 0;
219 for (final Statement statement : database) {
220
221 final Resource s = statement.getSubject();
222 final URI p = statement.getPredicate();
223 final Value o = statement.getObject();
224
225 statementArray[index++] = Objects.equals(context, statement.getContext()) ? statement
226 : context == null ? Statements.VALUE_FACTORY.createStatement(s, p, o)
227 : Statements.VALUE_FACTORY.createStatement(s, p, o, context);
228
229 resources.put(s, s);
230 resources.put(p, p);
231 if (o instanceof Resource) {
232 resources.put((Resource) o, (Resource) o);
233 }
234
235 if (o instanceof Resource
236 && (p.equals(RDFS.SUBCLASSOF) || p.equals(RDFS.DOMAIN)
237 || p.equals(RDFS.RANGE) || p.equals(RDFS.SUBPROPERTYOF)
238 && o instanceof URI)) {
239 attributes.add(statement);
240 }
241 }
242
243 Collections.sort(attributes, Sorter.INSTANCE);
244
245 final int length = attributes.size();
246 Resource subject = null;
247 Resource[] parents = EMPTY;
248 Resource[] domain = EMPTY;
249 Resource[] range = EMPTY;
250 boolean property = false;
251
252 int i = 0;
253 while (i < length) {
254 final Statement t = attributes.get(i);
255 final Resource s = t.getSubject();
256 final URI p = t.getPredicate();
257
258 if (s != subject) {
259 if (subject != null) {
260 if (property) {
261 properties.put(subject, new Property(parents, domain, range));
262 } else {
263 types.put(subject, new Type(parents));
264 }
265 }
266 subject = s;
267 parents = EMPTY;
268 domain = EMPTY;
269 range = EMPTY;
270 }
271
272 final int start = i;
273 for (++i; i < length; ++i) {
274 final Statement t2 = attributes.get(i);
275 if (t2.getSubject() != s || t2.getPredicate() != p) {
276 break;
277 }
278 }
279
280 final Resource[] array = new Resource[i - start];
281 for (int j = start; j < i; ++j) {
282 array[j - start] = (Resource) attributes.get(j).getObject();
283 }
284
285 property = p != RDFS.SUBCLASSOF;
286 if (p == RDFS.SUBCLASSOF || p == RDFS.SUBPROPERTYOF) {
287 parents = array;
288 } else if (p == RDFS.DOMAIN) {
289 domain = array;
290 } else if (p == RDFS.RANGE) {
291 range = array;
292 }
293 }
294
295 this.statements = Arrays.asList(statementArray);
296 this.resources = resources;
297 this.types = types;
298 this.properties = properties;
299 }
300
301 static final class Type {
302
303 final Resource[] parents;
304
305 Type(final Resource[] parents) {
306 this.parents = parents;
307 }
308
309 }
310
311 static final class Property {
312
313 final Resource[] parents;
314
315 final Resource[] domain;
316
317 final Resource[] range;
318
319 Property(final Resource[] parents, final Resource[] domain, final Resource[] range) {
320 this.parents = parents;
321 this.domain = domain;
322 this.range = range;
323 }
324
325 }
326
327 private static final class Sorter implements Comparator<Statement> {
328
329 static Sorter INSTANCE = new Sorter();
330
331 @Override
332 public int compare(final Statement t1, final Statement t2) {
333 int result = 0;
334 if (t1 != t2) {
335 result = System.identityHashCode(t1.getSubject())
336 - System.identityHashCode(t2.getSubject());
337 if (result == 0) {
338 result = System.identityHashCode(t1.getPredicate())
339 - System.identityHashCode(t2.getPredicate());
340 }
341 }
342 return result;
343 }
344
345 }
346
347 }
348
349 private static final class TBoxInferencer {
350
351 private final boolean decomposeOWLAxioms;
352
353 private final Ruleset ruleset;
354
355 private final Database db;
356
357 private Iterable<Statement> delta;
358
359 TBoxInferencer(final boolean decomposeOWLAxioms, final Ruleset ruleset,
360 final Database database) {
361 this.decomposeOWLAxioms = decomposeOWLAxioms;
362 this.ruleset = ruleset;
363 this.db = database;
364 }
365
366 void infer() {
367
368 addAxioms();
369 if (this.decomposeOWLAxioms) {
370 decomposeOWLAxioms();
371 }
372 this.db.commit();
373
374 this.delta = this.db;
375 while (true) {
376 evalRules();
377 final List<Statement> added = this.db.commit();
378 if (added.isEmpty()) {
379 break;
380 }
381 this.delta = added;
382 }
383 }
384
385 private void addAxioms() {
386
387 emit(RDF.TYPE, RDFS.DOMAIN, RDFS.RESOURCE);
388 emit(RDFS.DOMAIN, RDFS.DOMAIN, RDF.PROPERTY);
389 emit(RDFS.RANGE, RDFS.DOMAIN, RDF.PROPERTY);
390 emit(RDFS.SUBPROPERTYOF, RDFS.DOMAIN, RDF.PROPERTY);
391 emit(RDFS.SUBCLASSOF, RDFS.DOMAIN, RDFS.CLASS);
392 emit(RDF.SUBJECT, RDFS.DOMAIN, RDF.STATEMENT);
393 emit(RDF.PREDICATE, RDFS.DOMAIN, RDF.STATEMENT);
394 emit(RDF.OBJECT, RDFS.DOMAIN, RDF.STATEMENT);
395 emit(RDFS.MEMBER, RDFS.DOMAIN, RDFS.RESOURCE);
396 emit(RDF.FIRST, RDFS.DOMAIN, RDF.LIST);
397 emit(RDF.REST, RDFS.DOMAIN, RDF.LIST);
398 emit(RDFS.SEEALSO, RDFS.DOMAIN, RDFS.RESOURCE);
399 emit(RDFS.ISDEFINEDBY, RDFS.DOMAIN, RDFS.RESOURCE);
400 emit(RDFS.COMMENT, RDFS.DOMAIN, RDFS.RESOURCE);
401 emit(RDFS.LABEL, RDFS.DOMAIN, RDFS.RESOURCE);
402 emit(RDF.VALUE, RDFS.DOMAIN, RDFS.RESOURCE);
403
404 emit(RDF.TYPE, RDFS.RANGE, RDFS.CLASS);
405 emit(RDFS.DOMAIN, RDFS.RANGE, RDFS.CLASS);
406 emit(RDFS.RANGE, RDFS.RANGE, RDFS.CLASS);
407 emit(RDFS.SUBPROPERTYOF, RDFS.RANGE, RDF.PROPERTY);
408 emit(RDFS.SUBCLASSOF, RDFS.RANGE, RDFS.CLASS);
409 emit(RDF.SUBJECT, RDFS.RANGE, RDFS.RESOURCE);
410 emit(RDF.PREDICATE, RDFS.RANGE, RDFS.RESOURCE);
411 emit(RDF.OBJECT, RDFS.RANGE, RDFS.RESOURCE);
412 emit(RDFS.MEMBER, RDFS.RANGE, RDFS.RESOURCE);
413 emit(RDF.FIRST, RDFS.RANGE, RDFS.RESOURCE);
414 emit(RDF.REST, RDFS.RANGE, RDF.LIST);
415 emit(RDFS.SEEALSO, RDFS.RANGE, RDFS.RESOURCE);
416 emit(RDFS.ISDEFINEDBY, RDFS.RANGE, RDFS.RESOURCE);
417 emit(RDFS.COMMENT, RDFS.RANGE, RDFS.LITERAL);
418 emit(RDFS.LABEL, RDFS.RANGE, RDFS.LITERAL);
419 emit(RDF.VALUE, RDFS.RANGE, RDFS.RESOURCE);
420
421 emit(RDF.ALT, RDFS.SUBCLASSOF, RDFS.CONTAINER);
422 emit(RDF.BAG, RDFS.SUBCLASSOF, RDFS.CONTAINER);
423 emit(RDF.SEQ, RDFS.SUBCLASSOF, RDFS.CONTAINER);
424 emit(RDFS.CONTAINERMEMBERSHIPPROPERTY, RDFS.SUBCLASSOF, RDF.PROPERTY);
425
426 emit(RDFS.ISDEFINEDBY, RDFS.SUBPROPERTYOF, RDFS.SEEALSO);
427 emit(RDFS.DATATYPE, RDFS.SUBCLASSOF, RDFS.CLASS);
428 }
429
430 private void decomposeOWLAxioms() {
431
432 final Map<URI, List<Resource>> subprops = new HashMap<URI, List<Resource>>();
433 final Map<URI, List<Resource>> domains = new HashMap<URI, List<Resource>>();
434 final Map<URI, List<Resource>> ranges = new HashMap<URI, List<Resource>>();
435 final List<URI[]> inverses = new ArrayList<URI[]>();
436
437 final Map<Resource, Resource[]> nodes = new HashMap<Resource, Resource[]>();
438 final Map<Resource, Resource> intersections = new HashMap<Resource, Resource>();
439 final Map<Resource, Resource> unions = new HashMap<Resource, Resource>();
440
441 for (final Statement t : this.db) {
442
443 final Resource s = t.getSubject();
444 final URI p = t.getPredicate();
445 final Value o = t.getObject();
446
447 if (p == RDF.TYPE) {
448 if (o == OWL.CLASS || o == OWL.RESTRICTION) {
449 emit(s, RDF.TYPE, RDFS.CLASS);
450 } else if (o == OWL.ANNOTATIONPROPERTY || o == OWL.DATATYPEPROPERTY
451 || o == OWL.OBJECTPROPERTY) {
452 emit(s, RDF.TYPE, RDF.PROPERTY);
453 }
454
455 } else if (p == OWL.EQUIVALENTCLASS) {
456 if (o instanceof Resource) {
457 emit(s, RDFS.SUBCLASSOF, o);
458 emit((Resource) o, RDFS.SUBCLASSOF, s);
459 }
460
461 } else if (p == OWL.EQUIVALENTPROPERTY) {
462 if (s instanceof URI && o instanceof URI && !s.equals(o)) {
463 for (final URI prop : new URI[] { (URI) s, (URI) o }) {
464 final URI other = prop == s ? (URI) o : (URI) s;
465 emit(prop, RDFS.SUBPROPERTYOF, other);
466 List<Resource> list = subprops.get(prop);
467 if (list == null) {
468 list = new ArrayList<>();
469 subprops.put(prop, list);
470 }
471 if (!list.contains(other)) {
472 list.add(other);
473 }
474 }
475 }
476
477 } else if (p == RDFS.DOMAIN || p == RDFS.RANGE || p == RDFS.SUBPROPERTYOF) {
478 if (s instanceof URI
479 && (o instanceof URI || o instanceof Resource
480 && p != RDFS.SUBPROPERTYOF)) {
481 final Map<URI, List<Resource>> map = p == RDFS.DOMAIN ? domains
482 : p == RDFS.RANGE ? ranges : subprops;
483 List<Resource> list = map.get(s);
484 if (list == null) {
485 list = new ArrayList<>();
486 map.put((URI) s, list);
487 }
488 list.add((Resource) o);
489 }
490
491 } else if (p == OWL.INVERSEOF) {
492 if (s instanceof URI && o instanceof URI) {
493 inverses.add(new URI[] { (URI) s, (URI) o });
494 }
495
496 } else if (p == OWL.INTERSECTIONOF) {
497 if (o instanceof Resource) {
498 intersections.put(s, (Resource) o);
499 }
500
501 } else if (p == OWL.UNIONOF) {
502 if (o instanceof Resource) {
503 unions.put(s, (Resource) o);
504 }
505
506 } else if (p == RDF.FIRST || p == RDF.REST) {
507 if (o instanceof Resource) {
508 Resource[] node = nodes.get(s);
509 if (node == null) {
510 node = new Resource[2];
511 nodes.put(s, node);
512 }
513 node[p == RDF.FIRST ? 0 : 1] = (Resource) o;
514 }
515 }
516 }
517
518
519
520 for (final URI[] pair : inverses) {
521 for (int i = 0; i < 2; ++i) {
522 final URI property = pair[i];
523 final Set<URI> others = new HashSet<URI>();
524 others.add(pair[1 - i]);
525 boolean changed;
526 do {
527 changed = false;
528 for (final URI other : others.toArray(new URI[others.size()])) {
529 final List<Resource> parents = subprops.get(other);
530 if (parents != null) {
531 for (final Resource parent : parents) {
532 if (others.add((URI) parent)) {
533 changed = true;
534 }
535 }
536 }
537 }
538 } while (changed);
539 for (final URI other : others) {
540 final List<Resource> otherDomains = domains.get(other);
541 if (otherDomains != null) {
542 for (final Resource domain : otherDomains) {
543 emit(property, RDFS.RANGE, domain);
544 }
545 }
546 final List<Resource> otherRanges = ranges.get(other);
547 if (otherRanges != null) {
548 for (final Resource range : otherRanges) {
549 emit(property, RDFS.DOMAIN, range);
550 }
551 }
552 }
553 }
554 }
555
556
557 for (final Map.Entry<Resource, Resource> entry : unions.entrySet()) {
558 final Resource unionClass = entry.getKey();
559 for (Resource[] node = nodes.get(entry.getValue()); node != null
560 && node[0] != null; node = nodes.get(node[1])) {
561 emit(node[0], RDFS.SUBCLASSOF, unionClass);
562 }
563 }
564
565
566 for (final Map.Entry<Resource, Resource> entry : intersections.entrySet()) {
567 final Resource intersectionClass = entry.getKey();
568 for (Resource[] node = nodes.get(entry.getValue()); node != null
569 && node[0] != null; node = nodes.get(node[1])) {
570 emit(intersectionClass, RDFS.SUBCLASSOF, node[0]);
571 }
572 }
573 }
574
575 private void evalRules() {
576
577 final Map<Resource, List<Resource>> superClasses
578 = new HashMap<Resource, List<Resource>>();
579 final Map<Resource, List<Resource>> subClasses
580 = new HashMap<Resource, List<Resource>>();
581
582 final Map<URI, List<URI>> superProperties = new HashMap<URI, List<URI>>();
583 final Map<URI, List<URI>> subProperties = new HashMap<URI, List<URI>>();
584
585 for (final Statement t : this.delta) {
586
587 final Resource s = t.getSubject();
588 final URI p = t.getPredicate();
589 final Value o = t.getObject();
590
591
592 if (this.ruleset.rdfs1 && o instanceof Literal) {
593 final Literal l = (Literal) o;
594 final URI dt = l.getDatatype();
595 if (dt != null) {
596 emit(dt, RDF.TYPE, RDFS.DATATYPE);
597 }
598 }
599
600
601 if (this.ruleset.rdfs4a) {
602 emit(s, RDF.TYPE, RDFS.RESOURCE);
603 }
604
605
606 if (this.ruleset.rdfs4b && o instanceof Resource) {
607 emit((Resource) o, RDF.TYPE, RDFS.RESOURCE);
608 }
609
610
611 if (this.ruleset.rdfD2) {
612 emit(p, RDF.TYPE, RDF.PROPERTY);
613 }
614
615 if (p == RDF.TYPE) {
616 if (o == RDFS.CLASS) {
617
618 if (this.ruleset.rdfs8) {
619 emit(s, RDFS.SUBCLASSOF, RDFS.RESOURCE);
620 }
621
622 if (this.ruleset.rdfs10) {
623 emit(s, RDFS.SUBCLASSOF, s);
624 }
625 } else if (o == RDF.PROPERTY) {
626
627 if (this.ruleset.rdfs6) {
628 emit(s, RDFS.SUBPROPERTYOF, s);
629 }
630 } else if (o == RDFS.DATATYPE) {
631
632 if (this.ruleset.rdfs13) {
633 emit(s, RDFS.SUBCLASSOF, RDFS.LITERAL);
634 }
635 } else if (o == RDFS.CONTAINERMEMBERSHIPPROPERTY) {
636
637 if (this.ruleset.rdfs12) {
638 emit(s, RDFS.SUBPROPERTYOF, RDFS.MEMBER);
639 }
640 }
641 }
642
643
644 if (this.ruleset.rdfs2) {
645 if (p == RDFS.DOMAIN && s instanceof URI && o instanceof Resource) {
646 for (final Statement t2 : this.db.filter(null, (URI) s, null)) {
647 emit(t2.getSubject(), RDF.TYPE, o);
648 }
649 }
650 for (final Statement t2 : this.db.filter(p, RDFS.DOMAIN, null)) {
651 if (t2.getObject() instanceof Resource) {
652 emit(s, RDF.TYPE, t2.getObject());
653 }
654 }
655 }
656
657
658 if (this.ruleset.rdfs3) {
659 if (p == RDFS.RANGE && s instanceof URI && o instanceof Resource) {
660 for (final Statement t2 : this.db.filter(null, (URI) s, null)) {
661 if (t2.getObject() instanceof Resource) {
662 emit((Resource) t2.getObject(), RDF.TYPE, o);
663 }
664 }
665 }
666 if (o instanceof Resource) {
667 for (final Statement t2 : this.db.filter(p, RDFS.RANGE, null)) {
668 if (t2.getObject() instanceof Resource) {
669 emit((Resource) o, RDF.TYPE, t2.getObject());
670 }
671 }
672 }
673 }
674
675
676
677
678 if (p == RDFS.SUBCLASSOF && o instanceof Resource) {
679 final Resource c1 = s;
680 final Resource c2 = (Resource) o;
681 if (this.ruleset.rdfs11) {
682 for (final Resource c0 : match(subClasses, null, RDFS.SUBCLASSOF, c1,
683 Resource.class)) {
684 emit(c0, RDFS.SUBCLASSOF, c2);
685 }
686 for (final Resource c3 : match(superClasses, c2, RDFS.SUBCLASSOF, null,
687 Resource.class)) {
688 emit(c1, RDFS.SUBCLASSOF, c3);
689 }
690 }
691 if (this.ruleset.rdfs9) {
692 for (final Statement t2 : this.db.filter(null, RDF.TYPE, c1)) {
693 emit(t2.getSubject(), RDF.TYPE, c2);
694 }
695 }
696 }
697 if (this.ruleset.rdfs9 && p == RDF.TYPE && o instanceof Resource) {
698 for (final Statement t2 : this.db.filter((Resource) o, RDFS.SUBCLASSOF, null)) {
699 if (t2.getObject() instanceof Resource) {
700 emit(s, RDF.TYPE, t2.getObject());
701 }
702 }
703 }
704
705
706
707
708 if (p == RDFS.SUBPROPERTYOF && s instanceof URI && o instanceof URI) {
709 final URI p1 = (URI) s;
710 final URI p2 = (URI) o;
711 if (this.ruleset.rdfs5) {
712 for (final URI p0 : match(subProperties, null, RDFS.SUBPROPERTYOF, p1,
713 URI.class)) {
714 emit(p0, RDFS.SUBPROPERTYOF, p2);
715 }
716 for (final URI p3 : match(superProperties, p2, RDFS.SUBPROPERTYOF, null,
717 URI.class)) {
718 emit(p1, RDFS.SUBPROPERTYOF, p3);
719 }
720 }
721 if (this.ruleset.rdfs7) {
722 for (final Statement t2 : this.db.filter(null, p1, null)) {
723 emit(t2.getSubject(), p2, t2.getObject());
724 }
725 }
726 }
727 if (this.ruleset.rdfs7) {
728 for (final Statement t2 : this.db.filter(p, RDFS.SUBPROPERTYOF, null)) {
729 if (t2.getObject() instanceof URI) {
730 emit(s, (URI) t2.getObject(), o);
731 }
732 }
733 }
734 }
735 }
736
737 private <T> List<T> match(final Map<T, List<T>> map, @Nullable final Resource subject,
738 @Nullable final URI predicate, @Nullable final Value object, final Class<T> clazz) {
739
740 final T key = clazz.cast(subject != null ? subject : object);
741 List<T> list = map.get(key);
742 if (list == null) {
743 list = new ArrayList<T>();
744 for (final Statement t : this.db.filter(subject, predicate, object)) {
745 if (subject == null && clazz.isInstance(t.getSubject())) {
746 list.add(clazz.cast(t.getSubject()));
747 }
748 if (object == null && clazz.isInstance(t.getObject())) {
749 list.add(clazz.cast(t.getObject()));
750 }
751 }
752 map.put(key, list);
753 }
754 return list;
755 }
756
757 private void emit(final Resource subject, final URI predicate, final Value object) {
758 this.db.add(subject, predicate, object);
759 }
760
761 }
762
763 private static final class ABoxInferencer {
764
765 private static final int STATEMENTS_PER_BUCKET = 4;
766
767 private final RDFHandler handler;
768
769 private final Ruleset ruleset;
770
771 private final TBox tbox;
772
773 private final Deduplicator deduplicator;
774
775 private final boolean dropBNodeTypes;
776
777 private Resource context;
778
779 private long bitmask;
780
781 private final Statement[] matrix;
782
783 private final Set<Statement> set;
784
785 private final List<Statement> emitted;
786
787 ABoxInferencer(final RDFHandler handler, final Ruleset ruleset, final TBox tbox,
788 final Deduplicator deduplicator, final boolean dropBNodesTypes) {
789 this.handler = handler;
790 this.ruleset = ruleset;
791 this.tbox = tbox;
792 this.deduplicator = deduplicator;
793 this.dropBNodeTypes = dropBNodesTypes;
794 this.matrix = new Statement[64 * STATEMENTS_PER_BUCKET];
795 this.set = new HashSet<Statement>();
796 this.emitted = new ArrayList<Statement>();
797 }
798
799 void handleStatement(final Statement statement) throws RDFHandlerException {
800
801 this.bitmask = 0L;
802 this.context = statement.getContext();
803 this.emitted.clear();
804 if (!this.set.isEmpty()) {
805 this.set.clear();
806 }
807
808 final Resource s = statement.getSubject();
809 final URI p = statement.getPredicate();
810 final Value o = statement.getObject();
811
812 Resource s2 = this.tbox.resources.get(s);
813 if (s2 == null) {
814 s2 = s;
815 }
816
817 URI p2 = (URI) this.tbox.resources.get(p);
818 if (p2 == null) {
819 p2 = s2 == s && p.equals(s) ? (URI) s : p;
820 }
821
822 Value o2 = this.tbox.resources.get(o);
823 if (o2 == null) {
824 o2 = s2 == s && o.equals(s) ? s : p2 == p && o.equals(p) ? p : o;
825 }
826
827 int index = 0;
828 emit(s2, p2, o2, false);
829 while (index < this.emitted.size()) {
830 final Statement t = this.emitted.get(index);
831 infer(t.getSubject(), t.getPredicate(), t.getObject());
832 ++index;
833 }
834
835 for (final Statement t : this.emitted) {
836 final boolean emit = !this.dropBNodeTypes || t.getPredicate() != RDF.TYPE
837 || !(t.getObject() instanceof BNode);
838 if (emit) {
839 this.handler.handleStatement(t);
840 }
841 }
842 }
843
844 private void infer(final Resource subject, final URI predicate, final Value object) {
845
846 if (this.ruleset.rdfs1 && object instanceof Literal) {
847 final Literal l = (Literal) object;
848 final URI dt = l.getDatatype();
849 if (dt != null) {
850 emit(dt, RDF.TYPE, RDFS.DATATYPE, true);
851 }
852 }
853
854 if (this.ruleset.rdfs4a) {
855 emit(subject, RDF.TYPE, RDFS.RESOURCE, false);
856 }
857 if (this.ruleset.rdfs4b && object instanceof Resource) {
858 emit((Resource) object, RDF.TYPE, RDFS.RESOURCE, predicate == RDF.TYPE);
859 }
860 if (this.ruleset.rdfD2) {
861 emit(predicate, RDF.TYPE, RDF.PROPERTY, true);
862 }
863
864 if (this.ruleset.rdfs2 || this.ruleset.rdfs3 || this.ruleset.rdfs7) {
865 final TBox.Property p = this.tbox.properties.get(predicate);
866 if (p != null) {
867 if (this.ruleset.rdfs2) {
868 for (final Resource c : p.domain) {
869 emit(subject, RDF.TYPE, c, false);
870 }
871 }
872 if (this.ruleset.rdfs3 && object instanceof Resource) {
873 for (final Resource c : p.range) {
874 emit((Resource) object, RDF.TYPE, c, false);
875 }
876 }
877 if (this.ruleset.rdfs7) {
878 for (final Resource q : p.parents) {
879 emit(subject, (URI) q, object, false);
880 }
881 }
882 }
883 }
884
885 if (predicate == RDF.TYPE) {
886 if (object == RDFS.CLASS) {
887 if (this.ruleset.rdfs8) {
888 emit(subject, RDFS.SUBCLASSOF, RDFS.RESOURCE, true);
889 }
890 if (this.ruleset.rdfs10) {
891 emit(subject, RDFS.SUBCLASSOF, subject, true);
892 }
893 } else if (object == RDF.PROPERTY) {
894 if (this.ruleset.rdfs6) {
895 emit(subject, RDFS.SUBPROPERTYOF, subject, true);
896 }
897 } else if (object == RDFS.DATATYPE) {
898 if (this.ruleset.rdfs13) {
899 emit(subject, RDFS.SUBCLASSOF, RDFS.LITERAL, true);
900 }
901 } else if (object == RDFS.CONTAINERMEMBERSHIPPROPERTY) {
902 if (this.ruleset.rdfs12) {
903 emit(subject, RDFS.SUBPROPERTYOF, RDFS.MEMBER, true);
904 }
905 }
906
907 if (this.ruleset.rdfs9) {
908 final TBox.Type t = this.tbox.types.get(object);
909 if (t != null) {
910 for (final Resource c : t.parents) {
911 emit(subject, RDF.TYPE, c, false);
912 }
913 }
914 }
915 }
916 }
917
918 private void emit(final Resource subject, final URI predicate, final Value object,
919 final boolean buffer) {
920
921 final int hash = System.identityHashCode(subject) * 3323
922 + System.identityHashCode(predicate) * 661 + System.identityHashCode(object);
923
924 final int index = hash & 0x3F;
925 final long mask = 1L << index;
926 final int offset = index * STATEMENTS_PER_BUCKET;
927
928 Statement statement = null;
929
930 if ((this.bitmask & mask) == 0L) {
931 statement = create(subject, predicate, object);
932 this.bitmask = this.bitmask | mask;
933 this.matrix[offset] = statement;
934 this.matrix[offset + 1] = null;
935 } else {
936 final int last = offset + STATEMENTS_PER_BUCKET;
937 for (int i = offset; i < last; ++i) {
938 final Statement s = this.matrix[i];
939 if (s == null) {
940 statement = create(subject, predicate, object);
941 this.matrix[i] = statement;
942 final int next = i + 1;
943 if (next < last) {
944 this.matrix[next] = null;
945 }
946 break;
947 } else if (s.getSubject() == subject && s.getPredicate() == predicate
948 && s.getObject() == object) {
949 return;
950 }
951 }
952 if (statement == null) {
953 final Statement s = create(subject, predicate, object);
954 if (this.set.add(s)) {
955 statement = s;
956 }
957 }
958 }
959
960 if (statement != null && !this.deduplicator.add(statement, buffer)) {
961 this.emitted.add(statement);
962 }
963 }
964
965 private Statement create(final Resource subject, final URI predicate, final Value object) {
966 return this.context == null ? Statements.VALUE_FACTORY.createStatement(subject,
967 predicate, object) : Statements.VALUE_FACTORY.createStatement(subject,
968 predicate, object, this.context);
969 }
970
971 }
972
973 private static final class Deduplicator {
974
975 private static final int RECENT_BUFFER_SIZE = 4 * 1024;
976
977 private static final int LOCK_COUNT = 32;
978
979 private final Map<Statement, Statement> mainBuffer;
980
981 private final Statement[] recentBuffer;
982
983 private final Object[] locks;
984
985 Deduplicator() {
986 this.mainBuffer = new ConcurrentHashMap<Statement, Statement>();
987 this.recentBuffer = new Statement[RECENT_BUFFER_SIZE];
988 this.locks = new Object[LOCK_COUNT];
989 for (int i = 0; i < LOCK_COUNT; ++i) {
990 this.locks[i] = new Object();
991 }
992 }
993
994
995
996 boolean add(final Statement statement, final boolean buffer) {
997
998 if (buffer || VOC.containsKey(statement.getPredicate())
999 && VOC.containsKey(statement.getObject())
1000 && VOC.containsKey(statement.getSubject())) {
1001 if (this.mainBuffer.put(statement, statement) != null) {
1002 return true;
1003 }
1004 }
1005
1006 final int hash = statement.hashCode() & 0x7FFFFFFF;
1007 final int index = hash % RECENT_BUFFER_SIZE;
1008 final Object lock = this.locks[hash % LOCK_COUNT];
1009 synchronized (lock) {
1010 final Statement old = this.recentBuffer[index];
1011 if (old != null && old.equals(statement)) {
1012 return true;
1013 }
1014 this.recentBuffer[index] = statement;
1015 return false;
1016 }
1017 }
1018
1019 }
1020
1021 private static final class Ruleset {
1022
1023 static final Ruleset DEFAULT = new Ruleset();
1024
1025 final boolean rdfD2;
1026
1027 final boolean rdfs1;
1028
1029 final boolean rdfs2;
1030
1031 final boolean rdfs3;
1032
1033 final boolean rdfs4a;
1034
1035 final boolean rdfs4b;
1036
1037 final boolean rdfs5;
1038
1039 final boolean rdfs6;
1040
1041 final boolean rdfs7;
1042
1043 final boolean rdfs8;
1044
1045 final boolean rdfs9;
1046
1047 final boolean rdfs10;
1048
1049 final boolean rdfs11;
1050
1051 final boolean rdfs12;
1052
1053 final boolean rdfs13;
1054
1055 Ruleset(final String... excludedRules) {
1056
1057 final Set<String> set = new HashSet<String>();
1058 for (final String rule : excludedRules) {
1059 set.add(rule.trim().toLowerCase());
1060 }
1061
1062 this.rdfD2 = !set.remove("rdfd2");
1063 this.rdfs1 = !set.remove("rdfs1");
1064 this.rdfs2 = !set.remove("rdfs2");
1065 this.rdfs3 = !set.remove("rdfs3");
1066 this.rdfs4a = !set.remove("rdfs4a");
1067 this.rdfs4b = !set.remove("rdfs4b");
1068 this.rdfs5 = !set.remove("rdfs5");
1069 this.rdfs6 = !set.remove("rdfs6");
1070 this.rdfs7 = !set.remove("rdfs7");
1071 this.rdfs8 = !set.remove("rdfs8");
1072 this.rdfs9 = !set.remove("rdfs9");
1073 this.rdfs10 = !set.remove("rdfs10");
1074 this.rdfs11 = !set.remove("rdfs11");
1075 this.rdfs12 = !set.remove("rdfs12");
1076 this.rdfs13 = !set.remove("rdfs13");
1077
1078 if (!set.isEmpty()) {
1079 throw new IllegalArgumentException("Unknown rule(s): " + String.join(", ", set));
1080 }
1081 }
1082
1083 }
1084
1085 private static final class Database implements Iterable<Statement> {
1086
1087 private final Map<Value, Node> nodes;
1088
1089 private final Set<Triple> triples;
1090
1091 private final Set<Triple> pending;
1092
1093 Database() {
1094 this.nodes = new IdentityHashMap<Value, Node>();
1095 this.triples = new HashSet<Triple>();
1096 this.pending = new HashSet<Triple>();
1097 }
1098
1099 public void add(final Resource subj, final URI pred, final Value obj) {
1100 final Triple triple = new Triple(subj, pred, obj);
1101 if (!this.triples.contains(triple)) {
1102 this.pending.add(triple);
1103 }
1104 }
1105
1106 public List<Statement> commit() {
1107 final List<Statement> result = new ArrayList<Statement>(this.pending.size());
1108 for (final Triple triple : this.pending) {
1109 result.add(triple.getStatement());
1110 this.triples.add(triple);
1111 final Node subjNode = nodeFor(triple.subj, true);
1112 triple.nextBySubj = subjNode.nextBySubj;
1113 subjNode.nextBySubj = triple;
1114 ++subjNode.numSubj;
1115 final Node predNode = nodeFor(triple.pred, true);
1116 triple.nextByPred = predNode.nextByPred;
1117 predNode.nextByPred = triple;
1118 ++predNode.numPred;
1119 final Node objNode = nodeFor(triple.obj, true);
1120 triple.nextByObj = objNode.nextByObj;
1121 objNode.nextByObj = triple;
1122 ++objNode.numObj;
1123 }
1124 this.pending.clear();
1125 return result;
1126 }
1127
1128 public Iterable<Statement> filter(@Nullable final Resource subj, @Nullable final URI pred,
1129 @Nullable final Value obj) {
1130
1131 Node node = null;
1132 Triple triple = null;
1133 int field = -1;
1134 int num = Integer.MAX_VALUE;
1135
1136 if (subj != null) {
1137 final Node n = nodeFor(subj, false);
1138 if (n == null) {
1139 return Collections.emptyList();
1140 }
1141 if (n.numSubj < num) {
1142 node = n;
1143 triple = n.nextBySubj;
1144 field = 0;
1145 num = n.numSubj;
1146 }
1147 }
1148
1149 if (pred != null) {
1150 final Node n = nodeFor(pred, false);
1151 if (n == null) {
1152 return Collections.emptyList();
1153 }
1154 if (n.numPred < num) {
1155 node = n;
1156 triple = n.nextByPred;
1157 field = 1;
1158 num = n.numPred;
1159 }
1160 }
1161
1162 if (obj != null) {
1163 final Node n = nodeFor(obj, false);
1164 if (n == null) {
1165 return Collections.emptyList();
1166 }
1167 if (n.numObj < num) {
1168 node = n;
1169 triple = n.nextByObj;
1170 field = 2;
1171 num = n.numObj;
1172 }
1173 }
1174
1175 if (node == null) {
1176 return this;
1177 }
1178
1179 final Triple t = triple;
1180 final int f = field;
1181
1182 return new Iterable<Statement>() {
1183
1184 @Override
1185 public Iterator<Statement> iterator() {
1186 return new Iterator<Statement>() {
1187
1188 private Triple triple = t;
1189
1190 {
1191 advance(false);
1192 }
1193
1194 @Override
1195 public boolean hasNext() {
1196 return this.triple != null;
1197 }
1198
1199 @Override
1200 public Statement next() {
1201 final Statement result = this.triple.getStatement();
1202 advance(true);
1203 return result;
1204 }
1205
1206 @Override
1207 public void remove() {
1208 throw new UnsupportedOperationException();
1209 }
1210
1211 private void advance(boolean skipCurrent) {
1212 while (this.triple != null) {
1213 if (!skipCurrent && (subj == null || subj == this.triple.subj)
1214 && (pred == null || pred == this.triple.pred)
1215 && (obj == null || obj == this.triple.obj)) {
1216 return;
1217 }
1218 skipCurrent = false;
1219 if (f == 0) {
1220 this.triple = this.triple.nextBySubj;
1221 } else if (f == 1) {
1222 this.triple = this.triple.nextByPred;
1223 } else {
1224 this.triple = this.triple.nextByObj;
1225 }
1226 }
1227 }
1228
1229 };
1230 }
1231
1232 };
1233 }
1234
1235 @Override
1236 public Iterator<Statement> iterator() {
1237 final Iterator<Triple> iterator = this.triples.iterator();
1238 return new Iterator<Statement>() {
1239
1240 @Override
1241 public boolean hasNext() {
1242 return iterator.hasNext();
1243 }
1244
1245 @Override
1246 public Statement next() {
1247 return iterator.next().getStatement();
1248 }
1249
1250 @Override
1251 public void remove() {
1252 throw new UnsupportedOperationException();
1253 }
1254 };
1255 }
1256
1257 public int size() {
1258 return this.triples.size();
1259 }
1260
1261 private Node nodeFor(final Value value, final boolean canCreate) {
1262 Node node = this.nodes.get(value);
1263 if (node == null && canCreate) {
1264 node = new Node();
1265 this.nodes.put(value, node);
1266 }
1267 return node;
1268 }
1269
1270 private static final class Node {
1271
1272 @Nullable
1273 Triple nextBySubj;
1274
1275 @Nullable
1276 Triple nextByPred;
1277
1278 @Nullable
1279 Triple nextByObj;
1280
1281 int numSubj;
1282
1283 int numPred;
1284
1285 int numObj;
1286
1287 }
1288
1289 private static final class Triple {
1290
1291 Resource subj;
1292
1293 URI pred;
1294
1295 Value obj;
1296
1297 @Nullable
1298 Statement statement;
1299
1300 @Nullable
1301 Triple nextBySubj;
1302
1303 @Nullable
1304 Triple nextByPred;
1305
1306 @Nullable
1307 Triple nextByObj;
1308
1309 Triple(final Resource subj, final URI pred, final Value obj) {
1310 this.subj = subj;
1311 this.pred = pred;
1312 this.obj = obj;
1313 }
1314
1315 public Statement getStatement() {
1316 if (this.statement == null) {
1317 this.statement = new StatementImpl(this.subj, this.pred, this.obj);
1318 }
1319 return this.statement;
1320 }
1321
1322 @Override
1323 public boolean equals(final Object object) {
1324 if (object == this) {
1325 return true;
1326 }
1327 if (!(object instanceof Triple)) {
1328 return false;
1329 }
1330 final Triple other = (Triple) object;
1331 return this.subj == other.subj && this.pred == other.pred && this.obj == other.obj;
1332 }
1333
1334 @Override
1335 public int hashCode() {
1336 return System.identityHashCode(this.subj) * 757
1337 + System.identityHashCode(this.pred) * 37
1338 + System.identityHashCode(this.obj);
1339 }
1340
1341 }
1342
1343 }
1344
1345 }