1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro.util;
15
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collections;
19 import java.util.HashMap;
20 import java.util.HashSet;
21 import java.util.List;
22 import java.util.Map;
23 import java.util.Objects;
24 import java.util.Set;
25 import java.util.function.Function;
26
27 import javax.annotation.Nullable;
28
29 import com.google.common.base.Preconditions;
30 import com.google.common.collect.HashBasedTable;
31 import com.google.common.collect.ImmutableList;
32 import com.google.common.collect.Table;
33
34 import org.openrdf.model.Literal;
35 import org.openrdf.model.Resource;
36 import org.openrdf.model.Statement;
37 import org.openrdf.model.URI;
38 import org.openrdf.model.Value;
39 import org.openrdf.model.vocabulary.SESAME;
40 import org.openrdf.query.BindingSet;
41 import org.openrdf.query.algebra.And;
42 import org.openrdf.query.algebra.Compare;
43 import org.openrdf.query.algebra.Compare.CompareOp;
44 import org.openrdf.query.algebra.QueryModelNode;
45 import org.openrdf.query.algebra.StatementPattern;
46 import org.openrdf.query.algebra.TupleExpr;
47 import org.openrdf.query.algebra.ValueConstant;
48 import org.openrdf.query.algebra.ValueExpr;
49 import org.openrdf.query.algebra.Var;
50 import org.openrdf.query.impl.ListBindingSet;
51
52 public final class StatementMatcher {
53
54 private static final Object NORMALIZED_MARKER = new Object();
55
56 private static final Object UNNORMALIZED_MARKER = new Object();
57
58 private static final List<String> VAR_NAMES = ImmutableList.of("s", "p", "o", "c");
59
60 @Nullable
61 private final Function<Value, Value> normalizer;
62
63 private final byte[] masks;
64
65 private final int[][] tables;
66
67 private final Object[] values;
68
69 private final Object[] normalizedValues;
70
71 private final URI nil;
72
73 private final int numPatterns;
74
75 private final int numValues;
76
77 private final boolean matchAll;
78
79 private StatementMatcher(@Nullable final Function<Value, Value> normalizer,
80 final byte[] masks, final int[][] tables, @Nullable final Object[] values,
81 final int numPatterns, final int numValues, final boolean matchAll) {
82
83
84 this.normalizer = normalizer;
85 this.masks = masks;
86 this.tables = tables;
87 this.values = values;
88 this.normalizedValues = normalizer == null ? values : values.clone();
89 this.nil = normalizer == null ? SESAME.NIL : (URI) normalizer.apply(SESAME.NIL);
90 this.numPatterns = numPatterns;
91 this.numValues = numValues;
92 this.matchAll = matchAll;
93 }
94
95 public StatementMatcher normalize(@Nullable final Function<Value, Value> normalizer) {
96 return normalizer == null ? this : new StatementMatcher(normalizer, this.masks,
97 this.tables, this.values, this.numPatterns, this.numValues, this.matchAll);
98 }
99
100 public boolean matchAll() {
101 return this.matchAll;
102 }
103
104 public boolean match(final Statement stmt) {
105 return match(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext());
106 }
107
108 public boolean match(final Resource subj, final URI pred, final Value obj,
109 @Nullable Resource ctx) {
110
111 if (this.matchAll) {
112 return true;
113 }
114
115 if (ctx == null) {
116 ctx = this.nil;
117 }
118
119 List<Filter> filters = null;
120
121 for (int i = 0; i < this.masks.length; ++i) {
122 final byte mask = this.masks[i];
123 final int[] table = this.tables[i];
124 final int hash = hash(subj, pred, obj, ctx, mask);
125 for (int slot = (hash & 0x7FFFFFFF) % table.length; table[slot] != 0; slot = next(
126 slot, table.length)) {
127 final int token = table[slot];
128 int offset = match(subj, pred, obj, ctx, mask, hash, token);
129 if (offset != 0) {
130 if (tokenToUnfiltered(token)) {
131 return true;
132 }
133 while (true) {
134 final Object value = this.normalizedValues[offset++];
135 if (value == NORMALIZED_MARKER || value == UNNORMALIZED_MARKER) {
136 break;
137 } else if (value instanceof Filter) {
138 if (filters == null) {
139 filters = new ArrayList<>();
140 }
141 filters.add((Filter) value);
142 }
143 }
144 break;
145 }
146 }
147 }
148
149 if (filters != null) {
150 for (final Filter filter : filters) {
151 if (filter.eval(subj, pred, obj, ctx)) {
152 return true;
153 }
154 }
155 }
156
157 return false;
158 }
159
160 public <T> List<T> map(final Resource subj, final URI pred, final Value obj,
161 @Nullable final Resource ctx, final Class<T> clazz) {
162 return map(subj, pred, obj, ctx, clazz, null);
163 }
164
165 @SuppressWarnings("unchecked")
166 public <T> List<T> map(final Resource subj, final URI pred, final Value obj,
167 @Nullable Resource ctx, final Class<T> clazz, @Nullable final List<T> list) {
168
169 if (ctx == null) {
170 ctx = this.nil;
171 }
172
173 List<T> result = list;
174
175 outer: for (int i = 0; i < this.masks.length; ++i) {
176
177 final byte mask = this.masks[i];
178
179 int offset = 0;
180 if (mask == 0) {
181 normalizeIfNecessary(1);
182 offset = 1;
183 } else {
184 final int[] table = this.tables[i];
185 final int hash = hash(subj, pred, obj, ctx, mask);
186 for (int slot = (hash & 0x7FFFFFFF) % table.length;; slot = next(slot,
187 table.length)) {
188 final int token = table[slot];
189 if (token == 0) {
190 continue outer;
191 }
192 offset = match(subj, pred, obj, ctx, mask, hash, token);
193 if (offset != 0) {
194 break;
195 }
196 }
197 }
198
199 boolean add = true;
200 while (true) {
201 final Object value = this.normalizedValues[offset++];
202 if (value == NORMALIZED_MARKER || value == UNNORMALIZED_MARKER) {
203 break;
204 } else if (value instanceof Filter) {
205 add = ((Filter) value).eval(subj, pred, obj, ctx);
206 } else if (add && clazz.isInstance(value)) {
207 result = result != null ? result : new ArrayList<>(16);
208 result.add((T) value);
209 }
210 }
211 }
212
213 return result != null ? result : Collections.emptyList();
214 }
215
216 @Override
217 public String toString() {
218 return this.numPatterns + " patterns, " + this.numValues + " values";
219 }
220
221 private int match(final Resource subj, final URI pred, final Value obj, final Resource ctx,
222 final byte mask, final int hash, final int token) {
223
224
225 if (!tokenMatchHash(hash, token)) {
226 return 0;
227 }
228
229
230 final int offset = tokenToOffset(token);
231
232
233 normalizeIfNecessary(offset);
234
235
236 int index = offset;
237 if ((mask & 0x01) != 0 && !this.normalizedValues[index++].equals(subj)) {
238 return 0;
239 }
240 if ((mask & 0x02) != 0 && !this.normalizedValues[index++].equals(pred)) {
241 return 0;
242 }
243 if ((mask & 0x04) != 0 && !this.normalizedValues[index++].equals(obj)) {
244 return 0;
245 }
246 if ((mask & 0x08) != 0 && !this.normalizedValues[index].equals(ctx)) {
247 return 0;
248 }
249 return index;
250 }
251
252 private void normalizeIfNecessary(final int offset) {
253 if (this.normalizer != null && this.normalizedValues[offset - 1] == UNNORMALIZED_MARKER) {
254 for (int i = offset;; ++i) {
255 Object value = this.normalizedValues[i];
256 if (value == UNNORMALIZED_MARKER || value == NORMALIZED_MARKER) {
257 break;
258 } else if (value instanceof Filter) {
259 value = ((Filter) value).normalize(this.normalizer);
260 } else if (value instanceof Value) {
261 value = this.normalizer.apply((Value) value);
262 } else if (value instanceof StatementMatcher) {
263 value = ((StatementMatcher) value).normalize(this.normalizer);
264 } else if (value instanceof StatementTemplate) {
265 value = ((StatementTemplate) value).normalize(this.normalizer);
266 }
267 this.normalizedValues[i] = value;
268 }
269 this.normalizedValues[offset - 1] = NORMALIZED_MARKER;
270 }
271 }
272
273 private static int next(final int slot, final int numSlots) {
274 final int result = slot + 1;
275 return result < numSlots ? result : 0;
276 }
277
278 private static int hash(final Resource subj, final URI pred, final Value obj,
279 final Resource ctx, final byte mask) {
280
281 int hash = 1;
282 if ((mask & 0x01) != 0) {
283 hash += 6661 * subj.hashCode();
284 }
285 if ((mask & 0x02) != 0) {
286 hash += 961 * pred.hashCode();
287 }
288 if ((mask & 0x04) != 0) {
289 hash += 31 * obj.hashCode();
290 }
291 if ((mask & 0x08) != 0) {
292 hash += ctx.hashCode();
293 }
294 return hash;
295 }
296
297 private static byte mask(@Nullable final Resource subj, @Nullable final URI pred,
298 @Nullable final Value obj, @Nullable final Resource ctx) {
299
300 byte mask = 0;
301 if (subj != null) {
302 mask |= 0x01;
303 }
304 if (pred != null) {
305 mask |= 0x02;
306 }
307 if (obj != null) {
308 mask |= 0x04;
309 }
310 if (ctx != null) {
311 mask |= 0x08;
312 }
313 return mask;
314 }
315
316 @Nullable
317 private static Value variableValue(final Var var) {
318 return var == null ? null : var.getValue();
319 }
320
321 private static void variableReplacement(final Var from, final String to,
322 final Map<String, Var> map) {
323 if (from != null && from.getValue() == null) {
324 map.put(from.getName(), new Var(to));
325 }
326 }
327
328 private static int tokenEncode(final int hash, final int offset, final boolean unfiltered) {
329 assert offset < 0x1000000;
330 return hash & 0x7FF00000 | offset & 0xFFFFF | (unfiltered ? 0x80000000 : 0);
331 }
332
333 private static boolean tokenToUnfiltered(final int token) {
334 return (token & 0x80000000) != 0;
335 }
336
337 private static int tokenToOffset(final int token) {
338 return token & 0xFFFFF;
339 }
340
341 private static boolean tokenMatchHash(final int hash, final int token) {
342 return ((hash ^ token) & 0x7FF00000) == 0;
343 }
344
345 public static Builder builder() {
346 return new Builder();
347 }
348
349 public static final class Builder {
350
351 private static final Object EMPTY_FILTER = new Object();
352
353 private final Table<List<Value>, Object, Set<Object>>[] maskData;
354
355 private int numValues;
356
357 private int numMasks;
358
359 private int numFilters;
360
361 @SuppressWarnings("unchecked")
362 Builder() {
363
364 this.maskData = new Table[16];
365 this.numValues = 0;
366 this.numMasks = 0;
367 }
368
369 public Builder addExpr(final TupleExpr expr, final Object... mappedValues) {
370
371
372 final List<StatementPattern> patterns = Algebra.extractNodes(expr,
373 StatementPattern.class, null, null);
374 Preconditions.checkArgument(patterns.size() == 1);
375 final StatementPattern pattern = patterns.get(0);
376
377
378 ValueExpr filter = null;
379 for (QueryModelNode node = pattern.getParentNode(); node != null; node = node
380 .getParentNode()) {
381 if (node instanceof org.openrdf.query.algebra.Filter) {
382 final ValueExpr f = ((org.openrdf.query.algebra.Filter) node).getCondition();
383 if (filter == null) {
384 filter = f;
385 } else {
386 filter = new And(filter, f);
387 }
388 } else {
389 throw new IllegalArgumentException("Invalid expression: " + expr);
390 }
391 }
392
393
394 return addPattern(pattern, filter, mappedValues);
395 }
396
397 public Builder addPattern(final StatementPattern pattern, @Nullable ValueExpr filter,
398 final Object... mappedValues) {
399
400
401 final Resource subj = (Resource) variableValue(pattern.getSubjectVar());
402 final URI pred = (URI) variableValue(pattern.getPredicateVar());
403 final Value obj = variableValue(pattern.getObjectVar());
404 final Resource ctx = (Resource) variableValue(pattern.getContextVar());
405
406
407 if (filter != null) {
408 final Map<String, Var> replacements = new HashMap<>();
409 variableReplacement(pattern.getSubjectVar(), "s", replacements);
410 variableReplacement(pattern.getPredicateVar(), "p", replacements);
411 variableReplacement(pattern.getObjectVar(), "o", replacements);
412 variableReplacement(pattern.getContextVar(), "c", replacements);
413 filter = Algebra.rewrite(filter, replacements);
414 }
415
416
417 return addValues(subj, pred, obj, ctx, filter, mappedValues);
418 }
419
420 public Builder addValues(@Nullable final Resource subj, @Nullable final URI pred,
421 @Nullable final Value obj, @Nullable final Resource ctx,
422 @Nullable final ValueExpr filter, final Object... mappedValues) {
423
424
425 final Object filterKey = filter == null ? EMPTY_FILTER : filter;
426
427
428 final byte mask = mask(subj, pred, obj, ctx);
429 final List<Value> pattern = Arrays.asList(subj, pred, obj, ctx);
430
431
432 Table<List<Value>, Object, Set<Object>> sets = this.maskData[mask];
433 if (sets == null) {
434 sets = HashBasedTable.create();
435 this.maskData[mask] = sets;
436 this.numMasks++;
437 }
438
439
440 Set<Object> set = sets.get(pattern, filterKey);
441 if (set == null) {
442 set = new HashSet<>();
443 sets.put(pattern, filterKey, set);
444 if (filterKey != EMPTY_FILTER) {
445 this.numFilters++;
446 }
447 }
448
449
450 this.numValues -= set.size();
451 set.addAll(Arrays.asList(mappedValues));
452 this.numValues += set.size();
453
454
455 return this;
456 }
457
458 public StatementMatcher build(@Nullable final Function<Value, Value> normalizer) {
459
460
461 int valuesSize = this.numFilters + this.numValues + 1;
462 for (int mask = 0; mask < this.maskData.length; ++mask) {
463 final Table<List<Value>, Object, Set<Object>> table = this.maskData[mask];
464 if (table != null) {
465 valuesSize += table.rowKeySet().size() * (Integer.bitCount(mask) + 1);
466 }
467 }
468
469
470 final byte[] masks = new byte[this.numMasks];
471 final int[][] tables = new int[this.numMasks][];
472 final Object[] values = new Object[valuesSize * 4];
473
474
475 int numPatterns = 0;
476
477
478 int maskIndex = 0;
479 int valueIndex = 0;
480
481
482 values[valueIndex++] = UNNORMALIZED_MARKER;
483
484
485 boolean matchAll = false;
486 for (final byte mask : new byte[] { 0, 8, 2, 4, 1, 10, 12, 9, 6, 3, 5, 14, 11, 13, 7,
487 15 }) {
488
489
490 final Table<List<Value>, Object, Set<Object>> sets = this.maskData[mask];
491 if (sets == null) {
492 continue;
493 }
494
495
496 final int maskPatterns = sets.rowKeySet().size();
497 final int[] table = new int[maskPatterns + Math.max(1, maskPatterns >> 1)];
498
499
500 numPatterns += maskPatterns;
501 for (final List<Value> pattern : sets.rowKeySet()) {
502
503
504 final int hash = hash((Resource) pattern.get(0), (URI) pattern.get(1),
505 pattern.get(2), (Resource) pattern.get(3), mask);
506
507
508 final Map<Object, Set<Object>> map = sets.rowMap().get(pattern);
509 final boolean unfiltered = map.containsKey(EMPTY_FILTER);
510 if (unfiltered && mask == 0) {
511 matchAll = true;
512 }
513
514
515 int slot = (hash & 0x7FFFFFFF) % table.length;
516 while (table[slot] != 0) {
517 slot = next(slot, table.length);
518 }
519 table[slot] = tokenEncode(hash, valueIndex, unfiltered);
520
521
522 for (final Value component : pattern) {
523 if (component != null) {
524 values[valueIndex++] = component;
525 }
526 }
527
528
529 for (final Map.Entry<Object, Set<Object>> entry : map.entrySet()) {
530 if (entry.getKey() != EMPTY_FILTER) {
531 values[valueIndex++] = Filter.create((ValueExpr) entry.getKey());
532 }
533 for (final Object value : entry.getValue()) {
534 values[valueIndex++] = value;
535 }
536 }
537
538
539 values[valueIndex++] = UNNORMALIZED_MARKER;
540 }
541
542
543 masks[maskIndex] = mask;
544 tables[maskIndex] = table;
545 ++maskIndex;
546 }
547
548
549 return new StatementMatcher(normalizer, masks, tables, values, numPatterns,
550 this.numValues, matchAll);
551 }
552
553 }
554
555 private static abstract class Filter {
556
557 static Filter create(final ValueExpr expr) {
558 if (expr instanceof And) {
559 final And and = (And) expr;
560 return new AndFilter(create(and.getLeftArg()), create(and.getRightArg()));
561 } else if (expr instanceof Compare) {
562 final Compare cmp = (Compare) expr;
563 if (cmp.getOperator() == CompareOp.EQ || cmp.getOperator() == CompareOp.NE) {
564 ValueExpr left = cmp.getLeftArg();
565 ValueExpr right = cmp.getRightArg();
566 if (left instanceof ValueConstant) {
567 left = new Var("l", ((ValueConstant) left).getValue());
568 }
569 if (right instanceof ValueConstant) {
570 right = new Var("r", ((ValueConstant) right).getValue());
571 }
572 if (left instanceof Var && right instanceof Var) {
573 return new CompareFilter((Var) left, (Var) right,
574 cmp.getOperator() == CompareOp.EQ);
575 }
576 }
577 }
578 return new ValueExprFilter(expr);
579 }
580
581 Filter normalize(final Function<Value, Value> normalizer) {
582 return this;
583 }
584
585 abstract boolean eval(final Resource subj, final URI pred, final Value obj,
586 final Resource ctx);
587
588 private static final class ValueExprFilter extends Filter {
589
590 private final ValueExpr expr;
591
592 ValueExprFilter(final ValueExpr expr) {
593 this.expr = expr;
594 }
595
596 @Override
597 Filter normalize(final Function<Value, Value> normalizer) {
598 return new ValueExprFilter(Algebra.normalize(this.expr, normalizer));
599 }
600
601 @Override
602 boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
603 final BindingSet bindings = new ListBindingSet(VAR_NAMES, subj, pred, obj, ctx);
604 return ((Literal) Algebra.evaluateValueExpr(this.expr, bindings)).booleanValue();
605 }
606
607 }
608
609 private static final class CompareFilter extends Filter {
610
611 private final Value leftValue;
612
613 private final Value rightValue;
614
615 private final char left;
616
617 private final char right;
618
619 private final boolean negate;
620
621 CompareFilter(final Var left, final Var right, final boolean equal) {
622 this.leftValue = left.getValue();
623 this.rightValue = right.getValue();
624 this.left = left.hasValue() ? 'l'
625 : Character.toLowerCase(left.getName().charAt(0));
626 this.right = right.hasValue() ? 'r'
627 : Character.toLowerCase(right.getName().charAt(0));
628 this.negate = !equal;
629 }
630
631 @Override
632 boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
633 final Value left = select(subj, pred, obj, ctx, this.left);
634 final Value right = select(subj, pred, obj, ctx, this.right);
635 final boolean equals = Objects.equals(left, right);
636 return equals ^ this.negate;
637 }
638
639 private Value select(final Resource subj, final URI pred, final Value obj,
640 final Resource ctx, final char c) {
641 switch (c) {
642 case 's':
643 return subj;
644 case 'p':
645 return pred;
646 case 'o':
647 return obj;
648 case 'c':
649 return ctx;
650 case 'l':
651 return this.leftValue;
652 case 'r':
653 return this.rightValue;
654 default:
655 throw new Error();
656 }
657 }
658
659 }
660
661 private static final class AndFilter extends Filter {
662
663 private final Filter left;
664
665 private final Filter right;
666
667 AndFilter(final Filter left, final Filter right) {
668 this.left = left;
669 this.right = right;
670 }
671
672 @Override
673 Filter normalize(final Function<Value, Value> normalizer) {
674 final Filter left = this.left.normalize(normalizer);
675 final Filter right = this.right.normalize(normalizer);
676 return left == this.left && right == this.right ? this
677 : new AndFilter(left, right);
678 }
679
680 @Override
681 boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
682 return this.left.eval(subj, pred, obj, ctx)
683 && this.right.eval(subj, pred, obj, ctx);
684 }
685
686 }
687
688 }
689
690 }