1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2015 by Francesco Corcoglioniti with support by Alessio Palmero Aprosio and Marco
5    * Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro.util;
15  
16  import java.util.ArrayList;
17  import java.util.Arrays;
18  import java.util.Collections;
19  import java.util.HashMap;
20  import java.util.HashSet;
21  import java.util.List;
22  import java.util.Map;
23  import java.util.Objects;
24  import java.util.Set;
25  import java.util.function.Function;
26  
27  import javax.annotation.Nullable;
28  
29  import com.google.common.base.Preconditions;
30  import com.google.common.collect.HashBasedTable;
31  import com.google.common.collect.ImmutableList;
32  import com.google.common.collect.Table;
33  
34  import org.openrdf.model.Literal;
35  import org.openrdf.model.Resource;
36  import org.openrdf.model.Statement;
37  import org.openrdf.model.URI;
38  import org.openrdf.model.Value;
39  import org.openrdf.model.vocabulary.SESAME;
40  import org.openrdf.query.BindingSet;
41  import org.openrdf.query.algebra.And;
42  import org.openrdf.query.algebra.Compare;
43  import org.openrdf.query.algebra.Compare.CompareOp;
44  import org.openrdf.query.algebra.QueryModelNode;
45  import org.openrdf.query.algebra.StatementPattern;
46  import org.openrdf.query.algebra.TupleExpr;
47  import org.openrdf.query.algebra.ValueConstant;
48  import org.openrdf.query.algebra.ValueExpr;
49  import org.openrdf.query.algebra.Var;
50  import org.openrdf.query.impl.ListBindingSet;
51  
52  public final class StatementMatcher {
53  
54      private static final Object NORMALIZED_MARKER = new Object();
55  
56      private static final Object UNNORMALIZED_MARKER = new Object();
57  
58      private static final List<String> VAR_NAMES = ImmutableList.of("s", "p", "o", "c");
59  
60      @Nullable
61      private final Function<Value, Value> normalizer;
62  
63      private final byte[] masks;
64  
65      private final int[][] tables;
66  
67      private final Object[] values;
68  
69      private final Object[] normalizedValues; // modified during use
70  
71      private final URI nil;
72  
73      private final int numPatterns;
74  
75      private final int numValues;
76  
77      private final boolean matchAll;
78  
79      private StatementMatcher(@Nullable final Function<Value, Value> normalizer,
80              final byte[] masks, final int[][] tables, @Nullable final Object[] values,
81              final int numPatterns, final int numValues, final boolean matchAll) {
82  
83          // Initialize object state
84          this.normalizer = normalizer;
85          this.masks = masks;
86          this.tables = tables;
87          this.values = values;
88          this.normalizedValues = normalizer == null ? values : values.clone();
89          this.nil = normalizer == null ? SESAME.NIL : (URI) normalizer.apply(SESAME.NIL);
90          this.numPatterns = numPatterns;
91          this.numValues = numValues;
92          this.matchAll = matchAll;
93      }
94  
95      public StatementMatcher normalize(@Nullable final Function<Value, Value> normalizer) {
96          return normalizer == null ? this : new StatementMatcher(normalizer, this.masks,
97                  this.tables, this.values, this.numPatterns, this.numValues, this.matchAll);
98      }
99  
100     public boolean matchAll() {
101         return this.matchAll;
102     }
103 
104     public boolean match(final Statement stmt) {
105         return match(stmt.getSubject(), stmt.getPredicate(), stmt.getObject(), stmt.getContext());
106     }
107 
108     public boolean match(final Resource subj, final URI pred, final Value obj,
109             @Nullable Resource ctx) {
110 
111         if (this.matchAll) {
112             return true;
113         }
114 
115         if (ctx == null) {
116             ctx = this.nil;
117         }
118 
119         List<Filter> filters = null;
120 
121         for (int i = 0; i < this.masks.length; ++i) {
122             final byte mask = this.masks[i];
123             final int[] table = this.tables[i];
124             final int hash = hash(subj, pred, obj, ctx, mask);
125             for (int slot = (hash & 0x7FFFFFFF) % table.length; table[slot] != 0; slot = next(
126                     slot, table.length)) {
127                 final int token = table[slot];
128                 int offset = match(subj, pred, obj, ctx, mask, hash, token);
129                 if (offset != 0) {
130                     if (tokenToUnfiltered(token)) {
131                         return true;
132                     }
133                     while (true) {
134                         final Object value = this.normalizedValues[offset++];
135                         if (value == NORMALIZED_MARKER || value == UNNORMALIZED_MARKER) {
136                             break;
137                         } else if (value instanceof Filter) {
138                             if (filters == null) {
139                                 filters = new ArrayList<>();
140                             }
141                             filters.add((Filter) value);
142                         }
143                     }
144                     break;
145                 }
146             }
147         }
148 
149         if (filters != null) {
150             for (final Filter filter : filters) {
151                 if (filter.eval(subj, pred, obj, ctx)) {
152                     return true;
153                 }
154             }
155         }
156 
157         return false;
158     }
159 
160     public <T> List<T> map(final Resource subj, final URI pred, final Value obj,
161             @Nullable final Resource ctx, final Class<T> clazz) {
162         return map(subj, pred, obj, ctx, clazz, null);
163     }
164 
165     @SuppressWarnings("unchecked")
166     public <T> List<T> map(final Resource subj, final URI pred, final Value obj,
167             @Nullable Resource ctx, final Class<T> clazz, @Nullable final List<T> list) {
168 
169         if (ctx == null) {
170             ctx = this.nil;
171         }
172 
173         List<T> result = list;
174 
175         outer: for (int i = 0; i < this.masks.length; ++i) {
176 
177             final byte mask = this.masks[i];
178 
179             int offset = 0;
180             if (mask == 0) {
181                 normalizeIfNecessary(1);
182                 offset = 1;
183             } else {
184                 final int[] table = this.tables[i];
185                 final int hash = hash(subj, pred, obj, ctx, mask);
186                 for (int slot = (hash & 0x7FFFFFFF) % table.length;; slot = next(slot,
187                         table.length)) {
188                     final int token = table[slot];
189                     if (token == 0) {
190                         continue outer;
191                     }
192                     offset = match(subj, pred, obj, ctx, mask, hash, token);
193                     if (offset != 0) {
194                         break;
195                     }
196                 }
197             }
198 
199             boolean add = true;
200             while (true) {
201                 final Object value = this.normalizedValues[offset++];
202                 if (value == NORMALIZED_MARKER || value == UNNORMALIZED_MARKER) {
203                     break;
204                 } else if (value instanceof Filter) {
205                     add = ((Filter) value).eval(subj, pred, obj, ctx);
206                 } else if (add && clazz.isInstance(value)) {
207                     result = result != null ? result : new ArrayList<>(16);
208                     result.add((T) value);
209                 }
210             }
211         }
212 
213         return result != null ? result : Collections.emptyList();
214     }
215 
216     @Override
217     public String toString() {
218         return this.numPatterns + " patterns, " + this.numValues + " values";
219     }
220 
221     private int match(final Resource subj, final URI pred, final Value obj, final Resource ctx,
222             final byte mask, final int hash, final int token) {
223 
224         // Check that lower 12 bits of the hash match with lower 12 bits of cell
225         if (!tokenMatchHash(hash, token)) {
226             return 0;
227         }
228 
229         // Use the higher 20 bits of the cell as an index in the value array
230         final int offset = tokenToOffset(token);
231 
232         // Normalize if necessary (the lack of synchronization is deliberate)
233         normalizeIfNecessary(offset);
234 
235         // Check that the quad matches the constants in the value array
236         int index = offset;
237         if ((mask & 0x01) != 0 && !this.normalizedValues[index++].equals(subj)) {
238             return 0;
239         }
240         if ((mask & 0x02) != 0 && !this.normalizedValues[index++].equals(pred)) {
241             return 0;
242         }
243         if ((mask & 0x04) != 0 && !this.normalizedValues[index++].equals(obj)) {
244             return 0;
245         }
246         if ((mask & 0x08) != 0 && !this.normalizedValues[index].equals(ctx)) {
247             return 0;
248         }
249         return index;
250     }
251 
252     private void normalizeIfNecessary(final int offset) {
253         if (this.normalizer != null && this.normalizedValues[offset - 1] == UNNORMALIZED_MARKER) {
254             for (int i = offset;; ++i) {
255                 Object value = this.normalizedValues[i];
256                 if (value == UNNORMALIZED_MARKER || value == NORMALIZED_MARKER) {
257                     break;
258                 } else if (value instanceof Filter) {
259                     value = ((Filter) value).normalize(this.normalizer);
260                 } else if (value instanceof Value) {
261                     value = this.normalizer.apply((Value) value);
262                 } else if (value instanceof StatementMatcher) {
263                     value = ((StatementMatcher) value).normalize(this.normalizer);
264                 } else if (value instanceof StatementTemplate) {
265                     value = ((StatementTemplate) value).normalize(this.normalizer);
266                 }
267                 this.normalizedValues[i] = value;
268             }
269             this.normalizedValues[offset - 1] = NORMALIZED_MARKER;
270         }
271     }
272 
273     private static int next(final int slot, final int numSlots) {
274         final int result = slot + 1;
275         return result < numSlots ? result : 0;
276     }
277 
278     private static int hash(final Resource subj, final URI pred, final Value obj,
279             final Resource ctx, final byte mask) {
280 
281         int hash = 1;
282         if ((mask & 0x01) != 0) {
283             hash += 6661 * subj.hashCode();
284         }
285         if ((mask & 0x02) != 0) {
286             hash += 961 * pred.hashCode();
287         }
288         if ((mask & 0x04) != 0) {
289             hash += 31 * obj.hashCode();
290         }
291         if ((mask & 0x08) != 0) {
292             hash += ctx.hashCode();
293         }
294         return hash;
295     }
296 
297     private static byte mask(@Nullable final Resource subj, @Nullable final URI pred,
298             @Nullable final Value obj, @Nullable final Resource ctx) {
299 
300         byte mask = 0;
301         if (subj != null) {
302             mask |= 0x01;
303         }
304         if (pred != null) {
305             mask |= 0x02;
306         }
307         if (obj != null) {
308             mask |= 0x04;
309         }
310         if (ctx != null) {
311             mask |= 0x08;
312         }
313         return mask;
314     }
315 
316     @Nullable
317     private static Value variableValue(final Var var) {
318         return var == null ? null : var.getValue();
319     }
320 
321     private static void variableReplacement(final Var from, final String to,
322             final Map<String, Var> map) {
323         if (from != null && from.getValue() == null) {
324             map.put(from.getName(), new Var(to));
325         }
326     }
327 
328     private static int tokenEncode(final int hash, final int offset, final boolean unfiltered) {
329         assert offset < 0x1000000;
330         return hash & 0x7FF00000 | offset & 0xFFFFF | (unfiltered ? 0x80000000 : 0);
331     }
332 
333     private static boolean tokenToUnfiltered(final int token) {
334         return (token & 0x80000000) != 0;
335     }
336 
337     private static int tokenToOffset(final int token) {
338         return token & 0xFFFFF;
339     }
340 
341     private static boolean tokenMatchHash(final int hash, final int token) {
342         return ((hash ^ token) & 0x7FF00000) == 0;
343     }
344 
345     public static Builder builder() {
346         return new Builder();
347     }
348 
349     public static final class Builder {
350 
351         private static final Object EMPTY_FILTER = new Object();
352 
353         private final Table<List<Value>, Object, Set<Object>>[] maskData;
354 
355         private int numValues;
356 
357         private int numMasks;
358 
359         private int numFilters;
360 
361         @SuppressWarnings("unchecked")
362         Builder() {
363             // There are at most 16 masks to consider
364             this.maskData = new Table[16];
365             this.numValues = 0;
366             this.numMasks = 0;
367         }
368 
369         public Builder addExpr(final TupleExpr expr, final Object... mappedValues) {
370 
371             // Identify the statement pattern in the expression
372             final List<StatementPattern> patterns = Algebra.extractNodes(expr,
373                     StatementPattern.class, null, null);
374             Preconditions.checkArgument(patterns.size() == 1);
375             final StatementPattern pattern = patterns.get(0);
376 
377             // Identify the filter conditions in the expression
378             ValueExpr filter = null;
379             for (QueryModelNode node = pattern.getParentNode(); node != null; node = node
380                     .getParentNode()) {
381                 if (node instanceof org.openrdf.query.algebra.Filter) {
382                     final ValueExpr f = ((org.openrdf.query.algebra.Filter) node).getCondition();
383                     if (filter == null) {
384                         filter = f;
385                     } else {
386                         filter = new And(filter, f);
387                     }
388                 } else {
389                     throw new IllegalArgumentException("Invalid expression: " + expr);
390                 }
391             }
392 
393             // Delegate
394             return addPattern(pattern, filter, mappedValues);
395         }
396 
397         public Builder addPattern(final StatementPattern pattern, @Nullable ValueExpr filter,
398                 final Object... mappedValues) {
399 
400             // Extract components
401             final Resource subj = (Resource) variableValue(pattern.getSubjectVar());
402             final URI pred = (URI) variableValue(pattern.getPredicateVar());
403             final Value obj = variableValue(pattern.getObjectVar());
404             final Resource ctx = (Resource) variableValue(pattern.getContextVar());
405 
406             // Rewrite filter if necessary
407             if (filter != null) {
408                 final Map<String, Var> replacements = new HashMap<>();
409                 variableReplacement(pattern.getSubjectVar(), "s", replacements);
410                 variableReplacement(pattern.getPredicateVar(), "p", replacements);
411                 variableReplacement(pattern.getObjectVar(), "o", replacements);
412                 variableReplacement(pattern.getContextVar(), "c", replacements);
413                 filter = Algebra.rewrite(filter, replacements);
414             }
415 
416             // Delegate
417             return addValues(subj, pred, obj, ctx, filter, mappedValues);
418         }
419 
420         public Builder addValues(@Nullable final Resource subj, @Nullable final URI pred,
421                 @Nullable final Value obj, @Nullable final Resource ctx,
422                 @Nullable final ValueExpr filter, final Object... mappedValues) {
423 
424             // Map the filter to a non-null key
425             final Object filterKey = filter == null ? EMPTY_FILTER : filter;
426 
427             // Compute mask and pattern list
428             final byte mask = mask(subj, pred, obj, ctx);
429             final List<Value> pattern = Arrays.asList(subj, pred, obj, ctx);
430 
431             // Retrieve the table for the mask. Create it if necessary
432             Table<List<Value>, Object, Set<Object>> sets = this.maskData[mask];
433             if (sets == null) {
434                 sets = HashBasedTable.create();
435                 this.maskData[mask] = sets;
436                 this.numMasks++;
437             }
438 
439             // Retrieve the values set for the (pattern, filter) pair. Create it if necessary
440             Set<Object> set = sets.get(pattern, filterKey);
441             if (set == null) {
442                 set = new HashSet<>();
443                 sets.put(pattern, filterKey, set);
444                 if (filterKey != EMPTY_FILTER) {
445                     this.numFilters++;
446                 }
447             }
448 
449             // Add the mapped values to the set
450             this.numValues -= set.size();
451             set.addAll(Arrays.asList(mappedValues));
452             this.numValues += set.size();
453 
454             // Return this builder object for call chaining
455             return this;
456         }
457 
458         public StatementMatcher build(@Nullable final Function<Value, Value> normalizer) {
459 
460             // Compute the total size of the values array
461             int valuesSize = this.numFilters + this.numValues + 1;
462             for (int mask = 0; mask < this.maskData.length; ++mask) {
463                 final Table<List<Value>, Object, Set<Object>> table = this.maskData[mask];
464                 if (table != null) {
465                     valuesSize += table.rowKeySet().size() * (Integer.bitCount(mask) + 1);
466                 }
467             }
468 
469             // Allocate data structures
470             final byte[] masks = new byte[this.numMasks];
471             final int[][] tables = new int[this.numMasks][];
472             final Object[] values = new Object[valuesSize * 4];
473 
474             // Initialize counters
475             int numPatterns = 0;
476 
477             // Initialize mask and value indexes
478             int maskIndex = 0;
479             int valueIndex = 0;
480 
481             // Emit an initial marker
482             values[valueIndex++] = UNNORMALIZED_MARKER;
483 
484             // Consider the patterns for each mask, starting from least selective mask
485             boolean matchAll = false;
486             for (final byte mask : new byte[] { 0, 8, 2, 4, 1, 10, 12, 9, 6, 3, 5, 14, 11, 13, 7,
487                     15 }) {
488 
489                 // Retrieve the table for the current mask. Abort if undefined
490                 final Table<List<Value>, Object, Set<Object>> sets = this.maskData[mask];
491                 if (sets == null) {
492                     continue;
493                 }
494 
495                 // Allocate the hash table for the mask (load factor 0.66)
496                 final int maskPatterns = sets.rowKeySet().size();
497                 final int[] table = new int[maskPatterns + Math.max(1, maskPatterns >> 1)];
498 
499                 // Populate the hash table and the values array
500                 numPatterns += maskPatterns;
501                 for (final List<Value> pattern : sets.rowKeySet()) {
502 
503                     // Compute hash
504                     final int hash = hash((Resource) pattern.get(0), (URI) pattern.get(1),
505                             pattern.get(2), (Resource) pattern.get(3), mask);
506 
507                     // Identify whether the pattern is used unfiltered
508                     final Map<Object, Set<Object>> map = sets.rowMap().get(pattern);
509                     final boolean unfiltered = map.containsKey(EMPTY_FILTER);
510                     if (unfiltered && mask == 0) {
511                         matchAll = true;
512                     }
513 
514                     // Update hash map
515                     int slot = (hash & 0x7FFFFFFF) % table.length;
516                     while (table[slot] != 0) {
517                         slot = next(slot, table.length);
518                     }
519                     table[slot] = tokenEncode(hash, valueIndex, unfiltered);
520 
521                     // Append the constants used in the pattern
522                     for (final Value component : pattern) {
523                         if (component != null) {
524                             values[valueIndex++] = component;
525                         }
526                     }
527 
528                     // Append filters and mapped values
529                     for (final Map.Entry<Object, Set<Object>> entry : map.entrySet()) {
530                         if (entry.getKey() != EMPTY_FILTER) {
531                             values[valueIndex++] = Filter.create((ValueExpr) entry.getKey());
532                         }
533                         for (final Object value : entry.getValue()) {
534                             values[valueIndex++] = value;
535                         }
536                     }
537 
538                     // Append marker
539                     values[valueIndex++] = UNNORMALIZED_MARKER;
540                 }
541 
542                 // Update masks and tables structures
543                 masks[maskIndex] = mask;
544                 tables[maskIndex] = table;
545                 ++maskIndex;
546             }
547 
548             // Build a pattern matcher using the created data structures
549             return new StatementMatcher(normalizer, masks, tables, values, numPatterns,
550                     this.numValues, matchAll);
551         }
552 
553     }
554 
555     private static abstract class Filter {
556 
557         static Filter create(final ValueExpr expr) {
558             if (expr instanceof And) {
559                 final And and = (And) expr;
560                 return new AndFilter(create(and.getLeftArg()), create(and.getRightArg()));
561             } else if (expr instanceof Compare) {
562                 final Compare cmp = (Compare) expr;
563                 if (cmp.getOperator() == CompareOp.EQ || cmp.getOperator() == CompareOp.NE) {
564                     ValueExpr left = cmp.getLeftArg();
565                     ValueExpr right = cmp.getRightArg();
566                     if (left instanceof ValueConstant) {
567                         left = new Var("l", ((ValueConstant) left).getValue());
568                     }
569                     if (right instanceof ValueConstant) {
570                         right = new Var("r", ((ValueConstant) right).getValue());
571                     }
572                     if (left instanceof Var && right instanceof Var) {
573                         return new CompareFilter((Var) left, (Var) right,
574                                 cmp.getOperator() == CompareOp.EQ);
575                     }
576                 }
577             }
578             return new ValueExprFilter(expr);
579         }
580 
581         Filter normalize(final Function<Value, Value> normalizer) {
582             return this;
583         }
584 
585         abstract boolean eval(final Resource subj, final URI pred, final Value obj,
586                 final Resource ctx);
587 
588         private static final class ValueExprFilter extends Filter {
589 
590             private final ValueExpr expr;
591 
592             ValueExprFilter(final ValueExpr expr) {
593                 this.expr = expr;
594             }
595 
596             @Override
597             Filter normalize(final Function<Value, Value> normalizer) {
598                 return new ValueExprFilter(Algebra.normalize(this.expr, normalizer));
599             }
600 
601             @Override
602             boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
603                 final BindingSet bindings = new ListBindingSet(VAR_NAMES, subj, pred, obj, ctx);
604                 return ((Literal) Algebra.evaluateValueExpr(this.expr, bindings)).booleanValue();
605             }
606 
607         }
608 
609         private static final class CompareFilter extends Filter {
610 
611             private final Value leftValue;
612 
613             private final Value rightValue;
614 
615             private final char left;
616 
617             private final char right;
618 
619             private final boolean negate;
620 
621             CompareFilter(final Var left, final Var right, final boolean equal) {
622                 this.leftValue = left.getValue();
623                 this.rightValue = right.getValue();
624                 this.left = left.hasValue() ? 'l' //
625                         : Character.toLowerCase(left.getName().charAt(0));
626                 this.right = right.hasValue() ? 'r' //
627                         : Character.toLowerCase(right.getName().charAt(0));
628                 this.negate = !equal;
629             }
630 
631             @Override
632             boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
633                 final Value left = select(subj, pred, obj, ctx, this.left);
634                 final Value right = select(subj, pred, obj, ctx, this.right);
635                 final boolean equals = Objects.equals(left, right);
636                 return equals ^ this.negate;
637             }
638 
639             private Value select(final Resource subj, final URI pred, final Value obj,
640                     final Resource ctx, final char c) {
641                 switch (c) {
642                 case 's':
643                     return subj;
644                 case 'p':
645                     return pred;
646                 case 'o':
647                     return obj;
648                 case 'c':
649                     return ctx;
650                 case 'l':
651                     return this.leftValue;
652                 case 'r':
653                     return this.rightValue;
654                 default:
655                     throw new Error();
656                 }
657             }
658 
659         }
660 
661         private static final class AndFilter extends Filter {
662 
663             private final Filter left;
664 
665             private final Filter right;
666 
667             AndFilter(final Filter left, final Filter right) {
668                 this.left = left;
669                 this.right = right;
670             }
671 
672             @Override
673             Filter normalize(final Function<Value, Value> normalizer) {
674                 final Filter left = this.left.normalize(normalizer);
675                 final Filter right = this.right.normalize(normalizer);
676                 return left == this.left && right == this.right ? this
677                         : new AndFilter(left, right);
678             }
679 
680             @Override
681             boolean eval(final Resource subj, final URI pred, final Value obj, final Resource ctx) {
682                 return this.left.eval(subj, pred, obj, ctx)
683                         && this.right.eval(subj, pred, obj, ctx);
684             }
685 
686         }
687 
688     }
689 
690 }