1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro;
15
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.HashSet;
19 import java.util.List;
20 import java.util.Objects;
21 import java.util.Set;
22 import java.util.function.Function;
23 import java.util.function.Predicate;
24
25 import javax.annotation.Nullable;
26
27 import org.openrdf.model.BNode;
28 import org.openrdf.model.Literal;
29 import org.openrdf.model.Resource;
30 import org.openrdf.model.Statement;
31 import org.openrdf.model.URI;
32 import org.openrdf.model.Value;
33 import org.openrdf.model.ValueFactory;
34 import org.openrdf.model.vocabulary.SESAME;
35 import org.openrdf.model.vocabulary.XMLSchema;
36 import org.openrdf.rio.RDFHandler;
37 import org.openrdf.rio.RDFHandlerException;
38
39 import eu.fbk.rdfpro.util.Namespaces;
40 import eu.fbk.rdfpro.util.Scripting;
41 import eu.fbk.rdfpro.util.Statements;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84 @FunctionalInterface
85 public interface Transformer {
86
87
88 static Transformer NIL = new Transformer() {
89
90 @Override
91 public void transform(final Statement statement, final RDFHandler handler) {
92 }
93
94 };
95
96
97 static Transformer IDENTITY = new Transformer() {
98
99 @Override
100 public void transform(final Statement statement, final RDFHandler handler)
101 throws RDFHandlerException {
102 handler.handleStatement(statement);
103 }
104
105 };
106
107
108
109
110
111
112
113
114
115 static Transformer filter(final Predicate<? super Statement> predicate) {
116 Objects.requireNonNull(predicate);
117 return new Transformer() {
118
119 @Override
120 public void transform(final Statement statement, final RDFHandler handler)
121 throws RDFHandlerException {
122 if (predicate.test(statement)) {
123 handler.handleStatement(statement);
124 }
125 }
126
127 };
128 }
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144 static Transformer filter(final String components, final Predicate<? super Value> predicate) {
145
146 Objects.requireNonNull(predicate);
147
148 final String comp = components.trim().toLowerCase();
149 final boolean[] flags = new boolean[4];
150 for (int i = 0; i < comp.length(); ++i) {
151 final char c = comp.charAt(i);
152 final int index = c == 's' ? 0 : c == 'p' ? 1 : c == 'o' ? 2
153 : c == 'c' || c == 'g' ? 3 : -1;
154 if (index < 0 || flags[index]) {
155 throw new IllegalArgumentException("Invalid components '" + components + "'");
156 }
157 flags[index] = true;
158 }
159
160 if (!flags[0] && !flags[1] && !flags[2] && !flags[3]) {
161 return IDENTITY;
162 }
163
164 return new Transformer() {
165
166 private final boolean skipSubj = !flags[0];
167
168 private final boolean skipPred = !flags[1];
169
170 private final boolean skipObj = !flags[2];
171
172 private final boolean skipCtx = !flags[3];
173
174 @Override
175 public void transform(final Statement statement, final RDFHandler handler)
176 throws RDFHandlerException {
177 if ((skipSubj || predicate.test(statement.getSubject()))
178 && (skipPred || predicate.test(statement.getPredicate()))
179 && (skipObj || predicate.test(statement.getObject()))
180 && (skipCtx || predicate.test(statement.getContext()))) {
181 handler.handleStatement(statement);
182 }
183 }
184
185 };
186 }
187
188
189
190
191
192
193
194
195
196 static Transformer map(final Function<? super Statement, ? extends Statement> function) {
197
198 Objects.requireNonNull(function);
199
200 return new Transformer() {
201
202 @Override
203 public void transform(final Statement statement, final RDFHandler handler)
204 throws RDFHandlerException {
205 final Statement transformed = function.apply(statement);
206 if (transformed != null) {
207 handler.handleStatement(transformed);
208 }
209 }
210
211 };
212 }
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228 static Transformer map(final String components,
229 final Function<? super Value, ? extends Value> function) {
230
231 Objects.requireNonNull(function);
232
233 final String comp = components.trim().toLowerCase();
234 final boolean[] flags = new boolean[4];
235 for (int i = 0; i < comp.length(); ++i) {
236 final char c = comp.charAt(i);
237 final int index = c == 's' ? 0 : c == 'p' ? 1 : c == 'o' ? 2
238 : c == 'c' || c == 'g' ? 3 : -1;
239 if (index < 0 || flags[index]) {
240 throw new IllegalArgumentException("Invalid components '" + components + "'");
241 }
242 flags[index] = true;
243 }
244
245 if (!flags[0] && !flags[1] && !flags[2] && !flags[3]) {
246 return IDENTITY;
247 }
248
249 return new Transformer() {
250
251 private final boolean mapSubj = flags[0];
252
253 private final boolean mapPred = flags[1];
254
255 private final boolean mapObj = flags[2];
256
257 private final boolean mapCtx = flags[3];
258
259 @Override
260 public void transform(final Statement statement, final RDFHandler handler)
261 throws RDFHandlerException {
262
263 Resource subj = statement.getSubject();
264 URI pred = statement.getPredicate();
265 Value obj = statement.getObject();
266 Resource ctx = statement.getContext();
267
268 boolean modified = false;
269
270 if (this.mapSubj) {
271 final Value v = function.apply(subj);
272 if (!(v instanceof Resource)) {
273 return;
274 }
275 modified |= v != subj;
276 subj = (Resource) v;
277 }
278
279 if (this.mapPred) {
280 final Value v = function.apply(pred);
281 if (!(v instanceof URI)) {
282 return;
283 }
284 modified |= v != pred;
285 pred = (URI) v;
286 }
287
288 if (this.mapObj) {
289 final Value v = function.apply(obj);
290 if (v == null) {
291 return;
292 }
293 modified |= v != obj;
294 obj = v;
295 }
296
297 if (this.mapCtx) {
298 final Value v = function.apply(ctx);
299 if (!(v instanceof Resource)) {
300 return;
301 }
302 modified |= v != ctx;
303 ctx = (Resource) v;
304 }
305
306 if (!modified) {
307 handler.handleStatement(statement);
308 } else if (ctx == null) {
309 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(subj, pred,
310 obj));
311 } else {
312 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(subj, pred,
313 obj, ctx));
314 }
315 }
316
317 };
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338 static Transformer set(final String components, final Value... values) {
339
340 final boolean[] flags = new boolean[4];
341 final Value[] vals = new Value[4];
342
343 final String comp = components.trim().toLowerCase();
344 for (int i = 0; i < comp.length(); ++i) {
345 final char c = comp.charAt(i);
346 final int index = c == 's' ? 0 : c == 'p' ? 1 : c == 'o' ? 2
347 : c == 'c' || c == 'g' ? 3 : -1;
348 if (index < 0 || flags[index]) {
349 throw new IllegalArgumentException("Invalid components '" + components + "'");
350 }
351 flags[index] = true;
352 vals[index] = values[Math.min(values.length - 1, i)];
353 }
354
355 if (!flags[0] && !flags[1] && !flags[2] && !flags[3]) {
356 return IDENTITY;
357
358 } else if (vals[0] != null && !(vals[0] instanceof Resource) || vals[1] != null
359 && !(vals[1] instanceof URI) || vals[3] != null && !(vals[3] instanceof Resource)) {
360 return NIL;
361
362 } else {
363 return new Transformer() {
364
365 private final boolean subjFlag = flags[0];
366
367 private final boolean predFlag = flags[1];
368
369 private final boolean objFlag = flags[2];
370
371 private final boolean ctxFlag = flags[3];
372
373 private final Resource subjVal = (Resource) vals[0];
374
375 private final URI predVal = (URI) vals[1];
376
377 private final Value objVal = vals[2];
378
379 private final Resource ctxVal = (Resource) vals[3];
380
381 @Override
382 public void transform(final Statement statement, final RDFHandler handler)
383 throws RDFHandlerException {
384
385 final Resource subj = this.subjFlag ? this.subjVal : statement.getSubject();
386 final URI pred = this.predFlag ? this.predVal : statement.getPredicate();
387 final Value obj = this.objFlag ? this.objVal : statement.getObject();
388 final Resource ctx = this.ctxFlag ? this.ctxVal : statement.getContext();
389
390 final ValueFactory vf = Statements.VALUE_FACTORY;
391 if (ctx == null) {
392 handler.handleStatement(vf.createStatement(subj, pred, obj));
393 } else {
394 handler.handleStatement(vf.createStatement(subj, pred, obj, ctx));
395 }
396 }
397
398 };
399 }
400 }
401
402
403
404
405
406
407
408
409
410
411
412 static Transformer sequence(final Transformer... transformers) {
413 if (Arrays.asList(transformers).contains(null)) {
414 throw new NullPointerException();
415 }
416 if (transformers.length == 0) {
417 return NIL;
418 } else if (transformers.length == 1) {
419 return transformers[0];
420 } else if (transformers.length == 2) {
421 final Transformer first = transformers[0];
422 final Transformer second = transformers[1];
423 return new Transformer() {
424
425 @Override
426 public void transform(final Statement statement, final RDFHandler handler)
427 throws RDFHandlerException {
428 final List<Statement> buffer = new ArrayList<>();
429 final RDFHandler collector = RDFHandlers.wrap(buffer);
430 first.transform(statement, collector);
431 for (final Statement intermediateStatement : buffer) {
432 second.transform(intermediateStatement, handler);
433 }
434 }
435
436 };
437 } else {
438 final Transformer[] newTransformers = new Transformer[transformers.length - 1];
439 System.arraycopy(transformers, 2, newTransformers, 1, transformers.length - 2);
440 newTransformers[0] = sequence(transformers[0], transformers[1]);
441 return sequence(newTransformers);
442 }
443 }
444
445
446
447
448
449
450
451
452
453 static Transformer parallel(final Transformer... transformers) {
454 if (Arrays.asList(transformers).contains(null)) {
455 throw new NullPointerException();
456 }
457 if (transformers.length == 0) {
458 return NIL;
459 } else if (transformers.length == 1) {
460 return transformers[0];
461 } else {
462 return new Transformer() {
463
464 @Override
465 public void transform(final Statement statement, final RDFHandler handler)
466 throws RDFHandlerException {
467 for (final Transformer transformer : transformers) {
468 transformer.transform(statement, handler);
469 }
470 }
471
472 };
473 }
474 }
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505 static Transformer rules(final String rules) {
506 return new RuleTransformer(rules);
507 }
508
509
510
511
512
513
514
515
516
517
518 @Nullable
519 static Transformer parse(@Nullable final String expression) {
520 if (expression == null) {
521 return null;
522 } else if (Scripting.isScript(expression)) {
523 return Scripting.compile(Transformer.class, expression, "q");
524 } else {
525 return rules(expression);
526 }
527 }
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543 void transform(Statement statement, RDFHandler handler) throws RDFHandlerException;
544
545 }
546
547 final class RuleTransformer implements Transformer {
548
549 @Nullable
550 private final ValueTransformer subjectTransformer;
551
552 @Nullable
553 private final ValueTransformer predicateTransformer;
554
555 @Nullable
556 private final ValueTransformer objectTransformer;
557
558 @Nullable
559 private final ValueTransformer contextTransformer;
560
561 @SuppressWarnings("unchecked")
562 public RuleTransformer(final String spec) {
563
564
565 final List<?>[] expressions = new List<?>[4];
566 final Value[] replacements = new Value[4];
567 final Boolean[] includes = new Boolean[4];
568 for (int i = 0; i < 4; ++i) {
569 expressions[i] = new ArrayList<String>();
570 }
571
572
573 char action = 0;
574 final List<Integer> components = new ArrayList<Integer>();
575 for (final String token : spec.split("\\s+")) {
576 final char ch0 = token.charAt(0);
577 if (ch0 == '+' || ch0 == '-' || ch0 == '=') {
578 action = ch0;
579 if (token.length() == 1) {
580 throw new IllegalArgumentException("No component(s) specified in '" + spec
581 + "'");
582 }
583 components.clear();
584 for (int i = 1; i < token.length(); ++i) {
585 final char ch1 = Character.toLowerCase(token.charAt(i));
586 final int component = ch1 == 's' ? 0 : ch1 == 'p' ? 1 : ch1 == 'o' ? 2
587 : ch1 == 'c' ? 3 : -1;
588 if (component < 0) {
589 throw new IllegalArgumentException("Invalid component '" + ch1 + "' in '"
590 + spec + "'");
591 }
592 components.add(component);
593 }
594 } else if (action == 0) {
595 throw new IllegalArgumentException("Missing selector in '" + spec + "'");
596 } else if (action == '=') {
597 for (final int component : components) {
598 replacements[component] = Statements.parseValue(token, Namespaces.DEFAULT);
599 }
600 } else {
601 for (final int component : components) {
602 ((List<String>) expressions[component]).add(token);
603 final Boolean include = action == '+' ? Boolean.TRUE : Boolean.FALSE;
604 if (includes[component] != null
605 && !Objects.equals(includes[component], include)) {
606 throw new IllegalArgumentException(
607 "Include (+) and exclude (-) rules both "
608 + "specified for same component in '" + spec + "'");
609 }
610 includes[component] = include;
611 }
612 }
613 }
614
615
616 final ValueTransformer[] transformers = new ValueTransformer[4];
617 for (int i = 0; i < 4; ++i) {
618 transformers[i] = expressions[i].isEmpty() && replacements[i] == null ? null
619 : new ValueTransformer((List<String>) expressions[i], replacements[i],
620 Boolean.TRUE.equals(includes[i]));
621 }
622 this.subjectTransformer = transformers[0];
623 this.predicateTransformer = transformers[1];
624 this.objectTransformer = transformers[2];
625 this.contextTransformer = transformers[3];
626 }
627
628 @Override
629 public void transform(final Statement statement, final RDFHandler handler)
630 throws RDFHandlerException {
631
632
633 final Resource oldSubj = statement.getSubject();
634 Resource newSubj = oldSubj;
635 if (this.subjectTransformer != null) {
636 newSubj = (Resource) this.subjectTransformer.transform(oldSubj);
637 if (newSubj == null) {
638 return;
639 }
640 }
641
642
643 final URI oldPred = statement.getPredicate();
644 URI newPred = oldPred;
645 if (this.predicateTransformer != null) {
646 newPred = (URI) this.predicateTransformer.transform(oldPred);
647 if (newPred == null) {
648 return;
649 }
650 }
651
652
653 final Value oldObj = statement.getObject();
654 Value newObj = oldObj;
655 if (this.objectTransformer != null) {
656 newObj = this.objectTransformer.transform(oldObj);
657 if (newObj == null) {
658 return;
659 }
660 }
661
662
663 Resource oldCtx = statement.getContext();
664 oldCtx = oldCtx != null ? oldCtx : SESAME.NIL;
665 Resource newCtx = oldCtx;
666 if (this.contextTransformer != null) {
667 newCtx = (Resource) this.contextTransformer.transform(oldCtx);
668 if (newCtx == null) {
669 return;
670 }
671 }
672
673
674 if (newSubj == oldSubj && newPred == oldPred && newObj == oldObj && newCtx == oldCtx) {
675 handler.handleStatement(statement);
676 } else if (newCtx.equals(SESAME.NIL)) {
677 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(newSubj, newPred,
678 newObj));
679 } else {
680 handler.handleStatement(Statements.VALUE_FACTORY.createStatement(newSubj, newPred,
681 newObj, newCtx));
682 }
683 }
684
685 private static class ValueTransformer {
686
687 @Nullable
688 private final Value replacement;
689
690 private final boolean include;
691
692
693
694 private final boolean matchAnyURI;
695
696 private final Set<String> matchedURINamespaces;
697
698 private final Set<URI> matchedURIs;
699
700
701
702 private final boolean matchAnyBNode;
703
704 private final Set<BNode> matchedBNodes;
705
706
707
708 private final boolean matchAnyPlainLiteral;
709
710 private final boolean matchAnyLangLiteral;
711
712 private final boolean matchAnyTypedLiteral;
713
714 private final Set<String> matchedLanguages;
715
716 private final Set<URI> matchedDatatypeURIs;
717
718 private final Set<String> matchedDatatypeNamespaces;
719
720 private final Set<Literal> matchedLiterals;
721
722 ValueTransformer(final Iterable<String> matchExpressions,
723 @Nullable final Value replacement, final boolean include) {
724
725 this.replacement = replacement;
726 this.include = include;
727
728 this.matchedURINamespaces = new HashSet<>();
729 this.matchedURIs = new HashSet<>();
730 this.matchedBNodes = new HashSet<>();
731 this.matchedLanguages = new HashSet<>();
732 this.matchedDatatypeURIs = new HashSet<>();
733 this.matchedDatatypeNamespaces = new HashSet<>();
734 this.matchedLiterals = new HashSet<>();
735
736 boolean matchAnyURI = false;
737 boolean matchAnyBNode = false;
738 boolean matchAnyPlainLiteral = false;
739 boolean matchAnyLangLiteral = false;
740 boolean matchAnyTypedLiteral = false;
741
742 for (final String expression : matchExpressions) {
743 if ("<*>".equals(expression)) {
744 matchAnyURI = true;
745 } else if ("_:*".equals(expression)) {
746 matchAnyBNode = true;
747 } else if ("*".equals(expression)) {
748 matchAnyPlainLiteral = true;
749 } else if ("*@*".equals(expression)) {
750 matchAnyLangLiteral = true;
751 } else if ("*^^*".equals(expression)) {
752 matchAnyTypedLiteral = true;
753 } else if (expression.startsWith("*@")) {
754 this.matchedLanguages.add(expression.substring(2));
755 } else if (expression.startsWith("*^^")) {
756 if (expression.endsWith(":*")) {
757 this.matchedDatatypeNamespaces.add(Namespaces.DEFAULT.uriFor(expression
758 .substring(3, expression.length() - 2)));
759 } else {
760 this.matchedDatatypeURIs.add((URI) Statements.parseValue(
761 expression.substring(3), Namespaces.DEFAULT));
762 }
763 } else if (expression.endsWith(":*")) {
764 this.matchedURINamespaces.add(Namespaces.DEFAULT.uriFor(expression.substring(
765 0, expression.length() - 2)));
766
767 } else if (expression.endsWith("*>")) {
768 this.matchedURINamespaces
769 .add(expression.substring(1, expression.length() - 2));
770 } else {
771 final Value value = Statements.parseValue(expression, Namespaces.DEFAULT);
772 if (value instanceof URI) {
773 this.matchedURIs.add((URI) value);
774 } else if (value instanceof BNode) {
775 this.matchedBNodes.add((BNode) value);
776 } else if (value instanceof Literal) {
777 this.matchedLiterals.add((Literal) value);
778 }
779
780 }
781 }
782
783 this.matchAnyURI = matchAnyURI;
784 this.matchAnyBNode = matchAnyBNode;
785 this.matchAnyPlainLiteral = matchAnyPlainLiteral;
786 this.matchAnyLangLiteral = matchAnyLangLiteral;
787 this.matchAnyTypedLiteral = matchAnyTypedLiteral;
788 }
789
790 @Nullable
791 Value transform(final Value value) {
792 final boolean matched = match(value);
793 return this.include && !matched || !this.include && matched ? null
794 : this.replacement == null ? value : this.replacement;
795 }
796
797 private boolean match(final Value value) {
798 if (value instanceof URI) {
799 return this.matchAnyURI
800 || contains(this.matchedURIs, value)
801 || containsNs(this.matchedURINamespaces, (URI) value);
802 } else if (value instanceof Literal) {
803 final Literal lit = (Literal) value;
804 final String lang = lit.getLanguage();
805 final URI dt = lit.getDatatype();
806 return lang == null
807 && (dt == null || XMLSchema.STRING.equals(dt))
808 && this.matchAnyPlainLiteral
809 || lang != null
810 && (this.matchAnyLangLiteral || contains(this.matchedLanguages, lang))
811 || dt != null
812 && (this.matchAnyTypedLiteral || contains(this.matchedDatatypeURIs, dt) || containsNs(
813 this.matchedDatatypeNamespaces, dt))
814 || contains(this.matchedLiterals, lit);
815 } else {
816 return this.matchAnyBNode
817 || contains(this.matchedBNodes, value);
818 }
819 }
820
821 private static boolean contains(final Set<?> set, final Object value) {
822 return !set.isEmpty() && set.contains(value);
823 }
824
825 private static boolean containsNs(final Set<String> set, final URI uri) {
826 if (set.isEmpty()) {
827 return false;
828 }
829 if (set.contains(uri.getNamespace())) {
830 return true;
831 }
832 final String uriString = uri.stringValue();
833 for (final String elem : set) {
834 if (uriString.startsWith(elem)) {
835 return true;
836 }
837 }
838 return false;
839 }
840
841 }
842
843 }