1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti <francesco.corcoglioniti@gmail.com> with support by
5    * Marco Rospocher, Marco Amadori and Michele Mostarda.
6    * 
7    * To the extent possible under law, the author has dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro;
15  
16  import java.io.BufferedReader;
17  import java.io.ByteArrayInputStream;
18  import java.io.File;
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.InputStreamReader;
22  import java.net.MalformedURLException;
23  import java.net.URL;
24  import java.net.URLConnection;
25  import java.nio.charset.Charset;
26  import java.nio.file.Files;
27  import java.nio.file.Path;
28  import java.util.ArrayList;
29  import java.util.Arrays;
30  import java.util.Date;
31  import java.util.GregorianCalendar;
32  import java.util.HashMap;
33  import java.util.Iterator;
34  import java.util.List;
35  import java.util.Map;
36  import java.util.Objects;
37  import java.util.Set;
38  import java.util.TreeSet;
39  import java.util.concurrent.atomic.AtomicInteger;
40  
41  import javax.annotation.Nullable;
42  import javax.xml.datatype.DatatypeConfigurationException;
43  import javax.xml.datatype.DatatypeFactory;
44  import javax.xml.datatype.XMLGregorianCalendar;
45  
46  import org.codehaus.groovy.control.customizers.ImportCustomizer;
47  import org.openrdf.model.BNode;
48  import org.openrdf.model.Literal;
49  import org.openrdf.model.Resource;
50  import org.openrdf.model.Statement;
51  import org.openrdf.model.URI;
52  import org.openrdf.model.Value;
53  import org.openrdf.model.datatypes.XMLDatatypeUtil;
54  import org.openrdf.model.impl.BNodeImpl;
55  import org.openrdf.model.impl.LiteralImpl;
56  import org.openrdf.model.impl.URIImpl;
57  import org.openrdf.model.vocabulary.RDF;
58  import org.openrdf.model.vocabulary.XMLSchema;
59  import org.openrdf.rio.RDFHandler;
60  import org.openrdf.rio.RDFHandlerException;
61  import org.slf4j.Logger;
62  import org.slf4j.LoggerFactory;
63  
64  import groovy.lang.MissingMethodException;
65  import groovy.lang.Script;
66  import groovy.util.GroovyScriptEngine;
67  import groovy.util.ResourceConnector;
68  import groovy.util.ResourceException;
69  
70  import info.aduna.text.ASCIIUtil;
71  
72  import eu.fbk.rdfpro.util.Environment;
73  import eu.fbk.rdfpro.util.Hash;
74  import eu.fbk.rdfpro.util.Namespaces;
75  import eu.fbk.rdfpro.util.Statements;
76  
77  final class GroovyProcessor implements RDFProcessor {
78  
79      // TODO: optionally, we can provide for two execution modalities, one failing at first error
80      // and the other continuing
81  
82      private static final Logger LOGGER = LoggerFactory.getLogger(GroovyProcessor.class);
83  
84      private static final Logger SCRIPT_LOGGER = LoggerFactory.getLogger(GroovyProcessor.class
85              .getName() + ".script"); // can be improved
86  
87      private static final GroovyScriptEngine ENGINE;
88  
89      private static final DatatypeFactory DATATYPE_FACTORY;
90  
91      private final boolean scriptPooling;
92  
93      private final Class<?> scriptClass;
94  
95      private final String[] scriptArgs;
96  
97      static {
98          try {
99              final ImportCustomizer customizer = new ImportCustomizer();
100             customizer.addStaticStars("eu.fbk.rdfpro.SparqlFunctions");
101             final String classpath = Environment.getProperty("rdfpro.groovy.classpath", "");
102             ENGINE = new GroovyScriptEngine(new Loader(classpath));
103             ENGINE.getConfig().setScriptBaseClass(HandlerScript.class.getName());
104             ENGINE.getConfig().addCompilationCustomizers(customizer);
105         } catch (final Throwable ex) {
106             throw new Error("Could not initialize Groovy: " + ex.getMessage(), ex);
107         }
108         try {
109             DATATYPE_FACTORY = DatatypeFactory.newInstance();
110         } catch (final DatatypeConfigurationException ex) {
111             throw new Error("Could not instantiate javax.xml.datatype.DatatypeFactory", ex);
112         }
113     }
114 
115     static GroovyProcessor doCreate(final String name, final String... args) {
116         int index = 0;
117         boolean pooling = false;
118         if (args.length > 0 && args[0].equals("-p")) {
119             pooling = true;
120             ++index;
121         }
122         if (index >= args.length) {
123             throw new IllegalArgumentException(
124                     "Missing filter script expression or file reference");
125         }
126         final String groovyExpressionOrFile = args[index];
127         final String[] groovyArgs = Arrays.copyOfRange(args, index + 1, args.length);
128         return new GroovyProcessor(pooling, groovyExpressionOrFile, groovyArgs);
129     }
130 
131     GroovyProcessor(final boolean scriptPooling, final String scriptExprOrFile,
132             final String... scriptArgs) {
133 
134         Objects.requireNonNull(scriptExprOrFile);
135 
136         Class<?> scriptClass = null;
137         try {
138             try {
139                 scriptClass = ENGINE.loadScriptByName(scriptExprOrFile);
140             } catch (final ResourceException ex) {
141                 final Path path = Files.createTempFile("rdfpro-filter-", ".groovy");
142                 Files.write(path, scriptExprOrFile.getBytes(Charset.forName("UTF-8")));
143                 scriptClass = ENGINE.loadScriptByName(path.toUri().toString());
144             }
145         } catch (final Throwable ex) {
146             throw new Error("Could not compile Groovy script", ex);
147         }
148 
149         this.scriptPooling = scriptPooling;
150         this.scriptClass = scriptClass;
151         this.scriptArgs = scriptArgs.clone();
152     }
153 
154     // Following two methods do not work in case pooling is enabled
155 
156     public Object getProperty(final RDFHandler handler, final String name) {
157         if (!(handler instanceof SingletonHandler)) {
158             return null;
159         }
160         final SingletonHandler h = (SingletonHandler) handler;
161         return h.getScript().getProperty(name);
162     }
163 
164     public Object setProperty(final RDFHandler handler, final String name, final Object value) {
165         if (!(handler instanceof SingletonHandler)) {
166             return null;
167         }
168         final SingletonHandler h = (SingletonHandler) handler;
169         final Object oldValue = h.getScript().getProperty(name);
170         h.getScript().setProperty(name, value);
171         return oldValue;
172     }
173 
174     @SuppressWarnings("resource")
175     @Override
176     public RDFHandler wrap(final RDFHandler handler) {
177         final RDFHandler sink = Objects.requireNonNull(handler);
178         return this.scriptPooling ? new PooledHandler(sink) : new SingletonHandler(sink);
179     }
180 
181     private HandlerScript newHandlerScript(final String name, final RDFHandler handler) {
182         try {
183             final HandlerScript script = (HandlerScript) GroovyProcessor.this.scriptClass
184                     .newInstance();
185             script.doInit(name, handler, GroovyProcessor.this.scriptArgs);
186             return script;
187         } catch (final Throwable ex) {
188             throw new Error("Could not instantiate script class", ex);
189         }
190     }
191 
192     private static boolean isPN_CHARS(final int c) {
193         return isPN_CHARS_U(c) || ASCIIUtil.isNumber(c) || c == 45 || c == 183 || c >= 768
194                 && c <= 879 || c >= 8255 && c <= 8256;
195     }
196 
197     private static boolean isPN_CHARS_U(final int c) {
198         return isPN_CHARS_BASE(c) || c == 95;
199     }
200 
201     private static boolean isPN_CHARS_BASE(final int c) {
202         return ASCIIUtil.isLetter(c) || c >= 192 && c <= 214 || c >= 216 && c <= 246 || c >= 248
203                 && c <= 767 || c >= 880 && c <= 893 || c >= 895 && c <= 8191 || c >= 8204
204                 && c <= 8205 || c >= 8304 && c <= 8591 || c >= 11264 && c <= 12271 || c >= 12289
205                 && c <= 55295 || c >= 63744 && c <= 64975 || c >= 65008 && c <= 65533
206                 || c >= 65536 && c <= 983039;
207     }
208 
209     private static String valueToHash(final Value v) {
210         final StringBuilder sb = new StringBuilder();
211         if (v instanceof URI) {
212             sb.append('u');
213             sb.append('#');
214             sb.append(v.stringValue());
215         } else if (v instanceof BNode) {
216             sb.append('b');
217             sb.append('#');
218             sb.append(v.stringValue());
219         } else if (v instanceof Literal) {
220             sb.append('l');
221             sb.append('#');
222             sb.append(Hash.murmur3(v.stringValue()).toString());
223         }
224         return Hash.murmur3(sb.toString()).toString();
225     }
226 
227     private static final class Loader implements ResourceConnector {
228 
229         private URL[] roots;
230 
231         // We cache connections as the Groovy engines access each file multiple times (3 times)
232         // for checking its existence and modification time. As we rewrite file content each time
233         // we cache the rewritten byte array for performance reasons
234         private Map<URL, URLConnection> connections;
235 
236         public Loader(final String classpath) {
237             try {
238                 final String[] paths = classpath.split("[;:]");
239                 this.roots = new URL[paths.length];
240                 for (int i = 0; i < paths.length; ++i) {
241                     final String path = paths[i];
242                     if (path.indexOf("://") != -1) {
243                         this.roots[i] = new URL(path);
244                     } else {
245                         this.roots[i] = new File(path).toURI().toURL();
246                     }
247                 }
248                 this.connections = new HashMap<URL, URLConnection>();
249 
250             } catch (final MalformedURLException ex) {
251                 throw new IllegalArgumentException(ex);
252             }
253         }
254 
255         @Override
256         public URLConnection getResourceConnection(final String name) throws ResourceException {
257 
258             URLConnection connection = null;
259             ResourceException exception = null;
260 
261             for (final URL root : this.roots) {
262                 URL scriptURL = null;
263                 try {
264                     scriptURL = new URL(root, name);
265                     connection = this.connections.get(scriptURL);
266                     if (connection == null) {
267                         connection = new Connection(scriptURL);
268                         connection.connect(); // load resource in advance
269                         this.connections.put(scriptURL, connection);
270                     }
271                     if (connection != null) {
272                         break;
273                     }
274 
275                 } catch (final MalformedURLException ex) {
276                     final String message = "Malformed URL: " + root + ", " + name;
277                     exception = exception == null ? new ResourceException(message)
278                             : new ResourceException(message, exception);
279 
280                 } catch (final IOException ex) {
281                     connection = null;
282                     final String message = "Cannot open URL: " + root + name;
283                     exception = exception == null ? new ResourceException(message)
284                             : new ResourceException(message, exception);
285                 }
286             }
287 
288             if (connection == null) {
289                 if (exception == null) {
290                     exception = new ResourceException("No resource for " + name + " was found");
291                 }
292                 throw exception;
293             }
294 
295             return connection;
296         }
297 
298         private static String filter(final AtomicInteger counter, final String string) {
299 
300             final StringBuilder builder = new StringBuilder();
301             final int length = string.length();
302             int i = 0;
303 
304             try {
305                 while (i < length) {
306                     char c = string.charAt(i);
307                     if (c == '<') {
308                         final int end = parseURI(string, i);
309                         if (end >= 0) {
310                             final URI u = (URI) Statements.parseValue(string.substring(i, end));
311                             builder.append("__iri(").append(counter.getAndIncrement())
312                                     .append(", \"").append(u.stringValue()).append("\")");
313                             i = end;
314                         } else {
315                             builder.append(c);
316                             ++i;
317                         }
318 
319                     } else if (isPN_CHARS_BASE(c)) {
320                         final int end = parseQName(string, i);
321                         if (end >= 0) {
322                             final URI u = (URI) Statements.parseValue(string.substring(i, end),
323                                     Namespaces.DEFAULT);
324                             builder.append("__iri(").append(counter.getAndIncrement())
325                                     .append(", \"").append(u.stringValue()).append("\")");
326                             i = end;
327                         } else {
328                             builder.append(c);
329                             while (++i < length) {
330                                 c = string.charAt(i);
331                                 if (!Character.isLetterOrDigit(c)) {
332                                     break;
333                                 }
334                                 builder.append(c);
335                             }
336                         }
337 
338                     } else if (c == '\'' || c == '\"') {
339                         final char d = c; // delimiter
340                         builder.append(d);
341                         do {
342                             c = string.charAt(++i);
343                             builder.append(c);
344                         } while (c != d || string.charAt(i - 1) == '\\');
345                         ++i;
346 
347                     } else {
348                         builder.append(c);
349                         ++i;
350                     }
351                 }
352             } catch (final Exception ex) {
353                 throw new IllegalArgumentException("Illegal URI escaping near offset " + i, ex);
354             }
355 
356             return builder.toString();
357         }
358 
359         private static int parseURI(final String string, int i) {
360 
361             final int len = string.length();
362 
363             if (string.charAt(i) != '<') {
364                 return -1;
365             }
366 
367             for (++i; i < len; ++i) {
368                 final char c = string.charAt(i);
369                 if (c == '<' || c == '\"' || c == '{' || c == '}' || c == '|' || c == '^'
370                         || c == '`' || c == '\\' || c == ' ') {
371                     return -1;
372                 }
373                 if (c == '>') {
374                     return i + 1;
375                 }
376             }
377 
378             return -1;
379         }
380 
381         private static int parseQName(final String string, int i) {
382 
383             final int len = string.length();
384             char c;
385 
386             if (!isPN_CHARS_BASE(string.charAt(i))) {
387                 return -1;
388             }
389 
390             for (; i < len; ++i) {
391                 c = string.charAt(i);
392                 if (!isPN_CHARS(c) && c != '.') {
393                     break;
394                 }
395             }
396 
397             if (i >= len - 1 || string.charAt(i - 1) == '.' || string.charAt(i) != ':') {
398                 return -1;
399             }
400 
401             c = string.charAt(++i);
402             if (!isPN_CHARS_U(c) && c != ':' && c != '%' && !Character.isDigit(c)) {
403                 return -1;
404             }
405 
406             for (; i < len; ++i) {
407                 c = string.charAt(i);
408                 if (!isPN_CHARS(c) && c != '.' && c != ':' && c != '%') {
409                     break;
410                 }
411             }
412 
413             if (string.charAt(i - 1) == '.') {
414                 return -1;
415             }
416 
417             return i;
418         }
419 
420         private class Connection extends URLConnection {
421 
422             private Map<String, List<String>> headers;
423 
424             private byte[] bytes;
425 
426             Connection(final URL url) {
427                 super(url);
428             }
429 
430             @Override
431             public void connect() throws IOException {
432 
433                 if (this.connected) {
434                     return;
435                 }
436 
437                 final URLConnection conn = this.url.openConnection();
438                 conn.setAllowUserInteraction(getAllowUserInteraction());
439                 conn.setConnectTimeout(getConnectTimeout());
440                 conn.setDefaultUseCaches(getDefaultUseCaches());
441                 conn.setDoInput(getDoInput());
442                 conn.setDoOutput(getDoOutput());
443                 conn.setIfModifiedSince(getIfModifiedSince());
444                 conn.setReadTimeout(getReadTimeout());
445                 conn.setUseCaches(getUseCaches());
446                 for (final Map.Entry<String, List<String>> entry : getRequestProperties()
447                         .entrySet()) {
448                     final String key = entry.getKey();
449                     for (final String value : entry.getValue()) {
450                         conn.setRequestProperty(key, value);
451                     }
452                 }
453                 conn.connect();
454 
455                 final String encoding = conn.getContentEncoding();
456                 final Charset charset = Charset.forName(encoding != null ? encoding : "UTF-8");
457 
458                 final StringBuilder builder = new StringBuilder();
459                 final InputStream stream = conn.getInputStream();
460                 final BufferedReader reader = new BufferedReader(new InputStreamReader(stream,
461                         charset));
462                 try {
463                     String line;
464                     final AtomicInteger counter = new AtomicInteger();
465                     while ((line = reader.readLine()) != null) {
466                         builder.append(filter(counter, line)).append("\n");
467                     }
468                 } finally {
469                     reader.close();
470                 }
471 
472                 LOGGER.debug("Filtered script is:\n{}", builder);
473 
474                 this.bytes = builder.toString().getBytes(charset);
475                 this.connected = true;
476 
477                 this.headers = new HashMap<String, List<String>>(conn.getHeaderFields());
478 
479                 LOGGER.debug("Loaded {}", getURL());
480             }
481 
482             @Override
483             public Map<String, List<String>> getHeaderFields() {
484                 return this.headers;
485             }
486 
487             @Override
488             public String getHeaderField(final String name) {
489                 final List<String> list = this.headers == null ? null : this.headers.get(name);
490                 return list == null ? null : list.get(list.size() - 1);
491             }
492 
493             @Override
494             public String getHeaderFieldKey(final int n) {
495                 final Iterator<String> iterator = this.headers.keySet().iterator();
496                 for (int i = 0; i < n; ++i) {
497                     if (iterator.hasNext()) {
498                         iterator.next();
499                     }
500                 }
501                 return iterator.hasNext() ? iterator.next() : null;
502             }
503 
504             @Override
505             public String getHeaderField(final int n) {
506                 return getHeaderField(getHeaderFieldKey(n));
507             }
508 
509             @Override
510             public InputStream getInputStream() throws IOException {
511                 return new ByteArrayInputStream(this.bytes);
512             }
513 
514         }
515 
516     }
517 
518     private final class PooledHandler extends AbstractRDFHandlerWrapper {
519 
520         private List<HandlerScript> allScripts;
521 
522         private ThreadLocal<HandlerScript> threadScript;
523 
524         private int pass;
525 
526         PooledHandler(final RDFHandler handler) {
527             super(handler);
528             this.allScripts = new ArrayList<HandlerScript>();
529             this.threadScript = new ThreadLocal<HandlerScript>() {
530 
531                 @Override
532                 protected HandlerScript initialValue() {
533                     synchronized (PooledHandler.this) {
534                         final HandlerScript script = newHandlerScript("script"
535                                 + PooledHandler.this.allScripts.size(), handler);
536                         try {
537                             script.doStart(PooledHandler.this.pass);
538                         } catch (final RDFHandlerException ex) {
539                             throw new RuntimeException(ex);
540                         }
541                         PooledHandler.this.allScripts.add(script);
542                         return script;
543                     }
544                 }
545 
546             };
547             this.pass = 0;
548         }
549 
550         @Override
551         public void startRDF() throws RDFHandlerException {
552             super.startRDF();
553             for (final HandlerScript script : this.allScripts) {
554                 script.doStart(this.pass);
555             }
556         }
557 
558         @Override
559         public void handleStatement(final Statement statement) throws RDFHandlerException {
560             this.threadScript.get().doHandle(statement);
561         }
562 
563         @Override
564         public void endRDF() throws RDFHandlerException {
565             for (final HandlerScript script : this.allScripts) {
566                 script.doEnd(this.pass);
567             }
568             ++this.pass;
569             super.endRDF();
570         }
571 
572     }
573 
574     private final class SingletonHandler extends AbstractRDFHandlerWrapper {
575 
576         private final HandlerScript script;
577 
578         private int pass;
579 
580         SingletonHandler(final RDFHandler handler) {
581             super(handler);
582             this.script = newHandlerScript("script", handler);
583             this.pass = 0;
584         }
585 
586         public HandlerScript getScript() {
587             return this.script;
588         }
589 
590         @Override
591         public synchronized void startRDF() throws RDFHandlerException {
592             super.startRDF();
593             this.script.doStart(this.pass);
594         }
595 
596         @Override
597         public synchronized void handleStatement(final Statement statement)
598                 throws RDFHandlerException {
599             this.script.doHandle(statement);
600         }
601 
602         @Override
603         public synchronized void endRDF() throws RDFHandlerException {
604             this.script.doEnd(this.pass++);
605             super.endRDF();
606         }
607 
608     }
609 
610     public static abstract class HandlerScript extends Script {
611 
612         private String name;
613 
614         private RDFHandler handler;
615 
616         private boolean startEnabled;
617 
618         private boolean handleEnabled;
619 
620         private boolean endEnabled;
621 
622         private boolean insideRun;
623 
624         private GroovyStatement statement;
625 
626         private URI[] uriConsts;
627 
628         protected HandlerScript() {
629             this.startEnabled = true;
630             this.handleEnabled = true;
631             this.endEnabled = true;
632             this.uriConsts = new URI[0];
633         }
634 
635         @Override
636         public Object getProperty(final String property) {
637             // Directly matching variables this way is faster than storing them in binding object
638             if (this.insideRun && property.length() == 1) {
639                 if ("q".equals(property)) {
640                     return this.statement;
641                 } else if ("s".equals(property)) {
642                     return this.statement.s;
643                 } else if ("p".equals(property)) {
644                     return this.statement.p;
645                 } else if ("o".equals(property)) {
646                     return this.statement.o;
647                 } else if ("c".equals(property)) {
648                     return this.statement.c;
649                 } else if ("t".equals(property)) {
650                     return this.statement.p.equals(RDF.TYPE) ? this.statement.o : null;
651                 } else if ("l".equals(property)) {
652                     return this.statement.o instanceof Literal ? ((Literal) this.statement.o)
653                             .getLanguage() : null;
654                 } else if ("d".equals(property)) {
655                     return this.statement.o instanceof Literal ? ((Literal) this.statement.o)
656                             .getDatatype() : null;
657                 }
658             }
659             if ("__rdfpro__".equals(property)) {
660                 return true; // flag to signal the script is being called by RDFpro
661             }
662             return super.getProperty(property);
663         }
664 
665         @Override
666         public void setProperty(final String property, final Object value) {
667             // Directly matching variables this way is faster than storing them in binding object
668             if (this.insideRun && property.length() == 1) {
669                 if ("q".equals(property)) {
670                     this.statement = normalize((Statement) value);
671                 } else if ("s".equals(property)) {
672                     this.statement.s = (Resource) toRDF(value, false);
673                 } else if ("p".equals(property)) {
674                     this.statement.p = (URI) toRDF(value, false);
675                 } else if ("c".equals(property)) {
676                     this.statement.c = (Resource) toRDF(value, false);
677                 } else if ("t".equals(property)) {
678                     this.statement.o = toRDF(value, false);
679                     this.statement.p = RDF.TYPE;
680                 } else {
681                     // Following code serves to assemble literals starting from label, lang, dt
682                     boolean setLiteral = false;
683                     String newLabel = null;
684                     String newLang = null;
685                     URI newDatatype = null;
686                     if ("o".equals(property)) {
687                         if (value instanceof Value) {
688                             this.statement.o = (Value) value;
689                         } else {
690                             newLabel = value.toString();
691                             setLiteral = true;
692                         }
693                     } else if ("l".equals(property)) {
694                         newLang = value == null ? null : value.toString();
695                         setLiteral = true;
696                     } else if ("d".equals(property)) {
697                         newDatatype = value == null ? null : (URI) toRDF(value, false);
698                         setLiteral = true;
699                     }
700                     if (setLiteral) {
701                         if (this.statement.o instanceof Literal) {
702                             final Literal l = (Literal) this.statement.o;
703                             newLabel = newLabel != null ? newLabel : l.getLabel();
704                             newLang = newLang != null ? newLang : l.getLanguage();
705                             newDatatype = newDatatype != null ? newDatatype : l.getDatatype();
706                         }
707                         this.statement.o = newLang != null ? Statements.VALUE_FACTORY
708                                 .createLiteral(newLabel, newLang)
709                                 : newDatatype != null ? Statements.VALUE_FACTORY.createLiteral(
710                                         newLabel, newDatatype) : Statements.VALUE_FACTORY
711                                         .createLiteral(newLabel);
712                     }
713                 }
714             }
715             super.setProperty(property, value);
716         }
717 
718         final void doInit(final String name, final RDFHandler handler, final String[] args)
719                 throws RDFHandlerException {
720             this.name = name;
721             this.handler = handler;
722             final boolean called = tryInvokeMethod("init", Arrays.asList(args));
723             if (called && LOGGER.isDebugEnabled()) {
724                 LOGGER.debug("Called " + name + ".init() with " + Arrays.asList(args));
725             }
726         }
727 
728         final void doStart(final int pass) throws RDFHandlerException {
729             if (this.startEnabled) {
730                 this.startEnabled = tryInvokeMethod("start", pass);
731                 if (this.startEnabled && LOGGER.isDebugEnabled()) {
732                     LOGGER.debug("Called " + this.name + ".start() for pass " + pass);
733                 }
734             }
735         }
736 
737         final void doHandle(final Statement statement) throws RDFHandlerException {
738 
739             this.statement = normalize(statement);
740 
741             if (this.handleEnabled) {
742                 if (tryInvokeMethod("handle", this.statement)) {
743                     return;
744                 }
745                 this.handleEnabled = false;
746                 LOGGER.debug("Using script body for " + this.name + " (no handle() method)");
747             }
748 
749             this.insideRun = true;
750             try {
751                 this.run();
752             } finally {
753                 this.insideRun = false;
754             }
755         }
756 
757         final void doEnd(final int pass) throws RDFHandlerException {
758             if (this.endEnabled) {
759                 this.endEnabled = tryInvokeMethod("end", pass);
760                 if (this.endEnabled && LOGGER.isDebugEnabled()) {
761                     LOGGER.debug("Called " + this.name + ".end() for pass " + pass);
762                 }
763             }
764         }
765 
766         // INTERNAL FUNCTIONS
767 
768         protected final URI __iri(final int index, final Object arg) {
769             if (index >= this.uriConsts.length) {
770                 this.uriConsts = Arrays.copyOf(this.uriConsts, index + 1);
771             }
772             URI uri = this.uriConsts[index];
773             if (uri == null) {
774                 uri = arg instanceof URI ? (URI) arg : Statements.VALUE_FACTORY.createURI(arg
775                         .toString());
776                 this.uriConsts[index] = uri;
777             }
778             return uri;
779         }
780 
781         // QUAD CREATION AND EMISSION FUNCTIONS
782 
783         protected final Statement quad(final Object s, final Object p, final Object o,
784                 final Object c) {
785             final Resource sv = (Resource) toRDF(s, false);
786             final URI pv = (URI) toRDF(p, false);
787             final Value ov = toRDF(o, true);
788             final Resource cv = (Resource) toRDF(c, false);
789             return new GroovyStatement(sv, pv, ov, cv);
790         }
791 
792         protected final void emit() throws RDFHandlerException {
793             this.handler.handleStatement(this.statement);
794         }
795 
796         protected final boolean emitIf(@Nullable final Object condition)
797                 throws RDFHandlerException {
798             if (condition == Boolean.TRUE) {
799                 emit();
800                 return true;
801             }
802             return false;
803         }
804 
805         protected final boolean emitIfNot(@Nullable final Object condition)
806                 throws RDFHandlerException {
807             if (condition == Boolean.FALSE) {
808                 emit();
809                 return true;
810             }
811             return false;
812         }
813 
814         protected final boolean emit(@Nullable final Statement statement)
815                 throws RDFHandlerException {
816             if (!(statement instanceof GroovyStatement) || ((GroovyStatement) statement).isValid()) {
817                 this.handler.handleStatement(statement);
818                 return true;
819             }
820             return false;
821         }
822 
823         protected final boolean emit(@Nullable final Object s, @Nullable final Object p,
824                 @Nullable final Object o, @Nullable final Object c) throws RDFHandlerException {
825 
826             final Value sv = toRDF(s, false);
827             final Value pv = toRDF(p, false);
828             final Value ov = toRDF(o, true);
829             final Value cv = toRDF(c, false);
830 
831             if (sv instanceof Resource && pv instanceof URI && ov != null) {
832                 if (cv == null) {
833                     this.handler.handleStatement(Statements.VALUE_FACTORY.createStatement(
834                             (Resource) sv, (URI) pv, ov));
835                     return true;
836                 } else if (cv instanceof Resource) {
837                     this.handler.handleStatement(Statements.VALUE_FACTORY.createStatement(
838                             (Resource) sv, (URI) pv, ov, (Resource) cv));
839                     return true;
840                 }
841             }
842 
843             return false;
844         }
845 
846         // ERROR REPORTING AND LOGGING FUNCTIONS
847 
848         protected final void error(@Nullable final Object message) throws RDFHandlerException {
849             final String string = message == null ? "ERROR" : message.toString();
850             SCRIPT_LOGGER.error(string);
851             throw new RDFHandlerException(string);
852         }
853 
854         protected final void error(@Nullable final Object message, @Nullable final Throwable ex)
855                 throws RDFHandlerException {
856             final String string = message == null ? "ERROR" : message.toString();
857             if (ex != null) {
858                 SCRIPT_LOGGER.error(string, ex);
859                 throw new RDFHandlerException(string, ex);
860             } else {
861                 SCRIPT_LOGGER.error(string);
862                 throw new RDFHandlerException(string);
863             }
864         }
865 
866         protected final void log(final Object message) {
867             if (message != null) {
868                 SCRIPT_LOGGER.info(message.toString());
869             }
870         }
871 
872         // TODO consider caching of loaded file components (very optional)
873         protected final ValueSet loadSet(final Object file, final Object components) {
874             final File inputFile;
875             if (file instanceof File) {
876                 inputFile = (File) file;
877             } else {
878                 inputFile = new File(file.toString());
879             }
880             final String pattern = components.toString();
881             return new ValueSet(createHashSet(pattern, inputFile));
882         }
883 
884         // UTILITY FUNCTIONS
885 
886         private boolean tryInvokeMethod(final String method, final Object arg)
887                 throws RDFHandlerException {
888             try {
889                 invokeMethod(method, arg);
890                 return true;
891             } catch (final MissingMethodException ex) {
892                 return false;
893             }
894         }
895 
896         private static Set<String> createHashSet(final String pattern, final File file) {
897             final boolean matchSub = pattern.contains("s");
898             final boolean matchPre = pattern.contains("p");
899             final boolean matchObj = pattern.contains("o");
900             final boolean matchCtx = pattern.contains("c");
901             final Set<String> hashes = new TreeSet<>();
902             final RDFHandler handler = RDFProcessors.read(true, false, null, null,
903                     file.getAbsolutePath()).wrap(new RDFHandler() {
904 
905                 @Override
906                 public void startRDF() throws RDFHandlerException {
907                 }
908 
909                 @Override
910                 public void endRDF() throws RDFHandlerException {
911                 }
912 
913                 @Override
914                 public void handleNamespace(final String s, final String s2)
915                         throws RDFHandlerException {
916                 }
917 
918                 @Override
919                 public void handleStatement(final Statement statement) throws RDFHandlerException {
920                     if (matchSub) {
921                         hashes.add(valueToHash(statement.getSubject()));
922                     }
923                     if (matchPre) {
924                         hashes.add(valueToHash(statement.getPredicate()));
925                     }
926                     if (matchObj) {
927                         hashes.add(valueToHash(statement.getObject()));
928                     }
929                     if (matchCtx) {
930                         hashes.add(valueToHash(statement.getContext()));
931                     }
932                 }
933 
934                 @Override
935                 public void handleComment(final String s) throws RDFHandlerException {
936                 }
937             });
938 
939             try {
940                 handler.startRDF();
941                 handler.endRDF();
942             } catch (final RDFHandlerException e) {
943                 throw new IllegalArgumentException("Error while parsing pattern file.", e);
944             }
945             return hashes;
946         }
947 
948     }
949 
950     @Nullable
951     private static Value toRDF(final Object object, final boolean mayBeLiteral) {
952         if (object instanceof Value) {
953             return normalize((Value) object);
954         }
955         if (object == null) {
956             return null;
957         }
958         if (mayBeLiteral) {
959             if (object instanceof Long) {
960                 return new GroovyLiteral(object.toString(), XMLSchema.LONG);
961             } else if (object instanceof Integer) {
962                 return new GroovyLiteral(object.toString(), XMLSchema.INT);
963             } else if (object instanceof Short) {
964                 return new GroovyLiteral(object.toString(), XMLSchema.SHORT);
965             } else if (object instanceof Byte) {
966                 return new GroovyLiteral(object.toString(), XMLSchema.BYTE);
967             } else if (object instanceof Double) {
968                 return new GroovyLiteral(object.toString(), XMLSchema.DOUBLE);
969             } else if (object instanceof Float) {
970                 return new GroovyLiteral(object.toString(), XMLSchema.FLOAT);
971             } else if (object instanceof Boolean) {
972                 return new GroovyLiteral(object.toString(), XMLSchema.BOOLEAN);
973             } else if (object instanceof XMLGregorianCalendar) {
974                 final XMLGregorianCalendar c = (XMLGregorianCalendar) object;
975                 return new GroovyLiteral(c.toXMLFormat(), XMLDatatypeUtil.qnameToURI(c
976                         .getXMLSchemaType()));
977             } else if (object instanceof Date) {
978                 final GregorianCalendar c = new GregorianCalendar();
979                 c.setTime((Date) object);
980                 final XMLGregorianCalendar xc = DATATYPE_FACTORY.newXMLGregorianCalendar(c);
981                 return new GroovyLiteral(xc.toXMLFormat(), XMLDatatypeUtil.qnameToURI(xc
982                         .getXMLSchemaType()));
983             } else if (object instanceof CharSequence) {
984                 return new GroovyLiteral(object.toString(), XMLSchema.STRING);
985             } else {
986                 return new GroovyLiteral(object.toString());
987             }
988         }
989         return new GroovyURI(object.toString());
990     }
991 
992     @Nullable
993     private static Literal toLiteral(@Nullable final Object old, @Nullable final String label) {
994 
995         if (label == null) {
996             return null;
997         }
998 
999         String lang = null;
1000         URI dt = null;
1001 
1002         if (old instanceof Literal) {
1003             final Literal l = (Literal) old;
1004             lang = l.getLanguage();
1005             dt = l.getDatatype();
1006         }
1007 
1008         return lang != null ? new GroovyLiteral(label, lang) : dt != null ? new GroovyLiteral(
1009                 label, dt) : new GroovyLiteral(label);
1010     }
1011 
1012     @Nullable
1013     private static GroovyStatement normalize(@Nullable final Statement s) {
1014         if (s instanceof GroovyStatement) {
1015             return (GroovyStatement) s;
1016         } else if (s != null) {
1017             return new GroovyStatement((Resource) normalize(s.getSubject()),
1018                     (URI) normalize(s.getPredicate()), normalize(s.getObject()),
1019                     (Resource) normalize(s.getContext()));
1020         }
1021         return null;
1022     }
1023 
1024     @Nullable
1025     private static Value normalize(@Nullable final Value v) {
1026         if (v instanceof URI) {
1027             return v instanceof GroovyURI ? v : new GroovyURI(v.stringValue());
1028         } else if (v instanceof BNode) {
1029             return v instanceof GroovyBNode ? v : new GroovyBNode(v.stringValue());
1030         } else if (v instanceof Literal) {
1031             if (v instanceof GroovyLiteral) {
1032                 return v;
1033             }
1034             final Literal l = (Literal) v;
1035             if (l.getLanguage() != null) {
1036                 return new GroovyLiteral(l.getLabel(), l.getLanguage());
1037             } else if (l.getDatatype() != null) {
1038                 return new GroovyLiteral(l.getLabel(), l.getDatatype());
1039             } else {
1040                 return new GroovyLiteral(l.getLabel());
1041             }
1042         }
1043         return null;
1044     }
1045 
1046     static class GroovyStatement implements Statement {
1047 
1048         private static final long serialVersionUID = 1L;
1049 
1050         @Nullable
1051         public Resource s;
1052 
1053         @Nullable
1054         public URI p;
1055 
1056         @Nullable
1057         public Value o;
1058 
1059         @Nullable
1060         public Resource c;
1061 
1062         public GroovyStatement(final Resource s, final URI p, final Value o,
1063                 @Nullable final Resource c) {
1064             this.s = s;
1065             this.p = p;
1066             this.o = o;
1067             this.c = c;
1068         }
1069 
1070         public boolean isValid() {
1071             return this.s != null && this.p != null && this.o != null;
1072         }
1073 
1074         @Override
1075         public Resource getSubject() {
1076             return this.s;
1077         }
1078 
1079         @Override
1080         public URI getPredicate() {
1081             return this.p;
1082         }
1083 
1084         @Override
1085         public Value getObject() {
1086             return this.o;
1087         }
1088 
1089         @Override
1090         public Resource getContext() {
1091             return this.c;
1092         }
1093 
1094         @Override
1095         public boolean equals(final Object object) {
1096             if (this == object) {
1097                 return true;
1098             }
1099             if (object instanceof Statement) {
1100                 final Statement other = (Statement) object;
1101                 return this.o.equals(other.getObject()) && this.s.equals(other.getSubject())
1102                         && this.p.equals(other.getPredicate());
1103             }
1104             return false;
1105         }
1106 
1107         @Override
1108         public int hashCode() {
1109             return 961 * this.s.hashCode() + 31 * this.p.hashCode() + this.o.hashCode();
1110         }
1111 
1112         @Override
1113         public String toString() {
1114             final StringBuilder builder = new StringBuilder(256);
1115             builder.append('(');
1116             builder.append(this.s);
1117             builder.append(", ");
1118             builder.append(this.p);
1119             builder.append(", ");
1120             builder.append(this.o);
1121             if (this.c != null) {
1122                 builder.append(" [");
1123                 builder.append(this.c);
1124                 builder.append("]");
1125             }
1126             return builder.toString();
1127         }
1128 
1129     }
1130 
1131     static final class GroovyURI extends URIImpl implements Comparable<Value> {
1132 
1133         private static final long serialVersionUID = 1L;
1134 
1135         public GroovyURI(final String uriString) {
1136             super(uriString);
1137         }
1138 
1139         @Override
1140         public int compareTo(final Value other) {
1141             if (other instanceof URI) {
1142                 return stringValue().compareTo(other.stringValue());
1143             }
1144             return -1;
1145         }
1146 
1147     }
1148 
1149     static final class GroovyBNode extends BNodeImpl implements Comparable<Value> {
1150 
1151         private static final long serialVersionUID = 1L;
1152 
1153         public GroovyBNode(final String id) {
1154             super(id);
1155         }
1156 
1157         @Override
1158         public int compareTo(final Value other) {
1159             if (other instanceof BNode) {
1160                 return stringValue().compareTo(other.stringValue());
1161             } else if (other instanceof URI) {
1162                 return 1;
1163             } else {
1164                 return -1;
1165             }
1166         }
1167 
1168     }
1169 
1170     static final class GroovyLiteral extends LiteralImpl implements Comparable<Value> {
1171 
1172         private static final long serialVersionUID = 1L;
1173 
1174         GroovyLiteral(final String label, @Nullable final URI datatype) {
1175             super(label, datatype);
1176         }
1177 
1178         GroovyLiteral(final String label, @Nullable final String language) {
1179             super(label, language);
1180         }
1181 
1182         GroovyLiteral(final String label) {
1183             super(label);
1184         }
1185 
1186         @Override
1187         public int compareTo(final Value other) {
1188             if (other instanceof Literal) {
1189                 int result = 0;
1190                 if (other != this) {
1191                     final Literal l = (Literal) other;
1192                     result = getLabel().compareTo(l.getLabel());
1193                     if (result == 0) {
1194                         final String lang1 = getLanguage();
1195                         final String lang2 = l.getLanguage();
1196                         result = lang1 != null ? lang2 != null ? lang1.compareTo(lang2) : 1
1197                                 : lang2 != null ? -1 : 0;
1198                         if (result == 0) {
1199                             final URI dt1 = getDatatype();
1200                             final URI dt2 = l.getDatatype();
1201                             result = dt1 != null ? dt2 != null ? dt1.stringValue().compareTo(
1202                                     dt2.stringValue()) : 1 : dt2 != null ? -1 : 0;
1203                         }
1204                     }
1205                 }
1206                 return result;
1207             }
1208             return 1;
1209         }
1210 
1211     }
1212 
1213     static final class ValueSet {
1214 
1215         private final Set<String> hashSet;
1216 
1217         ValueSet(final Set<String> hashSet) {
1218             this.hashSet = hashSet;
1219         }
1220 
1221         public boolean match(final Object value) {
1222             if (value == null) {
1223                 throw new IllegalArgumentException("value cannot be null.");
1224             }
1225             final Value target = value instanceof Value ? (Value) value : toRDF(value, true);
1226             return this.hashSet.contains(valueToHash(target));
1227         }
1228 
1229         public boolean match(final Statement statement, final Object components) {
1230             final String parts = components.toString();
1231             if (parts.contains("s")) {
1232                 if (this.hashSet.contains(valueToHash(statement.getSubject()))) {
1233                     return true;
1234                 }
1235             }
1236             if (parts.contains("p")) {
1237                 if (this.hashSet.contains(valueToHash(statement.getPredicate()))) {
1238                     return true;
1239                 }
1240             }
1241             if (parts.contains("o")) {
1242                 if (this.hashSet.contains(valueToHash(statement.getObject()))) {
1243                     return true;
1244                 }
1245             }
1246             if (parts.contains("c")) {
1247                 final Value context = statement.getContext();
1248                 if (context != null && this.hashSet.contains(valueToHash(context))) {
1249                     return true;
1250                 }
1251             }
1252             return false;
1253         }
1254 
1255     }
1256 
1257 }