1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5    * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro;
15  
16  import java.util.ArrayList;
17  import java.util.Arrays;
18  import java.util.Collections;
19  import java.util.HashSet;
20  import java.util.List;
21  import java.util.Map;
22  import java.util.Objects;
23  import java.util.Set;
24  import java.util.concurrent.ConcurrentHashMap;
25  
26  import javax.annotation.Nullable;
27  
28  import org.openrdf.model.Resource;
29  import org.openrdf.model.Statement;
30  import org.openrdf.model.URI;
31  import org.openrdf.model.Value;
32  import org.openrdf.model.vocabulary.OWL;
33  import org.openrdf.model.vocabulary.RDF;
34  import org.openrdf.model.vocabulary.RDFS;
35  import org.openrdf.rio.RDFHandler;
36  import org.openrdf.rio.RDFHandlerException;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  import eu.fbk.rdfpro.util.Namespaces;
41  import eu.fbk.rdfpro.util.Statements;
42  
43  final class ProcessorTBox implements RDFProcessor {
44  
45      static final RDFProcessor INSTANCE = new ProcessorTBox();
46  
47      private static final Logger LOGGER = LoggerFactory.getLogger(ProcessorTBox.class);
48  
49      private ProcessorTBox() {
50      }
51  
52      @Override
53      public RDFHandler wrap(final RDFHandler handler) {
54          return new Handler(Objects.requireNonNull(handler));
55      }
56  
57      final static class Handler extends AbstractRDFHandlerWrapper {
58  
59          private static final int NUM_LOCKS = 128;
60  
61          private final Map<URI, Term> terms;
62  
63          private final Object[] locks;
64  
65          Handler(@Nullable final RDFHandler handler) {
66              super(handler);
67              this.terms = new ConcurrentHashMap<URI, Term>();
68              this.locks = new Object[NUM_LOCKS];
69              for (int i = 0; i < this.locks.length; ++i) {
70                  this.locks[i] = new Object();
71              }
72          }
73  
74          @Override
75          public void startRDF() throws RDFHandlerException {
76              super.startRDF();
77              this.terms.clear();
78              for (final URI type : Statements.TBOX_CLASSES) {
79                  this.terms.put(type, new Term(true, false, true, false));
80              }
81              for (final URI property : Statements.TBOX_PROPERTIES) {
82                  this.terms.put(property, new Term(true, true, true, false));
83              }
84          }
85  
86          @Override
87          public void handleComment(final String comment) throws RDFHandlerException {
88              // discarded
89          }
90  
91          @Override
92          public void handleStatement(final Statement statement) throws RDFHandlerException {
93  
94              final Resource s = statement.getSubject();
95              final URI p = statement.getPredicate();
96              final Value o = statement.getObject();
97  
98              boolean emit = false;
99  
100             if (!p.equals(RDF.TYPE)) {
101                 synchronized (getLock(p)) {
102                     Term term = this.terms.get(p);
103                     if (term == null) {
104                         term = new Term(false, true, false, true);
105                         this.terms.put(p, term);
106                     } else if (term.isLanguage) {
107                         term.isUsed = true;
108                         emit = true;
109                     }
110                 }
111             } else if (o instanceof URI) {
112                 synchronized (getLock(o)) {
113                     Term term = this.terms.get(o);
114                     if (term == null) {
115                         term = new Term(false, false, false, true);
116                         this.terms.put((URI) o, term);
117                     } else if (term.isLanguage) {
118                         term.isUsed = true;
119                         emit = true;
120                     }
121                 }
122                 if (s instanceof URI) {
123                     final boolean isType = o.equals(RDFS.CLASS) || o.equals(OWL.CLASS);
124                     final boolean isProperty = o.equals(RDF.PROPERTY)
125                             || o.equals(OWL.DATATYPEPROPERTY) || o.equals(OWL.OBJECTPROPERTY)
126                             || o.equals(OWL.ANNOTATIONPROPERTY);
127                     if (isType || isProperty) {
128                         synchronized (getLock(s)) {
129                             Term sterm = this.terms.get(s);
130                             if (sterm == null) {
131                                 sterm = new Term(false, isProperty, true, true);
132                                 this.terms.put((URI) s, sterm);
133                             } else {
134                                 sterm.isDefined = true;
135                             }
136                         }
137                     }
138                 }
139             }
140 
141             if (emit) {
142                 super.handleStatement(statement);
143             }
144         }
145 
146         @Override
147         public void endRDF() throws RDFHandlerException {
148 
149             if (LOGGER.isInfoEnabled()) {
150 
151                 int numTypes = 0;
152                 int numTypesDefined = 0;
153 
154                 int numProperties = 0;
155                 int numPropertiesDefined = 0;
156 
157                 final List<String> languageTypes = new ArrayList<String>();
158                 final List<String> languageProperties = new ArrayList<String>();
159                 final Set<String> undefinedVocabularies = new HashSet<String>();
160 
161                 for (final Map.Entry<URI, Term> entry : this.terms.entrySet()) {
162                     final URI uri = entry.getKey();
163                     final Term term = entry.getValue();
164                     if (term.isLanguage) {
165                         if (term.isUsed) {
166                             final String s = Statements.formatValue(uri, Namespaces.DEFAULT);
167                             if (term.isProperty) {
168                                 languageProperties.add(s);
169                             } else {
170                                 languageTypes.add(s);
171                             }
172                         }
173                     } else {
174                         if (term.isProperty) {
175                             ++numProperties;
176                             numPropertiesDefined += term.isDefined ? 1 : 0;
177                         } else {
178                             ++numTypes;
179                             numTypesDefined += term.isDefined ? 1 : 0;
180                         }
181                         if (!term.isDefined) {
182                             undefinedVocabularies.add(uri.getNamespace());
183                         }
184                     }
185                 }
186 
187                 Collections.sort(languageTypes);
188                 Collections.sort(languageProperties);
189 
190                 if (numTypes > 0) {
191                     LOGGER.info("Found " + numTypes + " classes (" + numTypesDefined + " defined)");
192                 }
193                 if (numProperties > 0) {
194                     LOGGER.info("Found " + numProperties + " properties (" + numPropertiesDefined
195                             + " defined)");
196                 }
197                 if (!languageTypes.isEmpty()) {
198                     LOGGER.info("Found language classes: " + String.join(" ", languageTypes));
199                 }
200                 if (!languageProperties.isEmpty()) {
201                     LOGGER.info("Found language properties: "
202                             + String.join(" ", languageProperties));
203                 }
204 
205                 if (!undefinedVocabularies.isEmpty()) {
206                     for (final String ns1 : new ArrayList<String>(undefinedVocabularies)) {
207                         for (final String ns2 : undefinedVocabularies) {
208                             if (ns1 != ns2 && ns1.startsWith(ns2)) {
209                                 undefinedVocabularies.remove(ns1);
210                                 break;
211                             }
212                         }
213                     }
214                     final StringBuilder builder = new StringBuilder(
215                             "Found undefined vocabularies:");
216 
217                     final String[] sortedVocabularies = new String[undefinedVocabularies.size()];
218                     undefinedVocabularies.toArray(sortedVocabularies);
219                     Arrays.sort(sortedVocabularies);
220 
221                     for (final String ns : sortedVocabularies) {
222                         builder.append("\n- ").append(ns);
223                     }
224                     LOGGER.info(builder.toString());
225                 }
226             }
227 
228             this.terms.clear();
229             super.endRDF();
230         }
231 
232         private Object getLock(final Value value) {
233             final String s = value.stringValue(); // assume URI with >= 3 chars
234             final int length = s.length();
235             final int index = s.charAt(length - 1) * 37 + s.charAt(length - 2);
236             return this.locks[(index & 0x7FFFFFFF) % NUM_LOCKS];
237         }
238 
239         private static class Term {
240 
241             boolean isLanguage;
242 
243             boolean isProperty;
244 
245             boolean isDefined;
246 
247             boolean isUsed;
248 
249             Term(final boolean isLanguage, final boolean isProperty, final boolean isDefined,
250                     final boolean isUsed) {
251                 this.isLanguage = isLanguage;
252                 this.isProperty = isProperty;
253                 this.isDefined = isDefined;
254                 this.isUsed = isUsed;
255             }
256 
257         }
258 
259     }
260 
261 }