1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro;
15
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collections;
19 import java.util.HashSet;
20 import java.util.List;
21 import java.util.Map;
22 import java.util.Objects;
23 import java.util.Set;
24 import java.util.concurrent.ConcurrentHashMap;
25
26 import javax.annotation.Nullable;
27
28 import org.openrdf.model.Resource;
29 import org.openrdf.model.Statement;
30 import org.openrdf.model.URI;
31 import org.openrdf.model.Value;
32 import org.openrdf.model.vocabulary.OWL;
33 import org.openrdf.model.vocabulary.RDF;
34 import org.openrdf.model.vocabulary.RDFS;
35 import org.openrdf.rio.RDFHandler;
36 import org.openrdf.rio.RDFHandlerException;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import eu.fbk.rdfpro.util.Namespaces;
41 import eu.fbk.rdfpro.util.Statements;
42
43 final class ProcessorTBox implements RDFProcessor {
44
45 static final RDFProcessor INSTANCE = new ProcessorTBox();
46
47 private static final Logger LOGGER = LoggerFactory.getLogger(ProcessorTBox.class);
48
49 private ProcessorTBox() {
50 }
51
52 @Override
53 public RDFHandler wrap(final RDFHandler handler) {
54 return new Handler(Objects.requireNonNull(handler));
55 }
56
57 final static class Handler extends AbstractRDFHandlerWrapper {
58
59 private static final int NUM_LOCKS = 128;
60
61 private final Map<URI, Term> terms;
62
63 private final Object[] locks;
64
65 Handler(@Nullable final RDFHandler handler) {
66 super(handler);
67 this.terms = new ConcurrentHashMap<URI, Term>();
68 this.locks = new Object[NUM_LOCKS];
69 for (int i = 0; i < this.locks.length; ++i) {
70 this.locks[i] = new Object();
71 }
72 }
73
74 @Override
75 public void startRDF() throws RDFHandlerException {
76 super.startRDF();
77 this.terms.clear();
78 for (final URI type : Statements.TBOX_CLASSES) {
79 this.terms.put(type, new Term(true, false, true, false));
80 }
81 for (final URI property : Statements.TBOX_PROPERTIES) {
82 this.terms.put(property, new Term(true, true, true, false));
83 }
84 }
85
86 @Override
87 public void handleComment(final String comment) throws RDFHandlerException {
88
89 }
90
91 @Override
92 public void handleStatement(final Statement statement) throws RDFHandlerException {
93
94 final Resource s = statement.getSubject();
95 final URI p = statement.getPredicate();
96 final Value o = statement.getObject();
97
98 boolean emit = false;
99
100 if (!p.equals(RDF.TYPE)) {
101 synchronized (getLock(p)) {
102 Term term = this.terms.get(p);
103 if (term == null) {
104 term = new Term(false, true, false, true);
105 this.terms.put(p, term);
106 } else if (term.isLanguage) {
107 term.isUsed = true;
108 emit = true;
109 }
110 }
111 } else if (o instanceof URI) {
112 synchronized (getLock(o)) {
113 Term term = this.terms.get(o);
114 if (term == null) {
115 term = new Term(false, false, false, true);
116 this.terms.put((URI) o, term);
117 } else if (term.isLanguage) {
118 term.isUsed = true;
119 emit = true;
120 }
121 }
122 if (s instanceof URI) {
123 final boolean isType = o.equals(RDFS.CLASS) || o.equals(OWL.CLASS);
124 final boolean isProperty = o.equals(RDF.PROPERTY)
125 || o.equals(OWL.DATATYPEPROPERTY) || o.equals(OWL.OBJECTPROPERTY)
126 || o.equals(OWL.ANNOTATIONPROPERTY);
127 if (isType || isProperty) {
128 synchronized (getLock(s)) {
129 Term sterm = this.terms.get(s);
130 if (sterm == null) {
131 sterm = new Term(false, isProperty, true, true);
132 this.terms.put((URI) s, sterm);
133 } else {
134 sterm.isDefined = true;
135 }
136 }
137 }
138 }
139 }
140
141 if (emit) {
142 super.handleStatement(statement);
143 }
144 }
145
146 @Override
147 public void endRDF() throws RDFHandlerException {
148
149 if (LOGGER.isInfoEnabled()) {
150
151 int numTypes = 0;
152 int numTypesDefined = 0;
153
154 int numProperties = 0;
155 int numPropertiesDefined = 0;
156
157 final List<String> languageTypes = new ArrayList<String>();
158 final List<String> languageProperties = new ArrayList<String>();
159 final Set<String> undefinedVocabularies = new HashSet<String>();
160
161 for (final Map.Entry<URI, Term> entry : this.terms.entrySet()) {
162 final URI uri = entry.getKey();
163 final Term term = entry.getValue();
164 if (term.isLanguage) {
165 if (term.isUsed) {
166 final String s = Statements.formatValue(uri, Namespaces.DEFAULT);
167 if (term.isProperty) {
168 languageProperties.add(s);
169 } else {
170 languageTypes.add(s);
171 }
172 }
173 } else {
174 if (term.isProperty) {
175 ++numProperties;
176 numPropertiesDefined += term.isDefined ? 1 : 0;
177 } else {
178 ++numTypes;
179 numTypesDefined += term.isDefined ? 1 : 0;
180 }
181 if (!term.isDefined) {
182 undefinedVocabularies.add(uri.getNamespace());
183 }
184 }
185 }
186
187 Collections.sort(languageTypes);
188 Collections.sort(languageProperties);
189
190 if (numTypes > 0) {
191 LOGGER.info("Found " + numTypes + " classes (" + numTypesDefined + " defined)");
192 }
193 if (numProperties > 0) {
194 LOGGER.info("Found " + numProperties + " properties (" + numPropertiesDefined
195 + " defined)");
196 }
197 if (!languageTypes.isEmpty()) {
198 LOGGER.info("Found language classes: " + String.join(" ", languageTypes));
199 }
200 if (!languageProperties.isEmpty()) {
201 LOGGER.info("Found language properties: "
202 + String.join(" ", languageProperties));
203 }
204
205 if (!undefinedVocabularies.isEmpty()) {
206 for (final String ns1 : new ArrayList<String>(undefinedVocabularies)) {
207 for (final String ns2 : undefinedVocabularies) {
208 if (ns1 != ns2 && ns1.startsWith(ns2)) {
209 undefinedVocabularies.remove(ns1);
210 break;
211 }
212 }
213 }
214 final StringBuilder builder = new StringBuilder(
215 "Found undefined vocabularies:");
216
217 final String[] sortedVocabularies = new String[undefinedVocabularies.size()];
218 undefinedVocabularies.toArray(sortedVocabularies);
219 Arrays.sort(sortedVocabularies);
220
221 for (final String ns : sortedVocabularies) {
222 builder.append("\n- ").append(ns);
223 }
224 LOGGER.info(builder.toString());
225 }
226 }
227
228 this.terms.clear();
229 super.endRDF();
230 }
231
232 private Object getLock(final Value value) {
233 final String s = value.stringValue();
234 final int length = s.length();
235 final int index = s.charAt(length - 1) * 37 + s.charAt(length - 2);
236 return this.locks[(index & 0x7FFFFFFF) % NUM_LOCKS];
237 }
238
239 private static class Term {
240
241 boolean isLanguage;
242
243 boolean isProperty;
244
245 boolean isDefined;
246
247 boolean isUsed;
248
249 Term(final boolean isLanguage, final boolean isProperty, final boolean isDefined,
250 final boolean isUsed) {
251 this.isLanguage = isLanguage;
252 this.isProperty = isProperty;
253 this.isDefined = isDefined;
254 this.isUsed = isUsed;
255 }
256
257 }
258
259 }
260
261 }