1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5    * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro.util;
15  
16  import java.io.BufferedReader;
17  import java.io.IOException;
18  import java.io.InputStreamReader;
19  import java.net.URL;
20  import java.nio.charset.Charset;
21  import java.util.AbstractMap;
22  import java.util.AbstractSet;
23  import java.util.ArrayList;
24  import java.util.Collection;
25  import java.util.Collections;
26  import java.util.Enumeration;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Map.Entry;
32  import java.util.Objects;
33  import java.util.Set;
34  
35  import javax.annotation.Nullable;
36  
37  import org.openrdf.model.Namespace;
38  import org.openrdf.model.impl.NamespaceImpl;
39  
40  /**
41   * A specialized immutable {@code Namespace} set.
42   * <p>
43   * A {@code Namespaces} object is an immutable container of {@code prefix -> namespace URI}
44   * bindings, where multiple prefixes can map to the same namespace URI but there is at most a
45   * binding for any given prefix.
46   * </p>
47   * <p>
48   * Data in this class can be primarily accessed using lookup methods {@link #uriFor(String)},
49   * {@link #prefixFor(String)} and {@link #prefixesFor(String)}. In addition, the following views
50   * are offered:
51   * </p>
52   * <ul>
53   * <li>{@code set<Namespace object>} view, directly implemented by class instances;</li>
54   * <li>{@code prefix -> namespace URI} map view, produced by {@link #uriMap};</li>
55   * <li>{@code namespace URI -> prefix} map view, produced by {@link #prefixMap()};</li>
56   * <li>{@code set<prefix string>} view, produced by {@link #prefixes()};</li>
57   * <li>{@code set<URI string>} view, produced by {@link #uris()};</li>
58   * </ul>
59   * <p>
60   * Instances of this class can be created from:
61   * </p>
62   * <ul>
63   * <li>a {@code Namespace} iterable, by {@link #forIterable(Iterable, boolean)};</li>
64   * <li>a {@code prefix -> namespace URI} map, by {@link #forURIMap(Map)};</li>
65   * <li>a {@code namespace URI -> prefix} map, by {@link #forPrefixMap(Map, boolean)};</li>
66   * <li>a list of files identified by {@code URL}s, by {@link #load(Iterable, boolean)};</li>
67   * </ul>
68   * <p>
69   * This class is immutable and thus thread-safe.
70   * </p>
71   */
72  public final class Namespaces extends AbstractSet<Namespace> {
73  
74      public static final Namespaces DEFAULT;
75  
76      static {
77          try {
78              final List<URL> urls = new ArrayList<>();
79              final ClassLoader cl = Namespaces.class.getClassLoader();
80              for (final String p : new String[] { "META-INF/rdfpro.prefixes", "rdfpro.prefixes" }) {
81                  for (final Enumeration<URL> e = cl.getResources(p); e.hasMoreElements();) {
82                      urls.add(e.nextElement());
83                  }
84              }
85              DEFAULT = Namespaces.load(urls, true);
86          } catch (final IOException ex) {
87              throw new Error("Unexpected exception (!): " + ex.getMessage(), ex);
88          }
89      }
90  
91      private final EntryMap uriMap;
92  
93      private final EntryMap prefixMap;
94  
95      private final int[] uriTable;
96  
97      private final int[] prefixTable;
98  
99      private final String[] data; // uri, prefix pairs, sorted by uri, with uris interned
100 
101     private final int uriCount;
102 
103     private final int neighborhood;
104 
105     private Namespaces(final List<URIPrefixPair> pairs, final boolean resolveClashes) {
106 
107         List<URIPrefixPair> filteredPairs = pairs;
108         if (resolveClashes) {
109             final Set<String> prefixes = new HashSet<>();
110             filteredPairs = new ArrayList<URIPrefixPair>(pairs);
111             for (final Iterator<URIPrefixPair> i = filteredPairs.iterator(); i.hasNext();) {
112                 final URIPrefixPair pair = i.next();
113                 if (!prefixes.add(pair.prefix)) {
114                     i.remove(); // keep only first pair for given prefix
115                 }
116             }
117         }
118         Collections.sort(filteredPairs);
119 
120         final int size = filteredPairs.size();
121         final int tableSize = size + size / 2; // target fill factor = 0.66
122         int neighborhood = 4; // first attempt
123 
124         this.uriMap = new EntryMap(true);
125         this.prefixMap = new EntryMap(false);
126         this.prefixTable = new int[2 * tableSize];
127         this.uriTable = new int[2 * tableSize];
128         this.data = new String[size * 2];
129 
130         int pointer = 0;
131         String uri = null;
132         int uriCount = 0;
133         for (int i = 0; i < filteredPairs.size(); ++i) {
134             final URIPrefixPair pair = filteredPairs.get(i);
135             final String prefix = pair.prefix;
136             if (!pair.uri.equals(uri)) {
137                 uri = pair.uri;
138                 ++uriCount;
139             }
140             this.data[pointer++] = uri;
141             this.data[pointer] = prefix;
142             neighborhood = insert(this.uriTable, uri.hashCode(), pointer, neighborhood, null);
143             neighborhood = insert(this.prefixTable, prefix.hashCode(), pointer, neighborhood,
144                     prefix);
145             ++pointer;
146         }
147 
148         this.uriCount = uriCount;
149         this.neighborhood = neighborhood;
150     }
151 
152     private int insert(final int[] table, final int hash, final int pointer,
153             final int neighborhood, final String prefixToCheck) {
154 
155         // see http://en.wikipedia.org/wiki/Hopscotch_hashing
156 
157         final int buckets = table.length / 2;
158         int index = (hash & 0x7FFFFFFF) % buckets;
159         int offset = index * 2;
160         int distance = 0;
161         while (table[offset] != 0) {
162             if (prefixToCheck != null && table[offset + 1] == hash
163                     && prefixToCheck.equals(this.data[table[offset]])) {
164                 throw new IllegalArgumentException("Duplicate prefix " + prefixToCheck);
165             }
166             index = (index + 1) % buckets;
167             offset = index * 2;
168             ++distance;
169         }
170         if (distance < neighborhood) {
171             table[offset] = pointer;
172             table[offset + 1] = hash;
173             return neighborhood;
174         }
175         for (int i = 1; i < neighborhood; ++i) {
176             int oldIndex = index - i;
177             oldIndex = oldIndex >= 0 ? oldIndex : oldIndex + buckets;
178             final int oldOffset = oldIndex * 2;
179             int oldDistance = index - Math.abs(table[oldOffset + 1]) % buckets;
180             oldDistance = oldDistance >= 0 ? oldDistance : oldDistance + buckets;
181             if (oldDistance < neighborhood) {
182                 table[offset] = table[oldOffset];
183                 table[offset + 1] = table[oldOffset + 1];
184                 table[oldOffset] = 0;
185                 table[oldOffset + 1] = 0;
186                 return insert(table, hash, pointer, neighborhood, prefixToCheck);
187             }
188         }
189         return insert(table, hash, pointer, neighborhood + 1, prefixToCheck);
190     }
191 
192     private int lookup(final String string, final int[] table, final int field) {
193         final int hash = string.hashCode();
194         int offset = 2 * ((hash & 0x7FFFFFFF) % (table.length / 2));
195         for (int i = 0; i < this.neighborhood; ++i) {
196             final int prefixIndex = table[offset];
197             if (prefixIndex == 0) {
198                 return -1;
199             }
200             final int storedHash = table[offset + 1];
201             if (storedHash == hash) {
202                 final int stringIndex = prefixIndex - 1 + field;
203                 final String storedString = this.data[stringIndex];
204                 if (storedString.equals(string)) {
205                     return prefixIndex - 1;
206                 }
207             }
208             offset = (offset + 2) % table.length;
209         }
210         return -1;
211     }
212 
213     /**
214      * Creates a {@code Namespaces} set by loading the namespace declarations in the files at the
215      * URLs specified. The text files must be encoded in UTF-8. Each line has the format
216      * {@code namespace_URI prefix_1 ... prefix_N}, where components are separated by whitespaces.
217      *
218      * @param urls
219      *            the URLs identifying the files to load, not null
220      * @param resolveClashes
221      *            true in case a clash caused by the same prefix being mapped to different
222      *            namespace URIs should be silently resolved by keeping only the first mapping in
223      *            the supplied iterable
224      * @return the created {@code Namespaces} object
225      * @throws IOException
226      *             on IO error
227      * @throws IllegalArgumentException
228      *             in case {@code resolveClashes} is false and the {@code Iterable} associates the
229      *             same prefix to different namespace URIs
230      */
231     public static Namespaces load(final Iterable<URL> urls, final boolean resolveClashes)
232             throws IOException {
233 
234         final List<URIPrefixPair> pairs = new ArrayList<>();
235 
236         for (final URL url : urls) {
237             try (final BufferedReader reader = new BufferedReader(new InputStreamReader(
238                     url.openStream(), Charset.forName("UTF-8")))) {
239                 String line;
240                 while ((line = reader.readLine()) != null) {
241                     final String[] tokens = line.split("\\s+");
242                     if (tokens.length >= 2 && !tokens[0].startsWith("#")) {
243                         for (int i = 1; i < tokens.length; ++i) {
244                             pairs.add(new URIPrefixPair(tokens[0], tokens[i]));
245                         }
246                     }
247                 }
248             }
249         }
250 
251         return new Namespaces(pairs, resolveClashes);
252     }
253 
254     /**
255      * Returns a {@code Namespaces} set for the {@code Namespace} {@code Iterable} supplied. In
256      * case the {@code Iterable} is itself a {@code Namespaces} object, it will be directly
257      * returned.
258      *
259      * @param iterable
260      *            the {@code Iterable} where to load namespaces from, not null
261      * @param resolveClashes
262      *            true in case a clash caused by the same prefix being mapped to different
263      *            namespace URIs should be silently resolved by keeping only the first mapping in
264      *            the supplied iterable
265      * @return the corresponding {@code Namespaces} object
266      * @throws IllegalArgumentException
267      *             in case {@code resolveClashes} is false and the {@code Iterable} associates the
268      *             same prefix to different namespace URIs
269      */
270     public static Namespaces forIterable(final Iterable<? extends Namespace> iterable,
271             final boolean resolveClashes) throws IllegalArgumentException {
272         if (iterable instanceof Namespaces) {
273             return (Namespaces) iterable;
274         }
275         final int size = iterable instanceof Collection ? ((Collection<?>) iterable).size() : 1024;
276         final List<URIPrefixPair> pairs = new ArrayList<URIPrefixPair>(size);
277         for (final Namespace namespace : iterable) {
278             pairs.add(new URIPrefixPair(namespace.getName(), namespace.getPrefix()));
279         }
280         return new Namespaces(pairs, resolveClashes);
281     }
282 
283     /**
284      * Returns a {@code Namespaces} set for the {@code prefix -> namespace URI} map supplied. In
285      * case the map is the {@link #uriMap()} of another {@code Namespaces} object, that object
286      * will be directly returned.
287      *
288      * @param map
289      *            the {@code prefix -> namespace URI} map, not null
290      * @return the corresponding {@code Namespaces} object
291      */
292     public static Namespaces forURIMap(final Map<String, String> map) {
293 
294         if (map instanceof EntryMap) {
295             final EntryMap entryMap = (EntryMap) map;
296             if (entryMap.keyIsPrefix) {
297                 return entryMap.getNamespaces();
298             }
299         }
300 
301         final List<URIPrefixPair> pairs = new ArrayList<URIPrefixPair>(map.size());
302         for (final Map.Entry<String, String> entry : map.entrySet()) {
303             pairs.add(new URIPrefixPair(entry.getValue(), entry.getKey()));
304         }
305         return new Namespaces(pairs, false); // no clashes possible
306     }
307 
308     /**
309      * Returns a {@code Namespaces} set for the {@code namespace URI -> prefix} map supplied. In
310      * case the map is the {@link #prefixMap()} of another {@code Namespaces} object, that object
311      * will be directly returned.
312      *
313      * @param map
314      *            the {@code namespace URI -> prefix} map, not null
315      * @param resolveClashes
316      *            true in case a clash caused by the same prefix being mapped to different
317      *            namespace URIs should be silently resolved by keeping only the first mapping in
318      *            the supplied iterable
319      * @return the corresponding {@code Namespaces} object
320      * @throws IllegalArgumentException
321      *             in case {@code resolveClashes} is false and the {@code Iterable} associates the
322      *             same prefix to different namespace URIs
323      */
324     public static Namespaces forPrefixMap(final Map<String, String> map,
325             final boolean resolveClashes) throws IllegalArgumentException {
326 
327         if (map instanceof EntryMap) {
328             final EntryMap entryMap = (EntryMap) map;
329             if (!entryMap.keyIsPrefix) {
330                 return entryMap.getNamespaces();
331             }
332         }
333 
334         final List<URIPrefixPair> pairs = new ArrayList<URIPrefixPair>(map.size());
335         for (final Map.Entry<String, String> entry : map.entrySet()) {
336             pairs.add(new URIPrefixPair(entry.getKey(), entry.getValue()));
337         }
338         return new Namespaces(pairs, resolveClashes);
339     }
340 
341     @Override
342     public int size() {
343         return this.data.length / 2;
344     }
345 
346     @Override
347     public boolean isEmpty() {
348         return this.data.length > 0;
349     }
350 
351     @Override
352     public boolean contains(@Nullable final Object object) {
353         if (object instanceof Namespace) {
354             final Namespace ns = (Namespace) object;
355             return Objects.equals(ns.getName(), uriFor(ns.getPrefix()));
356         }
357         return false;
358     }
359 
360     /**
361      * Returns a {@code Set} view of the prefixes in this {@code Namespace} set.
362      *
363      * @return a {@code Set} view of prefixes
364      */
365     public Set<String> prefixes() {
366         return this.uriMap.keySet();
367     }
368 
369     /**
370      * Returns all the prefixes bound to the namespace URI specified.
371      *
372      * @param uri
373      *            the namespace URI, not null
374      * @return a non-null list with the prefixes bound to the give URI
375      */
376     public List<String> prefixesFor(final String uri) {
377         int index = lookup(uri, this.uriTable, 0);
378         if (index < 0) {
379             return null;
380         }
381         final String storedURI = this.data[index];
382         final List<String> result = new ArrayList<>();
383         while (this.data[index] == storedURI && index < this.data.length) {
384             result.add(this.data[index + 1]);
385             index += 2;
386         }
387         return result;
388     }
389 
390     /**
391      * Returns the first prefix bound to the namespace URI specified, or null if it does not
392      * exist.
393      *
394      * @param uri
395      *            the namespace URI, not null
396      * @return the first prefix bound to the namespace URI specified, if it exists, otherwise null
397      */
398     @Nullable
399     public String prefixFor(final String uri) {
400         final int index = lookup(uri, this.uriTable, 0);
401         return index < 0 ? null : this.data[index + 1];
402     }
403 
404     /**
405      * Returns a {@code namespace URI -> prefix} map view. Note that the map will report only the
406      * first prefix bound to a namespace URI, in case multiple prefixes are defined for that URI.
407      *
408      * @return a {@code namespace URI -> prefix} map view
409      */
410     public Map<String, String> prefixMap() {
411         return this.prefixMap;
412     }
413 
414     /**
415      * Returns a {@code Set} view of the namespace URIs in this {@code Namespace} set.
416      *
417      * @return a {@code Set} view of namespace URIs
418      */
419     public Set<String> uris() {
420         return this.prefixMap.keySet();
421     }
422 
423     /**
424      * Returns the namespace URI for the prefix specified, or null if it does not exist.
425      *
426      * @param prefix
427      *            the prefix, not null
428      * @return the namespace URI for the prefix specified, if it exists, otherwise null
429      */
430     @Nullable
431     public String uriFor(final String prefix) {
432         final int index = lookup(prefix, this.prefixTable, 1);
433         return index < 0 ? null : this.data[index];
434     }
435 
436     /**
437      * Returns a {@code prefix -> namespace URI} map view.
438      *
439      * @return a {@code prefix -> namespace URI} map view
440      */
441     public Map<String, String> uriMap() {
442         return this.uriMap;
443     }
444 
445     @Override
446     public Iterator<Namespace> iterator() {
447         return new NamespaceIterator();
448     }
449 
450     @Override
451     public Namespace[] toArray() {
452         return super.toArray(new Namespace[size()]);
453     }
454 
455     private class EntryMap extends AbstractMap<String, String> {
456 
457         final boolean keyIsPrefix;
458 
459         final EntrySet entries;
460 
461         EntryMap(final boolean keyIsPrefix) {
462             this.keyIsPrefix = keyIsPrefix;
463             this.entries = new EntrySet(keyIsPrefix);
464         }
465 
466         Namespaces getNamespaces() {
467             return Namespaces.this;
468         }
469 
470         @Override
471         public Set<Map.Entry<String, String>> entrySet() {
472             return this.entries;
473         }
474 
475         @Override
476         public String get(final Object key) {
477             if (key instanceof String) {
478                 final String s = (String) key;
479                 return this.keyIsPrefix ? uriFor(s) : prefixFor(s);
480             }
481             return null;
482         }
483 
484         @Override
485         public boolean containsKey(final Object key) {
486             if (key instanceof String) {
487                 final String s = (String) key;
488                 return this.keyIsPrefix ? uriFor(s) != null : prefixFor(s) != null;
489             }
490             return false;
491         }
492 
493         @Override
494         public boolean containsValue(final Object value) {
495             if (value instanceof String) {
496                 final String s = (String) value;
497                 return this.keyIsPrefix ? prefixFor(s) != null : uriFor(s) != null;
498             }
499             return false;
500         }
501 
502     }
503 
504     private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
505 
506         final boolean keyIsPrefix;
507 
508         EntrySet(final boolean keyIsPrefix) {
509             this.keyIsPrefix = keyIsPrefix;
510         }
511 
512         @Override
513         public Iterator<Map.Entry<String, String>> iterator() {
514             return new EntryIterator(this.keyIsPrefix);
515         }
516 
517         @Override
518         public int size() {
519             return this.keyIsPrefix ? Namespaces.this.size() : Namespaces.this.uriCount;
520         }
521 
522         @Override
523         public boolean contains(final Object object) {
524             if (object instanceof Map.Entry<?, ?>) {
525                 final Map.Entry<?, ?> entry = (Map.Entry<?, ?>) object;
526                 if (entry.getKey() instanceof String && entry.getValue() instanceof String) {
527                     final String key = (String) entry.getKey();
528                     final String value = (String) entry.getValue();
529                     return this.keyIsPrefix ? Objects.equals(uriFor(key), value) : Objects.equals(
530                             prefixFor(key), value);
531                 }
532             }
533             return false;
534         }
535 
536     }
537 
538     private class EntryIterator implements Iterator<Map.Entry<String, String>> {
539 
540         final boolean prefixToUri;
541 
542         int offset;
543 
544         EntryIterator(final boolean prefixToUri) {
545             this.prefixToUri = prefixToUri;
546             this.offset = 0;
547         }
548 
549         @Override
550         public boolean hasNext() {
551             return this.offset < Namespaces.this.data.length;
552         }
553 
554         @Override
555         public Entry<String, String> next() {
556             final String uri = Namespaces.this.data[this.offset];
557             final String prefix = Namespaces.this.data[this.offset + 1];
558             this.offset += 2;
559             if (this.prefixToUri) {
560                 return new AbstractMap.SimpleImmutableEntry<String, String>(prefix, uri);
561             } else {
562                 while (this.offset < Namespaces.this.data.length
563                         && Namespaces.this.data[this.offset] == Namespaces.this.data[this.offset - 2]) {
564                     this.offset += 2;
565                 }
566                 return new AbstractMap.SimpleImmutableEntry<String, String>(uri, prefix);
567             }
568         }
569 
570     }
571 
572     private class NamespaceIterator implements Iterator<Namespace> {
573 
574         int offset;
575 
576         @Override
577         public boolean hasNext() {
578             return this.offset < Namespaces.this.data.length;
579         }
580 
581         @Override
582         public Namespace next() {
583             final String uri = Namespaces.this.data[this.offset];
584             final String prefix = Namespaces.this.data[this.offset + 1];
585             this.offset += 2;
586             return new NamespaceImpl(prefix, uri);
587         }
588 
589     }
590 
591     private static class URIPrefixPair implements Comparable<URIPrefixPair> {
592 
593         public final String uri;
594 
595         public final String prefix;
596 
597         public URIPrefixPair(final String uri, final String prefix) {
598             this.uri = Objects.requireNonNull(uri);
599             this.prefix = Objects.requireNonNull(prefix);
600         }
601 
602         @Override
603         public int compareTo(final URIPrefixPair other) {
604             return this.uri.compareTo(other.uri);
605         }
606 
607     }
608 
609 }