1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro.util;
15
16 import java.math.BigDecimal;
17 import java.math.BigInteger;
18 import java.util.Arrays;
19 import java.util.concurrent.atomic.AtomicLong;
20
21 import javax.annotation.Nullable;
22 import javax.xml.datatype.DatatypeConstants;
23 import javax.xml.datatype.XMLGregorianCalendar;
24
25 import org.openrdf.model.BNode;
26 import org.openrdf.model.Literal;
27 import org.openrdf.model.Resource;
28 import org.openrdf.model.Statement;
29 import org.openrdf.model.URI;
30 import org.openrdf.model.Value;
31 import org.openrdf.model.ValueFactory;
32 import org.openrdf.model.vocabulary.XMLSchema;
33 import org.openrdf.rio.RDFHandler;
34 import org.openrdf.rio.RDFHandlerException;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
37
38 import eu.fbk.rdfpro.AbstractRDFHandler;
39
40
41
42
43
44
45 abstract class Dictionary implements AutoCloseable {
46
47 private static final Logger LOGGER = LoggerFactory.getLogger(Dictionary.class);
48
49 private static final int[] PACKING_TYPES;
50
51 private static final SequentialDictionary<URI> INITIAL_DATATYPE_INDEX;
52
53 private static final int XSD_STRING_INDEX;
54
55 static final URI XSD_STRING = Statements.normalize(XMLSchema.STRING);
56
57 static final int NULL_CODE = 0xE0000000;
58
59 static final int TYPE_URI_FULL = 0;
60
61 static final int TYPE_URI_NAME = 1;
62
63 static final int TYPE_BNODE = 2;
64
65 static final int TYPE_LITERAL_PLAIN = 4;
66
67 static final int TYPE_LITERAL_LANG = 5;
68
69 static final int TYPE_LITERAL_DT = 6;
70
71 final static int PACKING_BIGDECIMAL = 1;
72
73 final static int PACKING_BIGINTEGER = 2;
74
75 final static int PACKING_LONG = 3;
76
77 final static int PACKING_DOUBLE = 4;
78
79 final static int PACKING_BOOLEAN = 5;
80
81 final static int PACKING_DATETIME = 6;
82
83 static {
84 final int[] t = new int[26];
85 final SequentialDictionary<URI> i = new SequentialDictionary<>(64 * 1024 - 2);
86 t[i.encode(Statements.normalize(XMLSchema.DECIMAL))] = PACKING_BIGDECIMAL;
87 t[i.encode(Statements.normalize(XMLSchema.INTEGER))] = PACKING_BIGINTEGER;
88 t[i.encode(Statements.normalize(XMLSchema.NON_POSITIVE_INTEGER))] = PACKING_BIGINTEGER;
89 t[i.encode(Statements.normalize(XMLSchema.NEGATIVE_INTEGER))] = PACKING_BIGINTEGER;
90 t[i.encode(Statements.normalize(XMLSchema.NON_NEGATIVE_INTEGER))] = PACKING_BIGINTEGER;
91 t[i.encode(Statements.normalize(XMLSchema.POSITIVE_INTEGER))] = PACKING_BIGINTEGER;
92 t[i.encode(Statements.normalize(XMLSchema.LONG))] = PACKING_LONG;
93 t[i.encode(Statements.normalize(XMLSchema.INT))] = PACKING_LONG;
94 t[i.encode(Statements.normalize(XMLSchema.SHORT))] = PACKING_LONG;
95 t[i.encode(Statements.normalize(XMLSchema.BYTE))] = PACKING_LONG;
96 t[i.encode(Statements.normalize(XMLSchema.UNSIGNED_LONG))] = PACKING_LONG;
97 t[i.encode(Statements.normalize(XMLSchema.UNSIGNED_INT))] = PACKING_LONG;
98 t[i.encode(Statements.normalize(XMLSchema.UNSIGNED_SHORT))] = PACKING_LONG;
99 t[i.encode(Statements.normalize(XMLSchema.UNSIGNED_BYTE))] = PACKING_LONG;
100 t[i.encode(Statements.normalize(XMLSchema.DOUBLE))] = PACKING_DOUBLE;
101 t[i.encode(Statements.normalize(XMLSchema.FLOAT))] = PACKING_DOUBLE;
102 t[i.encode(Statements.normalize(XMLSchema.BOOLEAN))] = PACKING_BOOLEAN;
103 t[i.encode(Statements.normalize(XMLSchema.DATETIME))] = PACKING_DATETIME;
104 t[i.encode(Statements.normalize(XMLSchema.DATE))] = PACKING_DATETIME;
105 t[i.encode(Statements.normalize(XMLSchema.TIME))] = PACKING_DATETIME;
106 t[i.encode(Statements.normalize(XMLSchema.GYEARMONTH))] = PACKING_DATETIME;
107 t[i.encode(Statements.normalize(XMLSchema.GMONTHDAY))] = PACKING_DATETIME;
108 t[i.encode(Statements.normalize(XMLSchema.GYEAR))] = PACKING_DATETIME;
109 t[i.encode(Statements.normalize(XMLSchema.GMONTH))] = PACKING_DATETIME;
110 t[i.encode(Statements.normalize(XMLSchema.GDAY))] = PACKING_DATETIME;
111
112 PACKING_TYPES = t;
113 INITIAL_DATATYPE_INDEX = i;
114 XSD_STRING_INDEX = i.encode(Statements.normalize(XMLSchema.STRING));
115
116
117
118
119
120
121
122 }
123
124 private final SequentialDictionary<String> namespaceIndex;
125
126 private final SequentialDictionary<String> languageIndex;
127
128 private final SequentialDictionary<URI> datatypeIndex;
129
130 private final EncodeCacheEntry[] encodeCache;
131
132 private final DecodeCacheEntry[] decodeCache;
133
134 private final AtomicLong encodeEmbeddedCounter;
135
136 private final AtomicLong encodeCacheCounter;
137
138 private final AtomicLong encodeIndexedCounter;
139
140 private final AtomicLong decodeEmbeddedCounter;
141
142 private final AtomicLong decodeCacheCounter;
143
144 private final AtomicLong decodeIndexedCounter;
145
146 Dictionary() {
147 this.namespaceIndex = new SequentialDictionary<>(64 * 1024 - 2);
148 this.languageIndex = new SequentialDictionary<>(64 * 1024 - 2);
149 this.datatypeIndex = new SequentialDictionary<>(INITIAL_DATATYPE_INDEX);
150 this.encodeCache = new EncodeCacheEntry[1024 - 1];
151 this.decodeCache = new DecodeCacheEntry[1024 - 1];
152 this.encodeEmbeddedCounter = new AtomicLong();
153 this.encodeCacheCounter = new AtomicLong();
154 this.encodeIndexedCounter = new AtomicLong();
155 this.decodeEmbeddedCounter = new AtomicLong();
156 this.decodeCacheCounter = new AtomicLong();
157 this.decodeIndexedCounter = new AtomicLong();
158 }
159
160 abstract int doEncode(int type, int index, String string);
161
162 abstract Value doDecode(int code);
163
164 int doType(final int code) {
165 final Value value = decode(code);
166 if (value instanceof BNode) {
167 return TYPE_BNODE;
168 } else if (value instanceof URI) {
169 return ((URI) value).getLocalName().isEmpty() ? TYPE_URI_FULL : TYPE_URI_NAME;
170 } else {
171 final Literal lit = (Literal) value;
172 return lit.getLanguage() != null ? TYPE_LITERAL_LANG : lit.getDatatype() != null
173 && !lit.getDatatype().equals(XSD_STRING) ? TYPE_LITERAL_DT
174 : TYPE_LITERAL_PLAIN;
175 }
176 }
177
178 void doToString(final StringBuilder builder) {
179 }
180
181 void doClose() {
182 }
183
184 final Value valueFor(final int type, final int index, final String string) {
185
186 final ValueFactory vf = Statements.VALUE_FACTORY;
187 switch (type) {
188 case TYPE_URI_FULL:
189 return vf.createURI(string);
190 case TYPE_URI_NAME:
191 return vf.createURI(this.namespaceIndex.decode(index), string);
192 case TYPE_BNODE:
193 return vf.createBNode(string);
194 case TYPE_LITERAL_PLAIN:
195 return vf.createLiteral(string);
196 case TYPE_LITERAL_DT:
197 return vf.createLiteral(string, this.datatypeIndex.decode(index));
198 case TYPE_LITERAL_LANG:
199 return vf.createLiteral(string, this.languageIndex.decode(index));
200 default:
201 throw new Error();
202 }
203 }
204
205 public static Dictionary newMemoryDictionary() {
206 return new MemoryDictionary();
207 }
208
209 public final RDFHandler encode(final QuadHandler sink) {
210 return new AbstractRDFHandler() {
211
212 @Override
213 public void startRDF() throws RDFHandlerException {
214 sink.start();
215 }
216
217 @Override
218 public void handleStatement(final Statement stmt) throws RDFHandlerException {
219 final int subj = encode(stmt.getSubject());
220 final int pred = encode(stmt.getPredicate());
221 final int obj = encode(stmt.getObject());
222 final int ctx = encode(stmt.getContext());
223 sink.handle(subj, pred, obj, ctx);
224 }
225
226 @Override
227 public void endRDF() throws RDFHandlerException {
228 sink.end();
229 }
230
231 @Override
232 public void close() {
233 sink.close();
234 }
235
236 };
237 }
238
239 void encodeCachePut(@Nullable final Value value, final int code) {
240 final int hash = value.hashCode() & 0x7FFFFFFF;
241 final int slotIndex = hash % this.encodeCache.length;
242 final EncodeCacheEntry entry = new EncodeCacheEntry(code, hash, value);
243 this.encodeCache[slotIndex] = entry;
244 }
245
246 public final int encode(@Nullable final Value value) {
247
248
249 if (value == null) {
250 return NULL_CODE;
251 }
252
253
254 int index = 0;
255 if (value instanceof Literal) {
256 final Literal l = (Literal) value;
257 if (l.getLanguage() == null) {
258 index = this.datatypeIndex.encode(l.getDatatype());
259 if (index < PACKING_TYPES.length) {
260 final int v = pack(PACKING_TYPES[index], l);
261 if (v >= 0) {
262 this.encodeEmbeddedCounter.incrementAndGet();
263 return 0xE0000000 | index << 24 | v;
264 }
265 }
266 }
267 }
268
269
270 final int hash = value.hashCode();
271 final int slotIndex = (hash & 0x7FFFFFFF) % this.encodeCache.length;
272 final EncodeCacheEntry entry = this.encodeCache[slotIndex];
273 if (entry != null && entry.hash == hash && entry.value.equals(value)) {
274 this.encodeCacheCounter.incrementAndGet();
275 return entry.code;
276 }
277
278
279 int type;
280 String string;
281 if (value instanceof Literal) {
282 final Literal l = (Literal) value;
283 string = l.getLabel();
284 if (l.getLanguage() != null) {
285 type = TYPE_LITERAL_LANG;
286 index = this.languageIndex.encode(l.getLanguage());
287 } else if (index != XSD_STRING_INDEX) {
288 type = TYPE_LITERAL_DT;
289 } else {
290 type = TYPE_LITERAL_PLAIN;
291 index = 0;
292 }
293 } else if (value instanceof URI) {
294 final URI u = (URI) value;
295 index = this.namespaceIndex.encode(u.getNamespace(), 0);
296 if (index > 0) {
297 type = TYPE_URI_NAME;
298 string = u.getLocalName();
299 } else {
300 type = TYPE_URI_FULL;
301 string = u.stringValue();
302 }
303 } else if (value instanceof BNode) {
304 type = TYPE_BNODE;
305 string = ((BNode) value).getID();
306 } else {
307 throw new Error();
308 }
309
310
311 final int code = doEncode(type, index, string);
312 this.encodeCache[slotIndex] = new EncodeCacheEntry(code, hash, value);
313 this.encodeIndexedCounter.incrementAndGet();
314 return code;
315 }
316
317 public final QuadHandler decode(final RDFHandler sink) {
318 return new QuadHandler() {
319
320 @Override
321 public void start() throws RDFHandlerException {
322 sink.startRDF();
323 }
324
325 @Override
326 public void handle(final int subj, final int pred, final int obj, final int ctx)
327 throws RDFHandlerException {
328 final Resource s = (Resource) decode(subj);
329 final URI p = (URI) decode(pred);
330 final Value o = decode(obj);
331 final Resource c = (Resource) decode(ctx);
332 sink.handleStatement(Statements.VALUE_FACTORY.createStatement(s, p, o, c));
333 }
334
335 @Override
336 public void end() throws RDFHandlerException {
337 sink.endRDF();
338 }
339
340 @Override
341 public void close() {
342 IO.closeQuietly(sink);
343 }
344
345 };
346 }
347
348 @Nullable
349 public final Value decode(final int code) {
350
351
352 if (code == NULL_CODE) {
353 return null;
354 }
355
356
357 if (code >>> 29 == 0x7) {
358 final int index = (code & 0x1F000000) >>> 24;
359 final URI datatype = this.datatypeIndex.decode(index);
360 final String label = unpack(PACKING_TYPES[index], code & 0xFFFFFF);
361 final Value value = Statements.VALUE_FACTORY.createLiteral(label, datatype);
362 this.decodeEmbeddedCounter.incrementAndGet();
363 return value;
364 }
365
366
367 final int hash = hashInt(code);
368 final int slotIndex = (hash & 0x7FFFFFFF) % this.decodeCache.length;
369 final DecodeCacheEntry entry = this.decodeCache[slotIndex];
370 if (entry != null && entry.code == code) {
371 this.decodeCacheCounter.incrementAndGet();
372 return entry.value;
373 }
374
375
376 final Value value = doDecode(code);
377 this.decodeCache[slotIndex] = new DecodeCacheEntry(code, value);
378 this.decodeIndexedCounter.incrementAndGet();
379 return value;
380 }
381
382 public final boolean isNull(final int code) {
383 return code == NULL_CODE;
384 }
385
386 public final boolean isResource(final int code) {
387 return doType(code) <= TYPE_BNODE;
388 }
389
390 public final boolean isURI(final int code) {
391 return doType(code) <= TYPE_URI_NAME;
392 }
393
394 public final boolean isBNode(final int code) {
395 return doType(code) == TYPE_BNODE;
396 }
397
398 public final boolean isLiteral(final int code) {
399 return doType(code) >= TYPE_LITERAL_PLAIN;
400 }
401
402 @Override
403 public final void close() {
404 doClose();
405 }
406
407 @Override
408 public final String toString() {
409 final StringBuilder builder = new StringBuilder(getClass().getSimpleName()).append(": ");
410 final int oldLength = builder.length();
411 doToString(builder);
412 builder.append(builder.length() > oldLength ? ", " : "");
413 builder.append(this.namespaceIndex.size()).append(" namespaces, ")
414 .append(this.datatypeIndex.size()).append(" datatypes, ")
415 .append(this.languageIndex.size()).append(" languages, ");
416 toStringHelper(builder, "encode", this.encodeEmbeddedCounter.get(),
417 this.encodeCacheCounter.get(), this.encodeIndexedCounter.get());
418 builder.append(", ");
419 toStringHelper(builder, "decode", this.decodeEmbeddedCounter.get(),
420 this.decodeCacheCounter.get(), this.decodeIndexedCounter.get());
421 return builder.toString();
422 }
423
424 private void toStringHelper(final StringBuilder builder, final String type,
425 final long embedded, final long cached, final long indexed) {
426 final long total = embedded + cached + indexed;
427 builder.append(total).append(" ").append(type).append(" calls (").append(embedded)
428 .append(" embedded, ").append(cached).append(" cached, ").append(indexed)
429 .append(" indexed)");
430 }
431
432 private static int pack(final int packingType, final Literal literal) {
433
434 switch (packingType) {
435 case PACKING_BIGDECIMAL:
436 final BigDecimal bd = literal.decimalValue();
437 final int scale = bd.scale();
438 if (scale < 1 << 4 && scale > -(1 << 4)) {
439 final BigInteger unscaled = bd.unscaledValue();
440 if (unscaled.bitLength() <= 18) {
441 return scale + (1 << 4) << 19 | unscaled.intValue() + (1 << 18);
442 }
443 }
444 break;
445
446 case PACKING_BIGINTEGER:
447 final BigInteger bi = literal.integerValue();
448 if (bi.bitLength() <= 23) {
449 return bi.intValue() + (1 << 23);
450 }
451 break;
452
453 case PACKING_LONG:
454 final long l = literal.longValue();
455 if (l < 1 << 23 && l >= -(1 << 23)) {
456 return (int) l + (1 << 23);
457 }
458 break;
459
460 case PACKING_DOUBLE:
461 final long bits = Double.doubleToLongBits(literal.doubleValue());
462 if ((bits & 0xFFFFFFFFFFL) == 0L) {
463 return (int) (bits >>> 40);
464 }
465 break;
466
467 case PACKING_BOOLEAN:
468 return literal.booleanValue() ? 1 : 0;
469
470 case PACKING_DATETIME:
471 final XMLGregorianCalendar c = literal.calendarValue();
472 if (c.getTimezone() == DatatypeConstants.FIELD_UNDEFINED
473 && c.getMillisecond() == DatatypeConstants.FIELD_UNDEFINED
474 && c.getSecond() == DatatypeConstants.FIELD_UNDEFINED
475 && c.getMinute() == DatatypeConstants.FIELD_UNDEFINED
476 && c.getHour() == DatatypeConstants.FIELD_UNDEFINED) {
477 final int year = c.getYear();
478 final int month = c.getMonth();
479 final int day = c.getDay();
480 if (year == DatatypeConstants.FIELD_UNDEFINED || year < 1 << 14
481 && year > -(1 << 14)) {
482 return (day != DatatypeConstants.FIELD_UNDEFINED ? day : 0)
483 | (month != DatatypeConstants.FIELD_UNDEFINED ? month : 0) << 5
484 | (year != DatatypeConstants.FIELD_UNDEFINED ? year + (1 << 14) : 0) << 5 + 4;
485 }
486 }
487 break;
488 }
489
490
491 return -1;
492 }
493
494 private static String unpack(final int packingType, final int value) {
495
496 switch (packingType) {
497 case PACKING_BIGDECIMAL:
498 final int scale = (value >> 19 & 0x1F) - (1 << 4);
499 final int unscaled = (value & 0x7FFFF) - (1 << 18);
500 return new BigDecimal(BigInteger.valueOf(unscaled), scale).toString();
501
502 case PACKING_BIGINTEGER:
503 return Integer.toString((value & 0xFFFFFF) - (1 << 23));
504
505 case PACKING_LONG:
506 return Integer.toString((value & 0xFFFFFF) - (1 << 23));
507
508 case PACKING_DOUBLE:
509 return Double.toString(Double.longBitsToDouble((long) (value & 0xFFFFFF) << 40));
510
511 case PACKING_BOOLEAN:
512 return (value & 0x1) != 0 ? "true" : "false";
513
514 case PACKING_DATETIME:
515 final XMLGregorianCalendar c = Statements.DATATYPE_FACTORY.newXMLGregorianCalendar();
516 final int year = value >>> 5 + 4 & 0x7FFF;
517 final int month = value >>> 5 & 0x0F;
518 final int day = value & 0x1F;
519 c.setDay(day != 0 ? day : DatatypeConstants.FIELD_UNDEFINED);
520 c.setMonth(month != 0 ? month : DatatypeConstants.FIELD_UNDEFINED);
521 c.setYear(year != 0 ? year - (1 << 14) : DatatypeConstants.FIELD_UNDEFINED);
522 return c.toXMLFormat();
523
524 default:
525 throw new Error("Unexpected type " + packingType);
526 }
527 }
528
529 private static int hashInt(final int value) {
530 int x = value;
531 x = (x >>> 16 ^ x) * 0x45d9f3b;
532 x = (x >>> 16 ^ x) * 0x45d9f3b;
533 x = x >>> 16 ^ x;
534 return x;
535 }
536
537 public interface QuadHandler extends AutoCloseable {
538
539 final QuadHandler NIL = new QuadHandler() {
540
541 @Override
542 public void handle(final int subj, final int pred, final int obj, final int ctx) {
543 }
544
545 };
546
547 default void start() throws RDFHandlerException {
548 }
549
550 void handle(int subj, int pred, int obj, int ctx) throws RDFHandlerException;
551
552 default void end() throws RDFHandlerException {
553 }
554
555 @Override
556 default void close() {
557 }
558
559 }
560
561 private static final class EncodeCacheEntry {
562
563 final int code;
564
565 final int hash;
566
567 final Value value;
568
569 EncodeCacheEntry(final int code, final int hash, final Value value) {
570 this.code = code;
571 this.hash = hash;
572 this.value = value;
573 }
574
575 }
576
577 private static final class DecodeCacheEntry {
578
579 final int code;
580
581 final Value value;
582
583 DecodeCacheEntry(final int code, final Value value) {
584 this.code = code;
585 this.value = value;
586 }
587
588 }
589
590 private static final class SequentialDictionary<T> {
591
592 private final int capacity;
593
594 private Object[] array;
595
596 private long[] table;
597
598 private int size;
599
600 SequentialDictionary(final int capacity) {
601 this.capacity = capacity;
602 this.array = new Object[Math.min(capacity + 1, 8)];
603 this.table = new long[8];
604 this.size = 0;
605 }
606
607 SequentialDictionary(final SequentialDictionary<T> index) {
608 this.capacity = index.capacity;
609 this.array = index.array.clone();
610 this.table = index.table.clone();
611 this.size = index.size;
612 }
613
614 int size() {
615 return this.size;
616 }
617
618 int encode(final T element, final int resultIfFull) {
619 final long[] table = this.table;
620 Object[] array = this.array;
621 final int mask = table.length - 1;
622 final int hash = element.hashCode();
623 for (int slot = hash & mask;; slot = slot + 1 & mask) {
624 final long cell = table[slot];
625 if (cell == 0L) {
626 synchronized (this) {
627 if (table == this.table && table[slot] == 0L) {
628 array = this.array;
629 if (this.size == this.capacity) {
630 return resultIfFull;
631 }
632 if (this.size > table.length / 3 * 2) {
633 rehash();
634 } else {
635 final int index = this.size + 1;
636 if (index >= array.length) {
637 array = Arrays.copyOf(array, array.length << 1);
638 this.array = array;
639 }
640 array[index] = element;
641 table[slot] = (long) index << 32 | hash & 0xFFFFFFFFL;
642 ++this.size;
643 return index;
644 }
645 }
646 }
647 return encode(element, resultIfFull);
648 }
649 if ((int) cell == hash) {
650 final int index = (int) (cell >>> 32);
651 if (index >= array.length) {
652 return encode(element, resultIfFull);
653 }
654 if (element.equals(array[index])) {
655 return index;
656 }
657 }
658 }
659 }
660
661 int encode(final T element) {
662 final int index = encode(element, -1);
663 if (index < 0) {
664 throw new IllegalStateException("Full index");
665 }
666 return index;
667 }
668
669 @SuppressWarnings("unchecked")
670 T decode(final int index) {
671 return (T) this.array[index];
672 }
673
674 private void rehash() {
675 final long[] oldTable = this.table;
676 final long[] newTable = new long[oldTable.length << 1];
677 final int newMask = newTable.length - 1;
678 for (int oldSlot = 0; oldSlot < oldTable.length; ++oldSlot) {
679 final long cell = oldTable[oldSlot];
680 if (cell != 0L) {
681 final int hash = (int) cell;
682 int newSlot = hash & newMask;
683 while (newTable[newSlot] != 0L) {
684 newSlot = newSlot + 1 & newMask;
685 }
686 newTable[newSlot] = cell;
687 }
688 }
689 this.table = newTable;
690 }
691
692 }
693
694
695
696
697
698
699 private static final class MemoryDictionary extends Dictionary {
700
701 private final Buffer primaryBuffer;
702
703 private final Buffer secondaryBuffer;
704
705 private long primaryOffset;
706
707 private long secondaryOffset;
708
709 private long[] table;
710
711 private int size;
712
713 private final Object[] locks;
714
715 MemoryDictionary() {
716 this.primaryBuffer = Buffer.newResizableBuffer();
717 this.secondaryBuffer = Buffer.newResizableBuffer();
718 this.primaryOffset = 0L;
719 this.secondaryOffset = 0L;
720 this.table = new long[512];
721 this.size = 0;
722 this.locks = new Object[Environment.getCores() * 32];
723 for (int i = 0; i < this.locks.length; ++i) {
724 this.locks[i] = new Object();
725 }
726
727
728 this.primaryBuffer.write(0, (byte) 0);
729 }
730
731 int doEncode(final int type, final int index, final String string) {
732
733 final int hash = type * 6661 + index * 661 + string.hashCode() & 0x7FFFFFFF;
734
735 final long[] table = this.table;
736 final int mask = table.length - 1;
737
738 for (int slot = hash & mask;; slot = slot + 1 & mask) {
739 final long cell = table[slot];
740 if (cell == 0L) {
741 final Buffer buffer = Buffer.newFixedBuffer(new byte[string.length() * 3 + 6]);
742 final int bufferLength = buffer.writeString(4, string);
743 synchronized (this) {
744 if (table == this.table && table[slot] == 0L) {
745 if (this.size > this.table.length / 3 * 2) {
746 rehash();
747 } else {
748 final long offset = append(type, index, buffer, 4, bufferLength);
749 final int code = offsetToCode(offset);
750 table[slot] = (long) code << 32 | hash & 0xFFFFFFFFL;
751 ++this.size;
752 return code;
753 }
754 }
755 }
756 return doEncode(type, index, string);
757 }
758 if ((int) cell == hash) {
759 final long offset = codeToOffset((int) (cell >>> 32));
760 if (equals(offset, type, index, string)) {
761 return (int) (offset >>> 2);
762 }
763 }
764 }
765 }
766
767 int doEncode2(final int type, final int index, final String string) {
768
769 final int hash = type * 6661 + index * 661 + string.hashCode() & 0x7FFFFFFF;
770
771 long[] table = this.table;
772 int mask = table.length - 1;
773
774 for (int slot = hash & mask;; slot = slot + 1 & mask) {
775 final long cell = table[slot];
776 if (cell == 0L) {
777 final Buffer buffer = Buffer.newFixedBuffer(new byte[string.length() * 3]);
778 final int bufferLength = buffer.writeString(0, string);
779 boolean proceed = false;
780 synchronized (this.locks[hash & this.locks.length - 1]) {
781 synchronized (this) {
782 if (table == this.table && table[slot] == 0L) {
783 if (this.size > this.table.length / 3 * 2) {
784 rehash();
785 } else {
786 proceed = true;
787 }
788 }
789 }
790 if (proceed) {
791 final long offset = append2(type, index, buffer, bufferLength);
792 final int code = offsetToCode(offset);
793 synchronized (this) {
794 if (table != this.table || table[slot] != 0L) {
795 table = this.table;
796 mask = table.length - 1;
797 slot = hash & mask;
798 while (table[slot] != 0) {
799 slot = slot + 1 & mask;
800 }
801 }
802 table[slot] = (long) code << 32 | hash & 0xFFFFFFFFL;
803 ++this.size;
804 }
805 return code;
806 }
807 }
808 return doEncode(type, index, string);
809 }
810 if ((int) cell == hash) {
811 final long offset = codeToOffset((int) (cell >>> 32));
812 if (equals(offset, type, index, string)) {
813 return (int) (offset >>> 2);
814 }
815 }
816 }
817 }
818
819 @Override
820 Value doDecode(final int code) {
821
822
823 long offset = codeToOffset(code);
824
825
826 final byte header = this.primaryBuffer.read(offset);
827 assert (header & 0x7) == 0;
828
829
830 final int type = header >>> 5 & 0x07;
831
832 String string;
833 int len;
834 final boolean local = (header & 0x10) != 0;
835 if (local) {
836 final boolean fw = (header & 0x08) != 0;
837 len = this.primaryBuffer.read(offset + (fw ? 1 : -1)) & 0xFF;
838 offset += fw ? 2 : 1;
839 string = this.primaryBuffer.readString(offset, len);
840 offset += len;
841 } else {
842 final long off = this.primaryBuffer.readNumber(offset + 1, 5);
843 len = this.secondaryBuffer.readInt(off);
844 string = this.secondaryBuffer.readString(off + 4, len);
845 offset += 6;
846 }
847
848 int index = 0;
849 if (type == TYPE_URI_NAME || type == TYPE_LITERAL_LANG
850 || type == Dictionary.TYPE_LITERAL_DT) {
851 index = this.primaryBuffer.readShort(offset) & 0xFFFF;
852 }
853
854 return valueFor(type, index, string);
855 }
856
857 @Override
858 int doType(final int code) {
859 final long offset = (long) code << 2;
860 return this.primaryBuffer.read(offset) >>> 5 & 0x7;
861 }
862
863 @Override
864 void doToString(final StringBuilder builder) {
865 final long primary = this.primaryOffset;
866 final long secondary = this.secondaryOffset;
867 final long hash = this.table.length * 8;
868 final long total = primary + secondary + hash;
869 builder.append(this.size).append(" values, ").append(total).append(" bytes (")
870 .append(hash).append(" hash table, ").append(primary)
871 .append(" primary buffer, ").append(secondary).append(" secondary buffer)");
872 }
873
874 private void rehash() {
875
876
877
878
879 final long ts = System.currentTimeMillis();
880 final long[] oldTable = this.table;
881 final long[] newTable = new long[oldTable.length << 1];
882 final int newMask = newTable.length - 1;
883 for (int oldSlot = 0; oldSlot < oldTable.length; ++oldSlot) {
884 final long cell = oldTable[oldSlot];
885 if (cell != 0L) {
886 final int hash = (int) (cell & 0xFFFFFFFF);
887 int newSlot = hash & newMask;
888 while (newTable[newSlot] != 0L) {
889 newSlot = newSlot + 1 & newMask;
890 }
891 newTable[newSlot] = cell;
892 }
893 }
894 this.table = newTable;
895 LOGGER.debug("Rehashed from {} to {} entries in {} ms", oldTable.length,
896 newTable.length, System.currentTimeMillis() - ts);
897 }
898
899 private long append(final int type, final int index, final Buffer buffer, int bufferStart,
900 int bufferLength) {
901
902 final long offset = padOffset(this.primaryOffset);
903 final boolean local = bufferLength < 128 && offset <= 1L << 33;
904 final int header = type << 5 | (local ? 0x10 : 0);
905
906 if (local) {
907 final boolean fw = offset - this.primaryOffset == 0;
908 buffer.write(bufferStart - (fw ? 2 : 1), (byte) (header | (fw ? 0x08 : 0)));
909 buffer.write(bufferStart - (fw ? 1 : 2), (byte) bufferLength);
910 int bufferEnd = bufferStart + bufferLength;
911 if (index > 0) {
912 buffer.writeShort(bufferStart + bufferLength, (short) index);
913 bufferEnd += 2;
914 }
915 bufferStart -= 2;
916 final int writeLength = bufferEnd - bufferStart;
917 final long writeOffset = offset + (fw ? 0 : -1);
918 this.primaryBuffer.writeBuffer(writeOffset, buffer, bufferStart, writeLength);
919 this.primaryOffset = writeOffset + writeLength;
920
921 } else {
922 final long pointer = this.secondaryOffset;
923 buffer.writeInt(bufferStart - 4, bufferLength);
924 bufferStart -= 4;
925 bufferLength += 4;
926 this.secondaryBuffer.writeBuffer(pointer, buffer, bufferStart, bufferLength);
927 this.secondaryOffset += bufferLength;
928 if (index > 0) {
929 final long n = (long) header << 56 | pointer << 16 | index & 0xFFFFL;
930 this.primaryBuffer.writeLong(offset, n);
931 this.primaryOffset = offset + 8;
932 } else {
933 final long n = (long) header << 40 | pointer;
934 this.primaryBuffer.writeNumber(offset, 6, n);
935 this.primaryOffset = offset + 6;
936 }
937 }
938
939 return offset;
940 }
941
942 private long append2(final int type, final int index, final Buffer buffer,
943 final int bufferLength) {
944
945 final long offset;
946 final long lastOffset;
947 final long secondaryOffset;
948 final long indexOffset;
949 boolean local;
950
951 synchronized (this.primaryBuffer) {
952 lastOffset = this.primaryOffset;
953 offset = padOffset(lastOffset);
954 secondaryOffset = this.secondaryOffset;
955 local = bufferLength <= 128 && offset <= 1L << 33;
956 if (local) {
957 final boolean fw = offset - lastOffset == 0;
958 indexOffset = offset + (fw ? 2 : 1) + bufferLength;
959 } else {
960 indexOffset = offset + 6;
961 this.secondaryOffset += 4 + bufferLength;
962 }
963 this.primaryOffset = indexOffset + (index > 0 ? 2 : 0);
964 }
965
966 int header = type << 5 | (local ? 0x10 : 0);
967
968 if (local) {
969 final boolean fw = offset - lastOffset == 0;
970 final long lenOffset = offset + (fw ? +1 : -1);
971 final long stringOffset = offset + (fw ? 2 : 1);
972 this.primaryBuffer.write(lenOffset, (byte) bufferLength);
973 this.primaryBuffer.writeBuffer(stringOffset, buffer, 0L, bufferLength);
974 header |= fw ? 0x08 : 0;
975 } else {
976 this.secondaryBuffer.writeInt(secondaryOffset, bufferLength);
977 this.secondaryBuffer.writeBuffer(secondaryOffset + 4, buffer, 0L, bufferLength);
978 this.secondaryOffset = secondaryOffset + 4 + bufferLength;
979 this.primaryBuffer.writeNumber(offset + 1, 5, secondaryOffset);
980 }
981
982 this.primaryBuffer.write(offset, (byte) header);
983
984 if (index > 0) {
985 this.primaryBuffer.writeShort(indexOffset, (short) index);
986 }
987
988 return offset;
989 }
990
991 private boolean equals(long offset, final int type, final int index, final String string) {
992
993
994 final byte header = this.primaryBuffer.read(offset);
995 assert (header & 0x7) == 0;
996
997
998 if ((header >>> 5 & 0x07) != type) {
999 return false;
1000 }
1001
1002
1003
1004 int strLen;
1005 long strOffset;
1006 Buffer strBuffer;
1007 final boolean local = (header & 0x10) != 0;
1008 if (local) {
1009 final boolean fw = (header & 0x08) != 0;
1010 strLen = this.primaryBuffer.read(offset + (fw ? 1 : -1)) & 0xFF;
1011 strOffset = offset + (fw ? 2 : 1);
1012 strBuffer = this.primaryBuffer;
1013 offset = strOffset + strLen;
1014 } else {
1015 final long off = this.primaryBuffer.readNumber(offset + 1, 5);
1016 strLen = this.secondaryBuffer.readInt(off);
1017 strOffset = off + 4;
1018 strBuffer = this.secondaryBuffer;
1019 offset += 6;
1020 }
1021 assert strLen >= 0;
1022 assert strOffset >= 0;
1023
1024
1025 if (type == TYPE_URI_NAME || type == TYPE_LITERAL_LANG
1026 || type == Dictionary.TYPE_LITERAL_DT) {
1027 final int storedIndex = this.primaryBuffer.readShort(offset) & 0xFFFF;
1028 if (index != storedIndex) {
1029 return false;
1030 }
1031 }
1032
1033
1034 return strBuffer.equalString(strOffset, strLen, string);
1035 }
1036
1037 private static long codeToOffset(final int code) {
1038
1039 return (code & 0x80000000) == 0 ? (long) code << 2 : ((code & 0xFFFFFFFFL) << 3)
1040 - (1L << 33);
1041 }
1042
1043 private static int offsetToCode(final long offset) {
1044
1045 return (int) (offset < 1L << 33 ? offset >>> 2 : (1L << 30) + offset >>> 3);
1046 }
1047
1048 private static long padOffset(final long offset) {
1049 return offset < 1L << 33 ? offset + 3 & 0xFFFFFFFFFFFFFFFCL
1050 : offset + 7 & 0xFFFFFFFFFFFFFFF8L;
1051 }
1052
1053 }
1054
1055 }