1 /*
2 * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3 *
4 * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5 * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6 *
7 * To the extent possible under law, the authors have dedicated all copyright and related and
8 * neighboring rights to this software to the public domain worldwide. This software is
9 * distributed without any warranty.
10 *
11 * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12 * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13 */
14 package eu.fbk.rdfpro;
15
16 import java.io.Serializable;
17 import java.util.Objects;
18
19 /**
20 * A set/multiset operator.
21 */
22 public abstract class SetOperator implements Serializable, Comparable<SetOperator> {
23
24 private static final long serialVersionUID = 1L;
25
26 /**
27 * Set union. This {@code SetOperator} ignores duplicates and returns a unique copy of an
28 * element only if it occurs at least in one of the arguments of the operation. The
29 * corresponding textual representation is "u".
30 */
31 public static final SetOperator UNION = new SetOperator("u") {
32
33 private static final long serialVersionUID = 1L;
34
35 @Override
36 public int apply(final int[] counts) {
37 for (int i = 0; i < counts.length; ++i) {
38 if (counts[i] > 0) {
39 return 1;
40 }
41 }
42 return 0;
43 }
44
45 };
46
47 /**
48 * Multiset union. This {@code SetOperator} returns an element if it occurs in at least one
49 * argument of the operation, with a multiplicity equal to the one of the argument it occurs
50 * the most. The corresponding textual representation is "U". See <a
51 * href="http://en.wikipedia.org/wiki/Multiset">this page</a> for more information.
52 */
53 public static final SetOperator UNION_MULTISET = new SetOperator("U") {
54
55 private static final long serialVersionUID = 1L;
56
57 @Override
58 public int apply(final int[] counts) {
59 int result = counts[0];
60 for (int i = 1; i < counts.length; ++i) {
61 if (counts[i] > result) {
62 result = counts[i];
63 }
64 }
65 return result;
66 }
67
68 };
69
70 /**
71 * Multiset sum (a.k.a. union all). This {@code SetOperator} returns an element if it occurs
72 * at least in one argument of the operation, with a multiplicity that is the sum of the
73 * multiplicities of the element in the multisets coming from the operation arguments. The
74 * corresponding textual representation is "a". See <a
75 * href="http://en.wikipedia.org/wiki/Multiset">this page</a> for more information.
76 */
77 public static final SetOperator SUM_MULTISET = new SetOperator("a") {
78
79 private static final long serialVersionUID = 1L;
80
81 @Override
82 public int apply(final int[] counts) {
83 int result = 0;
84 for (int i = 0; i < counts.length; ++i) {
85 result += counts[i];
86 }
87 return result;
88 }
89
90 };
91
92 /**
93 * Set intersection. This {@code SetOperator} ignores duplicates and returns an element only
94 * if it occurs in all the arguments of the operation. In that case, exactly one copy of the
95 * element is emitted. The corresponding textual representation is "i".
96 */
97 public static final SetOperator INTERSECTION = new SetOperator("i") {
98
99 private static final long serialVersionUID = 1L;
100
101 @Override
102 public int apply(final int[] counts) {
103 for (int i = 0; i < counts.length; ++i) {
104 if (counts[i] == 0) {
105 return 0;
106 }
107 }
108 return 1;
109 }
110
111 };
112
113 /**
114 * Multiset intersection. This {@code SetOperator} returns an element if it occurs in all the
115 * arguments of the operation, with a multiplicity equal to the one of the argument it occurs
116 * the least. The corresponding textual representation is "I". See <a
117 * href="http://en.wikipedia.org/wiki/Multiset">this page</a> for more information.
118 */
119 public static final SetOperator INTERSECTION_MULTISET = new SetOperator("I") {
120
121 private static final long serialVersionUID = 1L;
122
123 @Override
124 public int apply(final int[] counts) {
125 int result = counts[0];
126 for (int i = 1; i < counts.length; ++i) {
127 if (counts[i] < result) {
128 result = counts[i];
129 }
130 }
131 return result;
132 }
133
134 };
135
136 /**
137 * Set difference. This {@code SetOperator} ignores duplicates and returns the set difference
138 * between the first argument (index 0) on one side, and all the other arguments on the other
139 * side (i.e., {@code A0 \ A1 ... \An} or equivalently {@code A0 \ (A1 U ... U An)}). At most
140 * a copy of a given element is returned. The corresponding textual representation is "d".
141 */
142 public static final SetOperator DIFFERENCE = new SetOperator("d") {
143
144 private static final long serialVersionUID = 1L;
145
146 @Override
147 public int apply(final int[] counts) {
148 if (counts[0] == 0) {
149 return 0;
150 }
151 for (int i = 1; i < counts.length; ++i) {
152 if (counts[i] > 0) {
153 return 0;
154 }
155 }
156 return 1;
157 }
158
159 };
160
161 /**
162 * Multiset difference. This {@code SetOperator} returns the multiset difference between the
163 * first argument (index 0) on one side, and all the other arguments on the other side (i.e.,
164 * {@code A0 \ A1 ... \An} or equivalently {@code A0 \ (A1 + ... + An)} where {@code +}
165 * denotes multiset sum). The multiplicity of a returned element is equal to its multiplicity
166 * in the first argument minus the sum of its multiplicities in the other arguments (rounded
167 * to 0 if negative). The corresponding textual representation is "D". See <a
168 * href="http://en.wikipedia.org/wiki/Multiset">this page</a> for more information.
169 */
170 public static final SetOperator DIFFERENCE_MULTISET = new SetOperator("D") {
171
172 private static final long serialVersionUID = 1L;
173
174 @Override
175 public int apply(final int[] counts) {
176 int result = counts[0];
177 for (int i = 1; i < counts.length && result > 0; ++i) {
178 result -= counts[i];
179 }
180 return result > 0 ? result : 0;
181 }
182
183 };
184
185 /**
186 * Set symmetric difference. This {@code SetOperator} ignores duplicates and returns all the
187 * elements that are present in at least one argument of the operator but not in all of them.
188 * For two arguments, this corresponds to evaluating {@code (A0 \ A1) U (A1 \ A0)}. At most a
189 * copy of an element is returned. The corresponding textual representation is "s".
190 */
191 public static final SetOperator SYMMETRIC_DIFFERENCE = new SetOperator("s") {
192
193 private static final long serialVersionUID = 1L;
194
195 @Override
196 public int apply(final int[] counts) {
197 int result = 0;
198 for (int i = 0; i < counts.length; ++i) {
199 if (counts[i] > 0) {
200 ++result;
201 }
202 }
203 return result < counts.length ? 1 : 0;
204 }
205
206 };
207
208 /**
209 * Multiset symmetric difference. This {@code SetOperator} returns the elements that are
210 * present in at least one argument of the operator, with a multiplicity that is the
211 * difference between the multiplicity in the argument it occurs the most and the multiplicity
212 * in the argument it occurs the least. For two arguments, this corresponds to evaluating (the
213 * multiset version of) {@code (A0 \ A1) U (A1 \ A0)}. The corresponding textual
214 * representation is "S". See <a href="http://en.wikipedia.org/wiki/Multiset">this page</a>
215 * for more information.
216 */
217 public static final SetOperator SYMMETRIC_DIFFERENCE_MULTISET = new SetOperator("S") {
218
219 private static final long serialVersionUID = 1L;
220
221 @Override
222 public int apply(final int[] counts) {
223 int min = counts[0];
224 int max = min;
225 for (int i = 1; i < counts.length; ++i) {
226 final int n = counts[i];
227 if (n < min) {
228 min = n;
229 }
230 if (n > max) {
231 max = n;
232 }
233 }
234 return max - min;
235 }
236
237 };
238
239 /**
240 * At least N occurrences. Creates a parametric {@code SetOperator} that checks that the total
241 * number of occurrences of a certain element, summed over all the operation arguments and
242 * counting duplicates, is AT LEAST N. In that case a single copy of the element is emitted.
243 * The corresponding textual representation is "N+".
244 *
245 * @param n
246 * the threshold, greater or equal to 1
247 * @return the created {@code SetOperator} object
248 */
249 public static SetOperator atLeast(final int n) {
250 if (n < 1) {
251 throw new IllegalArgumentException("Invalid threshold " + n);
252 }
253 return new SetOperator(n + "+") {
254
255 private static final long serialVersionUID = 1L;
256
257 @Override
258 public int apply(final int[] counts) {
259 int result = 0;
260 for (int i = 0; i < counts.length; ++i) {
261 if (counts[i] > 0) {
262 ++result;
263 }
264 }
265 return result >= n ? 1 : 0;
266 }
267
268 };
269 }
270
271 /**
272 * At most N occurrences. Creates a parametric {@code SetOperator} that checks that the total
273 * number of occurrences of a certain element, summed over all the operation arguments and
274 * counting duplicates, is AT MOST N. In that case a single copy of the element is emitted.
275 * The corresponding textual representation is "N-".
276 *
277 * @param n
278 * the threshold, greater or equal to 1
279 * @return the created {@code SetOperator} object
280 */
281 public static SetOperator atMost(final int n) {
282 if (n < 1) {
283 throw new IllegalArgumentException("Invalid threshold " + n);
284 }
285 return new SetOperator(n + "-") {
286
287 private static final long serialVersionUID = 1L;
288
289 @Override
290 public int apply(final int[] counts) {
291 int result = 0;
292 for (int i = 0; i < counts.length; ++i) {
293 result += counts[i];
294 }
295 return result <= n ? 1 : 0;
296 }
297
298 };
299 }
300
301 private final String string;
302
303 private SetOperator(final String string) {
304 this.string = string;
305 }
306
307 private Object writeReplace() {
308 return new SerializedForm(this.string);
309 }
310
311 /**
312 * Applies the operator. The method is called for a given element, supplying it the
313 * multiplicities of the element in each argument (a set or a multiset) of the operation. The
314 * method returns the multiplicity of the element in the operation result.
315 *
316 * @param multiplicities
317 * the multiplicities of the element in the operation arguments (non-null,
318 * non-empty array)
319 * @return the resulting multiplicity
320 */
321 public abstract int apply(int[] multiplicities);
322
323 /**
324 * {@inheritDoc} Comparison is based on the textual representation of set operators.
325 */
326 @Override
327 public final int compareTo(final SetOperator other) {
328 return this.string.compareTo(other.string);
329 }
330
331 /**
332 * {@inheritDoc} Equality is implemented based on the textual representation of set operators.
333 */
334 @Override
335 public final boolean equals(final Object object) {
336 if (object == this) {
337 return true;
338 }
339 if (!(object instanceof SetOperator)) {
340 return false;
341 }
342 final SetOperator other = (SetOperator) object;
343 return this.string.equals(other.string);
344 }
345
346 /**
347 * {@inheritDoc} The hash code is computed based on the textual representation of the set
348 * operator.
349 */
350 @Override
351 public final int hashCode() {
352 return this.string.hashCode();
353 }
354
355 /**
356 * {@inheritDoc} Returns the textual representation of the set operator that uniquely
357 * identifies it.
358 */
359 @Override
360 public final String toString() {
361 return this.string;
362 }
363
364 /**
365 * Returns the {@code SetOperator} for the textual representation supplied.
366 *
367 * @param string
368 * the textual representation, not null
369 * @return the corresponding {@code SetOperator}, upon success
370 */
371 public static SetOperator valueOf(final String string) {
372 Objects.requireNonNull(string);
373 for (final SetOperator operation : new SetOperator[] { UNION, UNION_MULTISET,
374 SUM_MULTISET, INTERSECTION, INTERSECTION_MULTISET, DIFFERENCE,
375 DIFFERENCE_MULTISET, SYMMETRIC_DIFFERENCE, SYMMETRIC_DIFFERENCE_MULTISET }) {
376 if (operation.string.equals(string)) {
377 return operation;
378 }
379 }
380 if (string.endsWith("+")) {
381 final int n = Integer.parseInt(string.substring(0, string.length() - 1));
382 return atLeast(n);
383 }
384 if (string.endsWith("-")) {
385 final int n = Integer.parseInt(string.substring(0, string.length() - 1));
386 return atMost(n);
387 }
388 throw new IllegalArgumentException("Unknown set operator '" + string + "'");
389 }
390
391 static final class SerializedForm implements Serializable {
392
393 private static final long serialVersionUID = 1L;
394
395 private final String string;
396
397 public SerializedForm(final String string) {
398 this.string = string;
399 }
400
401 Object readResolve() {
402 return SetOperator.valueOf(this.string);
403 }
404
405 }
406
407 }