1
2
3
4
5
6
7
8
9
10
11
12
13
14 package eu.fbk.rdfpro.tool;
15
16 import java.io.BufferedReader;
17 import java.io.IOException;
18 import java.io.InputStream;
19 import java.io.InputStreamReader;
20 import java.io.Reader;
21 import java.io.StringReader;
22 import java.nio.charset.Charset;
23 import java.util.zip.ZipException;
24 import java.util.zip.ZipInputStream;
25
26 import org.openrdf.model.ValueFactory;
27 import org.openrdf.model.impl.ValueFactoryImpl;
28 import org.openrdf.rio.ParserConfig;
29 import org.openrdf.rio.RDFFormat;
30 import org.openrdf.rio.RDFHandler;
31 import org.openrdf.rio.RDFHandlerException;
32 import org.openrdf.rio.RDFParseException;
33 import org.openrdf.rio.RDFParser;
34 import org.openrdf.rio.RDFParserFactory;
35 import org.openrdf.rio.Rio;
36 import org.openrdf.rio.helpers.BasicParserSettings;
37 import org.openrdf.rio.helpers.RDFHandlerWrapper;
38 import org.openrdf.rio.helpers.RDFParserBase;
39 import org.openrdf.rio.helpers.XMLParserSettings;
40
41 public class GeonamesRDF implements RDFParserFactory {
42
43 public static final RDFFormat FORMAT = new RDFFormat("Geonames RDF",
44 "application/x-geonames-rdf", null, "geonames", true, true);
45
46 public static void init() {
47
48 }
49
50 @Override
51 public RDFFormat getRDFFormat() {
52 return FORMAT;
53 }
54
55 @Override
56 public RDFParser getParser() {
57 return new Parser();
58 }
59
60 public static class Parser extends RDFParserBase {
61
62 public Parser() {
63 super();
64 }
65
66 public Parser(final ValueFactory factory) {
67 super(factory);
68 }
69
70 @Override
71 public RDFFormat getRDFFormat() {
72 return FORMAT;
73 }
74
75 @Override
76 public void parse(final InputStream in, final String baseURI) throws IOException,
77 RDFParseException, RDFHandlerException {
78
79 final RDFHandler handler = new RDFHandlerWrapper(getRDFHandler()) {
80
81 @Override
82 public void startRDF() throws RDFHandlerException {
83 }
84
85 @Override
86 public void endRDF() throws RDFHandlerException {
87 }
88
89 };
90
91 getRDFHandler().startRDF();
92
93 final ZipInputStream stream = new ZipInputStream(in);
94 try {
95 while (stream.getNextEntry() != null) {
96 final BufferedReader reader = new BufferedReader(new InputStreamReader(stream,
97 Charset.forName("UTF-8")));
98 while (reader.readLine() != null) {
99 final String entry = reader.readLine();
100
101 final RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
102 parser.setRDFHandler(handler);
103 parser.setValueFactory(ValueFactoryImpl.getInstance());
104
105 final ParserConfig config = parser.getParserConfig();
106 config.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
107 config.set(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES, false);
108 config.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
109 config.set(BasicParserSettings.VERIFY_LANGUAGE_TAGS, false);
110 config.set(BasicParserSettings.VERIFY_RELATIVE_URIS, false);
111 config.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, true);
112 config.set(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS, true);
113 config.set(BasicParserSettings.PRESERVE_BNODE_IDS, true);
114 config.set(XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID, false);
115 config.set(XMLParserSettings.FAIL_ON_INVALID_NCNAME, false);
116 config.set(XMLParserSettings.FAIL_ON_INVALID_QNAME, false);
117 config.set(XMLParserSettings.FAIL_ON_MISMATCHED_TAGS, false);
118 config.set(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES, false);
119 config.set(XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS, false);
120
121 parser.parse(new StringReader(entry), baseURI);
122 }
123 }
124 } catch (final ZipException ex) {
125 if (!ex.getMessage().contains("invalid entry size")) {
126 throw ex;
127 }
128
129 } finally {
130 stream.close();
131 }
132
133 getRDFHandler().endRDF();
134 }
135
136 @Override
137 public void parse(final Reader reader, final String baseURI) throws IOException,
138 RDFParseException, RDFHandlerException {
139 throw new UnsupportedOperationException("Binary data expected");
140 }
141
142 }
143
144 }