1   /*
2    * RDFpro - An extensible tool for building stream-oriented RDF processing libraries.
3    * 
4    * Written in 2014 by Francesco Corcoglioniti with support by Marco Amadori, Michele Mostarda,
5    * Alessio Palmero Aprosio and Marco Rospocher. Contact info on http://rdfpro.fbk.eu/
6    * 
7    * To the extent possible under law, the authors have dedicated all copyright and related and
8    * neighboring rights to this software to the public domain worldwide. This software is
9    * distributed without any warranty.
10   * 
11   * You should have received a copy of the CC0 Public Domain Dedication along with this software.
12   * If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13   */
14  package eu.fbk.rdfpro.tool;
15  
16  import java.io.BufferedReader;
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.Reader;
21  import java.io.StringReader;
22  import java.nio.charset.Charset;
23  import java.util.zip.ZipException;
24  import java.util.zip.ZipInputStream;
25  
26  import org.openrdf.model.ValueFactory;
27  import org.openrdf.model.impl.ValueFactoryImpl;
28  import org.openrdf.rio.ParserConfig;
29  import org.openrdf.rio.RDFFormat;
30  import org.openrdf.rio.RDFHandler;
31  import org.openrdf.rio.RDFHandlerException;
32  import org.openrdf.rio.RDFParseException;
33  import org.openrdf.rio.RDFParser;
34  import org.openrdf.rio.RDFParserFactory;
35  import org.openrdf.rio.Rio;
36  import org.openrdf.rio.helpers.BasicParserSettings;
37  import org.openrdf.rio.helpers.RDFHandlerWrapper;
38  import org.openrdf.rio.helpers.RDFParserBase;
39  import org.openrdf.rio.helpers.XMLParserSettings;
40  
41  public class GeonamesRDF implements RDFParserFactory {
42  
43      public static final RDFFormat FORMAT = new RDFFormat("Geonames RDF",
44              "application/x-geonames-rdf", null, "geonames", true, true);
45  
46      public static void init() {
47          // calling this method will cause the static initializer to run once
48      }
49  
50      @Override
51      public RDFFormat getRDFFormat() {
52          return FORMAT;
53      }
54  
55      @Override
56      public RDFParser getParser() {
57          return new Parser();
58      }
59  
60      public static class Parser extends RDFParserBase {
61  
62          public Parser() {
63              super();
64          }
65  
66          public Parser(final ValueFactory factory) {
67              super(factory);
68          }
69  
70          @Override
71          public RDFFormat getRDFFormat() {
72              return FORMAT;
73          }
74  
75          @Override
76          public void parse(final InputStream in, final String baseURI) throws IOException,
77                  RDFParseException, RDFHandlerException {
78  
79              final RDFHandler handler = new RDFHandlerWrapper(getRDFHandler()) {
80  
81                  @Override
82                  public void startRDF() throws RDFHandlerException {
83                  }
84  
85                  @Override
86                  public void endRDF() throws RDFHandlerException {
87                  }
88  
89              };
90  
91              getRDFHandler().startRDF();
92  
93              final ZipInputStream stream = new ZipInputStream(in);
94              try {
95                  while (stream.getNextEntry() != null) {
96                      final BufferedReader reader = new BufferedReader(new InputStreamReader(stream,
97                              Charset.forName("UTF-8")));
98                      while (reader.readLine() != null) { // read and drop URI
99                          final String entry = reader.readLine();
100 
101                         final RDFParser parser = Rio.createParser(RDFFormat.RDFXML);
102                         parser.setRDFHandler(handler);
103                         parser.setValueFactory(ValueFactoryImpl.getInstance());
104 
105                         final ParserConfig config = parser.getParserConfig();
106                         config.set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
107                         config.set(BasicParserSettings.FAIL_ON_UNKNOWN_LANGUAGES, false);
108                         config.set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
109                         config.set(BasicParserSettings.VERIFY_LANGUAGE_TAGS, false);
110                         config.set(BasicParserSettings.VERIFY_RELATIVE_URIS, false);
111                         config.set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, true);
112                         config.set(BasicParserSettings.NORMALIZE_LANGUAGE_TAGS, true);
113                         config.set(BasicParserSettings.PRESERVE_BNODE_IDS, true);
114                         config.set(XMLParserSettings.FAIL_ON_DUPLICATE_RDF_ID, false);
115                         config.set(XMLParserSettings.FAIL_ON_INVALID_NCNAME, false);
116                         config.set(XMLParserSettings.FAIL_ON_INVALID_QNAME, false);
117                         config.set(XMLParserSettings.FAIL_ON_MISMATCHED_TAGS, false);
118                         config.set(XMLParserSettings.FAIL_ON_NON_STANDARD_ATTRIBUTES, false);
119                         config.set(XMLParserSettings.FAIL_ON_SAX_NON_FATAL_ERRORS, false);
120 
121                         parser.parse(new StringReader(entry), baseURI);
122                     }
123                 }
124             } catch (final ZipException ex) {
125                 if (!ex.getMessage().contains("invalid entry size")) {
126                     throw ex;
127                 }
128 
129             } finally {
130                 stream.close();
131             }
132 
133             getRDFHandler().endRDF();
134         }
135 
136         @Override
137         public void parse(final Reader reader, final String baseURI) throws IOException,
138                 RDFParseException, RDFHandlerException {
139             throw new UnsupportedOperationException("Binary data expected");
140         }
141 
142     }
143 
144 }