1use crate::cli::InputFormat;
2use anyhow::{Context, Result};
3use flate2::read::MultiGzDecoder;
4use oxrdf::{GraphName, NamedOrBlankNode, Quad, Term, Triple};
5use std::fs::File;
6use std::io::{BufRead, BufReader, Read};
7use std::path::Path;
8
9pub fn is_quad_format(f: InputFormat) -> bool {
11 matches!(f, InputFormat::Trig | InputFormat::Nq)
12}
13
14pub fn open_reader(path: &Path) -> Result<Box<dyn BufRead>> {
16 let file = File::open(path).with_context(|| format!("failed to open {}", path.display()))?;
17 let is_gz = path
18 .file_name()
19 .and_then(|s| s.to_str())
20 .map(|s| s.to_ascii_lowercase().ends_with(".gz"))
21 .unwrap_or(false);
22
23 let raw: Box<dyn Read> = if is_gz {
24 Box::new(MultiGzDecoder::new(file))
25 } else {
26 Box::new(file)
27 };
28 Ok(Box::new(BufReader::new(raw)))
29}
30
31fn has_blank_node(t: &Triple) -> bool {
33 matches!(t.subject, NamedOrBlankNode::BlankNode(_)) || matches!(t.object, Term::BlankNode(_))
34}
35
36#[derive(Debug, Default, Clone)]
38pub struct ParseOutcome {
39 pub total: u64,
40 pub skipped: u64,
41 pub bnode_count: u64,
43 pub prefixes: Vec<(String, String)>,
46}
47
48pub fn parse_triples<R: BufRead, F: FnMut(Triple) -> Result<()>>(
55 reader: R,
56 format: InputFormat,
57 mut on_triple: F,
58) -> Result<ParseOutcome> {
59 let mut total: u64 = 0;
60 let mut skipped: u64 = 0;
61 let mut prefixes: Vec<(String, String)> = Vec::new();
62
63 macro_rules! handle_triple {
64 ($t:expr) => {{
65 let t: Triple = $t;
66 if has_blank_node(&t) {
67 skipped += 1;
68 } else {
69 total += 1;
70 on_triple(t)?;
71 }
72 }};
73 }
74
75 match format {
76 InputFormat::Nt => {
77 let parser = oxttl::NTriplesParser::new().for_reader(reader);
78 for tri in parser {
79 let t = tri.context("N-Triples parse error")?;
80 handle_triple!(t);
81 }
82 }
83 InputFormat::Ttl => {
84 let mut parser = oxttl::TurtleParser::new().for_reader(reader);
85 for tri in parser.by_ref() {
86 let t = tri.context("Turtle parse error")?;
87 handle_triple!(t);
88 }
89 prefixes.extend(
90 parser
91 .prefixes()
92 .map(|(k, v)| (k.to_string(), v.to_string())),
93 );
94 }
95 InputFormat::Rdf => {
96 let parser = oxrdfxml::RdfXmlParser::new().for_reader(reader);
97 for tri in parser {
98 let t = tri.context("RDF/XML parse error")?;
99 handle_triple!(t);
100 }
101 }
102 InputFormat::Trig => {
103 let mut parser = oxttl::TriGParser::new().for_reader(reader);
104 for q in parser.by_ref() {
105 let q = q.context("TriG parse error")?;
106 handle_triple!(Triple::new(q.subject, q.predicate, q.object));
107 }
108 prefixes.extend(
109 parser
110 .prefixes()
111 .map(|(k, v)| (k.to_string(), v.to_string())),
112 );
113 }
114 InputFormat::Nq => {
115 let parser = oxttl::NQuadsParser::new().for_reader(reader);
116 for q in parser {
117 let q = q.context("N-Quads parse error")?;
118 handle_triple!(Triple::new(q.subject, q.predicate, q.object));
119 }
120 }
121 }
122
123 Ok(ParseOutcome {
124 total,
125 skipped,
126 bnode_count: 0,
127 prefixes,
128 })
129}
130
131pub fn parse_quads<R: BufRead, F: FnMut(Quad) -> Result<()>>(
138 reader: R,
139 format: InputFormat,
140 mut on_quad: F,
141) -> Result<ParseOutcome> {
142 let mut total: u64 = 0;
143 let mut bnode_count: u64 = 0;
144 let mut prefixes: Vec<(String, String)> = Vec::new();
145
146 macro_rules! handle {
147 ($q:expr) => {{
148 let q: Quad = $q;
149 total += 1;
150 if matches!(q.subject, NamedOrBlankNode::BlankNode(_))
151 || matches!(q.object, Term::BlankNode(_))
152 {
153 bnode_count += 1;
154 }
155 on_quad(q)?;
156 }};
157 }
158
159 match format {
160 InputFormat::Nt => {
161 let parser = oxttl::NTriplesParser::new().for_reader(reader);
162 for tri in parser {
163 let t = tri.context("N-Triples parse error")?;
164 handle!(Quad {
165 subject: t.subject,
166 predicate: t.predicate,
167 object: t.object,
168 graph_name: GraphName::DefaultGraph,
169 });
170 }
171 }
172 InputFormat::Ttl => {
173 let mut parser = oxttl::TurtleParser::new().for_reader(reader);
174 for tri in parser.by_ref() {
175 let t = tri.context("Turtle parse error")?;
176 handle!(Quad {
177 subject: t.subject,
178 predicate: t.predicate,
179 object: t.object,
180 graph_name: GraphName::DefaultGraph,
181 });
182 }
183 prefixes.extend(
184 parser
185 .prefixes()
186 .map(|(k, v)| (k.to_string(), v.to_string())),
187 );
188 }
189 InputFormat::Rdf => {
190 let parser = oxrdfxml::RdfXmlParser::new().for_reader(reader);
191 for tri in parser {
192 let t = tri.context("RDF/XML parse error")?;
193 handle!(Quad {
194 subject: t.subject,
195 predicate: t.predicate,
196 object: t.object,
197 graph_name: GraphName::DefaultGraph,
198 });
199 }
200 }
201 InputFormat::Trig => {
202 let mut parser = oxttl::TriGParser::new().for_reader(reader);
203 for q in parser.by_ref() {
204 let q = q.context("TriG parse error")?;
205 handle!(q);
206 }
207 prefixes.extend(
208 parser
209 .prefixes()
210 .map(|(k, v)| (k.to_string(), v.to_string())),
211 );
212 }
213 InputFormat::Nq => {
214 let parser = oxttl::NQuadsParser::new().for_reader(reader);
215 for q in parser {
216 let q = q.context("N-Quads parse error")?;
217 handle!(q);
218 }
219 }
220 }
221
222 Ok(ParseOutcome {
223 total,
224 skipped: 0,
225 bnode_count,
226 prefixes,
227 })
228}
229
230pub fn quad_to_triple(q: &Quad) -> Triple {
232 Triple {
233 subject: q.subject.clone(),
234 predicate: q.predicate.clone(),
235 object: q.object.clone(),
236 }
237}