Skip to main content

rdf_compare/
cli.rs

1use anyhow::{Result, anyhow, bail};
2use clap::{Args as ClapArgs, Parser, Subcommand, ValueEnum};
3use std::path::{Path, PathBuf};
4
5/// Top-level CLI entry point. Supports the default diff invocation
6/// (`rdf-compare A B …`) and a `serve` subcommand for the web viewer.
7#[derive(Debug, Parser)]
8#[command(version, about, long_about = None)]
9pub struct Cli {
10    #[command(subcommand)]
11    pub command: Option<Command>,
12
13    #[command(flatten)]
14    pub diff: Args,
15}
16
17#[derive(Debug, Subcommand)]
18pub enum Command {
19    /// Start the local web viewer. Files can be preloaded via flags or
20    /// selected interactively in the browser.
21    Serve(ServeArgs),
22}
23
24/// Compare two RDF files and emit the diff as a quad dataset with two named graphs.
25#[derive(Debug, ClapArgs)]
26pub struct Args {
27    /// First (left) RDF file.
28    pub file_a: Option<PathBuf>,
29    /// Second (right) RDF file.
30    pub file_b: Option<PathBuf>,
31
32    /// Override input format for file A (auto-detected from extension by default).
33    #[arg(long = "format-a", value_enum)]
34    pub format_a: Option<InputFormat>,
35
36    /// Override input format for file B (auto-detected from extension by default).
37    #[arg(long = "format-b", value_enum)]
38    pub format_b: Option<InputFormat>,
39
40    /// Output file (defaults to stdout).
41    #[arg(short = 'o', long = "output")]
42    pub output: Option<PathBuf>,
43
44    /// Output serialization.
45    #[arg(long = "output-format", value_enum, default_value_t = OutputFormat::Trig)]
46    pub output_format: OutputFormat,
47
48    /// Override the named-graph IRI for triples only in file A.
49    #[arg(long = "graph-a")]
50    pub graph_a: Option<String>,
51
52    /// Override the named-graph IRI for triples only in file B.
53    #[arg(long = "graph-b")]
54    pub graph_b: Option<String>,
55
56    /// Suppress the summary line on stderr.
57    #[arg(long)]
58    pub quiet: bool,
59
60    /// Exit with code 1 if any differences are found (useful in CI).
61    #[arg(long)]
62    pub ci: bool,
63
64    /// Open the diff in the local web viewer after computing it.
65    #[arg(long)]
66    pub view: bool,
67
68    /// Do not auto-open the system browser when starting the viewer.
69    #[arg(long = "no-open")]
70    pub no_open: bool,
71
72    /// Bind address for the web viewer.
73    #[arg(long, default_value = "127.0.0.1:0")]
74    pub bind: String,
75
76    /// Skip blank-node-bearing triples instead of canonicalising them.
77    /// By default, when blank nodes are present, the W3C RDFC-1.0 algorithm is
78    /// used to assign stable labels before diffing.
79    #[arg(long = "ignore-blank-nodes")]
80    pub ignore_blank_nodes: bool,
81}
82
83/// Arguments accepted by `rdf-compare serve`.
84#[derive(Debug, ClapArgs)]
85pub struct ServeArgs {
86    /// First (left) RDF file to preload.
87    #[arg(long = "file-a", requires = "file_b")]
88    pub file_a: Option<PathBuf>,
89    /// Second (right) RDF file to preload.
90    #[arg(long = "file-b", requires = "file_a")]
91    pub file_b: Option<PathBuf>,
92    /// Override input format for file A.
93    #[arg(long = "format-a", value_enum)]
94    pub format_a: Option<InputFormat>,
95    /// Override input format for file B.
96    #[arg(long = "format-b", value_enum)]
97    pub format_b: Option<InputFormat>,
98    /// Pre-existing diff file (TriG or N-Quads) to load instead of recomputing.
99    #[arg(long, conflicts_with_all = ["file_a", "file_b"])]
100    pub diff: Option<PathBuf>,
101    /// Override the named-graph IRI for the A side.
102    #[arg(long = "graph-a")]
103    pub graph_a: Option<String>,
104    /// Override the named-graph IRI for the B side.
105    #[arg(long = "graph-b")]
106    pub graph_b: Option<String>,
107    /// Bind address for the web viewer.
108    #[arg(long, default_value = "127.0.0.1:0")]
109    pub bind: String,
110    /// Do not auto-open the system browser.
111    #[arg(long = "no-open")]
112    pub no_open: bool,
113    /// Skip blank-node-bearing triples instead of canonicalising them.
114    #[arg(long = "ignore-blank-nodes")]
115    pub ignore_blank_nodes: bool,
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
119pub enum InputFormat {
120    /// N-Triples (.nt)
121    Nt,
122    /// Turtle (.ttl)
123    Ttl,
124    /// RDF/XML (.rdf, .owl, .xml)
125    #[value(alias = "xml")]
126    Rdf,
127    /// TriG (.trig) — graph component is dropped
128    Trig,
129    /// N-Quads (.nq) — graph component is dropped
130    Nq,
131}
132
133#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
134pub enum OutputFormat {
135    /// TriG (Turtle-based, named graphs)
136    Trig,
137    /// N-Quads (line-based)
138    Nq,
139}
140
141/// Detect input format from a path. Strips a trailing `.gz` first.
142pub fn detect_format(path: &Path) -> Result<InputFormat> {
143    let name = path
144        .file_name()
145        .and_then(|s| s.to_str())
146        .ok_or_else(|| anyhow!("path has no filename: {}", path.display()))?
147        .to_ascii_lowercase();
148
149    let stem = name.strip_suffix(".gz").unwrap_or(&name);
150    let ext = stem.rsplit_once('.').map(|(_, e)| e).unwrap_or("");
151
152    match ext {
153        "nt" | "ntriples" => Ok(InputFormat::Nt),
154        "ttl" | "turtle" => Ok(InputFormat::Ttl),
155        "rdf" | "owl" | "xml" => Ok(InputFormat::Rdf),
156        "trig" => Ok(InputFormat::Trig),
157        "nq" | "nquads" => Ok(InputFormat::Nq),
158        "" => bail!(
159            "could not detect RDF format for {} (no extension); use --format-a/--format-b",
160            path.display()
161        ),
162        other => bail!(
163            "unknown RDF extension '.{}' for {}; use --format-a/--format-b",
164            other,
165            path.display()
166        ),
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn detects_basic_extensions() {
176        assert_eq!(detect_format(Path::new("a.ttl")).unwrap(), InputFormat::Ttl);
177        assert_eq!(detect_format(Path::new("a.nt")).unwrap(), InputFormat::Nt);
178        assert_eq!(detect_format(Path::new("a.rdf")).unwrap(), InputFormat::Rdf);
179        assert_eq!(
180            detect_format(Path::new("a.trig")).unwrap(),
181            InputFormat::Trig
182        );
183        assert_eq!(detect_format(Path::new("a.nq")).unwrap(), InputFormat::Nq);
184    }
185
186    #[test]
187    fn detects_gzipped_extensions() {
188        assert_eq!(
189            detect_format(Path::new("a.ttl.gz")).unwrap(),
190            InputFormat::Ttl
191        );
192        assert_eq!(
193            detect_format(Path::new("a.nt.gz")).unwrap(),
194            InputFormat::Nt
195        );
196    }
197
198    #[test]
199    fn rejects_unknown_extension() {
200        assert!(detect_format(Path::new("a.foo")).is_err());
201        assert!(detect_format(Path::new("noext")).is_err());
202    }
203}