1use anyhow::{Context, Result};
2use oxrdf::NamedNode;
3use percent_encoding::{AsciiSet, CONTROLS, utf8_percent_encode};
4use std::path::Path;
5
6const URN_NSS: &AsciiSet = &CONTROLS
10 .add(b' ')
11 .add(b'"')
12 .add(b'<')
13 .add(b'>')
14 .add(b'\\')
15 .add(b'^')
16 .add(b'`')
17 .add(b'{')
18 .add(b'|')
19 .add(b'}')
20 .add(b'#')
21 .add(b'%')
22 .add(b'/')
23 .add(b'?')
24 .add(b'&')
25 .add(b'=');
26
27const PREFIX: &str = "urn:rdf-compare:source:";
28
29fn basename_stem(path: &Path) -> String {
30 let name = path
32 .file_name()
33 .and_then(|s| s.to_str())
34 .unwrap_or("unnamed");
35 let lower = name.to_ascii_lowercase();
36 let trimmed = if lower.ends_with(".gz") {
37 &name[..name.len() - 3]
38 } else {
39 name
40 };
41 match trimmed.rsplit_once('.') {
42 Some((stem, _ext)) if !stem.is_empty() => stem.to_string(),
43 _ => trimmed.to_string(),
44 }
45}
46
47fn iri_for(stem: &str) -> Result<NamedNode> {
48 let encoded: String = utf8_percent_encode(stem, URN_NSS).collect();
49 let iri = format!("{}{}", PREFIX, encoded);
50 NamedNode::new(&iri).with_context(|| format!("invalid generated graph IRI: {}", iri))
51}
52
53pub fn resolve_graph_iris(
56 path_a: &Path,
57 path_b: &Path,
58 override_a: Option<&str>,
59 override_b: Option<&str>,
60) -> Result<(NamedNode, NamedNode)> {
61 let a = match override_a {
62 Some(s) => NamedNode::new(s).with_context(|| format!("invalid --graph-a IRI: {}", s))?,
63 None => iri_for(&basename_stem(path_a))?,
64 };
65 let b = match override_b {
66 Some(s) => NamedNode::new(s).with_context(|| format!("invalid --graph-b IRI: {}", s))?,
67 None => iri_for(&basename_stem(path_b))?,
68 };
69
70 if override_a.is_none() && override_b.is_none() && a == b {
72 let stem_a = basename_stem(path_a);
73 let stem_b = basename_stem(path_b);
74 let a2 = iri_for(&format!("{}:1", stem_a))?;
75 let b2 = iri_for(&format!("{}:2", stem_b))?;
76 return Ok((a2, b2));
77 }
78 Ok((a, b))
79}
80
81#[cfg(test)]
82mod tests {
83 use super::*;
84 use std::path::PathBuf;
85
86 #[test]
87 fn derives_basic_iri() {
88 let (a, b) = resolve_graph_iris(
89 &PathBuf::from("foo.ttl"),
90 &PathBuf::from("bar.nt"),
91 None,
92 None,
93 )
94 .unwrap();
95 assert_eq!(a.as_str(), "urn:rdf-compare:source:foo");
96 assert_eq!(b.as_str(), "urn:rdf-compare:source:bar");
97 }
98
99 #[test]
100 fn collision_gets_suffix() {
101 let (a, b) = resolve_graph_iris(
102 &PathBuf::from("dir1/data.ttl"),
103 &PathBuf::from("dir2/data.ttl"),
104 None,
105 None,
106 )
107 .unwrap();
108 assert_ne!(a, b);
109 assert!(a.as_str().ends_with(":1"));
110 assert!(b.as_str().ends_with(":2"));
111 }
112
113 #[test]
114 fn override_used_verbatim() {
115 let (a, b) = resolve_graph_iris(
116 &PathBuf::from("a.ttl"),
117 &PathBuf::from("b.ttl"),
118 Some("https://example.com/A"),
119 None,
120 )
121 .unwrap();
122 assert_eq!(a.as_str(), "https://example.com/A");
123 assert_eq!(b.as_str(), "urn:rdf-compare:source:b");
124 }
125
126 #[test]
127 fn handles_gz_double_ext() {
128 let (a, _) = resolve_graph_iris(
129 &PathBuf::from("foo.ttl.gz"),
130 &PathBuf::from("b.ttl"),
131 None,
132 None,
133 )
134 .unwrap();
135 assert_eq!(a.as_str(), "urn:rdf-compare:source:foo");
136 }
137}