Skip to content

Commit 64dad0c

Browse files
authored
Merge pull request #7144 from sylvestre/comm2
comm: implement the ordering check
2 parents 05c4ce0 + b50a012 commit 64dad0c

File tree

3 files changed

+291
-24
lines changed

3 files changed

+291
-24
lines changed

src/uu/comm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ path = "src/comm.rs"
1818

1919
[dependencies]
2020
clap = { workspace = true }
21-
uucore = { workspace = true }
21+
uucore = { workspace = true, features = ["fs"] }
2222

2323
[[bin]]
2424
name = "comm"

src/uu/comm/src/comm.rs

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
55

6-
// spell-checker:ignore (ToDO) delim mkdelim
6+
// spell-checker:ignore (ToDO) delim mkdelim pairable
77

88
use std::cmp::Ordering;
99
use std::fs::{metadata, File};
10-
use std::io::{self, stdin, BufRead, BufReader, Stdin};
10+
use std::io::{self, stdin, BufRead, BufReader, Read, Stdin};
1111
use uucore::error::{FromIo, UResult, USimpleError};
12+
use uucore::fs::paths_refer_to_same_file;
1213
use uucore::line_ending::LineEnding;
1314
use uucore::{format_usage, help_about, help_usage};
1415

@@ -27,6 +28,30 @@ mod options {
2728
pub const FILE_2: &str = "FILE2";
2829
pub const TOTAL: &str = "total";
2930
pub const ZERO_TERMINATED: &str = "zero-terminated";
31+
pub const CHECK_ORDER: &str = "check-order";
32+
pub const NO_CHECK_ORDER: &str = "nocheck-order";
33+
}
34+
35+
#[derive(Debug, Clone, Copy)]
36+
enum FileNumber {
37+
One,
38+
Two,
39+
}
40+
41+
impl FileNumber {
42+
fn as_str(&self) -> &'static str {
43+
match self {
44+
FileNumber::One => "1",
45+
FileNumber::Two => "2",
46+
}
47+
}
48+
}
49+
50+
struct OrderChecker {
51+
last_line: Vec<u8>,
52+
file_num: FileNumber,
53+
check_order: bool,
54+
has_error: bool,
3055
}
3156

3257
enum Input {
@@ -60,7 +85,74 @@ impl LineReader {
6085
}
6186
}
6287

63-
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) {
88+
impl OrderChecker {
89+
fn new(file_num: FileNumber, check_order: bool) -> Self {
90+
Self {
91+
last_line: Vec::new(),
92+
file_num,
93+
check_order,
94+
has_error: false,
95+
}
96+
}
97+
98+
fn verify_order(&mut self, current_line: &[u8]) -> bool {
99+
if self.last_line.is_empty() {
100+
self.last_line = current_line.to_vec();
101+
return true;
102+
}
103+
104+
let is_ordered = current_line >= &self.last_line;
105+
if !is_ordered && !self.has_error {
106+
eprintln!(
107+
"comm: file {} is not in sorted order",
108+
self.file_num.as_str()
109+
);
110+
self.has_error = true;
111+
}
112+
113+
self.last_line = current_line.to_vec();
114+
is_ordered || !self.check_order
115+
}
116+
}
117+
118+
// Check if two files are identical by comparing their contents
119+
pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
120+
// First compare file sizes
121+
let metadata1 = std::fs::metadata(path1)?;
122+
let metadata2 = std::fs::metadata(path2)?;
123+
124+
if metadata1.len() != metadata2.len() {
125+
return Ok(false);
126+
}
127+
128+
let file1 = File::open(path1)?;
129+
let file2 = File::open(path2)?;
130+
131+
let mut reader1 = BufReader::new(file1);
132+
let mut reader2 = BufReader::new(file2);
133+
134+
let mut buffer1 = [0; 8192];
135+
let mut buffer2 = [0; 8192];
136+
137+
loop {
138+
let bytes1 = reader1.read(&mut buffer1)?;
139+
let bytes2 = reader2.read(&mut buffer2)?;
140+
141+
if bytes1 != bytes2 {
142+
return Ok(false);
143+
}
144+
145+
if bytes1 == 0 {
146+
return Ok(true);
147+
}
148+
149+
if buffer1[..bytes1] != buffer2[..bytes2] {
150+
return Ok(false);
151+
}
152+
}
153+
}
154+
155+
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
64156
let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
65157
let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
66158

@@ -76,6 +168,26 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
76168
let mut total_col_2 = 0;
77169
let mut total_col_3 = 0;
78170

171+
let check_order = opts.get_flag(options::CHECK_ORDER);
172+
let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);
173+
174+
// Determine if we should perform order checking
175+
let should_check_order = !no_check_order
176+
&& (check_order
177+
|| if let (Some(file1), Some(file2)) = (
178+
opts.get_one::<String>(options::FILE_1),
179+
opts.get_one::<String>(options::FILE_2),
180+
) {
181+
!(paths_refer_to_same_file(file1, file2, true)
182+
|| are_files_identical(file1, file2).unwrap_or(false))
183+
} else {
184+
true
185+
});
186+
187+
let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
188+
let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
189+
let mut input_error = false;
190+
79191
while na.is_ok() || nb.is_ok() {
80192
let ord = match (na.is_ok(), nb.is_ok()) {
81193
(false, true) => Ordering::Greater,
@@ -91,6 +203,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
91203

92204
match ord {
93205
Ordering::Less => {
206+
if should_check_order && !checker1.verify_order(ra) {
207+
break;
208+
}
94209
if !opts.get_flag(options::COLUMN_1) {
95210
print!("{}", String::from_utf8_lossy(ra));
96211
}
@@ -99,6 +214,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
99214
total_col_1 += 1;
100215
}
101216
Ordering::Greater => {
217+
if should_check_order && !checker2.verify_order(rb) {
218+
break;
219+
}
102220
if !opts.get_flag(options::COLUMN_2) {
103221
print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
104222
}
@@ -107,6 +225,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
107225
total_col_2 += 1;
108226
}
109227
Ordering::Equal => {
228+
if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
229+
{
230+
break;
231+
}
110232
if !opts.get_flag(options::COLUMN_3) {
111233
print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
112234
}
@@ -117,12 +239,27 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
117239
total_col_3 += 1;
118240
}
119241
}
242+
243+
// Track if we've seen any order errors
244+
if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
245+
input_error = true;
246+
}
120247
}
121248

122249
if opts.get_flag(options::TOTAL) {
123250
let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
124251
print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
125252
}
253+
254+
if should_check_order && (checker1.has_error || checker2.has_error) {
255+
// Print the input error message once at the end
256+
if input_error {
257+
eprintln!("comm: input is not in sorted order");
258+
}
259+
Err(USimpleError::new(1, ""))
260+
} else {
261+
Ok(())
262+
}
126263
}
127264

128265
fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
@@ -170,8 +307,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
170307
"" => "\0",
171308
delim => delim,
172309
};
173-
comm(&mut f1, &mut f2, delim, &matches);
174-
Ok(())
310+
311+
comm(&mut f1, &mut f2, delim, &matches)
175312
}
176313

177314
pub fn uu_app() -> Command {
@@ -233,4 +370,17 @@ pub fn uu_app() -> Command {
233370
.help("output a summary")
234371
.action(ArgAction::SetTrue),
235372
)
373+
.arg(
374+
Arg::new(options::CHECK_ORDER)
375+
.long(options::CHECK_ORDER)
376+
.help("check that the input is correctly sorted, even if all input lines are pairable")
377+
.action(ArgAction::SetTrue),
378+
)
379+
.arg(
380+
Arg::new(options::NO_CHECK_ORDER)
381+
.long(options::NO_CHECK_ORDER)
382+
.help("do not check that the input is correctly sorted")
383+
.action(ArgAction::SetTrue)
384+
.conflicts_with(options::CHECK_ORDER),
385+
)
236386
}

0 commit comments

Comments
 (0)