From d44616bd72d4c4c40297617065f3f34c096001f7 Mon Sep 17 00:00:00 2001 From: FyloZ Date: Wed, 28 Jun 2023 23:12:19 -0400 Subject: [PATCH] Reworked diff algorithm to work with streams --- src/diff.rs | 195 +++++++++++++++++++++---------------- src/main.rs | 16 ++- test-data/config-old.jsonc | 65 +++++++++++++ test-data/config.jsonc | 65 +++++++++++++ 4 files changed, 253 insertions(+), 88 deletions(-) create mode 100644 test-data/config-old.jsonc create mode 100644 test-data/config.jsonc diff --git a/src/diff.rs b/src/diff.rs index e01fcaf..c43a793 100644 --- a/src/diff.rs +++ b/src/diff.rs @@ -1,7 +1,8 @@ // Based on https://github.com/mathertel/Diff // "An O(ND) Difference Algorithm and its Variations" by Eugene Myers Algorithmica Vol. 1 No. 2, 1986, p 251. - use std::collections::HashMap; +use std::io::{BufRead}; +use crate::diff::IndexDirection::{None, LeftDown, RightUp}; #[derive(Debug)] pub struct DiffItem { @@ -22,44 +23,84 @@ struct SmsData { y: usize, } -struct DiffVec { - data: Vec, +struct SmsBounds { + lower_a: usize, + lower_b: usize, + upper_a: usize, + upper_b: usize, + max_d: usize, + down_k: i32, + up_k: i32, + down_offset: usize, + up_offset: usize, + is_delta_odd: bool, } -impl DiffVec { - fn get(&self, index: usize) -> usize { - self.data[index] - } +impl SmsBounds { + fn from(lower_a: usize, lower_b: usize, upper_a: usize, upper_b: usize) -> Self { + let max = upper_a + upper_b + 1; + let max_d = (upper_a - lower_a + upper_b - lower_b) / 2 + 1; - fn get_i32(&self, index: i32) -> usize { - if index < 0 { - panic!("Got index < 0"); + let down_k = lower_a as i32 - lower_b as i32; + let up_k = upper_a as i32 - upper_b as i32; + + let down_offset = (max as i32 - down_k) as usize; + let up_offset = (max as i32 - up_k) as usize; + + let is_delta_odd = ((upper_a + lower_a + upper_b + lower_b) & 1) != 0; + + SmsBounds { + lower_a, + lower_b, + upper_a, + upper_b, + max_d, + down_k, + up_k, + down_offset, + up_offset, + is_delta_odd, } - - self.get(index as usize) } - fn set(&mut self, index: usize, val: usize) { - self.data[index] = val; + fn get_down_index(&self, k: i32, dir: IndexDirection) -> usize { + self.get_index(self.down_offset, k, dir) } - fn set_i32(&mut self, index: i32, val: usize) { - if index < 0 { - panic!("Got index < 0"); - } + fn get_up_index(&self, k: i32, dir: IndexDirection) -> usize { + self.get_index(self.up_offset, k, dir) + } - self.set(index as usize, val); + fn get_index(&self, offset: usize, k: i32, dir: IndexDirection) -> usize { + (offset as i32 + k + dir as i32) as usize } } -pub fn diff(a: &str, b: &str) -> Vec { - let mut existing_hashes: HashMap<&str, usize> = HashMap::new(); +#[repr(i32)] +enum IndexDirection { + None = 0, + LeftDown = 1, // Down: Down, Up: Left + RightUp = -1, // Down: Up, Up: Right +} + +// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable +fn add_i32(index: usize, offset: i32) -> usize { + if offset.is_negative() { + index - offset.wrapping_abs() as u32 as usize + } else { + index + offset as usize + } +} + +pub fn diff(a: &mut T, b: &mut T) -> Vec + where T: BufRead { + let mut existing_hashes: HashMap = HashMap::new(); let mut data_a = diff_data(a, &mut existing_hashes); let mut data_b = diff_data(b, &mut existing_hashes); let max = data_a.length + data_b.length; - let mut down_vector = DiffVec { data: vec![0usize; 2 * max + 2] }; - let mut up_vector = DiffVec { data: vec![0usize; 2 * max + 2] }; + let mut down_vector = vec![0usize; 2 * max + 2]; + let mut up_vector = vec![0usize; 2 * max + 2]; let upper_a = data_a.length; let upper_b = data_b.length; @@ -72,8 +113,9 @@ pub fn diff(a: &str, b: &str) -> Vec { create_diffs(&data_a, &data_b) } -fn diff_data<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) -> DiffData { - let codes = diff_codes(text, existing_hashes); +fn diff_data(reader: &mut T, existing_hashes: &mut HashMap) -> DiffData + where T: BufRead { + let codes = diff_codes(reader, existing_hashes); let length = codes.len(); DiffData { @@ -83,21 +125,24 @@ fn diff_data<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) - } } -fn diff_codes<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) -> Vec { - let lines: Vec<&str> = text.split('\n').collect(); - - let mut codes = vec![0usize; lines.len()]; +fn diff_codes(reader: &mut T, existing_hashes: &mut HashMap) -> Vec + where T: BufRead { + let mut codes = Vec::new(); let mut next_code = existing_hashes.len() + 1; - for i in 0..lines.len() { - let line = lines[i]; + loop { + let mut line = String::new(); + let read_res = reader.read_line(&mut line).expect("Failed to read BufRead"); + if read_res == 0 { + break; + } - if !existing_hashes.contains_key(line) { + if !existing_hashes.contains_key(&line) { existing_hashes.insert(line, next_code); - codes[i] = next_code; + codes.push(next_code); next_code += 1; } else { - codes[i] = existing_hashes[line]; + codes.push(existing_hashes[&line]); } } @@ -105,7 +150,7 @@ fn diff_codes<'a>(text: &'a str, existing_hashes: &mut HashMap<&'a str, usize>) } // Longest Common-Subsequence -fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) { +fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut Vec, up_vector: &mut Vec) { while lower_a < upper_a && lower_b < upper_b && data_a.codes[lower_a] == data_b.codes[lower_b] { lower_a += 1; lower_b += 1; @@ -130,7 +175,8 @@ fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &m } } else { // Find the middle snake and length of an optimal path for A and B - let sms = sms(&data_a, lower_a, upper_a, &data_b, lower_b, upper_b, down_vector, up_vector); + let sms_bounds = SmsBounds::from(lower_a, lower_b, upper_a, upper_b); + let sms = sms(&data_a, &data_b, &sms_bounds, down_vector, up_vector); // The path is from lower_x to (x, y) and (x, y) to upper_x lcs(data_a, lower_a, sms.x, data_b, lower_b, sms.y, down_vector, up_vector); @@ -138,96 +184,77 @@ fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &m } } -// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable -fn add_i32(index: usize, offset: i32) -> usize { - if offset.is_negative() { - index - offset.wrapping_abs() as u32 as usize - } else { - index + offset as usize - } -} - // Shortest Middle Snake -fn sms(data_a: &DiffData, lower_a: usize, upper_a: usize, data_b: &DiffData, lower_b: usize, upper_b: usize, down_vector: &mut DiffVec, up_vector: &mut DiffVec) -> SmsData { - let max = upper_a as i32 + upper_b as i32 + 1; +fn sms(data_a: &DiffData, data_b: &DiffData, bounds: &SmsBounds, down_vector: &mut Vec, up_vector: &mut Vec) -> SmsData { + down_vector[bounds.get_down_index(bounds.down_k, LeftDown)] = bounds.lower_a; + up_vector[bounds.get_up_index(bounds.up_k, RightUp)] = bounds.upper_a; - let down_k = lower_a as i32 - lower_b as i32; - let up_k = upper_a as i32 - upper_b as i32; - - let delta = (upper_a - lower_a) - (upper_b - lower_b); - let odd_delta = (delta & 1) != 0; - - let down_offset = max - down_k; - let up_offset = max - up_k; - - let max_d = ((upper_a - lower_a + upper_b - lower_b) / 2) as i32 + 1; - - down_vector.set_i32(down_offset + down_k + 1, lower_a); - up_vector.set_i32(up_offset + up_k - 1, upper_a); - - for d in 0..=max_d { + for d in 0..=bounds.max_d as i32 { // Extend the forward path - for k in ((down_k - d)..=(down_k + d)).step_by(2) { + for k in ((bounds.down_k - d)..=(bounds.down_k + d)).step_by(2) { let mut x; let mut y; - if k == down_k - d { + + if k == bounds.down_k - d { // Down - x = down_vector.get_i32(down_offset + k + 1); + x = down_vector[bounds.get_down_index(k, LeftDown)]; } else { // Right - x = down_vector.get_i32(down_offset + k - 1) + 1; - if k < down_k + d && down_vector.get_i32(down_offset + k + 1) >= x { + x = down_vector[bounds.get_down_index(k, RightUp)] + 1; + if k < bounds.down_k + d && down_vector[bounds.get_down_index(k, LeftDown)] >= x { // Down - x = down_vector.get_i32(down_offset + k + 1); + x = down_vector[bounds.get_down_index(k, LeftDown)]; } } y = add_i32(x, -k); // Find the end of the furthest reaching forward D-path in diagonal k. - while x < upper_a && y < upper_b && data_a.codes[x] == data_b.codes[y] { + while x < bounds.upper_a && y < bounds.upper_b && data_a.codes[x] == data_b.codes[y] { x += 1; y += 1; } - down_vector.set_i32(down_offset + k, x); + down_vector[bounds.get_down_index(k, None)] = x; // Overlap ? - if odd_delta && up_k - d < k && k < up_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) { - let x = down_vector.get_i32(down_offset + k); - let y = add_i32(down_vector.get_i32(down_offset + k), -k); + if bounds.is_delta_odd && bounds.up_k - d < k && k < bounds.up_k + d && + up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] { + let x = down_vector[bounds.get_down_index(k, None)]; + let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k); return SmsData { x, y }; } } // Extend the reverse path - for k in ((up_k - d)..=(up_k + d)).step_by(2) { + for k in ((bounds.up_k - d)..=(bounds.up_k + d)).step_by(2) { let mut x; let mut y; - if k == up_k + d { + if k == bounds.up_k + d { // Up - x = up_vector.get_i32(up_offset + k - 1); + x = up_vector[bounds.get_up_index(k, RightUp)]; } else { // Left - x = up_vector.get_i32(up_offset + k + 1) - 1; - if k > up_k - d && up_vector.get_i32(up_offset + k - 1) < x { + x = up_vector[bounds.get_up_index(k, LeftDown)] - 1; + if k > bounds.up_k - d && up_vector[bounds.get_up_index(k, RightUp)] < x { // Up - x = up_vector.get_i32(up_offset + k - 1); + x = up_vector[bounds.get_up_index(k, RightUp)]; } } y = add_i32(x, -k); - while x > lower_a && y > lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] { + while x > bounds.lower_a && y > bounds.lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] { x -= 1; y -= 1; } - up_vector.set_i32(up_offset + k, x); + up_vector[bounds.get_up_index(k, None)] = x; // Overlap ? - if !odd_delta && down_k - d <= k && k <= down_k + d && up_vector.get_i32(up_offset + k) <= down_vector.get_i32(down_offset + k) { - let x = down_vector.get_i32(down_offset + k); - let y = add_i32(down_vector.get_i32(down_offset + k), -k); + if !bounds.is_delta_odd && bounds.down_k - d <= k && k <= bounds.down_k + d && + up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] { + let x = down_vector[bounds.get_down_index(k, None)]; + let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k); return SmsData { x, y }; } } diff --git a/src/main.rs b/src/main.rs index c410a89..c97eebf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,21 @@ +use std::fs::File; +use std::io; +use std::io::{BufRead, BufReader}; use crate::diff::diff; mod matrix; mod lcs; mod diff; -fn main() { - let a = "abcabba\nlkajsdfasdf\nasdfasdfasdf\nlasjkdf"; - let b = "abcabba\ncbabasdfasdf\nlasjkdf\nope"; +fn main() -> io::Result<()> { + let file = File::open("test-data/config.jsonc")?; + let mut reader_a = BufReader::new(file); - let result = diff(a, b); + let file = File::open("test-data/config-old.jsonc")?; + let mut reader_b = BufReader::new(file); + + let result = diff(&mut reader_a, &mut reader_b); dbg!(result); + + Ok(()) } diff --git a/test-data/config-old.jsonc b/test-data/config-old.jsonc new file mode 100644 index 0000000..5ff8304 --- /dev/null +++ b/test-data/config-old.jsonc @@ -0,0 +1,65 @@ +{ + "position": "top", + "modules-left": ["sway/workspaces"], + "modules-right": ["network", "pulseaudio", "clock"], + // Modules configuration + "sway/workspaces": { + "disable-scroll": true, + "all-outputs": true, + "format": "{icon}", + "persistent_workspaces": { + "1": [], + "2": [], + "3": [], + "4": [], + "5": [], + "6": [], + "7": [], + "8": [], + "9": [], + "10": [] + }, + "format-icons": { + "default": "", + "urgent": "", + "focused": "" + } + }, + "custom/spotify": { + "format": "{}", + "interval": 1, + "exec-if": "pgrep spotify", + "on-click": "playerctl -p spotify play-pause", + "on-scroll-up": "playerctl -p spotify previous", + "on-scroll-down": "playerctl -p spotify next", + "tooltip": false, + "escape": true, + "MAX-LENGTH": 60, + "exec": "/home/loki/bin/spotify.sh" + }, + "clock": { + "format": "{:%a %d %H:%M} ", + "tooltip-format": "{:%Y %B}\n{calendar}" + }, + "network": { + "forletmat-disconnected": "Disconnected ", + "format-ethernet": "{ipaddr} " + }, + "pulseaudio": { + "format": "{volume}% {icon}", + "format-bluetooth": "{volume}% {icon}", + "format-muted": "", + "format-icons": { + "headphone": "", + "hands-free": "", + "headset": "", + "phone": "", + "portable": "", + "car": "", + "default": ["", ""] + }, + "scroll-step": 1, + "on-click": "pavucontrol", + "ignored-sinks": ["Easy Effects Sink"] + } +} \ No newline at end of file diff --git a/test-data/config.jsonc b/test-data/config.jsonc new file mode 100644 index 0000000..ab580c4 --- /dev/null +++ b/test-data/config.jsonc @@ -0,0 +1,65 @@ +{ + "position": "top", + "modules-left": ["sway/workspaces"], + "modules-right": ["network", "pulseaudio", "clock"], + // Modules configuration + "sway/workspaces": { + "disable-scroll": true, + "all-outputs": true, + "format": "{icon}", + "persistent_workspaces": { + "1": [], + "2": [], + "3": [], + "4": [], + "5": [], + "6": [], + "7": [], + "8": [], + "9": [], + "10": [] + }, + "format-icons": { + "default": "", + "urgent": "", + "focused": "" + } + }, + "custom/spotify": { + "format": "{}", + "interval": 1, + "exec-if": "pgrep spotify", + "on-click": "playerctl -p spotify play-pause", + "on-scroll-up": "playerctl -p spotify previous", + "on-scroll-down": "playerctl -p spotify next", + "tooltip": false, + "escape": true, + "MAX-LENGTH": 60, + "exec": "/home/loki/bin/spotify.sh" + }, + "clock": { + "format": "{:%a %d %H:%M} ", + "tooltip-format": "{:%Y %B}\n{calendar}" + }, + "network": { + "format-disconnected": "Disconnected ", + "format-ethernet": "{ipaddr} " + }, + "pulseaudio": { + "format": "{volume}% {icon}", + "format-bluetooth": "{volume}% {icon}", + "format-muted": "", + "format-icons": { + "headphone": "", + "hands-free": "", + "headset": "", + "phone": "", + "portable": "", + "car": "", + "default": ["", ""] + }, + "scroll-step": 1, + "on-click": "pavucontrol", + "ignored-sinks": ["Easy Effects Sink"] + } +} \ No newline at end of file