Compare commits

...

7 Commits

Author SHA1 Message Date
FyloZ d44616bd72
Reworked diff algorithm to work with streams 2023-06-28 23:12:19 -04:00
william 93d460ae5d Working LCS/SMS :) 2023-06-18 21:52:49 -04:00
FyloZ 7cc7d3bb76
non-working optimized lcs implementation 2023-06-08 22:58:08 -04:00
william d793297ad5 LCS 2023-06-04 20:26:40 -04:00
FyloZ c124d6ccdb
LCS diff (WIP) 2023-05-31 22:31:49 -04:00
william 71210dfdac Basic diff 2023-05-28 21:43:20 -04:00
william f8d1608036 Added matrices 2023-05-28 11:02:28 -04:00
6 changed files with 745 additions and 2 deletions

327
src/diff.rs Normal file
View File

@ -0,0 +1,327 @@
// Based on https://github.com/mathertel/Diff
// "An O(ND) Difference Algorithm and its Variations" by Eugene Myers Algorithmica Vol. 1 No. 2, 1986, p 251.
use std::collections::HashMap;
use std::io::{BufRead};
use crate::diff::IndexDirection::{None, LeftDown, RightUp};
#[derive(Debug)]
pub struct DiffItem {
start_a: usize,
start_b: usize,
deleted_a: usize,
inserted_b: usize,
}
struct DiffData {
length: usize,
codes: Vec<usize>,
modified: Vec<bool>,
}
struct SmsData {
x: usize,
y: usize,
}
struct SmsBounds {
lower_a: usize,
lower_b: usize,
upper_a: usize,
upper_b: usize,
max_d: usize,
down_k: i32,
up_k: i32,
down_offset: usize,
up_offset: usize,
is_delta_odd: bool,
}
impl SmsBounds {
fn from(lower_a: usize, lower_b: usize, upper_a: usize, upper_b: usize) -> Self {
let max = upper_a + upper_b + 1;
let max_d = (upper_a - lower_a + upper_b - lower_b) / 2 + 1;
let down_k = lower_a as i32 - lower_b as i32;
let up_k = upper_a as i32 - upper_b as i32;
let down_offset = (max as i32 - down_k) as usize;
let up_offset = (max as i32 - up_k) as usize;
let is_delta_odd = ((upper_a + lower_a + upper_b + lower_b) & 1) != 0;
SmsBounds {
lower_a,
lower_b,
upper_a,
upper_b,
max_d,
down_k,
up_k,
down_offset,
up_offset,
is_delta_odd,
}
}
fn get_down_index(&self, k: i32, dir: IndexDirection) -> usize {
self.get_index(self.down_offset, k, dir)
}
fn get_up_index(&self, k: i32, dir: IndexDirection) -> usize {
self.get_index(self.up_offset, k, dir)
}
fn get_index(&self, offset: usize, k: i32, dir: IndexDirection) -> usize {
(offset as i32 + k + dir as i32) as usize
}
}
#[repr(i32)]
enum IndexDirection {
None = 0,
LeftDown = 1, // Down: Down, Up: Left
RightUp = -1, // Down: Up, Up: Right
}
// https://stackoverflow.com/questions/54035728/how-to-add-a-negative-i32-number-to-an-usize-variable
fn add_i32(index: usize, offset: i32) -> usize {
if offset.is_negative() {
index - offset.wrapping_abs() as u32 as usize
} else {
index + offset as usize
}
}
pub fn diff<T>(a: &mut T, b: &mut T) -> Vec<DiffItem>
where T: BufRead {
let mut existing_hashes: HashMap<String, usize> = HashMap::new();
let mut data_a = diff_data(a, &mut existing_hashes);
let mut data_b = diff_data(b, &mut existing_hashes);
let max = data_a.length + data_b.length;
let mut down_vector = vec![0usize; 2 * max + 2];
let mut up_vector = vec![0usize; 2 * max + 2];
let upper_a = data_a.length;
let upper_b = data_b.length;
lcs(&mut data_a, 0, upper_a, &mut data_b, 0, upper_b, &mut down_vector, &mut up_vector);
optimize(&mut data_a);
optimize(&mut data_b);
create_diffs(&data_a, &data_b)
}
fn diff_data<T>(reader: &mut T, existing_hashes: &mut HashMap<String, usize>) -> DiffData
where T: BufRead {
let codes = diff_codes(reader, existing_hashes);
let length = codes.len();
DiffData {
length,
codes,
modified: vec![false; length + 2],
}
}
fn diff_codes<T>(reader: &mut T, existing_hashes: &mut HashMap<String, usize>) -> Vec<usize>
where T: BufRead {
let mut codes = Vec::new();
let mut next_code = existing_hashes.len() + 1;
loop {
let mut line = String::new();
let read_res = reader.read_line(&mut line).expect("Failed to read BufRead");
if read_res == 0 {
break;
}
if !existing_hashes.contains_key(&line) {
existing_hashes.insert(line, next_code);
codes.push(next_code);
next_code += 1;
} else {
codes.push(existing_hashes[&line]);
}
}
return codes;
}
// Longest Common-Subsequence
fn lcs(data_a: &mut DiffData, mut lower_a: usize, mut upper_a: usize, data_b: &mut DiffData, mut lower_b: usize, mut upper_b: usize, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) {
while lower_a < upper_a && lower_b < upper_b && data_a.codes[lower_a] == data_b.codes[lower_b] {
lower_a += 1;
lower_b += 1;
}
while lower_a < upper_a && lower_b < upper_b && data_a.codes[upper_a - 1] == data_b.codes[upper_b - 1] {
upper_a -= 1;
upper_b -= 1;
}
if lower_a == upper_a {
// Inserted lines
while lower_b < upper_b {
data_b.modified[lower_b] = true;
lower_b += 1;
}
} else if lower_b == upper_b {
// Deleted lines
while lower_a < upper_a {
data_a.modified[lower_a] = true;
lower_a += 1;
}
} else {
// Find the middle snake and length of an optimal path for A and B
let sms_bounds = SmsBounds::from(lower_a, lower_b, upper_a, upper_b);
let sms = sms(&data_a, &data_b, &sms_bounds, down_vector, up_vector);
// The path is from lower_x to (x, y) and (x, y) to upper_x
lcs(data_a, lower_a, sms.x, data_b, lower_b, sms.y, down_vector, up_vector);
lcs(data_a, sms.x, upper_a, data_b, sms.y, upper_b, down_vector, up_vector);
}
}
// Shortest Middle Snake
fn sms(data_a: &DiffData, data_b: &DiffData, bounds: &SmsBounds, down_vector: &mut Vec<usize>, up_vector: &mut Vec<usize>) -> SmsData {
down_vector[bounds.get_down_index(bounds.down_k, LeftDown)] = bounds.lower_a;
up_vector[bounds.get_up_index(bounds.up_k, RightUp)] = bounds.upper_a;
for d in 0..=bounds.max_d as i32 {
// Extend the forward path
for k in ((bounds.down_k - d)..=(bounds.down_k + d)).step_by(2) {
let mut x;
let mut y;
if k == bounds.down_k - d {
// Down
x = down_vector[bounds.get_down_index(k, LeftDown)];
} else {
// Right
x = down_vector[bounds.get_down_index(k, RightUp)] + 1;
if k < bounds.down_k + d && down_vector[bounds.get_down_index(k, LeftDown)] >= x {
// Down
x = down_vector[bounds.get_down_index(k, LeftDown)];
}
}
y = add_i32(x, -k);
// Find the end of the furthest reaching forward D-path in diagonal k.
while x < bounds.upper_a && y < bounds.upper_b && data_a.codes[x] == data_b.codes[y] {
x += 1;
y += 1;
}
down_vector[bounds.get_down_index(k, None)] = x;
// Overlap ?
if bounds.is_delta_odd && bounds.up_k - d < k && k < bounds.up_k + d &&
up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] {
let x = down_vector[bounds.get_down_index(k, None)];
let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k);
return SmsData { x, y };
}
}
// Extend the reverse path
for k in ((bounds.up_k - d)..=(bounds.up_k + d)).step_by(2) {
let mut x;
let mut y;
if k == bounds.up_k + d {
// Up
x = up_vector[bounds.get_up_index(k, RightUp)];
} else {
// Left
x = up_vector[bounds.get_up_index(k, LeftDown)] - 1;
if k > bounds.up_k - d && up_vector[bounds.get_up_index(k, RightUp)] < x {
// Up
x = up_vector[bounds.get_up_index(k, RightUp)];
}
}
y = add_i32(x, -k);
while x > bounds.lower_a && y > bounds.lower_b && data_a.codes[x - 1] == data_b.codes[y - 1] {
x -= 1;
y -= 1;
}
up_vector[bounds.get_up_index(k, None)] = x;
// Overlap ?
if !bounds.is_delta_odd && bounds.down_k - d <= k && k <= bounds.down_k + d &&
up_vector[bounds.get_up_index(k, None)] <= down_vector[bounds.get_down_index(k, None)] {
let x = down_vector[bounds.get_down_index(k, None)];
let y = add_i32(down_vector[bounds.get_down_index(k, None)], -k);
return SmsData { x, y };
}
}
}
panic!("This should not be possible :(");
}
fn optimize(data: &mut DiffData) {
let mut start_pos = 0usize;
let mut end_pos;
while start_pos < data.length {
while start_pos < data.length && !data.modified[start_pos] {
start_pos += 1;
}
end_pos = start_pos;
while end_pos < data.length && data.modified[end_pos] {
end_pos += 1;
}
if end_pos < data.length && data.codes[start_pos] == data.codes[end_pos] {
data.modified[start_pos] = false;
data.modified[end_pos] = true;
} else {
start_pos = end_pos;
}
}
}
fn create_diffs(data_a: &DiffData, data_b: &DiffData) -> Vec<DiffItem> {
let mut result = Vec::new();
let mut start_a;
let mut start_b;
let mut line_a = 0usize;
let mut line_b = 0usize;
while line_a < data_a.length || line_b < data_b.length {
if line_a < data_a.length && !data_a.modified[line_a] &&
line_b < data_b.length && !data_b.modified[line_b] {
// Equal line
line_a += 1;
line_b += 1;
} else {
start_a = line_a;
start_b = line_b;
while line_a < data_a.length && (line_b >= data_b.length || data_a.modified[line_a]) {
line_a += 1;
}
while line_b < data_b.length && (line_a >= data_a.length || data_b.modified[line_b]) {
line_b += 1;
}
if start_a < line_a || start_b < line_b {
let item = DiffItem {
start_a,
start_b,
deleted_a: line_a - start_a,
inserted_b: line_b - start_b,
};
result.push(item);
}
}
}
result
}

107
src/lcs.rs Normal file
View File

@ -0,0 +1,107 @@
use std::cmp::max;
use std::str::Chars;
use crate::matrix::Matrix;
pub fn diff(a: &str, b: &str) {
let m = build_matrix(a, b);
print_diff(m, a, b, a.len() - 1, b.len() - 1);
}
fn build_matrix(a: &str, b: &str) -> Matrix<i32> {
let mut m = Matrix::new(a.len(), b.len(), 0i32);
let mut i = 0;
let mut j = 0;
for x in a.chars() {
for y in b.chars() {
let v = if x == y {
if i == 0 || j == 0 {
0
} else {
m.get(i - 1, j - 1).unwrap() + 1
}
} else {
max(
if j == 0 { 0 } else { *m.get(i, j - 1).unwrap() },
if i == 0 { 0 } else { *m.get(i - 1, j).unwrap() })
};
m.set(v, i, j).unwrap();
j += 1;
}
i += 1;
j = 0;
}
m
}
fn print_diff(m: Matrix<i32>, a: &str, b: &str, i: usize, j: usize) {
let char_i = a.chars().nth(i).unwrap();
let char_j = b.chars().nth(j).unwrap();
println!("A: {char_i}, B: {char_j}");
if char_i == char_j {
println!(" {char_i}");
return;
}
if i > 0 {
let up = m.get(i - 1, j).unwrap();
if j > 0 && m.get(i, j - 1).unwrap() >= up {
println!("+ {char_i}");
return;
}
}
println!("- {char_i}");
}
// fn print_diff(m: Matrix<i32>, a: &str, b: &str, i: usize, j: usize) {
// // if i < 0 && j < 0 {
// // println!();
// // return;
// // }
//
// let char_i = a.chars().nth(i).unwrap();
// let char_j = b.chars().nth(j).unwrap();
//
// // if i < 0 {
// // print_diff(m, a, b, i, j - 1);
// // println!("+ {char_i}");
// // return;
// // }
// //
// // if j < 0 {
// // print_diff(m, a, b, i - 1, j);
// // println!("- {char_j}");
// // return;
// // }
//
// if char_i == char_j {
// if i > 0 && j > 0 {
// print_diff(m, a, b, i - 1, j - 1);
// }
//
// println!(" {char_i}");
// return;
// }
//
// if i > 0 && j > 0 && m.get(i, j - 1).unwrap() >= m.get(i - 1, j).unwrap() {
// print_diff(m, a, b, i, j - 1);
// println!("+ {char_j}");
// return;
// }
//
// if i > 0 {
// print_diff(m, a, b, i - 1, j);
// }
//
// println!("- {char_i}");
// }

View File

@ -1,3 +1,21 @@
fn main() { use std::fs::File;
println!("Hello, world!"); use std::io;
use std::io::{BufRead, BufReader};
use crate::diff::diff;
mod matrix;
mod lcs;
mod diff;
fn main() -> io::Result<()> {
let file = File::open("test-data/config.jsonc")?;
let mut reader_a = BufReader::new(file);
let file = File::open("test-data/config-old.jsonc")?;
let mut reader_b = BufReader::new(file);
let result = diff(&mut reader_a, &mut reader_b);
dbg!(result);
Ok(())
} }

161
src/matrix.rs Normal file
View File

@ -0,0 +1,161 @@
use std::fmt::{Debug, Display, Formatter};
use std::ops;
#[derive(Debug)]
pub struct Matrix<T> {
pub row_count: usize,
pub col_count: usize,
default_val: T,
rows: Vec<Vec<T>>,
}
#[derive(Debug)]
pub enum MatrixError {
RowIndexOutOfBound(usize),
ColIndexOutOfBound(usize),
IncompatibleSize,
}
impl<T> Matrix<T> where
T: Copy {
pub fn new(row_count: usize, col_count: usize, default_val: T) -> Self {
let rows = vec![vec![default_val; col_count]; row_count];
Matrix { row_count, col_count, default_val, rows }
}
pub fn new_with_size(matrix: &Self) -> Self {
Matrix::new(matrix.row_count, matrix.col_count, matrix.default_val)
}
pub fn get(&self, row: usize, col: usize) -> Option<&T> {
if row >= self.row_count || col >= self.col_count {
return None;
}
Some(&self.rows[row][col])
}
pub fn set(&mut self, val: T, row: usize, col: usize) -> Result<(), MatrixError> {
if row >= self.row_count {
return Err(MatrixError::RowIndexOutOfBound(row));
}
if col >= self.col_count {
return Err(MatrixError::ColIndexOutOfBound(col));
}
self.rows[row][col] = val;
Ok(())
}
fn size_equal(&self, other: &Self) -> bool {
self.row_count == other.row_count && self.col_count == other.col_count
}
}
impl<T> Matrix<T> where
T: Display, T: Copy {
pub fn print(&self) {
for row in 0..self.row_count {
for col in 0..self.col_count {
let val = self.rows[row][col];
print!(" {val} ");
}
print!("\n");
}
}
}
impl<T> ops::Add<Matrix<T>> for Matrix<T> where
T: ops::Add<Output=T>, T: Copy {
type Output = Result<Matrix<T>, MatrixError>;
fn add(self, rhs: Matrix<T>) -> Self::Output {
if !&self.size_equal(&rhs) {
return Err(MatrixError::IncompatibleSize);
}
let mut result_matrix = Matrix::new_with_size(&self);
for row in 0..self.row_count {
for col in 0..self.col_count {
// Since the sizes of the matrices are known, we can ignore errors
let val = self.rows[row][col] + rhs.rows[row][col];
result_matrix.set(val, row, col).unwrap();
}
}
Ok(result_matrix)
}
}
impl<T> ops::Sub<Matrix<T>> for Matrix<T> where
T: ops::Sub<Output=T>, T: Copy {
type Output = Result<Matrix<T>, MatrixError>;
fn sub(self, rhs: Matrix<T>) -> Self::Output {
if !&self.size_equal(&rhs) {
return Err(MatrixError::IncompatibleSize);
}
let mut result_matrix = Matrix::new_with_size(&self);
for row in 0..self.row_count {
for col in 0..self.col_count {
// Since the sizes of the matrices are known, we can ignore errors
let val = self.rows[row][col] - rhs.rows[row][col];
result_matrix.set(val, row, col).unwrap();
}
}
Ok(result_matrix)
}
}
impl<T> ops::Mul<T> for Matrix<T> where
T: ops::Mul<Output=T>, T: Copy {
type Output = Matrix<T>;
fn mul(self, rhs: T) -> Self::Output {
let mut result_matrix = Matrix::new_with_size(&self);
for row in 0..self.row_count {
for col in 0..self.col_count {
let val = self.rows[row][col] * rhs;
result_matrix.set(val, row, col).unwrap();
}
}
result_matrix
}
}
impl<T> ops::Mul<Matrix<T>> for Matrix<T> where
T: ops::Add<Output=T>, T: ops::Mul<Output=T>, T: Copy {
type Output = Result<Matrix<T>, MatrixError>;
fn mul(self, rhs: Matrix<T>) -> Self::Output {
if self.col_count != rhs.row_count {
return Err(MatrixError::IncompatibleSize);
}
let mut result_matrix = Matrix::new(self.row_count, rhs.col_count, self.default_val);
for row in 0..result_matrix.row_count {
for col in 0..result_matrix.col_count {
let mut val = self.default_val;
for i in 0..self.col_count {
let val_i = self.rows[row][i];
let val_j = rhs.rows[i][col];
val = val + val_i * val_j;
}
result_matrix.set(val, row, col).unwrap();
}
}
Ok(result_matrix)
}
}

View File

@ -0,0 +1,65 @@
{
"position": "top",
"modules-left": ["sway/workspaces"],
"modules-right": ["network", "pulseaudio", "clock"],
// Modules configuration
"sway/workspaces": {
"disable-scroll": true,
"all-outputs": true,
"format": "{icon}",
"persistent_workspaces": {
"1": [],
"2": [],
"3": [],
"4": [],
"5": [],
"6": [],
"7": [],
"8": [],
"9": [],
"10": []
},
"format-icons": {
"default": "",
"urgent": "",
"focused": ""
}
},
"custom/spotify": {
"format": "<span foreground='#abc123'> </span><span font='FireCodeMono Nerd Font Mono weight=325 Italic'>{}</span>",
"interval": 1,
"exec-if": "pgrep spotify",
"on-click": "playerctl -p spotify play-pause",
"on-scroll-up": "playerctl -p spotify previous",
"on-scroll-down": "playerctl -p spotify next",
"tooltip": false,
"escape": true,
"MAX-LENGTH": 60,
"exec": "/home/loki/bin/spotify.sh"
},
"clock": {
"format": "{:%a %d %H:%M} <span foreground='#123abc'></span>",
"tooltip-format": "<big>{:%Y %B}</big>\n<tt><small>{calendar}</small></tt>"
},
"network": {
"forletmat-disconnected": "Disconnected <span class='#abc123'></span>",
"format-ethernet": "{ipaddr} <span foreground='#123abc'></span>"
},
"pulseaudio": {
"format": "{volume}% <span foreground='#123abc'>{icon}</span>",
"format-bluetooth": "{volume}% <span foreground='#abc123'>{icon}</span>",
"format-muted": "",
"format-icons": {
"headphone": "",
"hands-free": "",
"headset": "",
"phone": "",
"portable": "",
"car": "",
"default": ["", ""]
},
"scroll-step": 1,
"on-click": "pavucontrol",
"ignored-sinks": ["Easy Effects Sink"]
}
}

65
test-data/config.jsonc Normal file
View File

@ -0,0 +1,65 @@
{
"position": "top",
"modules-left": ["sway/workspaces"],
"modules-right": ["network", "pulseaudio", "clock"],
// Modules configuration
"sway/workspaces": {
"disable-scroll": true,
"all-outputs": true,
"format": "{icon}",
"persistent_workspaces": {
"1": [],
"2": [],
"3": [],
"4": [],
"5": [],
"6": [],
"7": [],
"8": [],
"9": [],
"10": []
},
"format-icons": {
"default": "",
"urgent": "",
"focused": ""
}
},
"custom/spotify": {
"format": "<span foreground='#a4b9ef'> </span><span font='FireCodeMono Nerd Font Mono weight=325 Italic'>{}</span>",
"interval": 1,
"exec-if": "pgrep spotify",
"on-click": "playerctl -p spotify play-pause",
"on-scroll-up": "playerctl -p spotify previous",
"on-scroll-down": "playerctl -p spotify next",
"tooltip": false,
"escape": true,
"MAX-LENGTH": 60,
"exec": "/home/loki/bin/spotify.sh"
},
"clock": {
"format": "{:%a %d %H:%M} <span foreground='#a4b9ef'></span>",
"tooltip-format": "<big>{:%Y %B}</big>\n<tt><small>{calendar}</small></tt>"
},
"network": {
"format-disconnected": "Disconnected <span class='#f9c096'></span>",
"format-ethernet": "{ipaddr} <span foreground='#a4b9ef'></span>"
},
"pulseaudio": {
"format": "{volume}% <span foreground='#a4b9ef'>{icon}</span>",
"format-bluetooth": "{volume}% <span foreground='#a4b9ef'>{icon}</span>",
"format-muted": "",
"format-icons": {
"headphone": "",
"hands-free": "",
"headset": "",
"phone": "",
"portable": "",
"car": "",
"default": ["", ""]
},
"scroll-step": 1,
"on-click": "pavucontrol",
"ignored-sinks": ["Easy Effects Sink"]
}
}