From 025a551764f3a14622bdff1aa561626bb68c6e67 Mon Sep 17 00:00:00 2001 From: william Date: Fri, 8 Sep 2023 15:51:26 -0400 Subject: [PATCH] Complete lib --- Cargo.toml | 1 + src/error.rs | 102 +++++++++++++++++++++++++++++++++++++++++++ src/fmt.rs | 75 +++++++++++++++++++++++++++++++ src/lib.rs | 88 ++++++++++++++++++++++++++++--------- src/parse.rs | 121 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/serde.rs | 84 +++++++++++++++++++++++++++++++++++ src/v4.rs | 18 +++++++- 7 files changed, 466 insertions(+), 23 deletions(-) create mode 100644 src/error.rs create mode 100644 src/fmt.rs create mode 100644 src/parse.rs create mode 100644 src/serde.rs diff --git a/Cargo.toml b/Cargo.toml index a76d5c7..e96a658 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,4 @@ edition = "2021" [dependencies] getrandom = "0.2.10" +serde = "1.0.188" diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6113195 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,102 @@ +use std::fmt::{Display, Formatter}; +use std::str; + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Error(pub(crate) ErrorKind); + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub(crate) enum ErrorKind { + ByteLength { len: usize }, + Char { character: char, index: usize }, + GroupCount { count: usize }, + GroupLength { group: usize, len: usize, index: usize }, + InvalidUtf8, +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct InvalidUuid<'a>(pub(crate) &'a [u8]); + +impl<'a> InvalidUuid<'a> { + pub fn into_err(self) -> Error { + // Check if the string is valid UTF-8 + let uuid_str = match str::from_utf8(self.0) { + Ok(s) => s, + Err(_) => return Error(ErrorKind::InvalidUtf8) + }; + + let mut hyphen_count = 0; + let mut group_bounds = [0; 4]; + + let uuid_str = unsafe { str::from_utf8_unchecked(uuid_str.as_bytes()) }; + + for (index, character) in uuid_str.char_indices() { + let byte = character as u8; + if character as u32 - byte as u32 > 0 { + // The char is more than one byte + return Error(ErrorKind::Char { + character, + index: index + 1, + }); + } else if byte == b'-' { + // Search for the groups bounds + if hyphen_count < 4 { + group_bounds[hyphen_count] = index; + } + hyphen_count += 1; + } else if !matches!(byte, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F') { + // Non hex char + return Error(ErrorKind::Char { + character, + index: index + 1, + }); + } + } + + if hyphen_count != 4 { + // There is more or less than 5 groups + Error(ErrorKind::GroupCount { + count: hyphen_count + 1 + }) + } else { + // One of the groups as an invalid length + const BLOCK_STARTS: [usize; 5] = [0, 9, 14, 19, 24]; + for i in 0..4 { + if group_bounds[i] != BLOCK_STARTS[i + 1] - 1 { + return Error(ErrorKind::GroupLength { + group: i, + len: group_bounds[i] - BLOCK_STARTS[i], + index: BLOCK_STARTS[i] + 1, + }); + } + } + + // The last group is too long + Error(ErrorKind::GroupLength { + group: 4, + len: uuid_str.len() - BLOCK_STARTS[4], + index: BLOCK_STARTS[4] + 1, + }) + } + } +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.0 { + ErrorKind::ByteLength { len } => { + write!(f, "invalid length: expected 16 bytes, found {}", len) + } + ErrorKind::Char { character, index, .. } => { + write!(f, "invalid character: expected a sequence of [0-9a-fA-F-], found `{}` at {}", character, index) + } + ErrorKind::GroupCount { count } => { + write!(f, "invalid group count: expected 5, found {}", count) + } + ErrorKind::GroupLength { group, len, .. } => { + let expected = [8, 4, 4, 4, 12][group]; + write!(f, "invalid group length in group {}: epected {}, found {}", group, expected, len) + } + ErrorKind::InvalidUtf8 => write!(f, "non UTF-8 input"), + } + } +} diff --git a/src/fmt.rs b/src/fmt.rs new file mode 100644 index 0000000..4766c1d --- /dev/null +++ b/src/fmt.rs @@ -0,0 +1,75 @@ +use std::{ptr, str}; +use std::fmt::{Formatter, LowerHex, UpperHex}; +use crate::{Uuid}; + +pub const UUID_STR_LEN: usize = 36; +const LOWER_LUT: [u8; 16] = [ + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f' +]; +const UPPER_LUT: [u8; 16] = [ + b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F' +]; +const GROUPS: [(usize, usize); 5] = [ + (0, 8), (9, 13), (14, 18), (19, 23), (24, 36) +]; + +impl LowerHex for Uuid { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(encode(self.as_bytes(), &mut [0; UUID_STR_LEN], false)) + } +} + +impl UpperHex for Uuid { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_str(encode(self.as_bytes(), &mut [0; UUID_STR_LEN], true)) + } +} + +impl Uuid { + #[inline] + pub fn encode_lower<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { + encode(self.as_bytes(), buffer, false) + } + + #[inline] + pub fn encode_upper<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { + encode(self.as_bytes(), buffer, true) + } +} + +pub(crate) fn encode<'a>(data: &[u8], buffer: &'a mut [u8], upper: bool) -> &'a mut str { + let buf = &mut buffer[..UUID_STR_LEN]; + let dst = buf.as_mut_ptr(); + + unsafe { + ptr::write(dst.cast(), format(data, upper)); + str::from_utf8_unchecked_mut(buf) + } +} + +fn format(data: &[u8], upper: bool) -> [u8; UUID_STR_LEN] { + let lut = if upper { UPPER_LUT } else { LOWER_LUT }; + + let mut dst = [0u8; UUID_STR_LEN]; + let mut i = 0; + + for j in 0..GROUPS.len() { + let (start, end) = GROUPS[j]; + + let mut k = start; + while k < end { + let x = data[i]; + i += 1; + + dst[k] = lut[(x >> 4) as usize]; + dst[k + 1] = lut[(x & 0x0f) as usize]; + k += 2; + } + + if j < GROUPS.len() - 1 { + dst[end] = b'-'; + } + } + + dst +} diff --git a/src/lib.rs b/src/lib.rs index 9da6b79..6738e2e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,20 @@ -pub mod v4; +use crate::error::{Error, ErrorKind}; + +// UUID (RFC 4122) v4 implementation +// Heavily based on the 'uuid' crate: https://docs.rs/uuid/latest/uuid/ + +mod error; +mod parse; mod rng; -pub(crate) type Bytes = [u8; 16]; +pub mod fmt; +pub mod serde; +pub mod v4; +pub type Bytes = [u8; 16]; + +#[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[repr(transparent)] pub struct Uuid(Bytes); #[derive(Clone, Copy, Debug, PartialEq)] @@ -15,6 +27,7 @@ pub enum Variant { } #[derive(Clone, Copy, Debug, PartialEq)] +#[non_exhaustive] #[repr(u8)] pub enum Version { Nil = 0, @@ -23,25 +36,26 @@ pub enum Version { MD5 = 3, Random = 4, SHA1 = 5, - Unknown = u8::MAX + Unknown = u8::MAX, } impl Uuid { - pub fn new(version: Version) -> Self { - match version { - Version::Nil => Self::new_nil(), - Version::Random => Self::new_random(), - _ => panic!("Unsupported UUID version: {:?}", version) - } - } - pub fn new_nil() -> Self { Uuid(Uuid::nil_bytes()) } - #[inline] - pub const fn as_bytes(&self) -> &[u8] { - &self.0 + pub fn from_bytes(bytes: Bytes) -> Self { + Uuid(bytes) + } + + pub fn from_slice(b: &[u8]) -> Result { + if b.len() != 16 { + return Err(Error(ErrorKind::ByteLength { len: b.len() })); + } + + let mut bytes: Bytes = Uuid::nil_bytes(); + bytes.copy_from_slice(b); + Ok(Uuid::from_bytes(bytes)) } pub const fn get_variant(&self) -> Variant { @@ -69,6 +83,28 @@ impl Uuid { } } + pub const fn as_u128(&self) -> u128 { + u128::from_be_bytes(*self.as_bytes()) + } + + #[inline] + pub const fn as_bytes(&self) -> &Bytes { + &self.0 + } + + #[inline] + pub const fn into_bytes(self) -> Bytes { + self.0 + } + + pub const fn is_nil(&self) -> bool { + self.as_u128() == u128::MIN + } + + pub const fn encode_buffer() -> [u8; fmt::UUID_STR_LEN] { + [0; fmt::UUID_STR_LEN] + } + #[inline] const fn nil_bytes() -> Bytes { [0u8; 16] @@ -79,12 +115,15 @@ impl Uuid { let variant = variant as u8; let byte = bytes[8] & 0xf; - bytes[8] = variant & byte; + bytes[8] = variant | byte; } #[inline] fn set_version(bytes: &mut Bytes, version: Version) { - bytes[6] |= (version as u8) << 4; + let version = (version as u8) << 4; + let byte = bytes[6] & 0xf; + + bytes[6] = version | byte; } } @@ -94,8 +133,15 @@ impl Default for Uuid { } } -// impl AsRef for Uuid { -// fn as_ref(&self) -> &Bytes { -// &self.0 -// } -// } +impl AsRef<[u8]> for Uuid { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl ToString for Uuid { + fn to_string(&self) -> String { + format!("{:02x}", self) + } +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..b307996 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,121 @@ +use std::str::FromStr; +use crate::error::{Error, InvalidUuid}; +use crate::Uuid; + +impl FromStr for Uuid { + type Err = Error; + + fn from_str(value: &str) -> Result { + Uuid::parse_str(value) + } +} + +impl TryFrom<&'_ str> for Uuid { + type Error = Error; + + fn try_from(value: &'_ str) -> Result { + Uuid::parse_str(value) + } +} + +impl Uuid { + pub fn parse_str(value: &str) -> Result { + try_parse(value.as_bytes()) + .map(Uuid::from_bytes) + .map_err(InvalidUuid::into_err) + } +} + +const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> { + let result = match (input.len(), input) { + (36, s) => parse(s), + _ => Err(()) + }; + + match result { + Ok(b) => Ok(b), + Err(()) => Err(InvalidUuid(input)) + } +} + +const fn parse(s: &[u8]) -> Result<[u8; 16], ()> { + if s.len() != 36 { + return Err(()); + } + + // Checks that the string looks like a valid UUID + match [s[8], s[13], s[18], s[23]] { + [b'-', b'-', b'-', b'-'] => {} + _ => return Err(()) + } + + let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32]; + let mut buf: [u8; 16] = [0; 16]; + let mut j = 0; + + while j < 8 { + let i = positions[j]; + + let h1 = HEX_TABLE[s[i as usize] as usize]; + let h2 = HEX_TABLE[s[(i + 1) as usize] as usize]; + let h3 = HEX_TABLE[s[(i + 2) as usize] as usize]; + let h4 = HEX_TABLE[s[(i + 3) as usize] as usize]; + + if h1 | h2 | h3 | h4 == 0xff { + return Err(()); + } + + buf[j * 2] = SHL4_TABLE[h1 as usize] | h2; + buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4; + j += 1; + } + + Ok(buf) +} + +const HEX_TABLE: &[u8; 256] = &{ + let mut buf = [0u8; 256]; + let mut i = 0u8; + + loop { + buf[i as usize] = match i { + b'0'..=b'9' => i - b'0', + b'a'..=b'f' => i - b'a' + 10, + b'A'..=b'F' => i - b'A' + 10, + _ => 0xff + }; + + if i == 255 { + break buf; + } + + i += 1 + } +}; + +// Shift left 4 bits lookup table +const SHL4_TABLE: &[u8; 256] = &{ + let mut buf = [0u8; 256]; + let mut i = 0u8; + + loop { + buf[i as usize] = i.wrapping_shl(4); + + if i == 255 { + break buf; + } + + i += 1; + } +}; + +#[cfg(test)] +mod tests { + use crate::Uuid; + + #[test] + fn test_parse_uuid_v4() { + let uuid = Uuid::parse_str("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap(); + print!("var: {:?}, ver: {:?}, uuid: {}", uuid.get_variant(), uuid.get_version(), uuid.to_string()); + } +} diff --git a/src/serde.rs b/src/serde.rs new file mode 100644 index 0000000..b5591f5 --- /dev/null +++ b/src/serde.rs @@ -0,0 +1,84 @@ +use std::fmt::{Formatter}; +use serde::{de::{self, Error as _}, Deserialize, Deserializer, Serialize, Serializer}; +use serde::de::SeqAccess; +use crate::{error::*, Uuid}; + +impl Serialize for Uuid { + fn serialize(&self, serializer: S) -> Result where S: Serializer { + if serializer.is_human_readable() { + serializer.serialize_str(self.encode_lower(&mut Uuid::encode_buffer())) + } else { + serializer.serialize_bytes(self.as_bytes()) + } + } +} + +impl<'de> Deserialize<'de> for Uuid { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn de_error(e: Error) -> E { + E::custom(format_args!("UUID parsing failed: {}", e)) + } + + if deserializer.is_human_readable() { + struct UuidVisitor; + + impl<'vi> de::Visitor<'vi> for UuidVisitor { + type Value = Uuid; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + write!(formatter, "a UUID string") + } + + fn visit_str(self, value: &str) -> Result where E: de::Error { + value.parse::().map_err(de_error) + } + + fn visit_bytes(self, value: &[u8]) -> Result where E: de::Error { + Uuid::from_slice(value).map_err(de_error) + } + + fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'vi> { + #[rustfmt::skip] + let bytes = [ + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) }, + match seq.next_element()? { Some(e) => e, None => return Err(A::Error::invalid_length(16, &self)) } + ]; + + Ok(Uuid::from_bytes(bytes)) + } + } + + deserializer.deserialize_str(UuidVisitor) + } else { + struct UuidBytesVisitor; + + impl<'vi> de::Visitor<'vi> for UuidBytesVisitor { + type Value = Uuid; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + write!(formatter, "bytes") + } + + fn visit_bytes(self, value: &[u8]) -> Result where E: de::Error { + Uuid::from_slice(value).map_err(de_error) + } + } + + deserializer.deserialize_bytes(UuidBytesVisitor) + } + } +} diff --git a/src/v4.rs b/src/v4.rs index dad02ff..a49e0c3 100644 --- a/src/v4.rs +++ b/src/v4.rs @@ -19,10 +19,24 @@ impl Uuid { #[cfg(test)] mod tests { - use crate::{Uuid, Variant}; + use crate::{Uuid, Variant, Version}; #[test] - fn new_v4__variant() { + fn test_new_v4_version() { + let uuid = Uuid::new_v4(); + + assert_eq!(uuid.get_version(), Version::Random); + } + + #[test] + fn test_new_v4_version_num() { + let uuid = Uuid::new_v4(); + + assert_eq!(uuid.get_version_num(), 4); + } + + #[test] + fn test_new_v4_variant() { let uuid = Uuid::new_v4(); assert_eq!(uuid.get_variant(), Variant::Default);