Initial commit

This commit is contained in:
Connor Johnstone
2026-03-17 18:22:20 -04:00
commit 3159ee51ad
11 changed files with 1110 additions and 0 deletions

80
src/sanitize.rs Normal file
View File

@@ -0,0 +1,80 @@
/// Characters that are invalid in filenames on common filesystems.
const INVALID_CHARS: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0'];
/// Sanitize a single path component (filename or directory name) for filesystem safety.
pub fn sanitize_component(s: &str) -> String {
let mut result: String = s
.chars()
.map(|c| if INVALID_CHARS.contains(&c) { '_' } else { c })
.collect();
// Trim leading/trailing dots and spaces (problematic on Windows and some Linux tools)
result = result.trim_matches(|c: char| c == '.' || c == ' ').to_string();
// Collapse consecutive underscores
while result.contains("__") {
result = result.replace("__", "_");
}
// Truncate to 255 bytes (common filesystem limit)
if result.len() > 255 {
result = result[..255].to_string();
// Don't leave a partial UTF-8 sequence
while !result.is_char_boundary(result.len()) {
result.pop();
}
}
// If empty after sanitization, return a placeholder
if result.is_empty() {
return "_".to_string();
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_normal() {
assert_eq!(sanitize_component("Hello World"), "Hello World");
}
#[test]
fn test_sanitize_invalid_chars() {
assert_eq!(sanitize_component("AC/DC"), "AC_DC");
assert_eq!(sanitize_component("What?"), "What_");
assert_eq!(sanitize_component("a:b*c"), "a_b_c");
}
#[test]
fn test_sanitize_dots_and_spaces() {
assert_eq!(sanitize_component("..hidden"), "hidden");
assert_eq!(sanitize_component(" spaced "), "spaced");
assert_eq!(sanitize_component("..."), "_");
}
#[test]
fn test_sanitize_collapse_underscores() {
assert_eq!(sanitize_component("a///b"), "a_b");
}
#[test]
fn test_sanitize_empty() {
assert_eq!(sanitize_component(""), "_");
}
#[test]
fn test_sanitize_null_bytes() {
assert_eq!(sanitize_component("a\0b"), "a_b");
}
#[test]
fn test_sanitize_long_string() {
let long = "a".repeat(300);
let result = sanitize_component(&long);
assert!(result.len() <= 255);
}
}