Files
org/src/sanitize.rs
Connor Johnstone a2152cbf8d Formatting
2026-03-18 15:36:42 -04:00

83 lines
2.2 KiB
Rust

/// Characters that are invalid in filenames on common filesystems.
const INVALID_CHARS: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0'];
/// Sanitize a single path component (filename or directory name) for filesystem safety.
pub fn sanitize_component(s: &str) -> String {
let mut result: String = s
.chars()
.map(|c| if INVALID_CHARS.contains(&c) { '_' } else { c })
.collect();
// Trim leading/trailing dots and spaces (problematic on Windows and some Linux tools)
result = result
.trim_matches(|c: char| c == '.' || c == ' ')
.to_string();
// Collapse consecutive underscores
while result.contains("__") {
result = result.replace("__", "_");
}
// Truncate to 255 bytes (common filesystem limit)
if result.len() > 255 {
result = result[..255].to_string();
// Don't leave a partial UTF-8 sequence
while !result.is_char_boundary(result.len()) {
result.pop();
}
}
// If empty after sanitization, return a placeholder
if result.is_empty() {
return "_".to_string();
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_normal() {
assert_eq!(sanitize_component("Hello World"), "Hello World");
}
#[test]
fn test_sanitize_invalid_chars() {
assert_eq!(sanitize_component("AC/DC"), "AC_DC");
assert_eq!(sanitize_component("What?"), "What_");
assert_eq!(sanitize_component("a:b*c"), "a_b_c");
}
#[test]
fn test_sanitize_dots_and_spaces() {
assert_eq!(sanitize_component("..hidden"), "hidden");
assert_eq!(sanitize_component(" spaced "), "spaced");
assert_eq!(sanitize_component("..."), "_");
}
#[test]
fn test_sanitize_collapse_underscores() {
assert_eq!(sanitize_component("a///b"), "a_b");
}
#[test]
fn test_sanitize_empty() {
assert_eq!(sanitize_component(""), "_");
}
#[test]
fn test_sanitize_null_bytes() {
assert_eq!(sanitize_component("a\0b"), "a_b");
}
#[test]
fn test_sanitize_long_string() {
let long = "a".repeat(300);
let result = sanitize_component(&long);
assert!(result.len() <= 255);
}
}