/// Characters that are invalid in filenames on common filesystems. const INVALID_CHARS: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0']; /// Sanitize a single path component (filename or directory name) for filesystem safety. pub fn sanitize_component(s: &str) -> String { let mut result: String = s .chars() .map(|c| if INVALID_CHARS.contains(&c) { '_' } else { c }) .collect(); // Trim leading/trailing dots and spaces (problematic on Windows and some Linux tools) result = result .trim_matches(|c: char| c == '.' || c == ' ') .to_string(); // Collapse consecutive underscores while result.contains("__") { result = result.replace("__", "_"); } // Truncate to 255 bytes (common filesystem limit) if result.len() > 255 { result = result[..255].to_string(); // Don't leave a partial UTF-8 sequence while !result.is_char_boundary(result.len()) { result.pop(); } } // If empty after sanitization, return a placeholder if result.is_empty() { return "_".to_string(); } result } #[cfg(test)] mod tests { use super::*; #[test] fn test_sanitize_normal() { assert_eq!(sanitize_component("Hello World"), "Hello World"); } #[test] fn test_sanitize_invalid_chars() { assert_eq!(sanitize_component("AC/DC"), "AC_DC"); assert_eq!(sanitize_component("What?"), "What_"); assert_eq!(sanitize_component("a:b*c"), "a_b_c"); } #[test] fn test_sanitize_dots_and_spaces() { assert_eq!(sanitize_component("..hidden"), "hidden"); assert_eq!(sanitize_component(" spaced "), "spaced"); assert_eq!(sanitize_component("..."), "_"); } #[test] fn test_sanitize_collapse_underscores() { assert_eq!(sanitize_component("a///b"), "a_b"); } #[test] fn test_sanitize_empty() { assert_eq!(sanitize_component(""), "_"); } #[test] fn test_sanitize_null_bytes() { assert_eq!(sanitize_component("a\0b"), "a_b"); } #[test] fn test_sanitize_long_string() { let long = "a".repeat(300); let result = sanitize_component(&long); assert!(result.len() <= 255); } }