83 lines
2.2 KiB
Rust
83 lines
2.2 KiB
Rust
/// Characters that are invalid in filenames on common filesystems.
|
|
const INVALID_CHARS: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0'];
|
|
|
|
/// Sanitize a single path component (filename or directory name) for filesystem safety.
|
|
pub fn sanitize_component(s: &str) -> String {
|
|
let mut result: String = s
|
|
.chars()
|
|
.map(|c| if INVALID_CHARS.contains(&c) { '_' } else { c })
|
|
.collect();
|
|
|
|
// Trim leading/trailing dots and spaces (problematic on Windows and some Linux tools)
|
|
result = result
|
|
.trim_matches(|c: char| c == '.' || c == ' ')
|
|
.to_string();
|
|
|
|
// Collapse consecutive underscores
|
|
while result.contains("__") {
|
|
result = result.replace("__", "_");
|
|
}
|
|
|
|
// Truncate to 255 bytes (common filesystem limit)
|
|
if result.len() > 255 {
|
|
result = result[..255].to_string();
|
|
// Don't leave a partial UTF-8 sequence
|
|
while !result.is_char_boundary(result.len()) {
|
|
result.pop();
|
|
}
|
|
}
|
|
|
|
// If empty after sanitization, return a placeholder
|
|
if result.is_empty() {
|
|
return "_".to_string();
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_sanitize_normal() {
|
|
assert_eq!(sanitize_component("Hello World"), "Hello World");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_invalid_chars() {
|
|
assert_eq!(sanitize_component("AC/DC"), "AC_DC");
|
|
assert_eq!(sanitize_component("What?"), "What_");
|
|
assert_eq!(sanitize_component("a:b*c"), "a_b_c");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_dots_and_spaces() {
|
|
assert_eq!(sanitize_component("..hidden"), "hidden");
|
|
assert_eq!(sanitize_component(" spaced "), "spaced");
|
|
assert_eq!(sanitize_component("..."), "_");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_collapse_underscores() {
|
|
assert_eq!(sanitize_component("a///b"), "a_b");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_empty() {
|
|
assert_eq!(sanitize_component(""), "_");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_null_bytes() {
|
|
assert_eq!(sanitize_component("a\0b"), "a_b");
|
|
}
|
|
|
|
#[test]
|
|
fn test_sanitize_long_string() {
|
|
let long = "a".repeat(300);
|
|
let result = sanitize_component(&long);
|
|
assert!(result.len() <= 255);
|
|
}
|
|
}
|