Initial commit
This commit is contained in:
80
src/sanitize.rs
Normal file
80
src/sanitize.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
/// Characters that are invalid in filenames on common filesystems.
|
||||
const INVALID_CHARS: &[char] = &['/', '\\', ':', '*', '?', '"', '<', '>', '|', '\0'];
|
||||
|
||||
/// Sanitize a single path component (filename or directory name) for filesystem safety.
|
||||
pub fn sanitize_component(s: &str) -> String {
|
||||
let mut result: String = s
|
||||
.chars()
|
||||
.map(|c| if INVALID_CHARS.contains(&c) { '_' } else { c })
|
||||
.collect();
|
||||
|
||||
// Trim leading/trailing dots and spaces (problematic on Windows and some Linux tools)
|
||||
result = result.trim_matches(|c: char| c == '.' || c == ' ').to_string();
|
||||
|
||||
// Collapse consecutive underscores
|
||||
while result.contains("__") {
|
||||
result = result.replace("__", "_");
|
||||
}
|
||||
|
||||
// Truncate to 255 bytes (common filesystem limit)
|
||||
if result.len() > 255 {
|
||||
result = result[..255].to_string();
|
||||
// Don't leave a partial UTF-8 sequence
|
||||
while !result.is_char_boundary(result.len()) {
|
||||
result.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// If empty after sanitization, return a placeholder
|
||||
if result.is_empty() {
|
||||
return "_".to_string();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_normal() {
|
||||
assert_eq!(sanitize_component("Hello World"), "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_invalid_chars() {
|
||||
assert_eq!(sanitize_component("AC/DC"), "AC_DC");
|
||||
assert_eq!(sanitize_component("What?"), "What_");
|
||||
assert_eq!(sanitize_component("a:b*c"), "a_b_c");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_dots_and_spaces() {
|
||||
assert_eq!(sanitize_component("..hidden"), "hidden");
|
||||
assert_eq!(sanitize_component(" spaced "), "spaced");
|
||||
assert_eq!(sanitize_component("..."), "_");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_collapse_underscores() {
|
||||
assert_eq!(sanitize_component("a///b"), "a_b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_empty() {
|
||||
assert_eq!(sanitize_component(""), "_");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_null_bytes() {
|
||||
assert_eq!(sanitize_component("a\0b"), "a_b");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_long_string() {
|
||||
let long = "a".repeat(300);
|
||||
let result = sanitize_component(&long);
|
||||
assert!(result.len() <= 255);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user