Files
calendar/backend/src/handlers/ics_fetcher.rs
Connor Johnstone 2fee7a15f9 Clean up verbose debug logging from backend server
- Remove emoji debug logs from event deduplication process
- Remove verbose RRULE consolidation logging
- Remove "found X events with title Y" spam logs
- Keep essential functionality intact
- Maintain clean production server logs

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-21 21:29:26 -04:00

873 lines
35 KiB
Rust

use axum::{
extract::{Path, State},
response::Json,
};
use chrono::{DateTime, Utc, Datelike};
use ical::parser::ical::component::IcalEvent;
use reqwest::Client;
use serde::Serialize;
use std::sync::Arc;
use crate::{
db::ExternalCalendarRepository,
models::ApiError,
AppState,
};
// Import VEvent from calendar-models shared crate
use calendar_models::VEvent;
use super::auth::{extract_bearer_token};
#[derive(Debug, Serialize)]
pub struct ExternalCalendarEventsResponse {
pub events: Vec<VEvent>,
pub last_fetched: DateTime<Utc>,
}
pub async fn fetch_external_calendar_events(
headers: axum::http::HeaderMap,
State(app_state): State<Arc<AppState>>,
Path(id): Path<i32>,
) -> Result<Json<ExternalCalendarEventsResponse>, ApiError> {
let token = extract_bearer_token(&headers)?;
let user = app_state.auth_service.get_user_from_token(&token).await?;
let repo = ExternalCalendarRepository::new(&app_state.db);
// Get user's external calendars to verify ownership and get URL
let calendars = repo
.get_by_user(&user.id)
.await
.map_err(|e| ApiError::Database(format!("Failed to get external calendars: {}", e)))?;
let calendar = calendars
.into_iter()
.find(|c| c.id == id)
.ok_or_else(|| ApiError::NotFound("External calendar not found".to_string()))?;
if !calendar.is_visible {
return Ok(Json(ExternalCalendarEventsResponse {
events: vec![],
last_fetched: Utc::now(),
}));
}
// Check cache first
let cache_max_age_minutes = 5;
let mut ics_content = String::new();
let mut last_fetched = Utc::now();
let mut fetched_from_cache = false;
// Try to get from cache if not stale
match repo.is_cache_stale(id, cache_max_age_minutes).await {
Ok(is_stale) => {
if !is_stale {
// Cache is fresh, use it
if let Ok(Some((cached_data, cached_at))) = repo.get_cached_data(id).await {
ics_content = cached_data;
last_fetched = cached_at;
fetched_from_cache = true;
}
}
}
Err(_) => {
// If cache check fails, proceed to fetch from URL
}
}
// If not fetched from cache, get from external URL
if !fetched_from_cache {
// Log the URL being fetched for debugging
println!("🌍 Fetching calendar URL: {}", calendar.url);
let user_agents = vec![
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (compatible; Runway Calendar/1.0)",
"Outlook-iOS/709.2226530.prod.iphone (3.24.1)"
];
let mut response = None;
let mut last_error = None;
// Try different user agents
for (i, ua) in user_agents.iter().enumerate() {
println!("🔄 Attempt {} with User-Agent: {}", i + 1, ua);
let client = Client::builder()
.redirect(reqwest::redirect::Policy::limited(10))
.timeout(std::time::Duration::from_secs(30))
.user_agent(*ua)
.build()
.map_err(|e| ApiError::Internal(format!("Failed to create HTTP client: {}", e)))?;
let result = client
.get(&calendar.url)
.header("Accept", "text/calendar,application/calendar+xml,text/plain,*/*")
.header("Accept-Charset", "utf-8")
.header("Cache-Control", "no-cache")
.send()
.await;
match result {
Ok(resp) => {
let status = resp.status();
println!("📡 Response status: {}", status);
if status.is_success() {
response = Some(resp);
break;
} else if status == 400 {
// Check if this is an Outlook auth error
let error_body = resp.text().await.unwrap_or_default();
if error_body.contains("OwaPage") || error_body.contains("Outlook") {
println!("🚫 Outlook authentication error detected, trying next approach...");
last_error = Some(format!("Outlook auth error: {}", error_body.chars().take(100).collect::<String>()));
continue;
}
last_error = Some(format!("Bad Request: {}", error_body.chars().take(100).collect::<String>()));
} else {
last_error = Some(format!("HTTP {}", status));
}
}
Err(e) => {
println!("❌ Request failed: {}", e);
last_error = Some(format!("Request error: {}", e));
}
}
}
let response = response.ok_or_else(|| {
ApiError::Internal(format!(
"Failed to fetch calendar after trying {} different approaches. Last error: {}",
user_agents.len(),
last_error.unwrap_or("Unknown error".to_string())
))
})?;
// Response is guaranteed to be successful here since we checked in the loop
println!("✅ Successfully fetched calendar data");
ics_content = response
.text()
.await
.map_err(|e| ApiError::Internal(format!("Failed to read calendar content: {}", e)))?;
// Store in cache for future requests
let etag = None; // TODO: Extract ETag from response headers if available
if let Err(_) = repo.update_cache(id, &ics_content, etag).await {
// Log error but don't fail the request
}
// Update last_fetched timestamp
if let Err(_) = repo.update_last_fetched(id, &user.id).await {
}
last_fetched = Utc::now();
}
// Parse ICS content
let events = parse_ics_content(&ics_content)
.map_err(|e| ApiError::BadRequest(format!("Failed to parse calendar: {}", e)))?;
Ok(Json(ExternalCalendarEventsResponse {
events,
last_fetched,
}))
}
fn parse_ics_content(ics_content: &str) -> Result<Vec<VEvent>, Box<dyn std::error::Error>> {
let reader = ical::IcalParser::new(ics_content.as_bytes());
let mut events = Vec::new();
let mut _total_components = 0;
let mut _failed_conversions = 0;
for calendar in reader {
let calendar = calendar?;
for component in calendar.events {
_total_components += 1;
match convert_ical_to_vevent(component) {
Ok(vevent) => {
events.push(vevent);
}
Err(_) => {
_failed_conversions += 1;
}
}
}
}
// Deduplicate events based on UID, start time, and summary
// Outlook sometimes includes duplicate events (recurring exceptions may appear multiple times)
events = deduplicate_events(events);
Ok(events)
}
fn convert_ical_to_vevent(ical_event: IcalEvent) -> Result<VEvent, Box<dyn std::error::Error>> {
use uuid::Uuid;
let mut summary = None;
let mut description = None;
let mut location = None;
let mut dtstart = None;
let mut dtend = None;
let mut uid = None;
let mut all_day = false;
let mut rrule = None;
// Extract properties
for property in ical_event.properties {
match property.name.as_str() {
"SUMMARY" => {
summary = property.value;
}
"DESCRIPTION" => {
description = property.value;
}
"LOCATION" => {
location = property.value;
}
"DTSTART" => {
if let Some(value) = property.value {
// Check if it's a date-only value (all-day event)
if value.len() == 8 && !value.contains('T') {
all_day = true;
// Parse YYYYMMDD format
if let Ok(date) = chrono::NaiveDate::parse_from_str(&value, "%Y%m%d") {
dtstart = Some(chrono::TimeZone::from_utc_datetime(&Utc, &date.and_hms_opt(12, 0, 0).unwrap()));
}
} else {
// Extract timezone info from parameters
let tzid = property.params.as_ref()
.and_then(|params| params.iter().find(|(k, _)| k == "TZID"))
.and_then(|(_, v)| v.first().cloned());
// Parse datetime with timezone information
if let Some(dt) = parse_datetime_with_tz(&value, tzid.as_deref()) {
dtstart = Some(dt);
}
}
}
}
"DTEND" => {
if let Some(value) = property.value {
if all_day && value.len() == 8 && !value.contains('T') {
// For all-day events, DTEND is exclusive so use the date as-is at noon
if let Ok(date) = chrono::NaiveDate::parse_from_str(&value, "%Y%m%d") {
dtend = Some(chrono::TimeZone::from_utc_datetime(&Utc, &date.and_hms_opt(12, 0, 0).unwrap()));
}
} else {
// Extract timezone info from parameters
let tzid = property.params.as_ref()
.and_then(|params| params.iter().find(|(k, _)| k == "TZID"))
.and_then(|(_, v)| v.first().cloned());
// Parse datetime with timezone information
if let Some(dt) = parse_datetime_with_tz(&value, tzid.as_deref()) {
dtend = Some(dt);
}
}
}
}
"UID" => {
uid = property.value;
}
"RRULE" => {
rrule = property.value;
}
_ => {} // Ignore other properties for now
}
}
let dtstart = dtstart.ok_or("Missing DTSTART")?;
let vevent = VEvent {
uid: uid.unwrap_or_else(|| Uuid::new_v4().to_string()),
dtstart: dtstart.naive_utc(),
dtstart_tzid: None, // TODO: Parse timezone from ICS
dtend: dtend.map(|dt| dt.naive_utc()),
dtend_tzid: None, // TODO: Parse timezone from ICS
summary,
description,
location,
all_day,
rrule,
rdate: Vec::new(),
rdate_tzid: None,
exdate: Vec::new(), // External calendars don't need exception handling
exdate_tzid: None,
recurrence_id: None,
recurrence_id_tzid: None,
created: None,
created_tzid: None,
last_modified: None,
last_modified_tzid: None,
dtstamp: Utc::now(),
sequence: Some(0),
status: None,
transp: None,
organizer: None,
attendees: Vec::new(),
url: None,
attachments: Vec::new(),
categories: Vec::new(),
priority: None,
resources: Vec::new(),
related_to: None,
geo: None,
duration: None,
class: None,
contact: None,
comment: None,
alarms: Vec::new(),
etag: None,
href: None,
calendar_path: None,
};
Ok(vevent)
}
fn parse_datetime_with_tz(datetime_str: &str, tzid: Option<&str>) -> Option<DateTime<Utc>> {
use chrono::TimeZone;
use chrono_tz::Tz;
// Try various datetime formats commonly found in ICS files
// Format: 20231201T103000Z (UTC) - handle as naive datetime first
if datetime_str.ends_with('Z') {
let datetime_without_z = &datetime_str[..datetime_str.len()-1];
if let Ok(naive_dt) = chrono::NaiveDateTime::parse_from_str(datetime_without_z, "%Y%m%dT%H%M%S") {
return Some(naive_dt.and_utc());
}
}
// Format: 20231201T103000-0500 (with timezone offset)
if let Ok(dt) = DateTime::parse_from_str(datetime_str, "%Y%m%dT%H%M%S%z") {
return Some(dt.with_timezone(&Utc));
}
// Format: 2023-12-01T10:30:00Z (ISO format)
if let Ok(dt) = DateTime::parse_from_str(datetime_str, "%Y-%m-%dT%H:%M:%SZ") {
return Some(dt.with_timezone(&Utc));
}
// Handle naive datetime with timezone parameter
let naive_dt = if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(datetime_str, "%Y%m%dT%H%M%S") {
Some(dt)
} else if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(datetime_str, "%Y-%m-%dT%H:%M:%S") {
Some(dt)
} else {
None
};
if let Some(naive_dt) = naive_dt {
// If TZID is provided, try to parse it
if let Some(tzid_str) = tzid {
// Handle common timezone formats
let tz_result = if tzid_str.starts_with("/mozilla.org/") {
// Mozilla/Thunderbird format: /mozilla.org/20070129_1/Europe/London
tzid_str.split('/').last().and_then(|tz_name| tz_name.parse::<Tz>().ok())
} else if tzid_str.contains('/') {
// Standard timezone format: America/New_York, Europe/London
tzid_str.parse::<Tz>().ok()
} else {
// Try common abbreviations and Windows timezone names
match tzid_str {
// Standard abbreviations
"EST" => Some(Tz::America__New_York),
"PST" => Some(Tz::America__Los_Angeles),
"MST" => Some(Tz::America__Denver),
"CST" => Some(Tz::America__Chicago),
// North America - Windows timezone names to IANA mapping
"Mountain Standard Time" => Some(Tz::America__Denver),
"Eastern Standard Time" => Some(Tz::America__New_York),
"Central Standard Time" => Some(Tz::America__Chicago),
"Pacific Standard Time" => Some(Tz::America__Los_Angeles),
"Mountain Daylight Time" => Some(Tz::America__Denver),
"Eastern Daylight Time" => Some(Tz::America__New_York),
"Central Daylight Time" => Some(Tz::America__Chicago),
"Pacific Daylight Time" => Some(Tz::America__Los_Angeles),
"Hawaiian Standard Time" => Some(Tz::Pacific__Honolulu),
"Alaskan Standard Time" => Some(Tz::America__Anchorage),
"Alaskan Daylight Time" => Some(Tz::America__Anchorage),
"Atlantic Standard Time" => Some(Tz::America__Halifax),
"Newfoundland Standard Time" => Some(Tz::America__St_Johns),
// Europe
"GMT Standard Time" => Some(Tz::Europe__London),
"Greenwich Standard Time" => Some(Tz::UTC),
"W. Europe Standard Time" => Some(Tz::Europe__Berlin),
"Central Europe Standard Time" => Some(Tz::Europe__Warsaw),
"Romance Standard Time" => Some(Tz::Europe__Paris),
"Central European Standard Time" => Some(Tz::Europe__Belgrade),
"E. Europe Standard Time" => Some(Tz::Europe__Bucharest),
"FLE Standard Time" => Some(Tz::Europe__Helsinki),
"GTB Standard Time" => Some(Tz::Europe__Athens),
"Russian Standard Time" => Some(Tz::Europe__Moscow),
"Turkey Standard Time" => Some(Tz::Europe__Istanbul),
// Asia
"China Standard Time" => Some(Tz::Asia__Shanghai),
"Tokyo Standard Time" => Some(Tz::Asia__Tokyo),
"Korea Standard Time" => Some(Tz::Asia__Seoul),
"Singapore Standard Time" => Some(Tz::Asia__Singapore),
"India Standard Time" => Some(Tz::Asia__Kolkata),
"Pakistan Standard Time" => Some(Tz::Asia__Karachi),
"Bangladesh Standard Time" => Some(Tz::Asia__Dhaka),
"Thailand Standard Time" => Some(Tz::Asia__Bangkok),
"SE Asia Standard Time" => Some(Tz::Asia__Bangkok),
"Myanmar Standard Time" => Some(Tz::Asia__Yangon),
"Sri Lanka Standard Time" => Some(Tz::Asia__Colombo),
"Nepal Standard Time" => Some(Tz::Asia__Kathmandu),
"Central Asia Standard Time" => Some(Tz::Asia__Almaty),
"West Asia Standard Time" => Some(Tz::Asia__Tashkent),
"Afghanistan Standard Time" => Some(Tz::Asia__Kabul),
"Iran Standard Time" => Some(Tz::Asia__Tehran),
"Arabian Standard Time" => Some(Tz::Asia__Dubai),
"Arab Standard Time" => Some(Tz::Asia__Riyadh),
"Israel Standard Time" => Some(Tz::Asia__Jerusalem),
"Jordan Standard Time" => Some(Tz::Asia__Amman),
"Syria Standard Time" => Some(Tz::Asia__Damascus),
"Middle East Standard Time" => Some(Tz::Asia__Beirut),
"Egypt Standard Time" => Some(Tz::Africa__Cairo),
"South Africa Standard Time" => Some(Tz::Africa__Johannesburg),
"E. Africa Standard Time" => Some(Tz::Africa__Nairobi),
"W. Central Africa Standard Time" => Some(Tz::Africa__Lagos),
// Asia Pacific
"AUS Eastern Standard Time" => Some(Tz::Australia__Sydney),
"AUS Central Standard Time" => Some(Tz::Australia__Darwin),
"W. Australia Standard Time" => Some(Tz::Australia__Perth),
"Tasmania Standard Time" => Some(Tz::Australia__Hobart),
"New Zealand Standard Time" => Some(Tz::Pacific__Auckland),
"Fiji Standard Time" => Some(Tz::Pacific__Fiji),
"Tonga Standard Time" => Some(Tz::Pacific__Tongatapu),
// South America
"Argentina Standard Time" => Some(Tz::America__Buenos_Aires),
"E. South America Standard Time" => Some(Tz::America__Sao_Paulo),
"SA Eastern Standard Time" => Some(Tz::America__Cayenne),
"SA Pacific Standard Time" => Some(Tz::America__Bogota),
"SA Western Standard Time" => Some(Tz::America__La_Paz),
"Pacific SA Standard Time" => Some(Tz::America__Santiago),
"Venezuela Standard Time" => Some(Tz::America__Caracas),
"Montevideo Standard Time" => Some(Tz::America__Montevideo),
// Try parsing as IANA name
_ => tzid_str.parse::<Tz>().ok()
}
};
if let Some(tz) = tz_result {
if let Some(dt_with_tz) = tz.from_local_datetime(&naive_dt).single() {
return Some(dt_with_tz.with_timezone(&Utc));
}
}
}
// If no timezone info or parsing failed, treat as UTC (safer than local time assumptions)
return Some(chrono::TimeZone::from_utc_datetime(&Utc, &naive_dt));
}
None
}
/// Deduplicate events based on UID, start time, and summary
/// Some calendar systems (like Outlook) may include duplicate events in ICS feeds
/// This includes both exact duplicates and recurring event instances that would be
/// generated by existing RRULE patterns, and events with same title but different
/// RRULE patterns that should be consolidated
fn deduplicate_events(mut events: Vec<VEvent>) -> Vec<VEvent> {
use std::collections::HashMap;
// First pass: Group by UID and prefer recurring events over single events with same UID
let mut uid_groups: HashMap<String, Vec<VEvent>> = HashMap::new();
for event in events.drain(..) {
uid_groups.entry(event.uid.clone()).or_insert_with(Vec::new).push(event);
}
let mut uid_deduplicated_events = Vec::new();
for (_uid, mut events_with_uid) in uid_groups.drain() {
if events_with_uid.len() == 1 {
// Only one event with this UID, keep it
uid_deduplicated_events.push(events_with_uid.into_iter().next().unwrap());
} else {
// Multiple events with same UID - prefer recurring over non-recurring
// Sort by preference: recurring events first, then by completeness
events_with_uid.sort_by(|a, b| {
let a_has_rrule = a.rrule.is_some();
let b_has_rrule = b.rrule.is_some();
match (a_has_rrule, b_has_rrule) {
(true, false) => std::cmp::Ordering::Less, // a (recurring) comes first
(false, true) => std::cmp::Ordering::Greater, // b (recurring) comes first
_ => {
// Both same type (both recurring or both single) - compare by completeness
event_completeness_score(b).cmp(&event_completeness_score(a))
}
}
});
// Keep the first (preferred) event
let preferred_event = events_with_uid.into_iter().next().unwrap();
uid_deduplicated_events.push(preferred_event);
}
}
// Second pass: separate recurring and single events from UID-deduplicated set
let mut recurring_events = Vec::new();
let mut single_events = Vec::new();
for event in uid_deduplicated_events.drain(..) {
if event.rrule.is_some() {
recurring_events.push(event);
} else {
single_events.push(event);
}
}
// Third pass: Group recurring events by normalized title and consolidate different RRULE patterns
let mut title_groups: HashMap<String, Vec<VEvent>> = HashMap::new();
for event in recurring_events.drain(..) {
let title = normalize_title(event.summary.as_ref().unwrap_or(&String::new()));
title_groups.entry(title).or_insert_with(Vec::new).push(event);
}
let mut deduplicated_recurring = Vec::new();
for (_title, events_with_title) in title_groups.drain() {
if events_with_title.len() == 1 {
// Single event with this title, keep as-is
deduplicated_recurring.push(events_with_title.into_iter().next().unwrap());
} else {
// Multiple events with same title - consolidate or deduplicate
// Check if these are actually different recurring patterns for the same logical event
let consolidated = consolidate_same_title_events(events_with_title);
deduplicated_recurring.extend(consolidated);
}
}
// Fourth pass: filter single events, removing those that would be generated by recurring events
let mut deduplicated_single = Vec::new();
let mut seen_single: HashMap<String, usize> = HashMap::new();
for event in single_events.drain(..) {
let normalized_title = normalize_title(event.summary.as_ref().unwrap_or(&String::new()));
let dedup_key = format!(
"{}|{}",
event.dtstart.format("%Y%m%dT%H%M%S"),
normalized_title
);
// First check for exact duplicates among single events
if let Some(&existing_index) = seen_single.get(&dedup_key) {
let existing_event: &VEvent = &deduplicated_single[existing_index];
let current_completeness = event_completeness_score(&event);
let existing_completeness = event_completeness_score(existing_event);
if current_completeness > existing_completeness {
deduplicated_single[existing_index] = event;
} else {
// Discarding duplicate single event - keeping existing
}
continue;
}
// Check if this single event would be generated by any recurring event
let is_rrule_generated = deduplicated_recurring.iter().any(|recurring_event| {
// Check if this single event matches the recurring event's pattern (use normalized titles)
let single_title = normalize_title(event.summary.as_ref().unwrap_or(&String::new()));
let recurring_title = normalize_title(recurring_event.summary.as_ref().unwrap_or(&String::new()));
if single_title != recurring_title {
return false; // Different events
}
// Check if this single event would be generated by the recurring event
would_event_be_generated_by_rrule(recurring_event, &event)
});
if is_rrule_generated {
} else {
// This is a unique single event
seen_single.insert(dedup_key, deduplicated_single.len());
deduplicated_single.push(event);
}
}
// Combine recurring and single events
let mut result = deduplicated_recurring;
result.extend(deduplicated_single);
result
}
/// Normalize title for grouping similar events
fn normalize_title(title: &str) -> String {
title.trim()
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect::<String>()
.split_whitespace()
.collect::<Vec<&str>>()
.join(" ")
}
/// Consolidate events with the same title but potentially different RRULE patterns
/// This handles cases where calendar systems provide multiple recurring definitions
/// for the same logical meeting (e.g., one RRULE for Tuesdays, another for Thursdays)
fn consolidate_same_title_events(events: Vec<VEvent>) -> Vec<VEvent> {
if events.is_empty() {
return events;
}
// Log the RRULEs we're working with
// Check if all events have similar time patterns and could be consolidated
let first_event = &events[0];
let base_time = first_event.dtstart.time();
let base_duration = if let Some(end) = first_event.dtend {
Some(end.signed_duration_since(first_event.dtstart))
} else {
None
};
// Check if all events have the same time and duration
let can_consolidate = events.iter().all(|event| {
let same_time = event.dtstart.time() == base_time;
let same_duration = match (event.dtend, base_duration) {
(Some(end), Some(base_dur)) => end.signed_duration_since(event.dtstart) == base_dur,
(None, None) => true,
_ => false,
};
same_time && same_duration
});
if !can_consolidate {
// Just deduplicate exact duplicates
return deduplicate_exact_recurring_events(events);
}
// Try to detect if these are complementary weekly patterns
let weekly_events: Vec<_> = events.iter()
.filter(|e| e.rrule.as_ref().map_or(false, |r| r.contains("FREQ=WEEKLY")))
.collect();
if weekly_events.len() >= 2 && weekly_events.len() == events.len() {
// All events are weekly - try to consolidate into a single multi-day weekly pattern
if let Some(consolidated) = consolidate_weekly_patterns(&events) {
return vec![consolidated];
}
}
// If we can't consolidate, just deduplicate exact matches and keep the most complete one
let deduplicated = deduplicate_exact_recurring_events(events);
// If we still have multiple events, keep only the most complete one
if deduplicated.len() > 1 {
let best_event = deduplicated.into_iter()
.max_by_key(|e| event_completeness_score(e))
.unwrap();
println!("🎯 Kept most complete event: '{}'",
best_event.summary.as_ref().unwrap_or(&"No Title".to_string())
);
vec![best_event]
} else {
deduplicated
}
}
/// Deduplicate exact recurring event matches
fn deduplicate_exact_recurring_events(events: Vec<VEvent>) -> Vec<VEvent> {
use std::collections::HashMap;
let mut seen: HashMap<String, usize> = HashMap::new();
let mut deduplicated = Vec::new();
for event in events {
let dedup_key = format!(
"{}|{}|{}",
event.dtstart.format("%Y%m%dT%H%M%S"),
event.summary.as_ref().unwrap_or(&String::new()),
event.rrule.as_ref().unwrap_or(&String::new())
);
if let Some(&existing_index) = seen.get(&dedup_key) {
let existing_event: &VEvent = &deduplicated[existing_index];
let current_completeness = event_completeness_score(&event);
let existing_completeness = event_completeness_score(existing_event);
if current_completeness > existing_completeness {
println!("🔄 Replacing exact duplicate: Keeping more complete event");
deduplicated[existing_index] = event;
}
} else {
seen.insert(dedup_key, deduplicated.len());
deduplicated.push(event);
}
}
deduplicated
}
/// Attempt to consolidate multiple weekly RRULE patterns into a single pattern
fn consolidate_weekly_patterns(events: &[VEvent]) -> Option<VEvent> {
use std::collections::HashSet;
let mut all_days = HashSet::new();
let mut base_event = None;
for event in events {
let Some(rrule) = &event.rrule else { continue; };
if !rrule.contains("FREQ=WEEKLY") {
continue;
}
// Extract BYDAY if present
if let Some(byday_part) = rrule.split(';').find(|part| part.starts_with("BYDAY=")) {
let days_str = byday_part.strip_prefix("BYDAY=").unwrap_or("");
for day in days_str.split(',') {
all_days.insert(day.trim().to_string());
}
} else {
// If no BYDAY specified, use the weekday from the start date
let weekday = match event.dtstart.weekday() {
chrono::Weekday::Mon => "MO",
chrono::Weekday::Tue => "TU",
chrono::Weekday::Wed => "WE",
chrono::Weekday::Thu => "TH",
chrono::Weekday::Fri => "FR",
chrono::Weekday::Sat => "SA",
chrono::Weekday::Sun => "SU",
};
all_days.insert(weekday.to_string());
}
// Use the first event as the base (we already know they have same time/duration)
if base_event.is_none() {
base_event = Some(event.clone());
}
}
if all_days.is_empty() || base_event.is_none() {
return None;
}
// Create consolidated RRULE
let mut base = base_event.unwrap();
let days_list: Vec<_> = all_days.into_iter().collect();
let byday_str = days_list.join(",");
// Build new RRULE with consolidated BYDAY
let new_rrule = if let Some(existing_rrule) = &base.rrule {
// Remove existing BYDAY and add our consolidated one
let parts: Vec<_> = existing_rrule.split(';')
.filter(|part| !part.starts_with("BYDAY="))
.collect();
format!("{};BYDAY={}", parts.join(";"), byday_str)
} else {
format!("FREQ=WEEKLY;BYDAY={}", byday_str)
};
base.rrule = Some(new_rrule);
println!("🔗 Consolidated weekly pattern: BYDAY={}", byday_str);
Some(base)
}
/// Check if a single event would be generated by a recurring event's RRULE
fn would_event_be_generated_by_rrule(recurring_event: &VEvent, single_event: &VEvent) -> bool {
let Some(rrule) = &recurring_event.rrule else {
return false; // No RRULE to check against
};
// Parse basic RRULE patterns
if rrule.contains("FREQ=DAILY") {
// Daily recurrence
let interval = extract_interval_from_rrule(rrule).unwrap_or(1);
let days_diff = (single_event.dtstart.date() - recurring_event.dtstart.date()).num_days();
if days_diff >= 0 && days_diff % interval as i64 == 0 {
// Check if times match (allowing for timezone differences within same day)
let recurring_time = recurring_event.dtstart.time();
let single_time = single_event.dtstart.time();
return recurring_time == single_time;
}
} else if rrule.contains("FREQ=WEEKLY") {
// Weekly recurrence
let interval = extract_interval_from_rrule(rrule).unwrap_or(1);
let days_diff = (single_event.dtstart.date() - recurring_event.dtstart.date()).num_days();
// First check if it's the same day of week and time
let recurring_weekday = recurring_event.dtstart.weekday();
let single_weekday = single_event.dtstart.weekday();
let recurring_time = recurring_event.dtstart.time();
let single_time = single_event.dtstart.time();
if recurring_weekday == single_weekday && recurring_time == single_time && days_diff >= 0 {
// Calculate how many weeks apart they are
let weeks_diff = days_diff / 7;
// Check if this falls on an interval boundary
return weeks_diff % interval as i64 == 0;
}
} else if rrule.contains("FREQ=MONTHLY") {
// Monthly recurrence - simplified check
let months_diff = (single_event.dtstart.year() - recurring_event.dtstart.year()) * 12
+ (single_event.dtstart.month() as i32 - recurring_event.dtstart.month() as i32);
if months_diff >= 0 {
let interval = extract_interval_from_rrule(rrule).unwrap_or(1) as i32;
if months_diff % interval == 0 {
// Same day of month and time
return recurring_event.dtstart.day() == single_event.dtstart.day()
&& recurring_event.dtstart.time() == single_event.dtstart.time();
}
}
}
false
}
/// Extract INTERVAL value from RRULE string, defaulting to 1 if not found
fn extract_interval_from_rrule(rrule: &str) -> Option<u32> {
for part in rrule.split(';') {
if part.starts_with("INTERVAL=") {
return part.strip_prefix("INTERVAL=")
.and_then(|s| s.parse().ok());
}
}
Some(1) // Default interval is 1 if not specified
}
/// Calculate a completeness score for an event based on how many optional fields are filled
fn event_completeness_score(event: &VEvent) -> u32 {
let mut score = 0;
if event.summary.is_some() { score += 1; }
if event.description.is_some() { score += 1; }
if event.location.is_some() { score += 1; }
if event.dtend.is_some() { score += 1; }
if event.rrule.is_some() { score += 1; }
if !event.categories.is_empty() { score += 1; }
if !event.alarms.is_empty() { score += 1; }
if event.organizer.is_some() { score += 1; }
if !event.attendees.is_empty() { score += 1; }
score
}