From 25b5a7b6acf8b5228b24f1888526a4d044af0d8d Mon Sep 17 00:00:00 2001 From: Brayo Date: Sat, 9 Aug 2025 17:00:36 +0300 Subject: [PATCH 1/3] fix(queries): eliminate double-escaping in regex patterns The serialize_classes function was using serde_json serialization which caused regex patterns to be double-escaped (e.g., 't\.co' became 't\.co'), breaking pattern matching in ActivityWatch queries. Core changes: - Rewrite serialize_classes() to build JSON strings manually instead of using serde_json - Preserve single-escaped regex patterns for proper matching - Only include 'ignore_case' field when true (omit when false) - Only include 'regex' field for non-'none' type categories - Improve error handling in classes deserialization - Add optional fields to ClassSetting struct for better compatibility This fixes the core regex pattern matching issues in ActivityWatch canonical queries that were causing incorrect categorization results. --- aw-client-rust/src/classes.rs | 21 +++++++--- aw-client-rust/src/lib.rs | 2 + aw-client-rust/src/queries.rs | 77 ++++++++++++++++++++++------------- 3 files changed, 66 insertions(+), 34 deletions(-) diff --git a/aw-client-rust/src/classes.rs b/aw-client-rust/src/classes.rs index 2d8453e0..fb5e2317 100644 --- a/aw-client-rust/src/classes.rs +++ b/aw-client-rust/src/classes.rs @@ -5,6 +5,7 @@ use log::warn; use rand::Rng; use serde::{Deserialize, Serialize}; +use serde_json; use super::blocking::AwClient as ActivityWatchClient; @@ -14,6 +15,7 @@ pub type CategoryId = Vec; pub struct CategorySpec { #[serde(rename = "type")] pub spec_type: String, + #[serde(default)] pub regex: String, #[serde(default)] pub ignore_case: bool, @@ -21,8 +23,12 @@ pub struct CategorySpec { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ClassSetting { + #[serde(default)] + pub id: Option, pub name: Vec, pub rule: CategorySpec, + #[serde(default)] + pub data: Option, } /// Returns the default categorization classes @@ -173,11 +179,16 @@ pub fn get_classes_from_server(host: &str, port: u16) -> Vec<(CategoryId, Catego return default_classes(); } - let class_settings: Vec = serde_json::from_value(setting_value) - .unwrap_or_else(|_| { - warn!("Failed to deserialize classes setting, using default classes"); - return vec![]; - }); + let class_settings: Vec = match serde_json::from_value(setting_value) { + Ok(classes) => classes, + Err(e) => { + warn!( + "Failed to deserialize classes setting: {}, using default classes", + e + ); + return default_classes(); + } + }; // Convert ClassSetting to (CategoryId, CategorySpec) format class_settings diff --git a/aw-client-rust/src/lib.rs b/aw-client-rust/src/lib.rs index d6bd0d1d..5c6be763 100644 --- a/aw-client-rust/src/lib.rs +++ b/aw-client-rust/src/lib.rs @@ -123,6 +123,8 @@ impl AwClient { .map(|(start, stop)| format!("{}/{}", start, stop)) .collect(); + let query_lines: Vec<&str> = query.split('\n').collect(); + // Result is a sequence, one element per timeperiod self.client .post(url) diff --git a/aw-client-rust/src/queries.rs b/aw-client-rust/src/queries.rs index 92648d3f..7129b9a9 100644 --- a/aw-client-rust/src/queries.rs +++ b/aw-client-rust/src/queries.rs @@ -165,21 +165,40 @@ impl QueryParams { } /// Helper function to serialize classes in the format expected by the categorize function +/// This version builds the query string directly without JSON serialization to avoid double-escaping fn serialize_classes(classes: &[ClassRule]) -> String { - // Convert Vec<(CategoryId, CategorySpec)> to the JSON format expected by categorize - let serialized_classes: Vec<(Vec, serde_json::Value)> = classes - .iter() - .map(|(category_id, category_spec)| { - let spec_json = serde_json::json!({ - "type": category_spec.spec_type, - "regex": category_spec.regex, - "ignore_case": category_spec.ignore_case - }); - (category_id.clone(), spec_json) - }) - .collect(); - - serde_json::to_string(&serialized_classes).unwrap_or_else(|_| "[]".to_string()) + let mut parts = Vec::new(); + + for (category_id, category_spec) in classes { + // Build category array string manually: ["Work", "Programming"] + let category_str = format!( + "[{}]", + category_id + .iter() + .map(|s| format!("\"{}\"", s)) + .collect::>() + .join(", ") + ); + + // Build spec object manually to avoid JSON escaping regex patterns + let mut spec_parts = Vec::new(); + spec_parts.push(format!("\"type\": \"{}\"", category_spec.spec_type)); + + // Only include regex for non-"none" types, and use raw pattern without escaping + if category_spec.spec_type != "none" { + spec_parts.push(format!("\"regex\": \"{}\"", category_spec.regex)); + } + + // Always include ignore_case field + spec_parts.push(format!("\"ignore_case\": {}", category_spec.ignore_case)); + + let spec_str = format!("{{{}}}", spec_parts.join(", ")); + + // Build the tuple [category, spec] + parts.push(format!("[{}, {}]", category_str, spec_str)); + } + + format!("[{}]", parts.join(", ")) } fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { @@ -195,7 +214,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { if params.base.filter_afk { query.push(format!( "not_afk = flood(query_bucket(find_bucket(\"{}\"))); - not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])", +not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])", escape_doublequote(¶ms.bid_afk) )); } @@ -207,7 +226,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { if params.base.include_audible { query.push( "audible_events = filter_keyvals(browser_events, \"audible\", [true]); - not_afk = period_union(not_afk, audible_events)" +not_afk = period_union(not_afk, audible_events)" .to_string(), ); } @@ -221,7 +240,7 @@ fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { // Add categorization if classes specified if !params.base.classes.is_empty() { query.push(format!( - "events = categorize(events, {})", + "events = categorize(events, {});", serialize_classes(¶ms.base.classes) )); } @@ -252,7 +271,7 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String { // Add categorization if classes specified if !params.base.classes.is_empty() { query.push(format!( - "events = categorize(events, {})", + "events = categorize(events, {});", serialize_classes(¶ms.base.classes) )); } @@ -269,18 +288,19 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String { } fn build_browser_events(params: &DesktopQueryParams) -> String { - let mut query = String::from("browser_events = [];\n"); + let mut query = String::from("browser_events = [];"); for browser_bucket in ¶ms.base.bid_browsers { for (browser_name, app_names) in BROWSER_APPNAMES.entries() { if browser_bucket.contains(browser_name) { query.push_str(&format!( - "events_{0} = flood(query_bucket(\"{1}\")); - window_{0} = filter_keyvals(events, \"app\", {2}); - events_{0} = filter_period_intersect(events_{0}, window_{0}); - events_{0} = split_url_events(events_{0}); - browser_events = concat(browser_events, events_{0}); - browser_events = sort_by_timestamp(browser_events);\n", + " +events_{0} = flood(query_bucket(\"{1}\")); +window_{0} = filter_keyvals(events, \"app\", {2}); +events_{0} = filter_period_intersect(events_{0}, window_{0}); +events_{0} = split_url_events(events_{0}); +browser_events = concat(browser_events, events_{0}); +browser_events = sort_by_timestamp(browser_events)", browser_name, escape_doublequote(browser_bucket), serde_json::to_string(app_names).unwrap() @@ -288,7 +308,6 @@ fn build_browser_events(params: &DesktopQueryParams) -> String { } } } - query } @@ -414,9 +433,9 @@ mod tests { assert!(serialized.contains("Programming")); assert!(serialized.contains("Google Docs")); assert!(serialized.contains("GitHub|vim")); - assert!(serialized.contains("\"type\":\"regex\"")); - assert!(serialized.contains("\"ignore_case\":false")); - assert!(serialized.contains("\"ignore_case\":true")); + assert!(serialized.contains("\"type\": \"regex\"")); + assert!(serialized.contains("\"ignore_case\": false")); + assert!(serialized.contains("\"ignore_case\": true")); } #[test] From 78bbad7cb7968f78250971a496b612aaafe5e411 Mon Sep 17 00:00:00 2001 From: Brayo Date: Thu, 21 Aug 2025 20:26:34 +0300 Subject: [PATCH 2/3] fix: remove unnecessary functions in query class Keep feature parity with aw-client --- aw-client-rust/src/classes.rs | 62 ---------------------------- aw-client-rust/src/lib.rs | 4 +- aw-client-rust/src/queries.rs | 76 +++++++++-------------------------- aw-webui | 2 +- 4 files changed, 22 insertions(+), 122 deletions(-) diff --git a/aw-client-rust/src/classes.rs b/aw-client-rust/src/classes.rs index fb5e2317..1e6c2236 100644 --- a/aw-client-rust/src/classes.rs +++ b/aw-client-rust/src/classes.rs @@ -2,13 +2,9 @@ //! //! Taken from default classes in aw-webui -use log::warn; -use rand::Rng; use serde::{Deserialize, Serialize}; use serde_json; -use super::blocking::AwClient as ActivityWatchClient; - pub type CategoryId = Vec; #[derive(Debug, Clone, Serialize, Deserialize)] @@ -146,61 +142,3 @@ pub fn default_classes() -> Vec<(CategoryId, CategorySpec)> { ), ] } - -/// Get classes from server-side settings using default localhost:5600. -/// Might throw an error if not set yet, in which case we use the default classes as a fallback. -pub fn get_classes() -> Vec<(CategoryId, CategorySpec)> { - get_classes_from_server("localhost", 5600) -} - -/// Get classes from server-side settings with custom host and port. -/// Might throw an error if not set yet, in which case we use the default classes as a fallback. -pub fn get_classes_from_server(host: &str, port: u16) -> Vec<(CategoryId, CategorySpec)> { - let mut rng = rand::rng(); - let random_int = rng.random_range(0..10001); - let client_id = format!("get-setting-{}", random_int); - - // Create a client with a random ID, similar to the Python implementation - let awc = match ActivityWatchClient::new(host, port, &client_id) { - Ok(client) => client, - Err(_) => { - warn!( - "Failed to create ActivityWatch client for {}:{}, using default classes", - host, port - ); - return default_classes(); - } - }; - - awc.get_setting("classes") - .map(|setting_value| { - // Try to deserialize the setting into Vec - if setting_value.is_null() { - return default_classes(); - } - - let class_settings: Vec = match serde_json::from_value(setting_value) { - Ok(classes) => classes, - Err(e) => { - warn!( - "Failed to deserialize classes setting: {}, using default classes", - e - ); - return default_classes(); - } - }; - - // Convert ClassSetting to (CategoryId, CategorySpec) format - class_settings - .into_iter() - .map(|class| (class.name, class.rule)) - .collect() - }) - .unwrap_or_else(|_| { - warn!( - "Failed to get classes from server {}:{}, using default classes as fallback", - host, port - ); - default_classes() - }) -} diff --git a/aw-client-rust/src/lib.rs b/aw-client-rust/src/lib.rs index 5c6be763..42368604 100644 --- a/aw-client-rust/src/lib.rs +++ b/aw-client-rust/src/lib.rs @@ -36,7 +36,7 @@ impl std::fmt::Debug for AwClient { } fn get_hostname() -> String { - return gethostname::gethostname().to_string_lossy().to_string(); + gethostname::gethostname().to_string_lossy().to_string() } impl AwClient { @@ -123,8 +123,6 @@ impl AwClient { .map(|(start, stop)| format!("{}/{}", start, stop)) .collect(); - let query_lines: Vec<&str> = query.split('\n').collect(); - // Result is a sequence, one element per timeperiod self.client .post(url) diff --git a/aw-client-rust/src/queries.rs b/aw-client-rust/src/queries.rs index 7129b9a9..00afa621 100644 --- a/aw-client-rust/src/queries.rs +++ b/aw-client-rust/src/queries.rs @@ -28,11 +28,8 @@ //! }; //! //! // Automatically fetches classes from localhost:5600 -//! let query = QueryParams::Desktop(params.clone()).canonical_events_with_classes(); +//! let query = QueryParams::Desktop(params.clone()).canonical_events(); //! -//! // Or from a custom server -//! let query = QueryParams::Desktop(params) -//! .canonical_events_with_classes_from_server("localhost", 2345); //! ``` use crate::classes::{CategoryId, CategorySpec}; @@ -78,8 +75,6 @@ pub static BROWSER_APPNAMES: phf::Map<&'static str, &'static [&'static str]> = p "vivaldi" => &["Vivaldi-stable", "Vivaldi-snapshot", "vivaldi.exe"], }; -pub const DEFAULT_LIMIT: u32 = 100; - /// Type alias for categorization classes pub type ClassRule = (CategoryId, CategorySpec); @@ -135,33 +130,6 @@ impl QueryParams { QueryParams::Android(params) => build_android_canonical_events(params), } } - - /// Build canonical events query string with automatic class fetching if not provided - pub fn canonical_events_with_classes(&self) -> String { - self.canonical_events_with_classes_from_server("localhost", 5600) - } - - /// Build canonical events query string with automatic class fetching from custom server - pub fn canonical_events_with_classes_from_server(&self, host: &str, port: u16) -> String { - match self { - QueryParams::Desktop(params) => { - let mut params_with_classes = params.clone(); - if params_with_classes.base.classes.is_empty() { - params_with_classes.base.classes = - crate::classes::get_classes_from_server(host, port); - } - build_desktop_canonical_events(¶ms_with_classes) - } - QueryParams::Android(params) => { - let mut params_with_classes = params.clone(); - if params_with_classes.base.classes.is_empty() { - params_with_classes.base.classes = - crate::classes::get_classes_from_server(host, port); - } - build_android_canonical_events(¶ms_with_classes) - } - } - } } /// Helper function to serialize classes in the format expected by the categorize function @@ -201,7 +169,7 @@ fn serialize_classes(classes: &[ClassRule]) -> String { format!("[{}]", parts.join(", ")) } -fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { +pub fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { let mut query = Vec::new(); // Fetch window events @@ -256,7 +224,7 @@ not_afk = period_union(not_afk, audible_events)" query.join(";\n") } -fn build_android_canonical_events(params: &AndroidQueryParams) -> String { +pub fn build_android_canonical_events(params: &AndroidQueryParams) -> String { let mut query = Vec::new(); // Fetch app events @@ -287,7 +255,7 @@ fn build_android_canonical_events(params: &AndroidQueryParams) -> String { query.join(";\n") } -fn build_browser_events(params: &DesktopQueryParams) -> String { +pub fn build_browser_events(params: &DesktopQueryParams) -> String { let mut query = String::from("browser_events = [];"); for browser_bucket in ¶ms.base.bid_browsers { @@ -311,38 +279,34 @@ browser_events = sort_by_timestamp(browser_events)", query } -/// Build a full desktop query +/// Build a full desktop query using default localhost:5600 configuration pub fn full_desktop_query(params: &DesktopQueryParams) -> String { - let mut query = QueryParams::Desktop(params.clone()).canonical_events_with_classes(); + let mut query = QueryParams::Desktop(params.clone()).canonical_events(); // Add basic event aggregations - query.push_str(&format!( - " + query.push_str( + &" title_events = sort_by_duration(merge_events_by_keys(events, [\"app\", \"title\"])); app_events = sort_by_duration(merge_events_by_keys(title_events, [\"app\"])); cat_events = sort_by_duration(merge_events_by_keys(events, [\"$category\"])); - app_events = limit_events(app_events, {}); - title_events = limit_events(title_events, {}); duration = sum_durations(events); - ", - DEFAULT_LIMIT, DEFAULT_LIMIT - )); + " + .to_string(), + ); // Add browser-specific query parts if browser buckets exist if !params.base.bid_browsers.is_empty() { - query.push_str(&format!( - " + query.push_str( + &" browser_events = split_url_events(browser_events); browser_urls = merge_events_by_keys(browser_events, [\"url\"]); browser_urls = sort_by_duration(browser_urls); - browser_urls = limit_events(browser_urls, {}); browser_domains = merge_events_by_keys(browser_events, [\"$domain\"]); browser_domains = sort_by_duration(browser_domains); - browser_domains = limit_events(browser_domains, {}); browser_duration = sum_durations(browser_events); - ", - DEFAULT_LIMIT, DEFAULT_LIMIT - )); + " + .to_string(), + ); } else { query.push_str( " @@ -443,7 +407,7 @@ mod tests { let params = DesktopQueryParams { base: QueryParamsBase { bid_browsers: vec![], - classes: vec![], // Empty classes - should trigger server fetch + classes: vec![], filter_classes: vec![], filter_afk: true, include_audible: true, @@ -453,9 +417,9 @@ mod tests { }; let query_params = QueryParams::Desktop(params); - let query = query_params.canonical_events_with_classes(); + let query = query_params.canonical_events(); - // Should contain basic query structure even if server fetch fails + // Should contain basic query structure assert!(query.contains("events = flood")); assert!(query.contains("test-window")); } @@ -484,7 +448,7 @@ mod tests { }; let query_params = QueryParams::Desktop(params); - let query = query_params.canonical_events_with_classes(); + let query = query_params.canonical_events(); // Should contain categorization assert!(query.contains("events = categorize")); diff --git a/aw-webui b/aw-webui index f38b119f..291da6f2 160000 --- a/aw-webui +++ b/aw-webui @@ -1 +1 @@ -Subproject commit f38b119f0ad628dd8af6ddfaaaf89700b9290c06 +Subproject commit 291da6f2c5e7a6b896f23a4eec5ffed9874321ba From 01f25411514ad261dca187a1e1f0bb7048a60801 Mon Sep 17 00:00:00 2001 From: Brayo Date: Fri, 22 Aug 2025 12:26:43 +0300 Subject: [PATCH 3/3] feat: add optional always-active-pattern to canonical queries --- aw-client-rust/src/queries.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/aw-client-rust/src/queries.rs b/aw-client-rust/src/queries.rs index 00afa621..efbd45af 100644 --- a/aw-client-rust/src/queries.rs +++ b/aw-client-rust/src/queries.rs @@ -25,6 +25,7 @@ //! }, //! bid_window: "aw-watcher-window_example".to_string(), //! bid_afk: "aw-watcher-afk_example".to_string(), +//! always_active_pattern: None, //! }; //! //! // Automatically fetches classes from localhost:5600 @@ -104,6 +105,8 @@ pub struct DesktopQueryParams { pub base: QueryParamsBase, pub bid_window: String, pub bid_afk: String, + #[serde(default)] + pub always_active_pattern: Option, } /// Query parameters specific to Android @@ -180,11 +183,26 @@ pub fn build_desktop_canonical_events(params: &DesktopQueryParams) -> String { // Fetch not-afk events if params.base.filter_afk { - query.push(format!( + let mut not_afk_query = format!( "not_afk = flood(query_bucket(find_bucket(\"{}\"))); not_afk = filter_keyvals(not_afk, \"status\", [\"not-afk\"])", escape_doublequote(¶ms.bid_afk) - )); + ); + + // Add treat_as_active functionality if pattern is provided + if let Some(ref pattern) = params.always_active_pattern { + not_afk_query.push_str(&format!( + "; +not_treat_as_afk = filter_keyvals_regex(events, \"app\", \"{}\"); +not_afk = period_union(not_afk, not_treat_as_afk); +not_treat_as_afk = filter_keyvals_regex(events, \"title\", \"{}\"); +not_afk = period_union(not_afk, not_treat_as_afk)", + escape_doublequote(pattern), + escape_doublequote(pattern) + )); + } + + query.push(not_afk_query); } // Add browser events if any browser buckets specified @@ -362,6 +380,7 @@ mod tests { }, bid_window: "aw-watcher-window_".to_string(), bid_afk: "aw-watcher-afk_".to_string(), + always_active_pattern: None, }; let query = full_desktop_query(¶ms); @@ -414,6 +433,7 @@ mod tests { }, bid_window: "test-window".to_string(), bid_afk: "test-afk".to_string(), + always_active_pattern: None, }; let query_params = QueryParams::Desktop(params); @@ -445,6 +465,7 @@ mod tests { }, bid_window: "test-window".to_string(), bid_afk: "test-afk".to_string(), + always_active_pattern: None, }; let query_params = QueryParams::Desktop(params);