From 930f905efc009d40d1aa4512809e76cdb798ae47 Mon Sep 17 00:00:00 2001 From: James Magahern Date: Thu, 12 Jun 2025 18:09:58 -0700 Subject: [PATCH] Perf optimizations, recommended by o3 --- kordophone-db/src/database.rs | 8 +++ kordophone-db/src/repository.rs | 115 ++++++++++++++++++++++++-------- kordophoned/src/main.rs | 2 +- 3 files changed, 98 insertions(+), 27 deletions(-) diff --git a/kordophone-db/src/database.rs b/kordophone-db/src/database.rs index b0fe629..326156f 100644 --- a/kordophone-db/src/database.rs +++ b/kordophone-db/src/database.rs @@ -31,6 +31,14 @@ pub struct Database { impl Database { pub fn new(path: &str) -> Result { let mut connection = SqliteConnection::establish(path)?; + + // Performance optimisations for SQLite. These are safe defaults that speed + // up concurrent writes and cut the fsync cost dramatically while still + // keeping durability guarantees that are good enough for an end-user + // application. + diesel::sql_query("PRAGMA journal_mode = WAL;").execute(&mut connection)?; + diesel::sql_query("PRAGMA synchronous = NORMAL;").execute(&mut connection)?; + connection .run_pending_migrations(MIGRATIONS) .map_err(|e| anyhow::anyhow!("Error running migrations: {}", e))?; diff --git a/kordophone-db/src/repository.rs b/kordophone-db/src/repository.rs index c537e07..9fd70b1 100644 --- a/kordophone-db/src/repository.rs +++ b/kordophone-db/src/repository.rs @@ -1,6 +1,7 @@ use anyhow::Result; use diesel::prelude::*; use diesel::query_dsl::BelongingToDsl; +use std::collections::HashMap; use crate::{ models::{ @@ -142,8 +143,8 @@ impl<'a> Repository<'a> { ) -> Result<()> { use crate::schema::conversation_messages::dsl::*; use crate::schema::messages::dsl::*; + use crate::schema::participants::dsl as participants_dsl; - // Local insertable struct for the join table #[derive(Insertable)] #[diesel(table_name = crate::schema::conversation_messages)] struct InsertableConversationMessage { @@ -155,37 +156,99 @@ impl<'a> Repository<'a> { return Ok(()); } - // Build the collections of insertable records - let mut db_messages: Vec = Vec::with_capacity(in_messages.len()); - let mut conv_msg_records: Vec = - Vec::with_capacity(in_messages.len()); + // Use a single transaction for everything – this removes the implicit + // autocommit after every statement which costs a lot when we have many + // individual queries. + self.connection + .transaction::<_, diesel::result::Error, _>(|conn| { + // Cache participant ids we have already looked up / created – in a + // typical conversation we only have a handful of participants, but we + // might be processing hundreds of messages. Avoiding an extra SELECT per + // message saves a lot of round-trips to SQLite. + let mut participant_cache: HashMap = HashMap::new(); - for message in in_messages { - // Handle participant if message has a remote sender - let sender = message.sender.clone(); - let mut db_message: MessageRecord = message.into(); - db_message.sender_participant_id = self.get_or_create_participant(&sender); + // Prepare collections for the batch inserts. + let mut db_messages: Vec = + Vec::with_capacity(in_messages.len()); + let mut conv_msg_records: Vec = + Vec::with_capacity(in_messages.len()); - conv_msg_records.push(InsertableConversationMessage { - conversation_id: conversation_guid.to_string(), - message_id: db_message.id.clone(), - }); + for message in in_messages { + // Resolve/insert the sender participant only once per display name. + let sender_id = match &message.sender { + Participant::Me => None, + Participant::Remote { display_name, .. } => { + if let Some(cached_participant_id) = participant_cache.get(display_name) { + Some(*cached_participant_id) + } else { + // Try to load from DB first + let existing: Option = participants_dsl::participants + .filter(participants_dsl::display_name.eq(display_name)) + .select(participants_dsl::id) + .first::(conn) + .optional()?; - db_messages.push(db_message); - } + let participant_id = if let Some(pid) = existing { + pid + } else { + let new_participant = InsertableParticipantRecord { + display_name: Some(display_name.clone()), + is_me: false, + }; - // Batch insert or replace messages - diesel::replace_into(messages) - .values(&db_messages) - .execute(self.connection)?; + diesel::insert_into(participants_dsl::participants) + .values(&new_participant) + .execute(conn)?; - // Batch insert the conversation-message links - diesel::replace_into(conversation_messages) - .values(&conv_msg_records) - .execute(self.connection)?; + // last_insert_rowid() is connection-wide, but we are the only + // writer inside this transaction. + diesel::select(diesel::dsl::sql::( + "last_insert_rowid()", + )) + .get_result::(conn)? + }; - // Update conversation date - self.update_conversation_metadata(conversation_guid)?; + participant_cache.insert(display_name.clone(), participant_id); + Some(participant_id) + } + } + }; + + // Convert the message into its DB form. + let mut db_message: MessageRecord = message.into(); + db_message.sender_participant_id = sender_id; + + conv_msg_records.push(InsertableConversationMessage { + conversation_id: conversation_guid.to_string(), + message_id: db_message.id.clone(), + }); + + db_messages.push(db_message); + } + + // Execute the actual batch inserts. + diesel::replace_into(messages) + .values(&db_messages) + .execute(conn)?; + + diesel::replace_into(conversation_messages) + .values(&conv_msg_records) + .execute(conn)?; + + // Update conversation metadata quickly using the last message we just + // processed instead of re-querying the DB. + if let Some(last_msg) = db_messages.last() { + use crate::schema::conversations::dsl as conv_dsl; + diesel::update(conv_dsl::conversations.filter(conv_dsl::id.eq(conversation_guid))) + .set(( + conv_dsl::date.eq(last_msg.date), + conv_dsl::last_message_preview.eq::>(Some(last_msg.text.clone())), + )) + .execute(conn)?; + } + + Ok(()) + })?; Ok(()) } diff --git a/kordophoned/src/main.rs b/kordophoned/src/main.rs index 128333a..18228d0 100644 --- a/kordophoned/src/main.rs +++ b/kordophoned/src/main.rs @@ -19,7 +19,7 @@ fn initialize_logging() { .unwrap_or(LevelFilter::Info); env_logger::Builder::from_default_env() - .format_timestamp_secs() + .format_timestamp_millis() .filter_level(log_level) .init(); }