From 0fc1bbf08759ded6a43e84b01548f3ea9834e3c2 Mon Sep 17 00:00:00 2001 From: Josh Pigford Date: Tue, 4 Feb 2025 14:29:30 -0600 Subject: [PATCH 1/5] Refactor transaction enrichment to support batch processing - Add method to enrich transactions in batches - Implement job scheduling for unenriched transactions - Improve logging and error handling for transaction enrichment --- app/jobs/enrich_transaction_batch_job.rb | 8 +++ app/models/account/data_enricher.rb | 74 ++++++++++++++---------- 2 files changed, 51 insertions(+), 31 deletions(-) create mode 100644 app/jobs/enrich_transaction_batch_job.rb diff --git a/app/jobs/enrich_transaction_batch_job.rb b/app/jobs/enrich_transaction_batch_job.rb new file mode 100644 index 00000000000..22d026f7d1b --- /dev/null +++ b/app/jobs/enrich_transaction_batch_job.rb @@ -0,0 +1,8 @@ +class EnrichTransactionBatchJob < ApplicationJob + queue_as :latency_high + + def perform(account, batch_size = 100, offset = 0) + enricher = Account::DataEnricher.new(account) + enricher.enrich_transaction_batch(batch_size, offset) + end +end diff --git a/app/models/account/data_enricher.rb b/app/models/account/data_enricher.rb index 924d5894dda..e8778fc20f4 100644 --- a/app/models/account/data_enricher.rb +++ b/app/models/account/data_enricher.rb @@ -8,49 +8,61 @@ def initialize(account) end def run - enrich_transactions - end + total_unenriched = account.entries.account_transactions + .where("enriched_at IS NULL OR merchant_id IS NULL OR category_id IS NULL") + .count - private - def enrich_transactions - candidates = account.entries.account_transactions.includes(entryable: [ :merchant, :category ]) + if total_unenriched > 0 + batch_size = 50 + batches = (total_unenriched.to_f / batch_size).ceil - Rails.logger.info("Enriching #{candidates.count} transactions for account #{account.id}") + batches.times do |batch| + EnrichTransactionBatchJob.perform_later(account, batch_size, batch * batch_size) + end + end + end - merchants = {} + def enrich_transaction_batch(batch_size = 50, offset = 0) + candidates = account.entries.account_transactions + .includes(entryable: [ :merchant, :category ]) + .where("enriched_at IS NULL OR merchant_id IS NULL OR category_id IS NULL") + .offset(offset) + .limit(batch_size) - candidates.each do |entry| - if entry.enriched_at.nil? || entry.entryable.merchant_id.nil? || entry.entryable.category_id.nil? - begin - next unless entry.name.present? + Rails.logger.info("Enriching batch of #{candidates.count} transactions for account #{account.id} (offset: #{offset})") - info = self.class.synth_provider.enrich_transaction(entry.name).info + merchants = {} - next unless info.present? + candidates.each do |entry| + begin + next unless entry.name.present? - if info.name.present? - merchant = merchants[info.name] ||= account.family.merchants.find_or_create_by(name: info.name) + info = self.class.synth_provider.enrich_transaction(entry.name).info - if info.icon_url.present? - merchant.icon_url = info.icon_url - end - end + next unless info.present? - entryable_attributes = { id: entry.entryable_id } - entryable_attributes[:merchant_id] = merchant.id if merchant.present? && entry.entryable.merchant_id.nil? + if info.name.present? + merchant = merchants[info.name] ||= account.family.merchants.find_or_create_by(name: info.name) - Account.transaction do - merchant.save! if merchant.present? - entry.update!( - enriched_at: Time.current, - enriched_name: info.name, - entryable_attributes: entryable_attributes - ) - end - rescue => e - Rails.logger.warn("Error enriching transaction #{entry.id}: #{e.message}") + if info.icon_url.present? + merchant.icon_url = info.icon_url end end + + entryable_attributes = { id: entry.entryable_id } + entryable_attributes[:merchant_id] = merchant.id if merchant.present? && entry.entryable.merchant_id.nil? + + Account.transaction do + merchant.save! if merchant.present? + entry.update!( + enriched_at: Time.current, + enriched_name: info.name, + entryable_attributes: entryable_attributes + ) + end + rescue => e + Rails.logger.warn("Error enriching transaction #{entry.id}: #{e.message}") end end + end end From e7e76b92fe52a334fed958d8d8a9017d8232ffe8 Mon Sep 17 00:00:00 2001 From: Josh Pigford Date: Tue, 4 Feb 2025 14:30:18 -0600 Subject: [PATCH 2/5] Re-enable enrichment --- app/models/account/syncer.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/models/account/syncer.rb b/app/models/account/syncer.rb index 8f5ebc3af74..37074aa7432 100644 --- a/app/models/account/syncer.rb +++ b/app/models/account/syncer.rb @@ -15,8 +15,7 @@ def run # Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?) - # Temporarily disable until optimizations complete - # account.enrich_data_later + account.enrich_data_later else Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}") end From 84b2427105c956f47e5a612daf2a182ba04c97e0 Mon Sep 17 00:00:00 2001 From: Josh Pigford Date: Tue, 4 Feb 2025 19:31:05 -0600 Subject: [PATCH 3/5] Fix transaction enrichment query to use correct table references - Update queries to explicitly join and reference account_entries and account_transactions tables - Remove unnecessary name presence check before enrichment - Improve query precision for unenriched transaction selection --- app/models/account/data_enricher.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/models/account/data_enricher.rb b/app/models/account/data_enricher.rb index e8778fc20f4..6ed629055bf 100644 --- a/app/models/account/data_enricher.rb +++ b/app/models/account/data_enricher.rb @@ -9,7 +9,8 @@ def initialize(account) def run total_unenriched = account.entries.account_transactions - .where("enriched_at IS NULL OR merchant_id IS NULL OR category_id IS NULL") + .joins(:entryable) + .where("account_entries.enriched_at IS NULL OR account_transactions.merchant_id IS NULL OR account_transactions.category_id IS NULL") .count if total_unenriched > 0 @@ -25,7 +26,8 @@ def run def enrich_transaction_batch(batch_size = 50, offset = 0) candidates = account.entries.account_transactions .includes(entryable: [ :merchant, :category ]) - .where("enriched_at IS NULL OR merchant_id IS NULL OR category_id IS NULL") + .joins(:entryable) + .where("account_entries.enriched_at IS NULL OR account_transactions.merchant_id IS NULL OR account_transactions.category_id IS NULL") .offset(offset) .limit(batch_size) @@ -35,8 +37,6 @@ def enrich_transaction_batch(batch_size = 50, offset = 0) candidates.each do |entry| begin - next unless entry.name.present? - info = self.class.synth_provider.enrich_transaction(entry.name).info next unless info.present? From e1357469ddd5885f16f4bbdf129d484c9eba8242 Mon Sep 17 00:00:00 2001 From: Josh Pigford Date: Wed, 5 Feb 2025 09:20:43 -0600 Subject: [PATCH 4/5] Optimize transaction enrichment query joins - Refactor database joins to use explicit table references - Improve query performance for unenriched transaction selection - Ensure correct table aliasing in enrichment methods --- app/models/account/data_enricher.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/models/account/data_enricher.rb b/app/models/account/data_enricher.rb index 6ed629055bf..59979df067f 100644 --- a/app/models/account/data_enricher.rb +++ b/app/models/account/data_enricher.rb @@ -9,8 +9,8 @@ def initialize(account) def run total_unenriched = account.entries.account_transactions - .joins(:entryable) - .where("account_entries.enriched_at IS NULL OR account_transactions.merchant_id IS NULL OR account_transactions.category_id IS NULL") + .joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'") + .where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL") .count if total_unenriched > 0 @@ -26,8 +26,8 @@ def run def enrich_transaction_batch(batch_size = 50, offset = 0) candidates = account.entries.account_transactions .includes(entryable: [ :merchant, :category ]) - .joins(:entryable) - .where("account_entries.enriched_at IS NULL OR account_transactions.merchant_id IS NULL OR account_transactions.category_id IS NULL") + .joins("JOIN account_transactions at ON at.id = account_entries.entryable_id AND account_entries.entryable_type = 'Account::Transaction'") + .where("account_entries.enriched_at IS NULL OR at.merchant_id IS NULL OR at.category_id IS NULL") .offset(offset) .limit(batch_size) From d87cc8b0618f6a5c4537b5b586e9386ecb4eb7c5 Mon Sep 17 00:00:00 2001 From: Josh Pigford Date: Wed, 5 Feb 2025 10:26:21 -0600 Subject: [PATCH 5/5] Remove deprecated data enrichment job and method - Delete EnrichDataJob as it's no longer used - Remove `enrich_data_later` method from Account model - Update Account::Syncer to directly call `enrich_data` instead of scheduling a job --- app/jobs/enrich_data_job.rb | 7 ------- app/models/account.rb | 4 ---- app/models/account/syncer.rb | 2 +- 3 files changed, 1 insertion(+), 12 deletions(-) delete mode 100644 app/jobs/enrich_data_job.rb diff --git a/app/jobs/enrich_data_job.rb b/app/jobs/enrich_data_job.rb deleted file mode 100644 index f20875c826b..00000000000 --- a/app/jobs/enrich_data_job.rb +++ /dev/null @@ -1,7 +0,0 @@ -class EnrichDataJob < ApplicationJob - queue_as :latency_high - - def perform(account) - account.enrich_data - end -end diff --git a/app/models/account.rb b/app/models/account.rb index c11b532d716..23aaaf71290 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -130,10 +130,6 @@ def enrich_data DataEnricher.new(self).run end - def enrich_data_later - EnrichDataJob.perform_later(self) - end - def update_with_sync!(attributes) should_update_balance = attributes[:balance] && attributes[:balance].to_d != balance diff --git a/app/models/account/syncer.rb b/app/models/account/syncer.rb index 37074aa7432..867c9052c4c 100644 --- a/app/models/account/syncer.rb +++ b/app/models/account/syncer.rb @@ -15,7 +15,7 @@ def run # Enrich if user opted in or if we're syncing transactions from a Plaid account on the hosted app if account.family.data_enrichment_enabled? || (account.plaid_account_id.present? && Rails.application.config.app_mode.hosted?) - account.enrich_data_later + account.enrich_data else Rails.logger.info("Data enrichment is disabled, skipping enrichment for account #{account.id}") end