Skip to content

Commit

Permalink
Full sync: Set chunk size of Woo modules dynamically (#41433)
Browse files Browse the repository at this point in the history
* Add woo modules dynamic

* changelog

* Typo from merge

* Typos regarding MAX_META_LENGTH and phan issues

* Bug fixingsince objects can actually be arrays :(

* Added tests so that objects coming as arrays are also added in filter_objects_and_metadata_by_size.

* Woocommerce module handles order items, let's make sure we use the proper term

* Changes so that order items when expanded, are returned in desc order which is necessary for the chunk logic

* Deprecate removed methods

* Sanitize order in get_order_item_by_ids

* Renamed function to build_full_sync_action_array

* Update projects/packages/sync/src/modules/class-woocommerce.php

Typo

* Updated docblock

Committed via a GitHub action: https://github.com/Automattic/jetpack/actions/runs/13174118421

Upstream-Ref: Automattic/jetpack@2da3fe1
  • Loading branch information
darssen authored and matticbot committed Feb 6, 2025
1 parent 80d296f commit 927bb2d
Show file tree
Hide file tree
Showing 11 changed files with 291 additions and 201 deletions.
3 changes: 3 additions & 0 deletions jetpack_vendor/automattic/jetpack-sync/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ This is an alpha version! The changes listed here are not final.
### Added
- Add setting to hide newsletter category modal

### Changed
- Sync: Full-sync chunking logic dynamic for Woo modules

## [4.6.0] - 2025-02-03
### Added
- Sync: Full Sync comments now send dynamic chunks if chunk size default is too big. [#41350]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,6 @@
*/
class Comments extends Module {

/**
* Max bytes allowed for full sync upload.
* Current Setting : 7MB.
*
* @access public
*
* @var int
*/
const MAX_SIZE_FULL_SYNC = 7000000;
/**
* Max bytes allowed for post meta_value => length.
* Current Setting : 2MB.
*
* @access public
*
* @var int
*/
const MAX_COMMENT_META_LENGTH = 2000000;
/**
* Sync module name.
*
Expand Down Expand Up @@ -599,7 +581,7 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
'comment',
$comments,
$metadata,
self::MAX_COMMENT_META_LENGTH, // Replace with appropriate comment meta length constant.
self::MAX_META_LENGTH, // Replace with appropriate comment meta length constant.
self::MAX_SIZE_FULL_SYNC
);

Expand All @@ -609,22 +591,4 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
'meta' => $filtered_comments_metadata,
);
}

/**
* Set the status of the full sync action based on the objects that were sent.
*
* @access public
*
* @param array $status This module Full Sync status.
* @param array $objects This module Full Sync objects.
*
* @return array The updated status.
*/
public function set_send_full_sync_actions_status( $status, $objects ) {

$object_ids = $objects['object_ids'];
$status['last_sent'] = end( $object_ids );
$status['sent'] += count( $object_ids );
return $status;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ private function fetch_prepared_meta_from_db( $object_type, $where, $meta_object
private function get_prepared_meta_object( $object_type, $meta_entry ) {
$object_id_column = $object_type . '_id';

if ( 'post' === $object_type && strlen( $meta_entry['meta_value'] ) >= Posts::MAX_POST_META_LENGTH ) {
if ( 'post' === $object_type && strlen( $meta_entry['meta_value'] ) >= Posts::MAX_META_LENGTH ) {
$meta_entry['meta_value'] = '';
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,26 @@ abstract class Module {
*/
const MAX_DB_QUERY_LENGTH = 15 * 1024;

/**
* Max bytes allowed for full sync upload for the module.
* Default Setting : 7MB.
*
* @access public
*
* @var int
*/
const MAX_SIZE_FULL_SYNC = 7000000;

/**
* Max bytes allowed for post meta_value => length.
* Default Setting : 2MB.
*
* @access public
*
* @var int
*/
const MAX_META_LENGTH = 2000000;

/**
* Sync module name.
*
Expand Down Expand Up @@ -439,6 +459,9 @@ public function send_full_sync_actions( $config, $status, $send_until ) {

/**
* Set the status of the full sync action based on the objects that were sent.
* Used to update the status of the module after sending a chunk of objects.
* Since Full Sync logic chunking relies on order of items being processed in descending order, we need to sort
* due to some modules (e.g. WooCommerce) changing the order while getting the objects.
*
* @access protected
*
Expand All @@ -448,8 +471,10 @@ public function send_full_sync_actions( $config, $status, $send_until ) {
* @return array The updated status.
*/
protected function set_send_full_sync_actions_status( $status, $objects ) {
$status['last_sent'] = end( $objects );
$status['sent'] += count( $objects );

$object_ids = $objects['object_ids'] ?? $objects;
$status['last_sent'] = end( $object_ids );
$status['sent'] += count( $object_ids );
return $status;
}

Expand Down Expand Up @@ -715,9 +740,11 @@ public function filter_objects_and_metadata_by_size( $type, $objects, $metadata,
$object_size = strlen( maybe_serialize( $object ) );
$current_metadata = array();
$metadata_size = 0;
$id_field = $this->id_field();
$object_id = (int) ( is_object( $object ) ? $object->{$id_field} : $object[ $id_field ] );

foreach ( $metadata as $key => $metadata_item ) {
if ( (int) $metadata_item->{$type . '_id'} === (int) $object->{$this->id_field()} ) {
if ( (int) $metadata_item->{$type . '_id'} === $object_id ) {
$metadata_item_size = strlen( maybe_serialize( $metadata_item->meta_value ) );
if ( $metadata_item_size >= $max_meta_size ) {
$metadata_item->meta_value = ''; // Trim metadata if too large.
Expand All @@ -734,7 +761,7 @@ public function filter_objects_and_metadata_by_size( $type, $objects, $metadata,

// Always allow the first object with metadata.
if ( empty( $filtered_object_ids ) || ( $current_size + $object_size + $metadata_size ) <= $max_total_size ) {
$filtered_object_ids[] = strval( $object->{$this->id_field()} );
$filtered_object_ids[] = strval( is_object( $object ) ? $object->{$id_field} : $object[ $id_field ] );
$filtered_objects[] = $object;
$filtered_metadata = array_merge( $filtered_metadata, $current_metadata );
$current_size += $object_size + $metadata_size;
Expand Down
44 changes: 3 additions & 41 deletions jetpack_vendor/automattic/jetpack-sync/src/modules/class-posts.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,26 +64,6 @@ class Posts extends Module {
*/
const MAX_POST_CONTENT_LENGTH = 5000000;

/**
* Max bytes allowed for post meta_value => length.
* Current Setting : 2MB.
*
* @access public
*
* @var int
*/
const MAX_POST_META_LENGTH = 2000000;

/**
* Max bytes allowed for full sync upload.
* Current Setting : 7MB.
*
* @access public
*
* @var int
*/
const MAX_SIZE_FULL_SYNC = 7000000;

/**
* Default previous post state.
* Used for default previous post status.
Expand Down Expand Up @@ -321,7 +301,7 @@ public function get_full_sync_actions() {
}

/**
* Filter meta arguments so that we don't sync meta_values over MAX_POST_META_LENGTH.
* Filter meta arguments so that we don't sync meta_values over MAX_META_LENGTH.
*
* @param array $args action arguments.
*
Expand All @@ -332,7 +312,7 @@ public function trim_post_meta( $args ) {
// Explicitly truncate meta_value when it exceeds limit.
// Large content will cause OOM issues and break Sync.
$serialized_value = maybe_serialize( $meta_value );
if ( strlen( $serialized_value ) >= self::MAX_POST_META_LENGTH ) {
if ( strlen( $serialized_value ) >= self::MAX_META_LENGTH ) {
$meta_value = '';
}
return array( $meta_id, $object_id, $meta_key, $meta_value );
Expand Down Expand Up @@ -894,7 +874,7 @@ public function get_next_chunk( $config, $status, $chunk_size ) {
'post',
$posts,
$metadata,
self::MAX_POST_META_LENGTH,
self::MAX_META_LENGTH,
self::MAX_SIZE_FULL_SYNC
);

Expand All @@ -918,22 +898,4 @@ private function expand_posts( $post_ids ) {
$posts = array_values( $posts ); // Reindex in case posts were deleted.
return $posts;
}

/**
* Set the status of the full sync action based on the objects that were sent.
*
* @access public
*
* @param array $status This module Full Sync status.
* @param array $objects This module Full Sync objects.
*
* @return array The updated status.
*/
public function set_send_full_sync_actions_status( $status, $objects ) {

$object_ids = $objects['object_ids'];
$status['last_sent'] = end( $object_ids );
$status['sent'] += count( $object_ids );
return $status;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ public function init_full_sync_listeners( $callable ) {
*/
public function init_before_send() {
// Full sync.
add_filter( 'jetpack_sync_before_send_jetpack_full_sync_woocommerce_hpos_orders', array( $this, 'expand_order_objects' ) );
add_filter( 'jetpack_sync_before_send_jetpack_full_sync_woocommerce_hpos_orders', array( $this, 'build_full_sync_action_array' ) );
}

/**
Expand Down Expand Up @@ -228,15 +228,34 @@ public function get_objects_by_id( $object_type, $ids ) {
* @param array $args List of order IDs.
*
* @return array
* @deprecated since 4.7.0-alpha
*/
public function expand_order_objects( $args ) {
_deprecated_function( __METHOD__, 'next-version' );
list( $order_ids, $previous_end ) = $args;
return array(
'orders' => $this->get_objects_by_id( 'order', $order_ids ),
'previous_end' => $previous_end,
);
}

/**
* Build the full sync action object.
*
* @access public
*
* @param array $args An array with filtered objects and previous end.
*
* @return array An array with orders and previous end.
*/
public function build_full_sync_action_array( $args ) {
list( $filtered_orders, $previous_end ) = $args;
return array(
'orders' => $filtered_orders['objects'],
'previous_end' => $previous_end,
);
}

/**
* Retrieve order data by its ID.
*
Expand Down Expand Up @@ -503,4 +522,47 @@ public function get_where_sql( $config ) {
$where_sql = $wpdb->prepare( "type IN ( $order_type_placeholder )", $order_types );
return "{$parent_where} AND {$where_sql}";
}

/**
* Given the Module Configuration and Status return the next chunk of items to send.
* This function also expands the posts and metadata and filters them based on the maximum size constraints.
*
* @param array $config This module Full Sync configuration.
* @param array $status This module Full Sync status.
* @param int $chunk_size Chunk size.
*
* @return array
*/
public function get_next_chunk( $config, $status, $chunk_size ) {

$order_ids = parent::get_next_chunk( $config, $status, $chunk_size );

if ( empty( $order_ids ) ) {
return array();
}

$orders = $this->get_objects_by_id( 'order', $order_ids );

// If no orders were fetched, make sure to return the expected structure so that status is updated correctly.
if ( empty( $orders ) ) {
return array(
'object_ids' => $order_ids,
'objects' => array(),
);
}

// Filter the orders based on the maximum size constraints. We don't need to filter metadata here since we don't sync it for hpos.
list( $filtered_order_ids, $filtered_orders, ) = $this->filter_objects_and_metadata_by_size(
'order',
$orders,
array(),
0,
self::MAX_SIZE_FULL_SYNC
);

return array(
'object_ids' => $filtered_order_ids,
'objects' => $filtered_orders,
);
}
}
Loading

0 comments on commit 927bb2d

Please sign in to comment.