Skip to content

Commit c2b0593

Browse files
authored
Merge pull request #1323 from metacpan/haarg/contrib-faster-more-resilient
make contributor script faster and more resilient
2 parents 232b751 + bea9c1f commit c2b0593

File tree

1 file changed

+53
-19
lines changed

1 file changed

+53
-19
lines changed

lib/MetaCPAN/Script/Role/Contributor.pm

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,28 @@ sub update_contributors {
2525
},
2626
);
2727

28-
my $bulk = $self->es->bulk_helper( es_doc_path('contributor') );
28+
my $report = sub {
29+
my ( $action, $result, $i ) = @_;
30+
if ( $i == 0 ) {
31+
log_info {'flushing contributor updates'};
32+
}
33+
};
34+
35+
my $bulk = $self->es->bulk_helper(
36+
es_doc_path('contributor'),
37+
on_success => $report,
38+
on_error => $report,
39+
);
40+
41+
log_info { 'updating contributors for ' . $scroll->total . ' releases' };
2942

3043
while ( my $release = $scroll->next ) {
44+
my $source = $release->{_source};
45+
my $name = $source->{name};
46+
if ( !( $name && $source->{author} && $source->{distribution} ) ) {
47+
Dlog_warn {"found broken release: $_"} $release;
48+
next;
49+
}
3150
log_debug { 'updating contributors for ' . $release->{_source}{name} };
3251
my $actions = $self->release_contributor_update_actions(
3352
$release->{_source} );
@@ -78,6 +97,11 @@ sub release_contributor_update_actions {
7897
return \@actions;
7998
}
8099

100+
has email_mapping => (
101+
is => 'ro',
102+
default => sub { {} },
103+
);
104+
81105
sub get_contributors {
82106
my ( $self, $release ) = @_;
83107

@@ -164,24 +188,34 @@ sub get_contributors {
164188
}
165189

166190
if (%want_email) {
167-
my $check_author = $self->es->search(
168-
es_doc_path('author'),
169-
body => {
170-
query => { terms => { email => [ sort keys %want_email ] } },
171-
_source => [ 'email', 'pauseid' ],
172-
size => 100,
173-
},
174-
);
175-
176-
for my $author ( @{ $check_author->{hits}{hits} } ) {
177-
my $emails = $author->{_source}{email};
178-
$emails = [$emails]
179-
if !ref $emails;
180-
my $pauseid = uc $author->{_source}{pauseid};
181-
for my $email (@$emails) {
182-
for my $contrib ( @{ $want_email{$email} } ) {
183-
$contrib->{pauseid} = $pauseid;
184-
}
191+
my $email_mapping = $self->email_mapping;
192+
193+
my @fetch_email = grep !exists $email_mapping->{$_},
194+
sort keys %want_email;
195+
196+
if (@fetch_email) {
197+
my $check_author = $self->es->search(
198+
es_doc_path('author'),
199+
body => {
200+
query => { terms => { email => \@fetch_email } },
201+
_source => [ 'email', 'pauseid' ],
202+
size => 100,
203+
},
204+
);
205+
206+
for my $author ( @{ $check_author->{hits}{hits} } ) {
207+
my $pauseid = uc $author->{_source}{pauseid};
208+
my $emails = $author->{_source}{email};
209+
$email_mapping->{$_} //= $pauseid
210+
for ref $emails ? @$emails : $emails;
211+
}
212+
}
213+
214+
for my $email ( keys %want_email ) {
215+
my $pauseid = $email_mapping->{$email}
216+
or next;
217+
for my $contrib ( @{ $want_email{$email} } ) {
218+
$contrib->{pauseid} = $pauseid;
185219
}
186220
}
187221
}

0 commit comments

Comments
 (0)