Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

External script #49

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions bin/external.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
use strict;
use warnings;
use v5.36;

use Email::Sender::Simple ();
use Email::Simple ();
use Getopt::Long;
use MetaCPAN::Logger qw< :log :dlog >;

use MetaCPAN::ES;
use MetaCPAN::External::Cygwin qw< run_cygwin >;
use MetaCPAN::External::Debian qw< run_debian >;

# with(
# 'MetaCPAN::Script::Role::External::Cygwin',
# 'MetaCPAN::Script::Role::External::Debian',
# );

# args
my ( $email_to, $external_source );
GetOptions(
"email_to=s" => \$email_to,
"external_source=s" => \$external_source,
);

die "wrong external source: $external\n"
unless $external_source
and grep { $_ eq $external_source } qw< cygwin debian >;

# setup
my $es = MetaCPAN::ES->new( type => "author" );

my $ret;

$ret = run_cygwin() if $external_source eq 'cygwin';
$ret = run_debian() if $external_source eq 'debian';

my $email_body = $ret->{errors_email_body};
if ( $email_to and $email_body ) {
my $email = Email::Simple->create(
header => [
'Content-Type' => 'text/plain; charset=utf-8',
To => $email_to,
From => '[email protected]',
Subject => "Package mapping failures report for $external_source",
'MIME-Version' => '1.0',
],
body => $email_body,
);
Email::Sender::Simple->send($email);

log_debug { "Sending email to " . $email_to . ":" };
log_debug {"Email body:"};
log_debug {$email_body};
}

my $scroll = $es->scroll(
type => 'distribution',
scroll => '10m',
body => {
query => {
exists => { field => "external_package." . $external_source }
}
},
);

my @to_remove;

while ( my $s = $scroll->next ) {
my $name = $s->{_source}{name};
next unless $name;

if ( exists $dist->{$name} ) {
delete $dist->{$name}
if $dist->{$name} eq
$s->{_source}{external_package}{$external_source};
}
else {
push @to_remove => $name;
}
}

my $bulk = $es->bulk( type => 'distribution' );

for my $d ( keys %{$dist} ) {
log_debug {"[$external_source] adding $d"};
$bulk->update( {
id => $d,
doc => +{
'external_package' => {
$external_source => $dist->{$d}
}
},
doc_as_upsert => 1,
} );
}

for my $d (@to_remove) {
log_debug {"[$external_source] removing $d"};
$bulk->update( {
id => $d,
doc => +{
'external_package' => {
$external_source => undef
}
}
} );
}

$bulk->flush;

1;

=pod

=head1 SYNOPSIS

# bin/external.pl --external_source SOURCE --email_to EMAIL

=cut
65 changes: 65 additions & 0 deletions lib/MetaCPAN/External/Cygwin.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package MetaCPAN::External::Cygwin;

use List::Util qw< shuffle >;
use MetaCPAN::Logger qw< :log :dlog >;

use MetaCPAN::Ingest qw< ua >;

use Sub::Exporter -setup => {
exports => [ qw<
run_cygwin
> ]
};

sub run_cygwin () {
my $ret = {};

my $ua = ua();
my $mirrors = get_mirrors($ua);

my @mirrors = @{ $mirrors };
my $timeout = $ua->timeout(10);

MIRROR: {
my $mirror = shift @mirrors or die "Ran out of mirrors";
log_debug {"Trying mirror: $mirror"};
my $res = $ua->get( $mirror . 'x86_64/setup.ini' );
redo MIRROR unless $res->is_success;

my @packages = split /^\@ /m, $res->decoded_content;
shift @packages; # drop headers

log_debug { sprintf "Got %d cygwin packages", scalar @packages };

for my $desc (@packages) {
next if substr( $desc, 0, 5 ) ne 'perl-';
my ( $pkg, %attr ) = map s/\A"|"\z//gr, map s/ \z//r,
map s/\n+/ /gr, split /^([a-z]+): /m, $desc;
$attr{category} = [ split / /, $attr{category} ];
next if grep /^(Debug|_obsolete)$/, @{ $attr{category} };
$ret->{dist}{ $pkg =~ s/^perl-//r } = $pkg;
}
}
$ua->timeout($timeout);

log_debug {
sprintf "Found %d cygwin-CPAN packages",
scalar keys %{ $ret->{dist} }
};

return $ret;
}

sub _get_mirrors ( $ua ) {
log_debug {"Fetching mirror list"};
my $res = $ua->get('https://cygwin.com/mirrors.lst');
die "Failed to fetch mirror list: " . $res->status_line
unless $res->is_success;
my @mirrors = shuffle map +( split /;/ )[0], split /\n/,
$res->decoded_content;

log_debug { sprintf "Got %d mirrors", scalar @mirrors };
return \@mirrors;
}

1;
112 changes: 112 additions & 0 deletions lib/MetaCPAN/External/Debian.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package MetaCPAN::External::Debian;

use strict;
use warnings;
use v5.36;

use CPAN::DistnameInfo ();
use DBI ();

use MetaCPAN::ES;

use Sub::Exporter -setup => {
exports => [ qw<
run_debian
> ]
};

sub run_debian () {
my $ret = {};

my $host_regex = _get_host_regex();

# connect to the database
my $dbh = DBI->connect( "dbi:Pg:host=udd-mirror.debian.net;dbname=udd",
'udd-mirror', 'udd-mirror' );

# special cases
my %skip = ( 'libbssolv-perl' => 1 );

# multiple queries are needed
my @sql = (

# packages with upstream identified as CPAN
q{select u.source, u.upstream_url from upstream_metadata um join upstream u on um.source = u.source where um.key='Archive' and um.value='CPAN'},

# packages which upstream URL pointing to CPAN
qq{select source, upstream_url from upstream where upstream_url ~ '${\$host_regex}'},
);

my @failures;

for my $sql (@sql) {
my $sth = $dbh->prepare($sql);
$sth->execute();

# map Debian source package to CPAN distro
while ( my ( $source, $url ) = $sth->fetchrow ) {
next if $skip{$source};
if ( my $dist = dist_for_debian( $source, $url ) ) {
$ret->{dist}{$dist} = $source;
}
else {
push @failures => [ $source, $url ];
}
}
}

if (@failures) {
my $ret->{errors_email_body} = join "\n" =>
map { sprintf "%s %s", $_->[0], $_->[1] // '<undef>' } @failures;
}

return $ret;
}

sub dist_for_debian ( $source, $url ) {
my %alias = (
'datapager' => 'data-pager',
'html-format' => 'html-formatter',
);

my $dist = CPAN::DistnameInfo->new($url);
if ( $dist->dist ) {
return $dist->dist;
}
elsif ( $source =~ /^lib(.*)-perl$/ ) {
my $es = MetaCPAN::ES->new( type => 'release' );
my $res = $es->scroll(
body => {
query => {
term => { 'distribution.lowercase' => $alias{$1} // $1 }
},
sort => [ { 'date' => 'desc' } ],
}
)->next;

return $res->{_source}{distribution}
if $res;
}

return;
}

sub _get_host_regex () {
my @cpan_hosts = qw<
backpan.cpan.org
backpan.perl.org
cpan.metacpan.org
cpan.noris.de
cpan.org
cpan.perl.org
search.cpan.org
www.cpan.org
www.perl.com
>;

return
'^(https?|ftp)://('
. join( '|', map {s/\./\\./r} @cpan_hosts ) . ')/';
}

1;
Loading