/
PackageDetails.pm
848 lines (607 loc) · 20 KB
/
PackageDetails.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
use 5.008;
package CPAN::PackageDetails;
use strict;
use warnings;
use Carp qw(carp croak cluck confess);
use Cwd;
use File::Basename;
use File::Spec::Functions;
use vars qw( $VERSION );
BEGIN { # needed later in another BEGIN
$VERSION = '0.263';
}
=encoding utf8
=head1 NAME
CPAN::PackageDetails - Create or read 02packages.details.txt.gz
=head1 SYNOPSIS
use CPAN::PackageDetails;
# read an existing file #####################
my $package_details = CPAN::PackageDetails->read( $filename );
my $count = $package_details->count;
my $records = $package_details->entries->get_hash;
foreach my $record ( @$records )
{
# See CPAN::PackageDetails::Entry too
# print join "\t", map { $record->$_() } ('package name', 'version', 'path')
print join "\t", map { $record->$_() } $package_details->columns_as_list;
}
# not yet implemented, but would be really, really cool eh?
my $records = $package_details->entries(
logic => 'OR', # but that could be AND, which is the default
package => qr/^Test::/, # or a string
author => 'OVID', # case insenstive
path => qr/foo/,
);
# create a new file #####################
my $package_details = CPAN::PackageDetails->new(
file => "02packages.details.txt",
url => "http://example.com/MyCPAN/modules/02packages.details.txt",
description => "Package names for my private CPAN",
columns => "package name, version, path",
intended_for => "My private CPAN",
written_by => "$0 using CPAN::PackageDetails $CPAN::PackageDetails::VERSION",
last_updated => CPAN::PackageDetails->format_date,
allow_packages_only_once => 1,
disallow_alpha_versions => 1,
);
$package_details->add_entry(
package_name => $package,
version => $package->VERSION;
path => $path,
);
print "About to write ", $package_details->count, " entries\n";
$package_details->write_file( $file );
# OR ...
$package_details->write_fh( \*STDOUT )
=head1 DESCRIPTION
CPAN uses an index file, F<02packages.details.txt.gz>, to map package names to
distribution files. Using this module, you can get a data structure of that
file, or create your own.
There are two parts to the F<02packages.details.txt.g>z: a header and the index.
This module uses a top-level C<CPAN::PackageDetails> object to control
everything and comprise an C<CPAN::PackageDetails::Header> and
C<CPAN::PackageDetails::Entries> object. The C<CPAN::PackageDetails::Entries>
object is a collection of C<CPAN::PackageDetails::Entry> objects.
For the most common uses, you don't need to worry about the insides
of what class is doing what. You'll call most of the methods on
the top-level C<CPAN::PackageDetails> object and it will make sure
that it gets to the right place.
=head2 Methods
These methods are in the top-level object, and there are more methods
for this class in the sections that cover the Header, Entries, and
Entry objects.
=over 4
=item new
Create a new F<02packages.details.txt.gz> file. The C<default_headers>
method shows you which values you can pass to C<new>. For instance:
my $package_details = CPAN::PackageDetails->new(
url => $url,
columns => 'author, package name, version, path',
)
If you specify the C<allow_packages_only_once> option with a true value
and you try to add that package twice, the object will die. See C<add_entry>
in C<CPAN::PackageDetails::Entries>.
If you specify the C<disallow_alpha_versions> option with a true value
and you try to add that package twice, the object will die. See C<add_entry>
in C<CPAN::PackageDetails::Entries>.
=cut
BEGIN {
my $class_counter = 0;
sub new {
my( $class, %args ) = @_;
my( $ref, $bless_class ) = do {
if( exists $args{dbmdeep} ) {
eval { require DBM::Deep };
if( $@ ) {
croak "You must have DBM::Deep installed and discoverable to use the dbmdeep feature";
}
my $ref = DBM::Deep->new(
file => $args{dbmdeep},
autoflush => 1,
);
croak "Could not create DBM::Deep object" unless ref $ref;
my $single_class = sprintf "${class}::DBM%03d", $class_counter++;
no strict 'refs';
@{"${single_class}::ISA"} = ( $class , 'DBM::Deep' );
( $ref, $single_class );
}
else {
( {}, $class );
}
};
my $self = bless $ref, $bless_class;
$self->init( %args );
$self;
}
}
=item init
Sets up the object. C<new> calls this automatically for you.
=item default_headers
Returns the hash of header fields and their default values:
file "02packages.details.txt"
url "http://example.com/MyCPAN/modules/02packages.details.txt"
description "Package names for my private CPAN"
columns "package name, version, path"
intended_for "My private CPAN"
written_by "$0 using CPAN::PackageDetails $CPAN::PackageDetails::VERSION"
last_updated format_date()
In the header, these fields show up with the underscores turned into hyphens,
and the letters at the beginning or after a hyphen are uppercase.
=cut
BEGIN {
# These methods live in the top level and delegate interfaces
# so I need to intercept them at the top-level and redirect
# them to the right delegate
my %Dispatch = (
header => { map { $_, 1 } qw(
default_headers get_header set_header header_exists
columns_as_list
) },
entries => { map { $_, 1 } qw(
add_entry count as_unique_sorted_list already_added
allow_packages_only_once disallow_alpha_versions
get_entries_by_package get_entries_by_version
get_entries_by_path get_entries_by_distribution
allow_suspicious_names get_hash
) },
# entry => { map { $_, 1 } qw() },
);
my %Dispatchable = map { #inverts %Dispatch
my $class = $_;
map { $_, $class } keys %{$Dispatch{$class}}
} keys %Dispatch;
sub can {
my( $self, @methods ) = @_;
my $class = ref $self || $self; # class or instance
foreach my $method ( @methods ) {
next if
defined &{"${class}::$method"} ||
exists $Dispatchable{$method} ||
$self->header_exists( $method );
return 0;
}
return 1;
}
sub AUTOLOAD {
my $self = shift;
our $AUTOLOAD;
carp "There are no AUTOLOADable class methods: $AUTOLOAD" unless ref $self;
( my $method = $AUTOLOAD ) =~ s/.*:://;
if( exists $Dispatchable{$method} ) {
my $delegate = $Dispatchable{$method};
return $self->$delegate()->$method(@_)
}
elsif( $self->header_exists( $method ) ) {
return $self->header->get_header( $method );
}
else {
carp "No such method as $method!";
return;
}
}
}
BEGIN {
my %defaults = (
file => "02packages.details.txt",
url => "http://example.com/MyCPAN/modules/02packages.details.txt",
description => "Package names for my private CPAN",
columns => "package name, version, path",
intended_for => "My private CPAN",
written_by => "$0 using CPAN::PackageDetails $CPAN::PackageDetails::VERSION",
header_class => 'CPAN::PackageDetails::Header',
entries_class => 'CPAN::PackageDetails::Entries',
entry_class => 'CPAN::PackageDetails::Entry',
allow_packages_only_once => 1,
disallow_alpha_versions => 0,
allow_suspicious_names => 0,
);
sub init
{
my( $self, %args ) = @_;
my %config = ( %defaults, %args );
# we'll delegate everything, but also try to hide the mess from the user
foreach my $key ( map { "${_}_class" } qw(header entries entry) ) {
$self->{$key} = $config{$key};
delete $config{$key};
}
foreach my $class ( map { $self->$_ } qw(header_class entries_class entry_class) ) {
eval "require $class";
}
# don't initialize things if they are already there. For instance,
# if we read an existing DBM::Deep file
$self->{entries} = $self->entries_class->new(
entry_class => $self->entry_class,
columns => [ split /,\s+/, $config{columns} ],
allow_packages_only_once => $config{allow_packages_only_once},
allow_suspicious_names => $config{allow_suspicious_names},
disallow_alpha_versions => $config{disallow_alpha_versions},
) unless exists $self->{entries};
$self->{header} = $self->header_class->new(
_entries => $self->entries,
) unless exists $self->{header};
foreach my $key ( keys %config )
{
$self->header->set_header( $key, $config{$key} );
}
$self->header->set_header(
'last_updated',
$self->header->format_date
);
}
}
=item read( FILE )
Read an existing 02packages.details.txt.gz file.
While parsing, it modifies the field names to map them to Perly
identifiers. The field is lowercased, and then hyphens become
underscores. For instance:
Written-By ---> written_by
=cut
sub read {
my( $class, $file, %args ) = @_;
unless( defined $file ) {
carp "Missing argument!";
return;
}
require IO::Uncompress::Gunzip;
my $fh = IO::Uncompress::Gunzip->new( $file ) or do {
no warnings;
carp "Could not open $file: $IO::Compress::Gunzip::GunzipError\n";
return;
};
my $self = $class->_parse( $fh, %args );
$self->{source_file} = $file;
$self;
}
=item source_file
Returns the original file path for objects created through the
C<read> method.
=cut
sub source_file { $_[0]->{source_file} }
sub _parse {
my( $class, $fh, %args ) = @_;
my $package_details = $class->new( %args );
while( <$fh> ) { # header processing
last if /\A\s*\Z/;
chomp;
my( $field, $value ) = split /\s*:\s*/, $_, 2;
$field = lc( $field || '' );
$field =~ tr/-/_/;
carp "Unknown field value [$field] at line $.! Skipping..."
unless 1; # XXX should there be field name restrictions?
$package_details->set_header( $field, $value );
}
my @columns = $package_details->columns_as_list;
while( <$fh> ) { # entry processing
chomp;
my @values = split; # this could be in any order based on columns field.
$package_details->add_entry(
map { $columns[$_], $values[$_] } 0 .. $#columns,
)
}
$package_details;
}
=item write_file( OUTPUT_FILE )
Formats the object as a string and writes it to a temporary file and
gzips the output. When everything is complete, it renames the temporary
file to its final name.
C<write_file> carps and returns nothing if you pass it no arguments, if
it cannot open OUTPUT_FILE for writing, or if it cannot rename the file.
=cut
sub write_file {
my( $self, $output_file ) = @_;
unless( defined $output_file ) {
carp "Missing argument!";
return;
}
require IO::Compress::Gzip;
my $fh = IO::Compress::Gzip->new( "$output_file.$$" ) or do {
carp "Could not open $output_file.$$ for writing: $IO::Compress::Gzip::GzipError";
return;
};
$self->write_fh( $fh );
$fh->close;
unless( rename "$output_file.$$", $output_file ) {
carp "Could not rename temporary file to $output_file!\n";
return;
}
return 1;
}
=item write_fh( FILEHANDLE )
Formats the object as a string and writes it to FILEHANDLE
=cut
sub write_fh {
my( $self, $fh ) = @_;
print $fh $self->header->as_string, $self->entries->as_string;
}
=item check_file( FILE, CPAN_PATH )
This method takes an existing F<02packages.details.txt.gz> named in FILE and
the CPAN root at CPAN_PATH (to append to the relative paths in the
index), then checks the file for several things:
1. That there are entries in the file
2. The number of entries matches those declared in the Line-Count header
3. All paths listed in the file exist under CPAN_PATH
4. All distributions under CPAN_PATH have an entry (not counting older versions)
If any of these checks fail, C<check_file> croaks with a hash reference
with these keys:
# present in every error object
filename the FILE you passed in
cpan_path the CPAN_PATH you passed in
cwd the current working directory
error_count
# if FILE is missing
missing_file exists and true if FILE doesn't exist
# if the entry count in the file is wrong
# that is, the actual line count and header disagree
entry_count_mismatch true
line_count the line count declared in the header
entry_count the actual count
# if some distros in CPAN_HOME are missing in FILE
missing_in_file anonymous array of missing paths
# if some entries in FILE are missing the file in CPAN_HOME
missing_in_repo anonymous array of missing paths
=cut
sub ENTRY_COUNT_MISMATCH () { 1 }
sub MISSING_IN_REPO () { 2 }
sub MISSING_IN_FILE () { 3 }
sub check_file {
my( $either, $file, $cpan_path ) = @_;
# works with a class or an instance. We have to create a new
# instance, so we need the class. However, I'm concerned about
# subclasses, so if the higher level application just has the
# object, and maybe from a class I don't know about, they should
# be able to call this method and have it end up here if they
# didn't override it. That is, don't encourage them to hard code
# a class name
my $class = ref $either || $either;
# file exists
my $error = {
error_count => 0,
cpan_path => $cpan_path,
filename => $file,
cwd => cwd(),
};
unless( -e $file ) {
$error->{missing_file} = 1;
$error->{error_count} += 1;
}
# file is gzipped
# check header # # # # # # # # # # # # # # # # # # #
my $packages = $class->read( $file );
# count of entries in non-zero # # # # # # # # # # # # # # # # # # #
my $header_count = $packages->get_header( 'line_count' );
my $entries_count = $packages->count;
unless( $header_count ) {
$error->{entry_count_mismatch} = 1;
$error->{line_count} = $header_count;
$error->{entry_count} = $entries_count;
$error->{error_count} += 1;
}
unless( $header_count == $entries_count ) {
$error->{entry_count_mismatch} = 1;
$error->{line_count} = $header_count;
$error->{entry_count} = $entries_count;
$error->{error_count} += 1;
}
if( $cpan_path ) {
my $missing_in_file = $packages->check_for_missing_dists_in_file( $cpan_path );
my $missing_in_repo = $packages->check_for_missing_dists_in_repo( $cpan_path );
$error->{missing_in_file} = $missing_in_file if @$missing_in_file;
$error->{missing_in_repo} = $missing_in_repo if @$missing_in_repo;
$error->{error_count} += @$missing_in_file + @$missing_in_repo;
}
croak $error if $error->{error_count};
return 1;
}
=item check_for_missing_dists_in_repo( CPAN_PATH )
Given an object and a CPAN_PATH, return an anonymous array of the
distributions in the object that are not in CPAN_PATH. That is,
complain when the object has extra distributions.
C<check_file> calls this for you and adds the result to its
error output.
=cut
sub check_for_missing_dists_in_repo {
my( $packages, $cpan_path ) = @_;
my @missing;
my( $entries ) = $packages->as_unique_sorted_list;
foreach my $entry ( @$entries ) {
my $path = $entry->path;
my $native_path = catfile( $cpan_path, split m|/|, $path );
push @missing, $path unless -e $native_path;
}
return \@missing;
}
=item check_for_missing_dists_in_file( CPAN_PATH )
Given an object and a CPAN_PATH, return an anonymous array of the
distributions in CPAN_PATH that do not show up in the object. That is,
complain when the object doesn't have all the dists.
C<check_file> calls this for you and adds the result to its
error output.
=cut
sub check_for_missing_dists_in_file {
my( $packages, $cpan_path ) = @_;
my $dists = $packages->_get_repo_dists( $cpan_path );
$packages->_filter_older_dists( $dists );
my %files = map { $_, 1 } @$dists;
use Data::Dumper;
my( $entries ) = $packages->as_unique_sorted_list;
foreach my $entry ( @$entries ) {
my $path = $entry->path;
my $native_path = catfile( $cpan_path, split m|/|, $path );
delete $files{$native_path};
}
[ keys %files ];
}
sub _filter_older_dists {
my( $self, $array ) = @_;
my %Seen;
my @order;
require CPAN::DistnameInfo;
foreach my $path ( @$array ) {
my( $basename, $directory, $suffix ) = fileparse( $path, qw(.tar.gz .tgz .zip .tar.bz2) );
my( $name, $version, $developer ) = CPAN::DistnameInfo::distname_info( $basename );
my $tuple = [ $path, $name, $version ];
push @order, $name;
# first branch, haven't seen the distro yet
if( ! exists $Seen{ $name } ) { $Seen{ $name } = $tuple }
# second branch, the version we see now is greater than before
elsif( $Seen{ $name }[2] lt $version ) { $Seen{ $name } = $tuple }
# third branch, nothing. Really? Are you sure there's not another case?
else { () }
}
@$array = map {
if( exists $Seen{$_} ) {
my $dist = $Seen{$_}[0];
delete $Seen{$_};
$dist;
}
else {
()
}
} @order;
return 1;
}
sub _distname_info {
my $file = shift or return;
my ($dist, $version) = $file =~ /^
( # start of dist name
(?:
[-+.]*
(?:
[A-Za-z0-9]+
|
(?<=\D)_
|
_(?=\D)
)*
(?:
[A-Za-z]
(?=
[^A-Za-z]
|
$
)
|
\d
(?=-)
)
(?<!
[._-][vV]
)
)+
) # end of dist name
( # start of version
.*
) # end of version
$/xs or return ($file, undef, undef );
$dist =~ s/-undef\z// if ($dist =~ /-undef\z/ and ! length $version);
# Catch names like Unicode-Collate-Standard-V3_1_1-0.1
# where the V3_1_1 is part of the distname
if ($version =~ /^(-[Vv].*)-(\d.*)/) {
$dist .= $1;
$version = $2;
}
$version = $1 if !length $version and $dist =~ s/-(\d+\w)$//;
$version = $1 . $version if $version =~ /^\d+$/ and $dist =~ s/-(\w+)$//;
if( $version =~ /\d\.\d/ ) { $version =~ s/^[-_.]+// }
else { $version =~ s/^[-_]+// }
# deal with versions with extra information
$version =~ s/-build\d+.*//;
$version =~ s/-DRW.*//;
# deal with perl versions, merely to see if it is a dev version
my $dev;
if( length $version ) {
$dev = do {
if ($file =~ /^perl-?\d+\.(\d+)(?:\D(\d+))?(-(?:TRIAL|RC)\d+)?$/) {
1 if (($1 > 6 and $1 & 1) or ($2 and $2 >= 50)) or $3;
}
elsif ($version =~ /\d\D\d+_\d/) {
1;
}
};
}
else {
$version = undef;
}
($dist, $version, $dev);
}
sub _get_repo_dists {
my( $self, $cpan_home ) = @_;
my @files = ();
use File::Find;
my $wanted = sub {
push @files,
File::Spec::Functions::canonpath( $File::Find::name )
if m/\.(?:tar\.gz|tgz|zip)\z/
};
find( $wanted, $cpan_home );
return \@files;
}
sub DESTROY {}
=back
=head3 Methods in CPAN::PackageDetails
=over 4
=item header_class
Returns the class that C<CPAN::PackageDetails> uses to create
the header object.
=cut
sub header_class { $_[0]->{header_class} }
=item header
Returns the header object.
=cut
sub header { $_[0]->{header} }
=back
=head3 Methods in CPAN::PackageDetails::Header
=over 4
=cut
=back
=head2 Entries
Entries are the collection of the items describing the package details.
It comprises all of the Entry object.
=head3 Methods is CPAN::PackageDetails
=over 4
=item entries_class
Returns the class to use for the Entries object.
To use a different Entries class, tell C<new> which class you want to use
by passing the C<entries_class> option:
CPAN::PackageDetails->new(
...,
entries_class => $class,
);
Note that you are responsible for loading the right class yourself.
=item count
Returns the number of entries.
This dispatches to the C<count> in CPAN::PackageDetails::Entries. These
are the same:
$package_details->count;
$package_details->entries->count;
=cut
sub entries_class { $_[0]->{entries_class} }
=item entries
Returns the entries object.
=cut
sub entries { $_[0]->{entries} }
=item entry_class
Returns the class to use for each Entry object.
To use a different Entry class, tell C<new> which class you want to use
by passing the C<entry_class> option:
CPAN::PackageDetails->new(
...,
entry_class => $class,
)
Note that you are responsible for loading the right class yourself.
=cut
sub entry_class { $_[0]->{entry_class} }
sub _entries { $_[0]->{_entries} }
=back
=head1 TO DO
=head1 SEE ALSO
=head1 SOURCE AVAILABILITY
This source is in Github:
https://github.com/briandfoy/cpan-packagedetails
=head1 AUTHOR
brian d foy, C<< <briandfoy@pobox.com> >>
=head1 COPYRIGHT AND LICENSE
Copyright © 2009-2021, brian d foy <briandfoy@pobox.com>. All rights reserved.
You may redistribute this under the terms of the Artistic License 2.0.
=cut
1;