Steve Peters
2007-08-15 15:00:05 UTC
Change 31722 by ***@stevep-kirk on 2007/08/15 14:58:57
Upgrade to Archive-Tar-1.34. Omitted re-addition of the Pod
tests.
Affected files ...
... //depot/perl/MANIFEST#1613 edit
... //depot/perl/lib/Archive/Tar.pm#13 edit
... //depot/perl/lib/Archive/Tar/File.pm#5 edit
... //depot/perl/lib/Archive/Tar/t/03_file.t#2 edit
... //depot/perl/lib/Archive/Tar/t/src/long/bar.tar.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/short/bar.tar.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#3 edit
Differences ...
==== //depot/perl/MANIFEST#1613 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST#1612~31712~ 2007-08-14 09:14:45.000000000 -0700
+++ perl/MANIFEST 2007-08-15 07:58:57.000000000 -0700
@@ -389,8 +389,8 @@
ext/Digest/SHA/t/bitbuf.t See if Digest::SHA works
ext/Digest/SHA/t/dumpload.t See if Digest::SHA works
ext/Digest/SHA/t/fips198.t See if Digest::SHA works
-ext/Digest/SHA/t/gg.t See if Digest::SHA works
ext/Digest/SHA/t/gglong.t See if Digest::SHA works
+ext/Digest/SHA/t/gg.t See if Digest::SHA works
ext/Digest/SHA/t/hmacsha.t See if Digest::SHA works
ext/Digest/SHA/t/ireland.t See if Digest::SHA works
ext/Digest/SHA/t/methods.t See if Digest::SHA works
@@ -2763,8 +2763,8 @@
lib/Tie/File/t/41_heap.t Unit tests for Tie::File::Heap
lib/Tie/File/t/42_offset.t Unit tests for the offset method
lib/Tie/Handle.pm Base class for tied handles
-lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle
lib/Tie/Handle/stdhandle_from_handle.t Test for Tie::StdHandle/Handle backwards compat
+lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle
lib/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour
lib/Tie/Hash.pm Base class for tied hashes
lib/Tie/Memoize.pm Base class for memoized tied hashes
@@ -3874,8 +3874,8 @@
t/run/switchPx.aux Data for switchPx.t
t/run/switchPx.t Test the -Px combination
t/run/switcht.t Test the -t switch
-t/run/switchx.aux Data for switchx.t
t/run/switchx2.aux Data for switchx.t
+t/run/switchx.aux Data for switchx.t
t/run/switchx.t Test the -x switch
t/TEST The regression tester
t/TestInit.pm Preamble library for core tests
==== //depot/perl/lib/Archive/Tar.pm#13 (text) ====
Index: perl/lib/Archive/Tar.pm
--- perl/lib/Archive/Tar.pm#12~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar.pm 2007-08-15 07:58:57.000000000 -0700
@@ -14,7 +14,7 @@
$DEBUG = 0;
$WARN = 1;
$FOLLOW_SYMLINK = 0;
-$VERSION = "1.32";
+$VERSION = "1.34";
$CHOWN = 1;
$CHMOD = 1;
$DO_NOT_USE_PREFIX = 0;
@@ -406,7 +406,9 @@
sub contains_file {
my $self = shift;
- my $full = shift or return;
+ my $full = shift;
+
+ return unless defined $full;
### don't warn if the entry isn't there.. that's what this function
### is for after all.
@@ -509,7 +511,7 @@
sub extract_file {
my $self = shift;
- my $file = shift or return;
+ my $file = shift; return unless defined $file;
my $alt = shift;
my $entry = $self->_find_entry( $file )
@@ -1669,6 +1671,56 @@
$tar->write($fh);
$fh->close ;
+=item How do I handle Unicode strings?
+
+C<Archive::Tar> uses byte semantics for any files it reads from or writes
+to disk. This is not a problem if you only deal with files and never
+look at their content or work solely with byte strings. But if you use
+Unicode strings with character semantics, some additional steps need
+to be taken.
+
+For example, if you add a Unicode string like
+
+ # Problem
+ $tar->add_data('file.txt', "Euro: \x{20AC}");
+
+then there will be a problem later when the tarfile gets written out
+to disk via C<$tar->write()>:
+
+ Wide character in print at .../Archive/Tar.pm line 1014.
+
+The data was added as a Unicode string and when writing it out to disk,
+the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
+tried to convert the string to ISO-8859 and failed. The written file
+now contains garbage.
+
+For this reason, Unicode strings need to be converted to UTF-8-encoded
+bytestrings before they are handed off to C<add_data()>:
+
+ use Encode;
+ my $data = "Accented character: \x{20AC}";
+ $data = encode('utf8', $data);
+
+ $tar->add_data('file.txt', $data);
+
+A opposite problem occurs if you extract a UTF8-encoded file from a
+tarball. Using C<get_content()> on the C<Archive::Tar::File> object
+will return its content as a bytestring, not as a Unicode string.
+
+If you want it to be a Unicode string (because you want character
+semantics with operations like regular expression matching), you need
+to decode the UTF8-encoded content and have Perl convert it into
+a Unicode string:
+
+ use Encode;
+ my $data = $tar->get_content();
+
+ # Make it a Unicode string
+ $data = decode('utf8', $data);
+
+There is no easy way to provide this functionality in C<Archive::Tar>,
+because a tarball can contain many files, and each of which could be
+encoded in a different way.
=back
==== //depot/perl/lib/Archive/Tar/File.pm#5 (text) ====
Index: perl/lib/Archive/Tar/File.pm
--- perl/lib/Archive/Tar/File.pm#4~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/File.pm 2007-08-15 07:58:57.000000000 -0700
@@ -200,7 +200,7 @@
sub _new_from_chunk {
my $class = shift;
- my $chunk = shift or return;
+ my $chunk = shift or return; # 512 bytes of tar header
my %hash = @_;
### filter any arguments on defined-ness of values.
@@ -233,7 +233,11 @@
sub _new_from_file {
my $class = shift;
- my $path = shift or return;
+ my $path = shift;
+
+ ### path has to at least exist
+ return unless defined $path;
+
my $type = __PACKAGE__->_filetype($path);
my $data = '';
@@ -304,7 +308,7 @@
sub _new_from_data {
my $class = shift;
- my $path = shift or return;
+ my $path = shift; return unless defined $path;
my $data = shift; return unless defined $data;
my $opt = shift;
@@ -371,7 +375,9 @@
sub _filetype {
my $self = shift;
- my $file = shift or return;
+ my $file = shift;
+
+ return unless defined $file;
return SYMLINK if (-l $file); # Symlink
@@ -515,7 +521,9 @@
sub rename {
my $self = shift;
- my $path = shift or return;
+ my $path = shift;
+
+ return unless defined $path;
my ($prefix,$file) = $self->_prefix_and_file( $path );
==== //depot/perl/lib/Archive/Tar/t/03_file.t#2 (text) ====
Index: perl/lib/Archive/Tar/t/03_file.t
--- perl/lib/Archive/Tar/t/03_file.t#1~24922~ 2005-06-21 05:01:07.000000000 -0700
+++ perl/lib/Archive/Tar/t/03_file.t 2007-08-15 07:58:57.000000000 -0700
@@ -20,6 +20,10 @@
[ 'x/bIn1', $all_chars ],
[ 'bIn2', $all_chars x 2 ],
[ 'bIn0', '' ],
+
+ ### we didnt handle 'false' filenames very well across A::T as of version
+ ### 1.32, as reported in #28687. Test for the handling of such files here.
+ [ 0, '', ],
### keep this one as the last entry
[ 'x/yy/z', '', { type => DIR,
==== //depot/perl/lib/Archive/Tar/t/src/long/bar.tar.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/long/bar.tar.packed
--- perl/lib/Archive/Tar/t/src/long/bar.tar.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/long/bar.tar.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/long/bar.tar lib/Archive/Tar/t/src/long/bar.tar.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M8P``````````````````````````````````````````````````````````
==== //depot/perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/long/foo.tgz.packed
--- perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/long/foo.tgz.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/long/foo.tgz lib/Archive/Tar/t/src/long/foo.tgz.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M'XL(`````````^W72VZ#,!`&8*]S"BY`F,$/MCT`ET")25`<D"A1Q.UKR*M1
==== //depot/perl/lib/Archive/Tar/t/src/short/bar.tar.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/short/bar.tar.packed
--- perl/lib/Archive/Tar/t/src/short/bar.tar.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/short/bar.tar.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/short/bar.tar lib/Archive/Tar/t/src/short/bar.tar.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M8P``````````````````````````````````````````````````````````
==== //depot/perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/short/foo.tgz.packed
--- perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/short/foo.tgz.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/short/foo.tgz lib/Archive/Tar/t/src/short/foo.tgz.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M'XL(`````````^W300K",!"%X5GW%#G"3-JFYREJ080NJKU_A^A"$.RJ(\+_
End of Patch.
Upgrade to Archive-Tar-1.34. Omitted re-addition of the Pod
tests.
Affected files ...
... //depot/perl/MANIFEST#1613 edit
... //depot/perl/lib/Archive/Tar.pm#13 edit
... //depot/perl/lib/Archive/Tar/File.pm#5 edit
... //depot/perl/lib/Archive/Tar/t/03_file.t#2 edit
... //depot/perl/lib/Archive/Tar/t/src/long/bar.tar.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/short/bar.tar.packed#3 edit
... //depot/perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#3 edit
Differences ...
==== //depot/perl/MANIFEST#1613 (text) ====
Index: perl/MANIFEST
--- perl/MANIFEST#1612~31712~ 2007-08-14 09:14:45.000000000 -0700
+++ perl/MANIFEST 2007-08-15 07:58:57.000000000 -0700
@@ -389,8 +389,8 @@
ext/Digest/SHA/t/bitbuf.t See if Digest::SHA works
ext/Digest/SHA/t/dumpload.t See if Digest::SHA works
ext/Digest/SHA/t/fips198.t See if Digest::SHA works
-ext/Digest/SHA/t/gg.t See if Digest::SHA works
ext/Digest/SHA/t/gglong.t See if Digest::SHA works
+ext/Digest/SHA/t/gg.t See if Digest::SHA works
ext/Digest/SHA/t/hmacsha.t See if Digest::SHA works
ext/Digest/SHA/t/ireland.t See if Digest::SHA works
ext/Digest/SHA/t/methods.t See if Digest::SHA works
@@ -2763,8 +2763,8 @@
lib/Tie/File/t/41_heap.t Unit tests for Tie::File::Heap
lib/Tie/File/t/42_offset.t Unit tests for the offset method
lib/Tie/Handle.pm Base class for tied handles
-lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle
lib/Tie/Handle/stdhandle_from_handle.t Test for Tie::StdHandle/Handle backwards compat
+lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle
lib/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour
lib/Tie/Hash.pm Base class for tied hashes
lib/Tie/Memoize.pm Base class for memoized tied hashes
@@ -3874,8 +3874,8 @@
t/run/switchPx.aux Data for switchPx.t
t/run/switchPx.t Test the -Px combination
t/run/switcht.t Test the -t switch
-t/run/switchx.aux Data for switchx.t
t/run/switchx2.aux Data for switchx.t
+t/run/switchx.aux Data for switchx.t
t/run/switchx.t Test the -x switch
t/TEST The regression tester
t/TestInit.pm Preamble library for core tests
==== //depot/perl/lib/Archive/Tar.pm#13 (text) ====
Index: perl/lib/Archive/Tar.pm
--- perl/lib/Archive/Tar.pm#12~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar.pm 2007-08-15 07:58:57.000000000 -0700
@@ -14,7 +14,7 @@
$DEBUG = 0;
$WARN = 1;
$FOLLOW_SYMLINK = 0;
-$VERSION = "1.32";
+$VERSION = "1.34";
$CHOWN = 1;
$CHMOD = 1;
$DO_NOT_USE_PREFIX = 0;
@@ -406,7 +406,9 @@
sub contains_file {
my $self = shift;
- my $full = shift or return;
+ my $full = shift;
+
+ return unless defined $full;
### don't warn if the entry isn't there.. that's what this function
### is for after all.
@@ -509,7 +511,7 @@
sub extract_file {
my $self = shift;
- my $file = shift or return;
+ my $file = shift; return unless defined $file;
my $alt = shift;
my $entry = $self->_find_entry( $file )
@@ -1669,6 +1671,56 @@
$tar->write($fh);
$fh->close ;
+=item How do I handle Unicode strings?
+
+C<Archive::Tar> uses byte semantics for any files it reads from or writes
+to disk. This is not a problem if you only deal with files and never
+look at their content or work solely with byte strings. But if you use
+Unicode strings with character semantics, some additional steps need
+to be taken.
+
+For example, if you add a Unicode string like
+
+ # Problem
+ $tar->add_data('file.txt', "Euro: \x{20AC}");
+
+then there will be a problem later when the tarfile gets written out
+to disk via C<$tar->write()>:
+
+ Wide character in print at .../Archive/Tar.pm line 1014.
+
+The data was added as a Unicode string and when writing it out to disk,
+the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
+tried to convert the string to ISO-8859 and failed. The written file
+now contains garbage.
+
+For this reason, Unicode strings need to be converted to UTF-8-encoded
+bytestrings before they are handed off to C<add_data()>:
+
+ use Encode;
+ my $data = "Accented character: \x{20AC}";
+ $data = encode('utf8', $data);
+
+ $tar->add_data('file.txt', $data);
+
+A opposite problem occurs if you extract a UTF8-encoded file from a
+tarball. Using C<get_content()> on the C<Archive::Tar::File> object
+will return its content as a bytestring, not as a Unicode string.
+
+If you want it to be a Unicode string (because you want character
+semantics with operations like regular expression matching), you need
+to decode the UTF8-encoded content and have Perl convert it into
+a Unicode string:
+
+ use Encode;
+ my $data = $tar->get_content();
+
+ # Make it a Unicode string
+ $data = decode('utf8', $data);
+
+There is no easy way to provide this functionality in C<Archive::Tar>,
+because a tarball can contain many files, and each of which could be
+encoded in a different way.
=back
==== //depot/perl/lib/Archive/Tar/File.pm#5 (text) ====
Index: perl/lib/Archive/Tar/File.pm
--- perl/lib/Archive/Tar/File.pm#4~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/File.pm 2007-08-15 07:58:57.000000000 -0700
@@ -200,7 +200,7 @@
sub _new_from_chunk {
my $class = shift;
- my $chunk = shift or return;
+ my $chunk = shift or return; # 512 bytes of tar header
my %hash = @_;
### filter any arguments on defined-ness of values.
@@ -233,7 +233,11 @@
sub _new_from_file {
my $class = shift;
- my $path = shift or return;
+ my $path = shift;
+
+ ### path has to at least exist
+ return unless defined $path;
+
my $type = __PACKAGE__->_filetype($path);
my $data = '';
@@ -304,7 +308,7 @@
sub _new_from_data {
my $class = shift;
- my $path = shift or return;
+ my $path = shift; return unless defined $path;
my $data = shift; return unless defined $data;
my $opt = shift;
@@ -371,7 +375,9 @@
sub _filetype {
my $self = shift;
- my $file = shift or return;
+ my $file = shift;
+
+ return unless defined $file;
return SYMLINK if (-l $file); # Symlink
@@ -515,7 +521,9 @@
sub rename {
my $self = shift;
- my $path = shift or return;
+ my $path = shift;
+
+ return unless defined $path;
my ($prefix,$file) = $self->_prefix_and_file( $path );
==== //depot/perl/lib/Archive/Tar/t/03_file.t#2 (text) ====
Index: perl/lib/Archive/Tar/t/03_file.t
--- perl/lib/Archive/Tar/t/03_file.t#1~24922~ 2005-06-21 05:01:07.000000000 -0700
+++ perl/lib/Archive/Tar/t/03_file.t 2007-08-15 07:58:57.000000000 -0700
@@ -20,6 +20,10 @@
[ 'x/bIn1', $all_chars ],
[ 'bIn2', $all_chars x 2 ],
[ 'bIn0', '' ],
+
+ ### we didnt handle 'false' filenames very well across A::T as of version
+ ### 1.32, as reported in #28687. Test for the handling of such files here.
+ [ 0, '', ],
### keep this one as the last entry
[ 'x/yy/z', '', { type => DIR,
==== //depot/perl/lib/Archive/Tar/t/src/long/bar.tar.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/long/bar.tar.packed
--- perl/lib/Archive/Tar/t/src/long/bar.tar.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/long/bar.tar.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/long/bar.tar lib/Archive/Tar/t/src/long/bar.tar.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M8P``````````````````````````````````````````````````````````
==== //depot/perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/long/foo.tgz.packed
--- perl/lib/Archive/Tar/t/src/long/foo.tgz.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/long/foo.tgz.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/long/foo.tgz lib/Archive/Tar/t/src/long/foo.tgz.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M'XL(`````````^W72VZ#,!`&8*]S"BY`F,$/MCT`ET")25`<D"A1Q.UKR*M1
==== //depot/perl/lib/Archive/Tar/t/src/short/bar.tar.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/short/bar.tar.packed
--- perl/lib/Archive/Tar/t/src/short/bar.tar.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/short/bar.tar.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/short/bar.tar lib/Archive/Tar/t/src/short/bar.tar.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M8P``````````````````````````````````````````````````````````
==== //depot/perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#3 (text) ====
Index: perl/lib/Archive/Tar/t/src/short/foo.tgz.packed
--- perl/lib/Archive/Tar/t/src/short/foo.tgz.packed#2~31274~ 2007-05-25 02:57:33.000000000 -0700
+++ perl/lib/Archive/Tar/t/src/short/foo.tgz.packed 2007-08-15 07:58:57.000000000 -0700
@@ -10,7 +10,7 @@
uupacktool.pl -p lib/Archive/Tar/t/src/short/foo.tgz lib/Archive/Tar/t/src/short/foo.tgz.packed
-Created at Thu May 24 15:38:19 2007
+Created at Wed Aug 15 15:56:07 2007
#########################################################################
__UU__
M'XL(`````````^W300K",!"%X5GW%#G"3-JFYREJ080NJKU_A^A"$.RJ(\+_
End of Patch.