#!/usr/bin/perl -w # # unpack a japanese zipfile, converting sjis filenames to unicode on the way # use lib $ENV{'HOME'} . "/src/perl"; use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); use Unicode::Japanese; use File::Path; use Data::Dumper; do "jcode.pl"; jcode::init(); while ( @ARGV ) { my $zipfile = new Archive::Zip; my $err = $zipfile->read( $ARGV[0] ); if ( $err != AZ_OK ) { die "Failed: $err $!\n"; } else { my @members = $zipfile->members(); # print scalar( @members ) . " items in zipfile\n"; for my $member ( @members ) { my $filename = $member->fileName; my @coding = jcode::getcode( \$filename ); my $coding = ( $coding[1] || "" ); # if ( @coding ) { # print "Coding is " . ( $coding[1] || "unknown") . "\n"; # } print $filename . "\n"; # Now convert filename to sjis if ( $coding and $coding ne "sjis" ) { $filename = jcode::sjis( $filename, $coding ); print "sjis filename = $filename\n"; $coding = "sjis"; } # and convert sjis to unicode if ( $coding ) { print "SJIS: " . get_in_hex_bytes( $filename ) . "\n"; $filename = Unicode::Japanese->new( $filename, 'sjis' )->get; print "UTF8: " . get_in_hex_bytes( $filename ) . "\n"; } # next, make directories # my ( $dir ) = $filename =~ m@(.*)/.*?@; #@? # mkpath( $dir, 0, 0755 ) if $dir; # open( FILE, ">$filename" ); # binmode( FILE ); # # close( FILE ); $zipfile->extractMember( $member, $filename ); } } shift; } sub get_in_hex_bytes { my(@bytes) = split(//, shift); map { $_ = sprintf("%02x", ord($_)) } @bytes; return join(" ", @bytes); } __DATA__ SJIS: 83 51 = 0x8351 0x2532 0x30B2 # KATAKANA LETTER GE 83 43 = 0x8343 0x2524 0x30A4 # KATAKANA LETTER I 83 69 = 0x8369 0x254A 0x30CA # KATAKANA LETTER NA 81 5b = 0x815B 0x213C 0x30FC # KATAKANA-HIRAGANA PROLONGED SOUND MARK 2f 54 68 75 6d 62 73 2e 64 62 # /Thumbs.db UTF8: e3 82 b2 e3 82 a4 e3 83 8a e3 83 bc 2f 54 68 75 6d 62 73 2e 64 62