#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
# $File: //member/autrijus/Locale-Maketext-Lexicon/bin/xgettext.pl $ $Author: blusseau $
# $Revision: 1.1 $ $Change: 4414 $ $DateTime: 2003/02/22 01:33:00 $
# Change by Yves BLUSSEAU to correct some bugs.

use strict;
use Getopt::Std;
use Pod::Usage;
use constant NUL  => 0;
use constant BEG  => 1;
use constant PAR  => 2;
use constant QUO1 => 3;
use constant QUO2 => 4;
use constant QUO3 => 5;

$::interop = 1; # whether to interoperate with GNU gettext

=head1 NAME

extracttext.pl - Extract gettext strings from perl source

=head1 SYNOPSIS

B<extracttext.pl> [ B<-u> ] [ B<-o> I<outputfile> ] [ I<inputfile>... ]

=head1 OPTIONS

[ B<-u> ] Disables conversion from B<Maketext> format to B<Gettext>
format -- i.e. it leaves all brackets alone.  This is useful if you are
also using the B<Gettext> syntax in your program.

[ B<-o> I<outputfile> ] PO file name to be written or incrementally
updated C<-> means writing to F<STDOUT>.  If not specified,
F<messages.po> is used.

[ I<inputfile>... ] is the files to extract messages from.

=head1 DESCRIPTION

This program extracts translatable strings from given input files, or
STDIN if none are given.

Currently only Perl source input input files is supported:

=over 4

=item Perl source files

Valid localization function names are: C<translate>, C<maketext>,
C<loc>, C<x>, C<_> and C<__>.

=cut

my (%file, %Lexicon, %opts);
my ($PO, $out);

# options as above. Values in %opts
getopts('huo:', \%opts)
  or pod2usage( -verbose => 1, -exitval => 1 );
$opts{h} and pod2usage( -verbose => 2, -exitval => 0 );

$PO = $opts{o} || "messages.po";

@ARGV = ('-') unless @ARGV;

if (-r $PO) {
    open LEXICON, $PO or die $!;
    while (<LEXICON>) {
	if (1 .. /^$/) { $out .= $_; next }
	last;
    }

    1 while chomp $out;

    %Lexicon = map {
	if ($opts{u}) {
	    s/\\/\\\\/g;
	    s/\"/\\"/g;
	    s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
	    s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*),([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
	    s/~([\~\[\]])/$1/g;
	}
	$_;
    } %{ parse(<LEXICON>) };
    close LEXICON;
    delete $Lexicon{''};
}

open PO, ">$PO" or die "Can't write to $PO:$!\n";
select PO;

undef $/;
foreach my $file (@ARGV) {
    next if ($file=~/\.po$/i); # Don't parse po files
    my $filename = $file;
    open _, $file or die $!; $_ = <_>; $filename =~ s!^./!!;

    my $line = 1; pos($_) = 0;

    # Perl source file
    my ($state,$str,$vars)=(0);
    pos($_) = 0;
    my $orig = 1 + (() = ((my $__ = $_) =~ /\n/g));
  PARSER: {
      $_ = substr($_, pos($_)) if (pos($_));
      my $line = $orig - (() = ((my $__ = $_) =~ /\n/g));
      # maketext or loc or _
      $state == NUL &&
        m/\b(translate|maketext|__?|loc|x)/gcx && do { $state = BEG;  redo; };
      $state == BEG && m/^([\s\t\n]*)/gcx && do { redo; };
      # begin ()
      $state == BEG && m/^([\S\(]) /gcx && do {
	$state = ( ($1 eq '(') ? PAR : NUL) ;
	redo;
      };
      # begin or end of string
      $state == PAR  && m/^(\')  /gcx     && do { $state = QUO1; redo; };
      $state == QUO1 && m/^([^\']+)/gcx && do { $str.=$1; redo; };
      $state == QUO1 && m/^\'  /gcx     && do { $state = PAR;  redo; };

      $state == PAR  && m/^\"  /gcx     && do { $state = QUO2; redo; };
      $state == QUO2 && m/^([^\"]+)/gcx && do { $str.=$1; redo; };
      $state == QUO2 && m/^\"  /gcx     && do { $state = PAR;  redo; };

      $state == PAR  && m/^\`  /gcx     && do { $state = QUO3; redo; };
      $state == QUO3 && m/^([^\`]*)/gcx && do { $str.=$1; redo; };
      $state == QUO3 && m/^\`  /gcx     && do { $state = PAR;  redo; };

      # end ()
      $state == PAR && m/^[\)]/gcx
	&& do {
	  $state = NUL;	
	  $vars =~ s/[\n\r]//g if ($vars);
	  push @{$file{$str}}, [ $filename, $line - (() = $str =~ /\n/g), $vars] if ($str);
	  undef $str; undef $vars;
	  redo;
	};

      # a line of vars
      $state == PAR && m/^([^\)]*)/gcx && do { 	$vars.=$1."\n"; redo; };
    }
}

foreach my $str (sort keys %file) {
    my $ostr = $str;
    my $entry = $file{$str};
    my $lexi = $Lexicon{$ostr};

    $str =~ s/\"/\\"/g;
    $lexi =~ s/\"/\\"/g;

    unless ($opts{u}) {
	$str =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
	$str =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*)([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
	$str =~ s/~([\~\[\]])/$1/g;
	$lexi =~ s/((?<!~)(?:~~)*)\[_(\d+)\]/$1%$2/g;
	$lexi =~ s/((?<!~)(?:~~)*)\[([A-Za-z#*]\w*)([^\]]+)\]/"$1%$2(".escape($3).")"/eg;
	$lexi =~ s/~([\~\[\]])/$1/g;
    }

    $Lexicon{$str} ||= '';
    next if $ostr eq $str;

    $Lexicon{$str} ||= $lexi;
    delete $file{$ostr}; delete $Lexicon{$ostr};
    $file{$str} = $entry;
}

exit unless %Lexicon;

print $out ? "$out\n" : (<< '.');
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
# This file is distributed under the same license as the PACKAGE package.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2002-07-16 17:27+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=CHARSET\n"
"Content-Transfer-Encoding: 8bit\n"
.

foreach my $entry (sort keys %Lexicon) {
    my %f = (map { ( "$_->[0]:$_->[1]" => 1 ) } @{$file{$entry}});
    my $f = join(' ', sort keys %f);
    $f = " $f" if length $f;

    my $nospace = $entry;
    $nospace =~ s/ +$//;

    if (!$Lexicon{$entry} and $Lexicon{$nospace}) {
	$Lexicon{$entry} = $Lexicon{$nospace} . (' ' x (length($entry) - length($nospace)));
    }

    my %seen;
    print "\n#:$f\n";
    foreach my $entry ( grep { $_->[2] } @{$file{$entry}} ) {
	my ($file, $line, $var) = @{$entry};
	$var =~ s/^\s*,\s*//; $var =~ s/\s*$//;
	print "#. ($var)\n" unless !length($var) or $seen{$var}++;
    }

    print "#, maketext-format" if $::interop and /%(?:\d|\w+\([^\)]*\))/;
    print "msgid "; output($entry);
    print "msgstr "; output($Lexicon{$entry});
}

sub output {
    my $str = shift;

    if ($str =~ /\n/) {
	print "\"\"\n";
	print "\"$_\"\n" foreach split(/\n/, $str, -1);
    }
    else {
	print "\"$str\"\n"
    }
}

sub escape {
    my $text = shift;
    $text =~ s/\b_(\d+)/%$1/;
    return $text;
}

sub parse {
    my $self = shift;
    my (%var, $key, @ret);
    my @metadata;

    # Check for magic string of MO files
    if ($_[0] =~ /^\x95\x04\x12\xde/ or $_[0] =~ /^\xde\x12\x04\x95/) {
	my ($tmpfh, $tmpfile);
	if (eval { require File::Temp; 1 }) {
	    ($tmpfh, $tmpfile) = File::Temp::tempfile();
	}
	else {
	    # make a reasonable tmpfile decision
	    use FileHandle;
	    $tmpfile = ($ENV{TEMP} || $ENV{TMPDIR} || '/tmp') . "/$$.tmp";
	    $tmpfh = FileHandle->new;
	    $tmpfh->open(">$tmpfile") or die $!;
	}

	print $tmpfh @_;
	close $tmpfh;

	# Convert it to PO format
	@_ = `msgunfmt $tmpfile`;
	unlink $tmpfile;
    }

    local $^W;	# no 'uninitialized' warnings, please.

    # Parse PO files; Locale::gettext objects are not yet supported.
    foreach (@_) {
	/^(msgid|msgstr) +"(.*)" *$/	? do {	# leading strings
	    $var{$1} = $2;
	    $key = $1;
	} :

	/^"(.*)" *$/			? do {	# continued strings
	    $var{$key} .= $1."\n";
	} :

	/^#, +(.*) *$/			? do {	# control variables
	    $var{$1} = 1;
	} :

	/^ *$/ && %var			? do {	# interpolate string escapes
	    push @ret, (map transform($_), @var{'msgid', 'msgstr'})
		if length $var{msgstr};
	    push @metadata, parse_metadata($var{msgstr})
		if $var{msgid} eq '';
	    %var = ();
	} : ();
    }

    push @ret, map { transform($_) } @var{'msgid', 'msgstr'}
	if length $var{msgstr};
    push @metadata, parse_metadata($var{msgstr})
	if $var{msgid} eq '';

    return {@metadata, @ret};
}

sub parse_metadata {
    return map {
	/^([^\x00-\x1f\x80-\xff :=]+):\s*(.*)$/ ? ("__$1", $2) : ()
    } split(/\n+/, transform(pop));
}

sub transform {
    my $str = shift;

    $str =~ s/\\([0x]..|c?.)/qq{"\\$1"}/eeg;
    $str =~ s/[\~\[\]]/~$&/g;
    $str =~ s/(?<![%\\])%([A-Za-z#*]\w*)\(([^\)]*)\)/"\[$1,".unescape($2)."]"/eg;
    $str =~ s/(?<![%\\])%(\d+|\*)/\[_$1]/g;

    return $str;
}

sub unescape {
    my $str = shift;
    $str =~ s/(^|,)%(\d+|\*)(,|$)/$1_$2$3/g;
    return $str;
}

1;
