#!/usr/bin/perl -w
#
$prog = "genmod v1.0";
# by Dana Jacobsen (dana@acm.org)
# 29 January 1996

# This program is not intended to be fast or perfect.
# The gen files are supposed to be correct, and if they
# are not, this program will very likely fail in a big way.
# That is ok for now, since the actual user will not be
# running this.  This is used to _generate_ the files that
# they use.  It should not generate incorrect output from
# correct input, however.

# XXXXX we need to put the name into each function so we don't have dups.

#
# I wish we didn't have to include this, but we need the HTML conversion.
#
unshift(@INC, $ENV{'BPHOME'})  if defined $ENV{'BPHOME'};
require "bp.pl";
&bib'load_charset("html");
$csfunc = $bib'charsets{'html', 'tocanon'};
$csesc  = $bib'charsets{'html', 'toesc'};

$field = '';

#
# This ought to be rethought.
#
# 1 is standard   $can{'field'}
# 2 is new        $can{'field'}
# 3 is local      $field
#
%known_fields = (
'Authors',	1,
'Editors',	1,
'Title',	1,
'SuperTitle',	1,
'Month',	1,
'Year',		1,
'Journal',	1,
'Volume',	1,
'Number',	1,
'Pages',	1,
'PagesWhole',	1,
'Publisher',	1,
'School',	1,
'ReportType',	1,
'ReportNumber',	1,
'Organization',	1,
'PubAddress',	1,
'Location',	1,
'Edition',	1,
'Abstract',	1,
'Keywords',	1,
'Annotation',	1,
'CiteType',	1,
'CiteKey',	1,
);
%field_name_info = (
'Authors',	'plain',
'Editors',	'plain',
);

$field_recnum = '';
$cs_escape    = "\034";
$type_field   = 'CiteType';
$type_default = 'misc';
$type_started = 0;
$prefix       = '';
$suffix       = '';
$header       = '';
$trailer      = '';


@states = ();
push(@states, 'illegal');
$state = 'begin';

while (<>) {
  next unless /\S/;
  next if /^\s*#/;

  if      ($state eq 'illegal') {
    die "Illegal state.  Error in gen code.";

  } elsif ($state eq 'begin') {

    if      (/\s*Name:\s*(\S+)/) {
      $name = $1;
      $state = 'global';
print <<"EOHEADER";

#
# generated by $prog
#
# style $name
#

\$record_number = 0;

sub conv_$name {
  local(\%can) = \@_;
  \$record_number++;

EOHEADER
    } else {
      die "Unknown command in begin state.";
    }

  } elsif ($state eq 'code') {

    if (/^\s*ECODE\s*$/) {
      $state = pop(@states);
    } else {
      print "  ", $_;
    }

  } elsif ($state eq 'prefix') {

    if (/^\s*EPREFIX\s*$/) {
      $prefix =~ s/\n+$//;
      $prefix = &parse_format($prefix, "");
      $prefix =~ s/^  //;
      $state = pop(@states);
    } else {
      $prefix .= $_;
    }

  } elsif ($state eq 'suffix') {

    if (/^\s*ESUFFIX\s*$/) {
      $suffix =~ s/\n+$//;
      $suffix = &parse_format($suffix, "");
      $suffix =~ s/^  //;
      $state = pop(@states);
    } else {
      $suffix .= $_;
    }

  } elsif ($state eq 'header') {

    if (/^\s*EHEADER\s*$/) {
      $header =~ s/\n+$//;
      $header = &parse_format($header, "");
      $header =~ s/^  //;
      $state = pop(@states);
    } else {
      $header .= $_;
    }

  } elsif ($state eq 'trailer') {

    if (/^\s*ETRAILER\s*$/) {
      $trailer =~ s/\n+$//;
      $trailer = &parse_format($trailer, "");
      $trailer =~ s/^  //;
      $state = pop(@states);
    } else {
      $trailer .= $_;
    }

  } elsif ($state eq 'global') {

    if      (/\s*CODE\s*$/) {
      push(@states, $state);
      $state = 'code';
    } elsif (/\s*PREFIX\s*$/) {
      $prefix = '';
      push(@states, $state);
      $state = 'prefix';
    } elsif (/\s*SUFFIX\s*$/) {
      $suffix = '';
      push(@states, $state);
      $state = 'suffix';
    } elsif (/\s*TEXT\s*$/) {
      $text = '';
      push(@states, $state);
      $state = 'text';
    } elsif (/\s*HEADER\s*$/) {
      $header = '';
      push(@states, $state);
      $state = 'header';
    } elsif (/\s*TRAILER\s*$/) {
      $trailer = '';
      push(@states, $state);
      $state = 'trailer';
    } elsif (/\s*TYPE:\s*(\w+)\s*$/) {
      push(@states, $state);
      $state = 'type';
      $type = $1;
      &beg_of_type;
    } elsif (s/\s*Field:\s*//) {
      if      (s/^(\S+) is required//) {
        $field = $1;
        print "  if (!defined \$can{'$field'}) {\n";
        print "    \&bib'gotwarn(\"required field $field is not defined\");\n";
        if (/default '(\S+)'/) {
          print "    \$can{'$field'} = '$1';\n";
        }
        print "  }\n";
      } elsif (/^(\S+) is new\.?\s*$/) {
        $field = $1;
        if (!defined $known_fields{$field}) {
          $known_fields{$field} = 2;
        }
      } elsif (/^(\S+) is local\.?\s*$/) {
        $field = $1;
        if (!defined $known_fields{$field}) {
          $known_fields{$field} = 3;
        }
        # XXXXX Shouldn't we warn if they think it's new but it isn't?
      } elsif (/^(\S+) is record number\.?\s*$/) {
        $field_recnum = $1;
      } elsif (/^(\w+) is name, format '(\S+)'\.$/) {
        $field = $1;
        $format = $2;
        # XXXXX known_fields
        $field_name_info{$field} = $format;
      } else {
        print "  # $_\n";
      }
    } elsif (s/\s*Label:\s*//) {
      if      (s/^(\w+) for (\w+)//) {
        $field  = $1;
        $ofield = $2;
        if (!defined $known_fields{$field}) {
          $known_fields{$field} = 3;
        }
        print "  local(\$$field) = '';\n";
        print "  if (defined \$can{'$ofield'}) {\n";
        print "    \$$field = \$can{'$ofield'};\n";
        if (s/,\s*single '(.*)'\s*,\s*plural '(.*)'\.//) {
          local($sing) = $1;
          local($plur) = $2;
          $sing = &$csfunc($sing) if $sing =~ /$csesc/;
          $plur = &$csfunc($plur) if $plur =~ /$csesc/;
          # We know how to do this with names.
          if (defined $field_name_info{$ofield}) {
            print "    if (\$can{'$ofield'} =~ /\$bib'cs_sep/) {\n";
            print "      \$$field = '$plur';\n";
            print "    } else {\n";
            print "      \$$field = '$sing';\n";
            print "    }\n";
          } else {
            die "I don't understand plural for the $ofield field\n";
          }
        }
        print "  }\n\n";
      } elsif (s/^(\w+) is date, format '(.*)'.//) {
        $field  = $1;
        $format = $2;
        if (!defined $known_fields{$field}) {
          $known_fields{$field} = 3;
        }
        &setup_date($field, $format);
      } elsif (s/^\((\w+),(\w+)\) is pages, format '(.*)'.//) {
        $field1 = $1;
        $field2 = $2;
        $format = $3;
        # We really don't have any formats.
        if (!defined $known_fields{$field1}) {
          $known_fields{$field1} = 3;
        }
        if (!defined $known_fields{$field2}) {
          $known_fields{$field2} = 3;
        }
        &setup_pages($field1, $field2, $format);
      } else {
       die "Unknown label command.";
      }
    } elsif (s/\s*Type:\s*//) {
      if (/^use (\S+)\s*,\s*default '(\S+)'/) {
        $type_field   = $1;
        $type_default = $2;
      } else {
        die "Couldn't parse 'Type' line.";
      }
    } else {
      die "Unknown command in global state.";
    }

  } elsif ($state eq 'type') {

    if      (/\s*ETYPE\s*$/) {
      &end_of_type;
      print "\n";
      $state = pop(@states);
    } elsif (/\s*TYPE:\s*(\w+)\s*$/) {
      $type = $1;
      &end_of_type;
      &beg_of_type;
    } elsif (/\s*TYPE:\s*([\w \t]+)\s*$/) {
      $type = '+';
      @typelist = split(/ /, $1);
      &end_of_type;
      &beg_of_type;
    } elsif (/\s*CODE\s*$/) {
      push(@states, $state);
      $state = 'code';
    } elsif (/\s*TEXT\s*$/) {
      $text = '';
      push(@states, $state);
      $state = 'text';
    } elsif (s/\s*Field:\s*//) {
      # handle Field
      if      (s/^(\S+) is required//) {
        $field = $1;
        if (!defined $known_fields{$field}) {
          warn "Unknown field referenced: $field\n";
          $known_fields{$field} = 2;
        }
        print "    if (!defined \$can{'$field'}) {\n";
        print "      \&bib'gotwarn(\"No $field field in \$type citation\");\n";
        if (/default '(\S+)'/) {
          print "      \$can{'$field'} = '$1';\n";
        }
        print "    }\n";
      }
    } elsif (s/\s*Label:\s*//) {
      # handle Label
      print "  # $_\n";
    } else {
      die "Unknown command in 'type' state.\n";
    }

  } elsif ($state eq 'text') {

    if (/^\s*ETEXT\s*$/) {
      print &parse_format($text, "    ");
      $state = pop(@states);
    } else {
      $text .= $_;
    }
  } else {
    die "Unknown state!  Error in genmod.";
  }
}

if ($state ne 'global') {
  die "Runaway argument section.\nStates: " . join(", ", @states);
}

# Now print the subroutines that they're going to call.
if (length($suffix) > 0) {
  print "\n";
  print "  $suffix\n";
}
$types_str = join("|", @types);
print <<"EOTRAILER";
  \$str;
}

sub ${name}_default_type {
  local(\$citetype) = \@_;
  local(\$type) = '$type_default';

  if (defined \$citetype) {
    if (\$citetype =~ /^($types_str)\$/) {
      \$type = \$citetype;
    } else {
      \&bib'gotwarn(\"No output type '\$can{'$type_field'}' in style -- using default type '$type_default'\");
    }
  }
  \$type;
}

1;
EOTRAILER

# XXXXX headers and trailers get defined here.
#       bp-output.pl also needs to be modified to read them.

sub beg_of_type {
  if ($type eq '+') {
    push(@types, @typelist);
  } else {
    push(@types, $type);
  }
  if ($type_started == 0) {
     $type_started = 1;
     print "  # set default type using the $type_field field\n";
     print "  \$type = \&${name}_default_type(\$can{'$type_field'});\n";
     print "\n";
     print "  # Initialize output string\n";
     print "  \$str = '';\n";
     print "  $prefix\n" if $prefix;
     print "\n";
     print "  if      ";
  } else {
     print "elsif ";
  }
  if ($type eq '+') {
    print "( ";
    while ($type = shift(@typelist)) {
      print "(\$type eq '$type') ";
      print "|| " if @typelist;
    }
    print ") {\n";
  } else {
    print "(\$type eq '$type') \{\n";
  }
}


sub end_of_type {
  print "  } ";
}

sub setup_date {
  local($f, $form) = @_;

  if ($form eq 'short') {
    print <<"EODATE";
  local(\$$f) = undef;
  if (defined \$can{'Month'}) {
    local(\$mo) = \&bp_util'output_month(\$can{'Month'}, 'short');
    if (defined \$can{'Year'}) {
      \$$f = "\$mo \$can{'Year'}";
    } else {
      \$$f = \$mo;
    }
  } else {
    \$$f = \$can{'Year'} if defined \$can{'Year'};
  }

EODATE
  } else {
    die "Unknown date format: $form\n";
  }
}

sub setup_pages {
  local($f1, $f2, $form) = @_;

  print <<"EOPAGES";
  local(\$$f1, \$$f2) = (undef,undef);
  \$$f1 = \$can{'Pages'}      if defined \$can{'Pages'};
  \$$f2 = \$can{'PagesWhole'} if defined \$can{'PagesWhole'};
  if ( (!defined \$$f2) && (defined \$$f1) && (\$$f1 !~ /-|,/) ) {
    \$$f2 = \$$f1;
    \$$f1 = undef;
  }
EOPAGES
}


sub parse_format {
  local($text, $indent) = @_;
  local($pc);

  # We handle specials by escape character replacement
  $text =~ s/$cs_escape/${cs_escape}0/g;
  $text =~ s/\\\\/${cs_escape}1/g;
  $text =~ s/\\\[/${cs_escape}2/g;
  $text =~ s/\\\]/${cs_escape}3/g;
  $text =~ s/\\\</${cs_escape}4/g;
  $text =~ s/\\\>/${cs_escape}5/g;
  $text =~ s/\\\'/${cs_escape}6/g;

  # We're going to try to take care of each [...] section seperately.  This
  # gets complicated because we want to deal with nested []'s.  We assume
  # that $[ is set to 0.

  $pc = &split_p($text, $indent);

  $pc =~ s/${cs_escape}6/'/g;
  $pc =~ s/${cs_escape}5/>/g;
  $pc =~ s/${cs_escape}4/</g;
  $pc =~ s/${cs_escape}3/\]/g;
  $pc =~ s/${cs_escape}2/\[/g;
  $pc =~ s/${cs_escape}1/\\/g;
  $pc =~ s/${cs_escape}0/$cs_escape/g;

  $pc;
}

sub split_p {
  local($t, $indent) = @_;
  local($pos, $l, $r, $nbrack, $ebrack);
  local($ret) = '';

  while ( length($t) > 0 ) {
    $pos = index($t, ']');
    if ($pos == -1) {
      $ret .= &split_c($t, $indent);
      $t = '';
    } else {
      $l = substr($t, 0, $pos+1);
      $r = substr($t, $pos+1);
      $nbrack = $l =~ s/\[/\[/g;
      $ebrack = 1;
      while ($nbrack > $ebrack) {
        $pos = index($r, ']');
        die "mismatched brackets: $l$r\n" if $pos == -1;
        $l = $l . substr($r, 0, $pos+1);
        $r = substr($r, $pos+1);
        $nbrack = $l =~ s/\[/\[/g;
        $ebrack++;
      }
      $ret .= &split_c($l, $indent);
      $r =~ s/^\n//;
      $t = $r;
    }
  }
  $ret;
}

sub split_c {
  local($c, $indent) = @_;
  local($sc) = '';

  if ($c =~ /\[/) {
    local($pre, $mid, $suf);
    local($ref_field);
    ($pre, $mid, $suf) = $c =~ /^([^\[]*)\[(.*)\]([^\]]*)$/;
    die "Confused in split_c: $c.\n" unless defined $pre;
#print "#--> :$pre:$mid:$suf:\n";
    $sc .= &parse_c($pre, $indent)  if $pre ne "";
    # determine ref_field
    ($ref_field) = $mid =~ /^[^']*'([^']+)'/;
    $ref_field = &parse_field($ref_field);
    # First, check to see if this is complicated
    if ($mid =~ /\[/) {
      $sc .= $indent . "if (defined $ref_field) {\n";
      $sc .= &split_p($mid, "  " . $indent);
      $sc .= $indent . "}\n";
    } else {
      local($tempc) = &parse_c($mid, $indent);
      if ( (defined $parse_multiline) || (length($tempc) > 42) ) {
        $sc .= $indent . "if (defined $ref_field) {\n";
        $tempc =~ s/\n/\n  /g;
        $tempc =~ s/\n  $/\n/g;
        $sc .= "  $tempc";
        $sc .= $indent . "}\n";
      } else {
        $tempc =~ s/;\n$//;
        $sc .= sprintf("%-40s if defined $ref_field;\n", $tempc);
      }
    }
    $sc .= &parse_c($suf, $indent)  if $suf ne "";
  } else {
    $sc = &parse_c($c, $indent);
  }

  $sc;
}


sub parse_c {
  local($c, $indent) = @_;
  local($ret) = '';

  $parse_multiline = undef;

  $c =~ s/(\")/\\$1/g;

  if ($c =~ /'/) {
    1 while $c =~ s/'(\w+)'/${cs_escape}=$1=/;
  }
  # Convert any HTML to canon
  $c = &$csfunc($c)  if $c =~ /$csesc/;

  if ($c =~ /${cs_escape}=/) {
    while ($c =~ s/${cs_escape}=([^=]+)=/&parse_field($1, 1)/e) {
      if (defined $field_additional) {
        $field_additional =~ s/\n/\n$indent/;
        $ret .= "${indent}$field_additional\n";
      }
    }
  }
  $ret .= "$indent\$str .= \"$c\";\n";
  $ret;
}


sub parse_field {
  local($f, $add) = @_;

  $field_additional = undef;

  if ($f eq $field_recnum) {
    return "\${record_number}";
  }
  if (!defined $known_fields{$f}) {
    warn "Unknown field referenced: $f\n";
    $known_fields{$f} = 2;
  }
  local($field);
  if      ($known_fields{$f} == 3) {
    $field = "\${$f}";
  } else {
    $field = "\$can{'$f'}";
  }
  if ( (defined $add) && (defined $field_name_info{$f}) ) {
    $field_additional = "local(\$t$f);\n\$t$f = \&bp_util'canon_to_name($field, '$field_name_info{$f}');";
    return "\$t$f";
  }
  $field;
}

