#
# bibliography package for Perl
#
# ISI (adopted from medline by MvdH & BR)
#
# orgiginal by Dana Jacobsen (dana@acm.org)
# 22 January 1995 
#
# (last modified: $Revision$)
#
# Note that there are many, many variations of the format called "isi".
#

package bp_isi;

$version = "isi (mvdh 15 oct 99)";

######

&bib'reg_format(
  'isi',        # name
  'isi',        # short name
  'bp_isi',     # package name
  'none',       # default character set
  'suffix is isi',
# our functions
  'options',
  'open is standard',
  'close is standard',
  'read',
  'write is standard',
  'clear is standard',
  'explode',
  'implode is unsupported',
  'tocanon',
  'fromcanon is unsupported',
);

######

$opt_html = 0;

######

sub options {
    local($opt) = @_;

    &bib'panic("isi options called with no arguments!") unless defined $opt;
    &bib'debugs("parsing isi option '$opt'", 64);
    return undef unless $opt =~ /=/;
    local($_, $val) = split(/\s*=\s*/, $opt, 2);
    &bib'debugs("option split: $_ = $val", 8);
    /^html$/       && do { $opt_html = &bib'parse_num_option($val);
    return 1; };
undef;
}

######

# We have our own read routine because we would like to handle the case
# of HTML output from Entrez.  For example, turn on the HTML option, then
# it can parse the output of:
#   <http://atlas.nlm.nih.gov:5700/htbin-post/Entrez/query?
#    db=m&form=4&term=ras&field=word&dispmax=10&dopt=l&title=no>
# directly.  Unfortunately, we have to do this specially since they don't
# put blank lines between entries.

sub read {
    local($file) = @_;
    local($record);

    &bib'debugs("reading $file<$bib'glb_current_fmt>", 32);

  if ($opt_html) {
    local($/) = '</pre>';
    $record = scalar(<$bib'glb_current_fh>);
    $record =~ s/^<HR>\s*//;
    $record =~ s/^<pre>\s*//;
    # Check for the last part of the file.  If we think we found it,
    # read again.  This should yield an eof.
    if ($record =~ /^<\/form>\s*$/) {
      $record = scalar(<$bib'glb_current_fh>);
    }
  } else {
    # read a paragraph
    local($/) = '';
    $record = scalar(<$bib'glb_current_fh>);
  }
  $record;
}

######

sub explode {
    local($_) = @_;
    local(%entry) = ();
    local($val);

    local($field) = undef;
    local(@lines) = split(/\n/);

    foreach (@lines) {
	if (0) {
	    if (/^<title>.*<\/title>$/) {
		next if $opt_html;
		# We could guess that it's html and change options here.
	    }
	    if ($opt_html) {
		s/^<pre>\s*//i;
		next if /^</;
		next if /^\s*$/;
	    }
	}
	if (/^\s/) {
	    return &bib'goterror("Isi explode--Problems parsing entry") unless defined $field;
      s/^\s+//;				# remove leading blanks
      if ($field eq 'AU') {
        $entry{$field} .= "; " . $_;	# append to entry with "; "
      } elsif ($field eq 'CR')  {
        $entry{$field} .= ";\n\t" . $_;	# append to entry with "; "
      }
      else {
        $entry{$field} .= " " . $_;	# append to entry with blank
      }
      next;
    }
    if (/^[A-Z0-9]/) {
      ($field, $val) = /^([A-Z0-9]+)\s*(.*)/;
      if (defined $entry{$field}) {
        $entry{$field} .= $bib'cs_sep . $val;
	} else {
	    $entry{$field} = $val;
	}
	next;
    }
    next if /^\d+$/;   # RefMan puts numbers here
    &bib'gotwarn("Isi explode--can't parse: $_");
  }

  %entry;
}

######


sub implode {
    local(%entry) = @_;
    return &bib'goterror("isi implode isn't supported.");
}

######

# We want to check for any fields we don't recognize, because we don't
# have documentation on the format, so there may be something important
# being missed.

# know tags:
# my ($CITEID,      $BIBTYPE,    $AUTHOR)         = ( 0,  1,  2);
# my ($TITLE,       $JOURNAL,    $VOLUME)         = ( 3,  4,  5);
# my ($NUMBER,      $PAGES,      $MONTH)          = ( 6,  7,  8);
# my ($YEAR,        $KEY,        $ABSTRACT)       = ( 9, 10, 11);
# my ($NOTE,        $EDITOR,     $PUBLISHER)      = (12, 13, 14);
# my ($SERIES,      $ADDRESS,    $CHAPTER)        = (15, 16, 17);
# my ($HOWPUBLISHED,$BOOKTITLE,  $ORGANISATION)   = (18, 19, 20);
# my ($SCHOOL,      $INSTITUTION,$TYPE)           = (21, 22, 23);
# my ($COMMENTS)           = (24);
# my ($MISC)                                      = (25);

%isi_to_can_fields = (
  # our own tags
  'ID', 'KeywordsPlus',
  'CR', 'CitedRef',
  'SO', 'FullJournalName',
  'BS', 'SubTitle',
  # BibTeX tags
  'DE', 'Keywords',
  'AU', 'Authors',
  'TI', 'Title',
  'AB', 'Abstract',
  'JI', 'Journal',
  'PU', 'Publisher',
  'PY', 'Year',
  'PD', 'Month',
  'VL', 'Volume',
  'IS', 'Number',
  'LA', 'Language',
  'SN', 'ISSN',
  'WP', 'Source',
  'SE', 'Series',
  'C1', 'AuthorAddress',	# affiliaction
  'PA', 'Address',		# publishers address 
  # BibTeX which are derived by others
  'PP', 'Pages',		# is "BP -- EP"
  'DT', 'Mytype',		# type = lowercace(DT)
  # internatly used tags
  'BP', 'FirstPage',
  'EP', 'LastPage',
  # not used tags
  'PG', 'PagesWhole',	# Page count
  'PT', 0, 	# Publication type (e.g., book, journal, book in series)
  'NR', 0,	# Cited reference count
  'TC', 0,	# Times cited
  'PI', 0,	# Publisher city
  'RP', 0,	# Reprint address
  'FN', 0,	# File type (should be: 'ISI Export Format')
  'VR', 0,	# File format version number (should be: '1.0')
  'GA', 0,	# ISI document delivery number
  'ER', 0,	# End of record
  'J9', 0,	# 29-character source title abbreviation
  'UT', 0, 	# internal ISI number
);


sub tocanon {
    local(%entry) = @_;
    local(%can);
    local($type, $field);

    # AU
    if (defined $entry{'AU'}) {
	local($n);
	$can{'Authors'} = '';
	# split authors at ';'
	foreach $n (split(/;/, $entry{'AU'})) {
	    $can{'Authors'} .= $bib'cs_sep . &isiname_to_canon($n) ;
    }
    $can{'Authors'} =~ s/^$bib'cs_sep//;
	}

	# merge keywords
	if ($entry{DE}) {
	    $entry{DE} = lc($entry{DE});
	}
	if ($entry{ID}) {
	    if ($entry{DE}) {
		$entry{DE} .= "; KEYWORDSPLUS: " . lc($entry{ID});
		delete $entry{ID};
	    }
	    else {
		$entry{DE} = "KEYWORDSPLUS: " . lc($entry{ID});
		delete $entry{ID};
	    }
	}

	# Convert uppercase titles 
	if (!($entry{TI} =~ /[a-z]/)) {
	    $entry{TI} = ucfirst(lc($entry{TI}));
	}

	if (!$entry{JI} && $entry{SO}) {
	    $entry{JI} = $entry{SO};
	    delete $entry{SO};
	}

	# Convert uppercase journals 'JI', 'Journal',
	if ($entry{JI}) {
	    if (!($entry{JI} =~ /[a-z]/)) {
		local($n,$tmp);
		$entry{JI} =~ s/\./\. /g; 	# add blanks after '\.'
		$entry{JI} =~ s/\s+/ /g; 		# replace more than one blank
		$tmp = '';
		foreach $n (split(/\s/, $entry{JI})) {
		    $tmp .= ucfirst(lc($n)) . " ";
		}
		$entry{JI} = substr($tmp,0,length($tmp)-1);
	    }
	}

	# convert months (delete days)
	if ($entry{PD}) {
	    $entry{PD} =~ s/[\s0-9]//g;    # get rid of the day
	    $entry{PD} = lc ($entry{PD});	
	}

	# make pages fit 
	if ($entry{EP} && $entry{BP}) {
	    $entry{PP} = "$entry{BP} -- $entry{EP}";
	    delete $entry{BP};
	    delete $entry{EP};
	}

	# determine entry type
	if ($entry{DT}) {
	    $type = lc("$entry{DT}");
	    if ($type eq 'note') {      # substitue note with article
		$type = 'article';
	    }
	    delete $entry{DT};
	    $can{'CiteType'} = $type;
	}

	delete $entry{AU};
	delete $entry{ED};

	foreach $field (keys %entry) {
	    if (!defined $isi_to_can_fields{$field}) {
		&bib'gotwarn("Unknown field: $field");
    } elsif ($isi_to_can_fields{$field}) {
      $can{$isi_to_can_fields{$field}} = $entry{$field};
    }
  }

  $can{'OrigFormat'} = $version;
  %can;
}

sub isiname_to_canon {
    local($name) = @_;
    local($last, $von, $first, $cname);
    ($last, $first) = $name =~ /\s*(.*),\s+([A-Z]*)$/;
    $last = '' unless defined $last;
    $first = '' unless defined $first;
    $first =~ s/([A-Z])/$1. /g;
    $first =~ s/\s+$//;
    $von = '';
    # (the von processing is from name_to_canon in bp-p-utils.pl)
    while ($last =~ /^([A-Za-z]+)\s+/) {
	$von .= " $1";
	substr($last, 0, length($1)+1) = '';
    }
    $von =~ s/^ //;

    # make names look pretty
    $von = lc($von);
    $last = ucfirst(lc($last));

    $cname = join( $bib'cs_sep2, $last, $von, $first, '');
  $cname;
}

######


#######################
# end of package
#######################

1;
