/*
 * Copyright (c) 2003-2011
 * Distributed Systems Software.  All rights reserved.
 * See the file LICENSE for redistribution information.
 */

/*	$OpenBSD: tr.c,v 1.13 2004/09/15 22:12:19 deraadt Exp $	*/
/*	$NetBSD: tr.c,v 1.5 1995/08/31 22:13:48 jtc Exp $	*/

/*
 * Copyright (c) 1988, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*	$OpenBSD: extern.h,v 1.6 2003/06/03 02:56:20 millert Exp $	*/
/*	$NetBSD: extern.h,v 1.4 1995/11/01 00:45:22 pk Exp $	*/

/*-
 * Copyright (c) 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)extern.h	8.1 (Berkeley) 6/6/93
 */

/*	$OpenBSD: str.c,v 1.9 2004/09/15 22:12:19 deraadt Exp $	*/
/*	$NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $	*/

/*-
 * Copyright (c) 1991, 1993
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#ifndef lint
static const char copyright1[] =
"@(#) Copyright (c) 1988, 1993\n\
	The Regents of the University of California.  All rights reserved.\n";
static const char copyright[] =
"Copyright (c) 2003-2011\n\
Distributed Systems Software.  All rights reserved.";

#if 0
static char sccsid[] = "@(#)tr.c	8.2 (Berkeley) 5/4/95";
static char sccsid[] = "@(#)str.c	8.2 (Berkeley) 4/28/95";
#endif

static const char rcsid1[] =
  "$OpenBSD: tr.c,v 1.13 2004/09/15 22:12:19 deraadt Exp $";
static const char rcsid2[] =
  "$OpenBSD: str.c,v 1.9 2004/09/15 22:12:19 deraadt Exp $";
static const char revid[] =
  "$Id: strtr.c 2528 2011-09-23 21:54:05Z brachman $";
#endif /* not lint */

#ifdef DSSLIB
#include "dsslib.h"
#else
#include "dacs_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <limits.h>
#if defined(DACS_OS_SOLARIS)
#include "port/cdefs.h"
#else
#include <sys/cdefs.h>
#endif
#include <sys/types.h>
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>

#include "str.h"
#endif

static const char *log_module_name = "strstr";

typedef struct {
  enum { STRING1, STRING2 } which;
  enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
  int cnt;				/* character count */
  int lastch;			/* last character */
  int equiv[2];			/* equivalence set */
  int *set;				/* set of characters */
  unsigned char	*str;	/* user's string */
  Strtr_io *io;
} STR;

#define	NCHARS	(UCHAR_MAX + 1)		/* Number of possible characters. */
#define	OOBCH	(UCHAR_MAX + 1)		/* Out of band character value. */

static int tr_string1[NCHARS];
static int tr_string2[NCHARS];

static STR init_s1 = {
  STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL, NULL
};

static STR init_s2 = {
  STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL, NULL
};

typedef struct Strtr_state {
  char *input;
  Ds output;
} Strtr_state;

static int
strtr_char_in(void *arg)
{
  int ch;
  Strtr_state *state;

  state = (Strtr_state *) arg;
  if ((ch = *state->input) == '\0')
	return(EOF);

  state->input++;
  return(ch);
}

static int
strtr_char_out(int ch, void *arg)
{
  Strtr_state *state;

  state = (Strtr_state *) arg;

  ds_appendc(&state->output, ch);

  return(ch);
}

static void setup(int *, char *, STR *, int);
static int backslash(STR *);
static int bracket(STR *);
static int c_class(const void *, const void *);
static int genclass(STR *);
static int genequiv(STR *);
static int genrange(STR *);
static int genseq(STR *);
static int next(STR *);

#ifdef PROG

static void
usage(void)
{
  fprintf(stderr, "usage:\ttr [-cs] string1 string2\n"
		  "\ttr [-c] -d string1\n"
		  "\ttr [-c] -s string1\n"
		  "\ttr [-c] -ds string1 string2\n");
  exit(1);
}

static int
get(void *arg)
{
  FILE *fp;

  fp = (FILE *) arg;
  return(getc(fp));
}

static int
put(int ch, void *arg)
{
  FILE *fp;

  fp = (FILE *) arg;
  return(putc(ch, fp));
}

int
main(int argc, char **argv)
{
  int ch, cflag, dflag, sflag;
  Strtr_io io;

  cflag = dflag = sflag = 0;
  while ((ch = getopt(argc, argv, "cds")) != -1) {
	switch((char) ch) {
	case 'c':
	  cflag = 1;
	  break;

	case 'd':
	  dflag = 1;
	  break;

	case 's':
	  sflag = 1;
	  break;

	case '?':
	default:
	  usage();
	}
  }

  argc -= optind;
  argv += optind;

  if (argc < 1 || argc > 2)
	usage();

  io.char_in = get;
  io.in_arg = (void *) stdin;
  io.char_out = put;
  io.out_arg = (void *) stdout;
  if (strtr_io(argv[0], argv[1], cflag, dflag, sflag, &io) == -1) {
	if (io.errmsg)
	  fprintf(stderr, "%s\n", io.errmsg);
	usage();
  }

  exit(0);
}
#endif

/*
 * This is a library implementation of the BSD tr command.
 * Input characters are copied to the output with substitution or deletion
 * of selected characters.
 * INPUT is the input string.
 * If non-NULL, TR_STRING is where the transliterated output goes, or an
 * error message if an error occurs.
 * If CFLAG is non-zero, the set of characters in STRING1 is complemented.
 * If DFLAG is non-zero, characters from STRING1 are deleted from the output.
 * If SFLAG is non-zero, multiple consecutive occurrences of characters in
 * STRING2 (or if STRING2 is not given, STRING1) are replaced by a single
 * character.
 * Any combination of the flag arguments is acceptible.
 *
 * Not thread-safe.
 *
 * Return -1 on error, 0 otherwise.
 *
 * XXX Might adopt perl's idea of returning the number of characters
 * replaced or deleted.
 */
int
strtr(char *input, char *string1, char *string2, int cflag, int dflag,
	  int sflag, char **tr_string)
{
  int rc;
  Strtr_state state;
  Strtr_io io;

  ds_init(&state.output);
  state.input = input;
  io.char_in = strtr_char_in;
  io.in_arg = (void *) &state;
  io.char_out = strtr_char_out;
  io.out_arg = (void *) &state;

  if ((rc = strtr_io(string1, string2, cflag, dflag, sflag, &io)) == -1) {
	if (tr_string != NULL) {
	  if (io.errmsg != NULL)
		*tr_string = ds_xprintf("strtr failed: %s", io.errmsg);
	  else
		*tr_string = "strtr failed";
	}
	return(-1);
  }

  if (tr_string != NULL) {
	ds_appendc(&state.output, '\0');
	*tr_string = ds_buf(&state.output);
  }

  return(0);
}

/*
 * Test if CH is a member of the set described by STRING1, optionally
 * complemented by CFLAG.
 * Return 1 if so, 0 if not, and -1 if an error occurs.
 */
int
strtr_char(int ch, char *string1, int cflag)
{
  STR s1;

  s1 = init_s1;
  s1.io = NULL;

  setup(tr_string1, string1, &s1, cflag);
  if (tr_string1[ch])
	return(1);

  return(0);
}

int
strtr_io(char *string1, char *string2, int cflag, int dflag, int sflag,
		 Strtr_io *io)
{
  int ch, cnt, i, lastch, *p, rc;
  STR s1, s2;

  /* Characters map to themselves, by default. */
  for (i = 0; i < NCHARS; i++) {
	if (i > 255)
	  tr_string1[i] = 0;
	else
	  tr_string1[i] = i;
  }

  io->errmsg = NULL;
  s1 = init_s1;
  s2 = init_s2;

  s1.io = io;
  s2.io = io;

  /*
   * tr -ds [-c] string1 string2
   * Delete all characters (or complemented characters) in string1.
   * Squeeze all characters in string2.
   */
  if (dflag && sflag) {
	if (string2 == NULL)
	  return(-1);

	setup(tr_string1, string1, &s1, cflag);
	setup(tr_string2, string2, &s2, 0);

	for (lastch = OOBCH; (ch = io->char_in(io->in_arg)) != EOF;) {
	  if (!tr_string1[ch] && (!tr_string2[ch] || lastch != ch)) {
		lastch = ch;
		(void) io->char_out(ch, io->out_arg);
	  }
	}

	return(0);
  }

  /*
   * tr -d [-c] string1
   * Delete all characters (or complemented characters) in string1.
   */
  if (dflag) {
	if (string2 != NULL)
	  return(-1);

	setup(tr_string1, string1, &s1, cflag);

	while ((ch = io->char_in(io->in_arg)) != EOF)
	  if (!tr_string1[ch])
		(void) io->char_out(ch, io->out_arg);
	return(0);
  }

  /*
   * tr -s [-c] string1
   * Squeeze all characters (or complemented characters) in string1.
   */
  if (sflag && string2 == NULL) {
	setup(tr_string1, string1, &s1, cflag);

	for (lastch = OOBCH; (ch = io->char_in(io->in_arg)) != EOF;) {
	  if (!tr_string1[ch] || lastch != ch) {
		lastch = ch;
		(void) io->char_out(ch, io->out_arg);
	  }
	}

	return(0);
  }

  /*
   * tr [-cs] string1 string2
   * Replace all characters (or complemented characters) in string1 with
   * the character in the same position in string2.  If the -s option is
   * specified, squeeze all the characters in string2.
   */
  if (string2 == NULL)
	return(-1);

  s1.str = (unsigned char *) string1;
  s2.str = (unsigned char *) string2;

  if (cflag) {
	for (cnt = NCHARS, p = tr_string1; cnt--;)
	  *p++ = OOBCH;
  }

  if ((rc = next(&s2)) == -1)
	return(-1);
  if (rc == 0) {
	io->errmsg = "empty string2";
	return(-1);
  }

  /* If string2 runs out of characters, use the last one specified. */
  ch = s2.lastch;
  if (sflag) {
	while (1) {
	  if ((rc = next(&s1)) == -1)
		return(-1);
	  if (!rc)
		break;
	  tr_string1[s1.lastch] = ch = s2.lastch;
	  tr_string2[ch] = 1;
	  if ((rc = next(&s2)) == -1)
		return(-1);
	}
  }
  else {
	while (1) {
	  if ((rc = next(&s1)) == -1)
		return(-1);
	  if (!rc)
		break;
	  tr_string1[s1.lastch] = ch = s2.lastch;
	  if ((rc = next(&s2)) == -1)
		return(-1);
	}
  }

  if (cflag) {
	for (cnt = 0, p = tr_string1; cnt < NCHARS; ++p, ++cnt)
	  *p = *p == OOBCH ? ch : cnt;
  }

  if (sflag) {
	for (lastch = OOBCH; (ch = io->char_in(io->in_arg)) != EOF;) {
	  ch = tr_string1[ch];
	  if (!tr_string2[ch] || lastch != ch) {
		lastch = ch;
		(void) io->char_out(ch, io->out_arg);
	  }
	}
  }
  else {
	while ((ch = io->char_in(io->in_arg)) != EOF)
	  (void) io->char_out(tr_string1[ch], io->out_arg);
  }

  return(0);
}

static void
setup(int *string, char *arg, STR *str, int cflag)
{
  int cnt, *p;

  str->str = (unsigned char *) arg;
  bzero(string, NCHARS * sizeof(int));
  while (next(str))
	string[str->lastch] = 1;
  if (cflag) {
	for (p = string, cnt = NCHARS; cnt--; ++p)
	  *p = !*p;
  }
}

static int
next(STR *s)
{
  int ch, rc;

  switch (s->state) {
  case EOS:
	return(0);
  case INFINITE:
	return(1);
  case NORMAL:
	switch (ch = *s->str) {
	case '\0':
	  s->state = EOS;
	  return(0);
	case '\\':
	  s->lastch = backslash(s);
	  break;
	case '[':
	  if ((rc = bracket(s)) == -1)
		return(-1);
	  else if (rc)
		return (next(s));
	  /* FALLTHROUGH */
	default:
	  ++s->str;
	  s->lastch = ch;
	  break;
	}

	/* We can start a range at any time. */
	if (s->str[0] == '-' && genrange(s))
	  return(next(s));
	return(1);
  case RANGE:
	if (s->cnt-- == 0) {
	  s->state = NORMAL;
	  return(next(s));
	}
	++s->lastch;
	return(1);
  case SEQUENCE:
	if (s->cnt-- == 0) {
	  s->state = NORMAL;
	  return(next(s));
	}
	return(1);
  case SET:
	if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
	  s->state = NORMAL;
	  return(next(s));
	}
	return(1);
  default:
	return(0);
  }
  /* NOTREACHED */
}

static int
bracket(STR *s)
{
  char *p;

  switch (s->str[1]) {
  case ':':				/* "[:class:]" */
	if ((p = strstr((char *) s->str + 2, ":]")) == NULL)
	  return(0);
	*p = '\0';
	s->str += 2;
	if (genclass(s) == -1)
	  return(-1);
	s->str = (unsigned char *) p + 2;
	return(1);
  case '=':				/* "[=equiv=]" */
	if ((p = strstr((char *) s->str + 2, "=]")) == NULL)
	  return(0);
	s->str += 2;
	if (genequiv(s) == -1)
	  return(-1);
	return(1);
  default:				/* "[\###*n]" or "[#*n]" */
	if ((p = strpbrk((char *) s->str + 2, "*]")) == NULL)
	  return(0);
	if (p[0] != '*' || strchr(p, ']') == NULL)
	  return(0);
	s->str += 1;
	if (genseq(s) == -1)
	  return(-1);
	return (1);
  }
  /* NOTREACHED */
}

typedef struct {
  char *name;
  int (*func)(int);
  int *set;
} CLASS;

static CLASS classes[] = {
  { "alnum",  isalnum,  },
  { "alpha",  isalpha,  },
  { "blank",  isblank,  },
  { "cntrl",  iscntrl,  },
  { "digit",  isdigit,  },
  { "graph",  isgraph,  },
  { "lower",  islower,  },
  { "print",  isprint,  },
  { "punct",  ispunct,  },
  { "space",  isspace,  },
  { "upper",  isupper,  },
  { "xdigit", isxdigit, },
};

static int
genclass(STR *s)
{
  int cnt, (*func)(int);
  CLASS *cp, tmp;
  int *p;

  tmp.name = (char *) s->str;
  if ((cp = (CLASS *) bsearch(&tmp, classes, sizeof(classes) / sizeof(CLASS),
							  sizeof(CLASS), c_class)) == NULL) {
	s->io->errmsg = ds_xprintf("unknown class %s", s->str);
	return(-1);
  }

  if ((cp->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL) {
	s->io->errmsg = "no memory for a class";
	return(-1);
  }
  bzero(p, (NCHARS + 1) * sizeof(int));
  for (cnt = 0, func = cp->func; cnt < NCHARS; ++cnt) {
	if ((func)(cnt))
	  *p++ = cnt;
  }
  *p = OOBCH;

  s->cnt = 0;
  s->state = SET;
  s->set = cp->set;

  return(0);
}

static int
c_class(const void *a, const void *b)
{
  return (strcmp(((CLASS *) a)->name, ((CLASS *) b)->name));
}

/*
 * English doesn't have any equivalence classes, so for now
 * we just syntax check and grab the character.
 */
static int
genequiv(STR *s)
{
  if (*s->str == '\\') {
	s->equiv[0] = backslash(s);
	if (*s->str != '=') {
	  s->io->errmsg = "misplaced equivalence equals sign";
	  return(-1);
	}
  } else {
	s->equiv[0] = s->str[0];
	if (s->str[1] != '=') {
	  s->io->errmsg = "misplaced equivalence equals sign";
	  return(-1);
	}
  }
  s->str += 2;
  s->cnt = 0;
  s->state = SET;
  s->set = s->equiv;

  return(0);
}

static int
genrange(STR *s)
{
  int stopval;
  unsigned char *savestart;

  savestart = s->str;
  stopval = *++s->str == '\\' ? backslash(s) : *s->str++;
  if (stopval < (u_char) s->lastch) {
	s->str = savestart;
	return(0);
  }
  s->cnt = stopval - s->lastch + 1;
  s->state = RANGE;
  --s->lastch;
  return(1);
}

static int
genseq(STR *s)
{
  char *ep;

  if (s->which == STRING1) {
	s->io->errmsg = "sequences only valid in string2";
	return(-1);
  }

  if (*s->str == '\\')
	s->lastch = backslash(s);
  else
	s->lastch = *s->str++;
  if (*s->str != '*') {
	s->io->errmsg = "misplaced sequence asterisk";
	return(-1);
  }

  switch (*++s->str) {
  case '\\':
	s->cnt = backslash(s);
	break;
  case ']':
	s->cnt = 0;
	++s->str;
	break;
  default:
	if (isdigit(*s->str)) {
	  s->cnt = strtol((char *) s->str, &ep, 0);
	  if (*ep == ']') {
		s->str = (unsigned char *) ep + 1;
		break;
	  }
	}
	s->io->errmsg = "illegal sequence count";
	return(-1);
  }

  s->state = s->cnt ? SEQUENCE : INFINITE;

  return(0);
}

/*
 * Translate \??? into a character.  Up to 3 octal digits, if no digits either
 * an escape code or a literal character.
 */
static int
backslash(STR *s)
{
  int ch, cnt, val;

  for (cnt = val = 0;;) {
	ch = *++s->str;
	if (!isascii(ch) || !isdigit(ch))
	  break;
	val = val * 8 + ch - '0';
	if (++cnt == 3) {
	  ++s->str;
	  break;
	}
  }
  if (cnt)
	return(val);
  if (ch != '\0')
	++s->str;
  switch (ch) {
  case 'a':			/* escape characters */
	return ('\7');
  case 'b':
	return ('\b');
  case 'f':
	return ('\f');
  case 'n':
	return ('\n');
  case 'r':
	return ('\r');
  case 't':
	return ('\t');
  case 'v':
	return ('\13');
  case '\0':			/*  \" -> \ */
	s->state = EOS;
	return ('\\');
  default:			/* \x" -> x */
	return (ch);
  }
}
