// Copyright (C) 1999-2012
// Smithsonian Astrophysical Observatory, Cambridge, MA, USA
// For conditions of distribution and use, see copyright notice in "copyright"

#include "fitsdata.h"
#include "colorscale.h"

#include "NaN.h"

// ZSCALE

#define ZSMAX(a,b) ((a) > (b) ? (a) : (b))
#define ZSMIN(a,b) ((a) < (b) ? (a) : (b)) 
#define ZSMOD(a,b) ((a) % (b))
#define ZSNINT(a) ((int)(a + 0.5))
#define ZSINDEF 0
// smallest permissible sample
#define ZSMIN_NPIXELS 5
// max frac. of pixels to be rejected
#define ZSMAX_REJECT 0.5
// k-sigma pixel rejection factor
#define ZSKREJ 2.5
// maximum number of fitline iterations
#define ZSMAX_ITERATIONS 5

// FitsData

ostream& operator<<(ostream& ss, const FitsBound& fb)
{
  ss << ' ' << fb.xmin << ' ' << fb.ymin << ' ' << fb.zmin
     << ' ' << fb.xmax << ' ' << fb.ymax << ' ' << fb.zmax;
  return ss;
}

FitsData::FitsData(FitsFile* fits, Base* p)
{
  parent = p;
  
  FitsHead* head = fits->head();
  FitsImageHDU* hdu = (FitsImageHDU*)head->hdu();
  width = head->naxis(1);
  height = head->naxis(2);

  buf[0] = '\0';

  byteswap = fits->byteswap();

  bscale = hdu->bscale();
  bzero = hdu->bzero();
  blank = hdu->blank();

  hasScaling = hdu->hasscaling();
  switch (hdu->bitpix()) {
  case 8:
  case 16:
  case -16:
  case 32:
  case 64:
    hasBlank = hdu->hasblank();
    break;
  case -32:
  case -64:
    hasBlank = 0;
    break;
  }

  nanf = getnanf();
  nand = getnand();

  low = high = 0;

  zLow = zHigh = 0;
  aLow = aHigh = 0;
  uLow = uHigh = 0;

  scanValid = 0;
  incr_ = 100;

  zContrast = .5;
  zSample = 600;
  zLine = 5;
  zscaleValid = 0;

  autoCutValid = 0;
  autoCutPer = 0;

  clipMode = FrScale::MINMAX;
  mmMode = FrScale::SAMPLE;

  if (fits->find("DATAMIN") && fits->find("DATAMAX")) {
    hasdatamin = 1;
    datamin = fits->getReal("DATAMIN", 0);
    datamax = fits->getReal("DATAMAX", 0);
  }
  else {
    hasdatamin = 0;
    datamin = datamax = 0;
  }

  if (fits->find("IRAF-MIN") && fits->find("IRAF-MAX")) {
    hasirafmin = 1;
    irafmin = fits->getReal("IRAF-MIN", 0);
    irafmax = fits->getReal("IRAF-MAX", 0);
  }
  else {
    hasirafmin = 0;
    irafmin = irafmax = 0;
  }

  scanMode = FrScale::IMGSEC;
}

FitsData::~FitsData()
{
}

const char* FitsData::getLow()
{
  ostringstream str;
  str << low << ends;
  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

const char* FitsData::getHigh()
{
  ostringstream str;
  str << high << ends;
  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

int FitsData::getIncr()
{
  switch (mmMode) {
  case FrScale::AUTOSCAN:
    if (width*height*parent->fitsCount() > 1e8)
      return incr_;
    else
      return 1;
  case FrScale::SCAN:
    return 1;
  case FrScale::SAMPLE:
    return incr_;
  }
}

// AutoCut
#define AUTOCUTSIZE 10240
void FitsData::autoCut(FitsBound* params)
{
  double amin = getMinDouble();
  double amax = getMaxDouble();

  // bin it up
  double hist[AUTOCUTSIZE];
  memset(hist,0,sizeof(double)*AUTOCUTSIZE);
  bin(hist, AUTOCUTSIZE, amin, amax, params);

  // find total number of pixels
  int total = 0;
  for (int ii=0; ii<AUTOCUTSIZE; ii++)
    total += hist[ii];
  
  // calc cut off
  int cutoff = (total*(100.-autoCutPer)/100.)/2.;
  int count;
  int ll, hh;
  for (ll=0,count=0; ll<AUTOCUTSIZE; ll++) {
    count += hist[ll];
    if (count > cutoff)
      break;
  }

  for (hh=AUTOCUTSIZE-1,count=0; hh>ll+1; hh--) {
    count += hist[hh];
    if (count > cutoff)
      break;
  }

  aLow = (amax-amin)/AUTOCUTSIZE*ll + amin;
  aHigh = (amax-amin)/AUTOCUTSIZE*hh + amin;
}

// FitsDatam

template<class T> FitsDatam<T>::FitsDatam(FitsFile* fits, Base* p) 
  : FitsData(fits,p)
{
  data = (T*)fits->data();
  min=max=0;
}

// swap (optimized)

template <> unsigned char FitsDatam<unsigned char>::swap(unsigned char* ptr)
{
  return *ptr;
}

template <> short FitsDatam<short>::swap(short* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[2];
    short s;
  } u;

  u.c[1] = *p++;
  u.c[0] = *p;

  return u.s;
}

template <> unsigned short FitsDatam<unsigned short>::swap(unsigned short* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[2];
    unsigned short s;
  } u;

  u.c[1] = *p++;
  u.c[0] = *p;

  return u.s;
}

template <> int FitsDatam<int>::swap(int* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[4];
    int i;
  } u;

  u.c[3] = *p++;
  u.c[2] = *p++;
  u.c[1] = *p++;
  u.c[0] = *p;

  return u.i;
}

template <> long long FitsDatam<long long>::swap(long long* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[8];
    long long i;
  } u;

  u.c[7] = *p++;
  u.c[6] = *p++;
  u.c[5] = *p++;
  u.c[4] = *p++;
  u.c[3] = *p++;
  u.c[2] = *p++;
  u.c[1] = *p++;
  u.c[0] = *p;

  return u.i;
}

template <> float FitsDatam<float>::swap(float* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[4];
    float f;
  } u;

  u.c[3] = *p++;
  u.c[2] = *p++;
  u.c[1] = *p++;
  u.c[0] = *p;

  return u.f;
}

template <> double FitsDatam<double>::swap(double* ptr)
{
  const char* p = (const char*)ptr;
  union {
    char c[8];
    double d;
  } u;

  u.c[7] = *p++;
  u.c[6] = *p++;
  u.c[5] = *p++;
  u.c[4] = *p++;
  u.c[3] = *p++;
  u.c[2] = *p++;
  u.c[1] = *p++;
  u.c[0] = *p;

  return u.d;
}

// Private/Protected

// output

template<class T> void FitsDatam<T>::output(ostringstream& str, T value)
{
  str << value << ends;
}

template <> void FitsDatam<unsigned char>::output(ostringstream& str, unsigned char value)
{
  str << (unsigned short)value << ends;
}

template <> void FitsDatam<unsigned short>::output(ostringstream& str, unsigned short value)
{
  str << (unsigned short)value << ends;
}

// updateMinMax

template<class T> void FitsDatam<T>::updateMinMax(FitsBound* params)
{
  if (!scanValid) {
    scan(params);
    scanValid = 1;
  }
}

// scan (optimized)

template <> void FitsDatam<unsigned char>::scan(FitsBound* params)
{
  min = UCHAR_MAX;
  max = 0;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan char... incr=" << incr
	 << " (" << params->xmin << ',' << params->ymin 
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    unsigned char* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register unsigned char value = *ptr;

      if (hasBlank && value == blank)
	continue; // skip nan's 

      if (value < min)
	min = value;
      else if (value > max)
	max = value;
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << (unsigned short)min << " max: " 
	 << (unsigned short)max << endl;
  }
}

template <> void FitsDatam<short>::scan(FitsBound* params)
{
  min = SHRT_MAX;
  max = SHRT_MIN;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan short... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    short* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register short value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[2];
	  short s;
	} u;

	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.s;
      }

      if (hasBlank && value == blank)
	continue; // skip nan's 

      if (value < min)
	min = value;
      else if (value > max)
	max = value;
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

template <> void FitsDatam<unsigned short>::scan(FitsBound* params)
{
  min = USHRT_MAX;
  max = 0;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan unsigned short... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    unsigned short* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register unsigned short value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[2];
	  unsigned short s;
	} u;

	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.s;
      }

      if (hasBlank && value == blank)
	continue; // skip nan's 

      if (value < min)
	min = value;
      else if (value > max)
	max = value;
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

template <> void FitsDatam<int>::scan(FitsBound* params)
{
  min = INT_MAX;
  max = INT_MIN;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan int... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    int* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register int value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[4];
	  int i;
	} u;

	u.c[3] = *p++;
	u.c[2] = *p++;
	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.i;
      }

      if (hasBlank && value == blank)
	continue; // skip nan's 

      if (value < min)
	min = value;
      else if (value > max)
	max = value;
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

template <> void FitsDatam<long long>::scan(FitsBound* params)
{
  min = INT_MAX;
  max = INT_MIN;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan long long... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    long long* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register long long value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[8];
	  long long i;
	} u;

	u.c[7] = *p++;
	u.c[6] = *p++;
	u.c[5] = *p++;
	u.c[4] = *p++;
	u.c[3] = *p++;
	u.c[2] = *p++;
	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.i;
      }

      if (hasBlank && value == blank)
	continue; // skip nan's 

      if (value < min)
	min = value;
      else if (value > max)
	max = value;
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

template <> void FitsDatam<float>::scan(FitsBound* params)
{
  min = FLT_MAX;
  max = -FLT_MAX;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan float... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    float* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register float value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[4];
	  float f;
	} u;

	u.c[3] = *p++;
	u.c[2] = *p++;
	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.f;
      }

      if (!isnanf(value)) {
	if (value < min)
	  min = value;
	else if (value > max)
	  max = value;
      }
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

template <> void FitsDatam<double>::scan(FitsBound* params)
{
  min = DBL_MAX;
  max = -DBL_MAX;
  int incr = getIncr();

  if (DebugPerf)
    cerr << "scan double... incr=" << incr 
	 << " (" << params->xmin << ',' << params->ymin
	 << ") to (" << params->xmax << ',' << params->ymax << ") ";

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    double* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {
      register double value;

      if (!byteswap) 
	value = *ptr;
      else {
	const char* p = (const char*)ptr;
	union {
	  char c[8];
	  double d;
	} u;

	u.c[7] = *p++;
	u.c[6] = *p++;
	u.c[5] = *p++;
	u.c[4] = *p++;
	u.c[3] = *p++;
	u.c[2] = *p++;
	u.c[1] = *p++;
	u.c[0] = *p;

	value = u.d;
      }

      if (!isnand(value)) {
	if (value < min)
	  min = value;
	else if (value > max)
	  max = value;
      }
    }
  }

  if (DebugPerf) {
    cerr << "end" << endl;
    cerr << "min: " << min << " max: " << max << endl;
  }
}

// Public

// updateClip
template<class T> void FitsDatam<T>::updateClip(FrScale* fr, FitsBound* params)
{
  // we do not check for blank/nan since this should never be set to nan

  clipMode = fr->clipMode();
  uLow = fr->uLow();
  uHigh = fr->uHigh();

  // DATASEC
  if (scanMode != fr->scanMode()) {
    if (DebugPerf)
      cerr << "reset updateClip" << endl;
    scanValid = 0;
    zscaleValid = 0;
    autoCutValid = 0;
  }
  scanMode = fr->scanMode();

  // MINMAX
  if (mmMode != fr->mmMode() || incr_ != fr->mmIncr())
    scanValid = 0;
  mmMode = fr->mmMode();
  incr_ = fr->mmIncr();

  // ZSCALE
  if (zContrast != fr->zContrast() || 
      zSample != fr->zSample() || 
      zLine != fr->zLine())
    zscaleValid = 0;
  zContrast = fr->zContrast();
  zSample = fr->zSample();
  zLine = fr->zLine();

  // AUTOCUT
  if (mmMode != fr->mmMode() || autoCutPer != fr->autoCutPer())
    autoCutValid = 0;
  autoCutPer = fr->autoCutPer();

  // always update min/max because everyone needs it

  updateMinMax(params);

  switch (clipMode) {
  case FrScale::MINMAX:
    low = getMinDouble();
    high = getMaxDouble();
    break;

  case FrScale::ZSCALE:
    if (!zscaleValid) {
      if (DebugPerf)
	cerr << "zscale...";

      zscale(params);
      zscaleValid = 1;

      if (DebugPerf)
	cerr << "end" << endl;
    }

    low = zLow;
    high = zHigh;
    break;

  case FrScale::ZMAX:
    // set low via zscale, high via minmax
    if (!zscaleValid) {
      if (DebugPerf)
	cerr << "zscale...";

      zscale(params);
      zscaleValid = 1;

      if (DebugPerf)
	cerr << "end" << endl;
    }

    low = zLow;
    high = getMaxDouble();
    break;

  case FrScale::AUTOCUT:
    if (!autoCutValid) {
      if (DebugPerf)
	cerr << "autocut...";

      autoCut(params);
      autoCutValid = 1;

      if (DebugPerf)
	cerr << "end" << endl;
    }

    low = aLow;
    high = aHigh;
    break;

  case FrScale::USERCLIP:
    low = uLow;
    high = uHigh;
    break;
  }
}

// getValue

template<class T> const char* FitsDatam<T>::getValue(const Vector& vv)
{
  Vector v(vv);

  long x = (long)v[0];
  long y = (long)v[1];

  ostringstream str;

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register T value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (hasBlank && value == blank)
      str << "nan" << ends;
    else if (hasScaling)
      str << value * bscale + bzero << ends;
    else
      output(str, value);
  }
  else
    str << ends;

  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

template <> const char* FitsDatam<float>::getValue(const Vector& vv)
{
  Vector v(vv);

  long x = (long)v[0];
  long y = (long)v[1];

  ostringstream str;

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register float value = 
      !byteswap ? data[y*width + x] : swap(data+(y*width + x));

    if (isnanf(value))
      str << "nan" << ends;
    else if (hasScaling)
      str << value * bscale + bzero << ends;
    else
      str << value << ends;
  }
  else
    str << ends;

  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

template <> const char* FitsDatam<double>::getValue(const Vector& vv)
{
  Vector v(vv);

  long x = (long)v[0];
  long y = (long)v[1];

  ostringstream str;

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register double value = 
      !byteswap ? data[y*width + x] : swap(data+(y*width + x));

    if (isnand(value))
      str << "nan" << ends;
    else if (hasScaling)
      str << value * bscale + bzero << ends;
    else
      str << value << ends;
  }
  else
    str << ends;

  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

// getValueFloat(long) (optimized)
// no bounds checking, we need the speed

template <> float FitsDatam<unsigned char>::getValueFloat(long i)
{
  if (!hasBlank && !hasScaling) return data[i];

  if (hasBlank && data[i] == blank)
    return nanf;
  else
    return hasScaling ? data[i] * bscale + bzero : data[i];
}

template <> float FitsDatam<short>::getValueFloat(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nanf;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[2];
      short s;
    } u;

    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.s;

    if (hasBlank && u.s == blank)
      return nanf;
    else
      return hasScaling ? u.s * bscale + bzero : u.s;
  }
}

template <> float FitsDatam<unsigned short>::getValueFloat(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nanf;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[2];
      unsigned short s;
    } u;

    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.s;

    if (hasBlank && u.s == blank)
      return nanf;
    else
      return hasScaling ? u.s * bscale + bzero : u.s;
  }
}

template <> float FitsDatam<int>::getValueFloat(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nanf;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[4];
      int i;
    } u;

    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.i;

    if (hasBlank && u.i == blank)
      return nanf;
    else
      return hasScaling ? u.i * bscale + bzero : u.i;
  }
}

template <> float FitsDatam<long long>::getValueFloat(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nanf;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[8];
      long long i;
    } u;

    u.c[7] = *p++;
    u.c[6] = *p++;
    u.c[5] = *p++;
    u.c[4] = *p++;
    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.i;

    if (hasBlank && u.i == blank)
      return nanf;
    else
      return hasScaling ? u.i * bscale + bzero : u.i;
  }
}

template <> float FitsDatam<float>::getValueFloat(long i)
{
  if (!byteswap)
    if (isnanf(data[i]))
      return nanf;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[4];
      float f;
    } u;

    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (isnanf(u.f))
      return nanf;
    else
      return hasScaling ? u.f * bscale + bzero : u.f;
  }
}

template <> float FitsDatam<double>::getValueFloat(long i)
{
  if (!byteswap)
    if (isnand(data[i]))
      return nanf;
    else
      return hasScaling ? (float)data[i] * bscale + bzero : (float)data[i];
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[8];
      double d;
    } u;

    u.c[7] = *p++;
    u.c[6] = *p++;
    u.c[5] = *p++;
    u.c[4] = *p++;
    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (isnand(u.d))
      return nanf;
    else
      return hasScaling ? (float)u.d * bscale + bzero : (float)u.d;
  }
}

// getValueFloat(const Vector&)

template<class T> float FitsDatam<T>::getValueFloat(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register T value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (hasBlank && value == blank)
      return nanf;

    return hasScaling ? value * bscale + bzero : value;
  }
  else
    return nanf;
}

template <> float FitsDatam<float>::getValueFloat(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register float value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (isnanf(value))
      return nanf;
    else
      return hasScaling ? value * bscale + bzero : value;
  }
  else
    return nanf;
}

template <> float FitsDatam<double>::getValueFloat(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register double value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (isnand(value))
      return nanf;
    else 
      return hasScaling ? (float)value * bscale + bzero : (float)value;
  }
  else
    return nanf;
}

// getValueDouble(long) (optimized)
// no bounds checking, we need the speed

template <> double FitsDatam<unsigned char>::getValueDouble(long i)
{
  if (!hasBlank && !hasScaling) return data[i];

  if (hasBlank && data[i] == blank)
    return nand;
  else
    return hasScaling ? data[i] * bscale + bzero : data[i];
}

template <> double FitsDatam<short>::getValueDouble(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nand;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[2];
      short s;
    } u;

    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.s;

    if (hasBlank && u.s == blank)
      return nand;
    else
      return hasScaling ? u.s * bscale + bzero : u.s;
  }
}

template <> double FitsDatam<unsigned short>::getValueDouble(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nand;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[2];
      unsigned short s;
    } u;

    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.s;

    if (hasBlank && u.s == blank)
      return nand;
    else
      return hasScaling ? u.s * bscale + bzero : u.s;
  }
}

template <> double FitsDatam<int>::getValueDouble(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nand;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[4];
      int i;
    } u;

    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.i;

    if (hasBlank && u.i == blank)
      return nand;
    else
      return hasScaling ? u.i * bscale + bzero : u.i;
  }
}

template <> double FitsDatam<long long>::getValueDouble(long i)
{
  if (!byteswap && !hasBlank && !hasScaling) return data[i];

  if (!byteswap) {
    if (hasBlank && data[i] == blank)
      return nand;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[8];
      long long i;
    } u;

    u.c[7] = *p++;
    u.c[6] = *p++;
    u.c[5] = *p++;
    u.c[4] = *p++;
    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (!hasBlank && !hasScaling) return u.i;

    if (hasBlank && u.i == blank)
      return nand;
    else
      return hasScaling ? u.i * bscale + bzero : u.i;
  }
}

template <> double FitsDatam<float>::getValueDouble(long i)
{
  if (!byteswap && !hasScaling)  return (double)data[i];

  if (!byteswap) {
    if (isnanf(data[i]))
      return nand;
    else
      return hasScaling ? (double)data[i] * bscale + bzero : (double)data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[4];
      float f;
    } u;

    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (isnanf(u.f))
      return nand;
    else 
      return hasScaling ? (double)u.f * bscale + bzero : (double)u.f;
  }
}

template <> double FitsDatam<double>::getValueDouble(long i)
{
  if (!byteswap && !hasScaling)  return (double)data[i];

  if (!byteswap) {
    if (isnand(data[i]))
      return nand;
    else
      return hasScaling ? data[i] * bscale + bzero : data[i];
  }
  else {
    const char* p = (const char*)(data+i);
    union {
      char c[8];
      double d;
    } u;

    u.c[7] = *p++;
    u.c[6] = *p++;
    u.c[5] = *p++;
    u.c[4] = *p++;
    u.c[3] = *p++;
    u.c[2] = *p++;
    u.c[1] = *p++;
    u.c[0] = *p;

    if (isnand(u.d))
      return nand;
    else
      return hasScaling ? u.d * bscale + bzero : u.d;
  }
}

// getValueDouble(const Vector&)

template<class T> double FitsDatam<T>::getValueDouble(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register T value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (hasBlank && value == blank)
      return nand;

    return hasScaling ? value * bscale + bzero : value;
  }
  else
    return nand;
}

template <> double FitsDatam<float>::getValueDouble(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register float value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (isnanf(value))
      return nand;
    else
      return hasScaling ? (double)value * bscale + bzero : (double)value;
  }
  else 
    return nand;
}

template <> double FitsDatam<double>::getValueDouble(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height) {
    register double value = !byteswap ? data[y*width + x] : 
      swap(data+(y*width + x));

    if (isnand(value))
      return nand;
    else 
      return hasScaling ? value * bscale + bzero : value;
  }
  else 
    return nand;
}

// getValueMask

template <class T> int FitsDatam<T>::getValueMask(const Vector& v)
{
  Vector r = v;
  long x = (long)r[0];
  long y = (long)r[1];

  if (x >= 0 && x < width && y >= 0 && y < height)
    return data[y*width + x] ? 1 : 0;
  else 
    return 0;
}

template <class T> int FitsDatam<T>::getValueMask(double xx, double yy)
{
  long x = (long)xx;
  long y = (long)yy;

  if (x >= 0 && x < width && y >= 0 && y < height)
    return data[y*width + x] ? 1 : 0;
  else 
    return 0;
}

template<class T> int FitsDatam<T>::getValueMask(long i)
{
  return data[i] ? 1 : 0;
}

// getMin

template<class T> const char* FitsDatam<T>::getMin()
{
  // we do not check for blank since this should never be set to nan
  ostringstream str;

  switch (mmMode) {
  case FrScale::AUTOSCAN:
  case FrScale::SCAN:
  case FrScale::SAMPLE:
    if (hasScaling)
      str << min * bscale + bzero << ends;
    else
      output(str,min);
    break;

  case FrScale::DATAMIN:
    if (hasdatamin)
      str << datamin << ends;
    else
      str << ends;
    break;

  case FrScale::IRAFMIN:
    if (hasirafmin)
      str << irafmin << ends;
    else
      str << ends;
    break;
  }

  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}

// getMax

template<class T> const char* FitsDatam<T>::getMax()
{
  // we do not check for blank since this should never be set to nan
  ostringstream str;
  switch (mmMode) {
  case FrScale::AUTOSCAN:
  case FrScale::SCAN:
  case FrScale::SAMPLE:
    if (hasScaling)
      str << max * bscale + bzero << ends;
    else
      output(str,max);
    break;

  case FrScale::DATAMIN:
    if (hasdatamin)
      str << datamax << ends;
    else
      str << ends;
    break;

  case FrScale::IRAFMIN:
    if (hasirafmin)
      str << irafmax << ends;
    else
      str << ends;
    break;
  }

  memcpy(buf,str.str().c_str(),str.str().length());
  return buf;
}
// getMinDouble

template<class T> double FitsDatam<T>::getMinDouble()
{
  // we do not check for blank since this should never be set to nan
  switch (mmMode) {
  case FrScale::AUTOSCAN:
  case FrScale::SCAN:
  case FrScale::SAMPLE:
    if (hasScaling)
      return min * bscale + bzero;
    else
      return min;

  case FrScale::DATAMIN:
    if (hasdatamin)
      return datamin;
    else
      return 0;

  case FrScale::IRAFMIN:
    if (hasirafmin)
      return irafmin;
    else
      return 0;
  }
}

// getMaxDouble

template<class T> double FitsDatam<T>::getMaxDouble()
{
  // we do not check for blank since this should never be set to nan
  switch (mmMode) {
  case FrScale::AUTOSCAN:
  case FrScale::SCAN:
  case FrScale::SAMPLE:
    if (hasScaling)
      return max * bscale + bzero;
    else
      return max;

  case FrScale::DATAMIN:
    if (hasdatamin)
      return datamax;
    else
      return 0;

  case FrScale::IRAFMIN:
    if (hasirafmin)
      return irafmax;
    return 0;
  }
}

// bin

template<class T> void FitsDatam<T>::bin(double* arr, int length, double mn, 
					 double mx, FitsBound* params)
{
  if (DebugPerf)
    cerr << "bin...";

  T* ptr = data;
  double diff = mx-mn;
  int last = length-1;
  int incr = getIncr();

  // special case: mx-mn=0
  if (!diff) {
    arr[0] = (params->xmax-params->xmin)*(params->ymax-params->ymin);
    return;
  }

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    T* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {

      register double value = !byteswap ? *ptr : swap(ptr);
      if (hasBlank && value == blank)
	continue; // skip nan's

      if (hasScaling)
	value = value * bscale + bzero;

      if (value>=mn && value <=mx)
	arr[(int)(((value-mn)/diff)*last+.5)]++;
    }
  }
}

template <> void FitsDatam<float>::bin(double* arr, int length, double mn, 
				       double mx, FitsBound* params)
{
  if (DebugPerf)
    cerr << "bin...";

  float* ptr = data;
  double diff = mx-mn;
  int last = length-1;
  int incr = getIncr();

  // special case: mx-mn=0
  if (!diff) {
    arr[0] = (params->xmax-params->xmin)*(params->ymax-params->ymin);
    return;
  }

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    float* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {

      register double value = !byteswap ? *ptr : swap(ptr);
      if (isnand(value))
	continue; // skip nan's

      if (hasScaling)
	value = value * bscale + bzero;

      if (value>=mn && value <=mx)
	arr[(int)(((value-mn)/diff)*last+.5)]++;
    }
  }
}

template <> void FitsDatam<double>::bin(double* arr, int length, double mn, 
					double mx, FitsBound* params)
{
  if (DebugPerf)
    cerr << "bin...";

  double* ptr = data;
  double diff = mx-mn;
  int last = length-1;
  int incr = getIncr();

  // special case: mx-mn=0
  if (!diff) {
    arr[0] = (params->xmax-params->xmin)*(params->ymax-params->ymin);
    return;
  }

  for (int j=params->ymin; j<params->ymax; j+=incr) {
    double* ptr = data + j*long(width) + long(params->xmin);
    for (int i=params->xmin; i<params->xmax; i+=incr, ptr+=incr) {

      register double value = !byteswap ? *ptr : swap(ptr);
      if (isnand(value))
	continue; // skip nan's

      if (hasScaling)
	value = value * bscale + bzero;

      if (value>=mn && value <=mx)
	arr[(int)(((value-mn)/diff)*last+.5)]++;
    }
  }
}

// ZSCALE

// ZSCALE -- Compute the optimal Z1, Z2 (range of greyscale values to be
// displayed) of an image.  For efficiency a statistical subsample of an image
// is used.  The pixel sample evenly subsamples the image in x and y.  The
// entire image is used if the number of pixels in the image is smaller than
// the desired sample.
//
// The sample is accumulated in a buffer and sorted by greyscale value.
// The median value is the central value of the sorted array.  The slope of a
// straight line fitted to the sorted sample is a measure of the standard
// deviation of the sample about the median value.  Our algorithm is to sort
// the sample and perform an iterative fit of a straight line to the sample,
// using pixel rejection to omit gross deviants near the endpoints.  The fitted
// straight line is the transfer function used to map image Z into display Z.
// If more than half the pixels are rejected the full range is used.  The slope
// of the fitted line is divided by the user-supplied contrast factor and the
// final Z1 and Z2 are computed, taking the origin of the fitted line at the
// median value.

template<class T> void FitsDatam<T>::zscale(FitsBound* params)
{
  // Subsample the image

  float* sample;
  int npix = zSampleImage(&sample,params);
  int center_pixel = ZSMAX(1, (npix + 1) / 2);

  // Sort the sample, compute the minimum, maximum, and median pixel values

  qsort((void*)sample, npix, sizeof(float), fCompare);
  float zmin = *sample;
  float zmax = *(sample+ZSMAX(npix,1)-1);

  // The median value is the average of the two central values if there 
  // are an even number of pixels in the sample.

  float* left = &(sample[center_pixel - 1]);

  float median;
  if (ZSMOD(npix, 2) == 1 || center_pixel >= npix)
    median = *left;
  else
    median = (*left + *(left+1)) / 2;

  // Fit a line to the sorted sample vector.  If more than half of the
  // pixels in the sample are rejected give up and return the full range.
  // If the user-supplied contrast factor is not 1.0 adjust the scale
  // accordingly and compute zLow and zHigh, the y intercepts at indices 1 and
  // npix.

  int minpix = ZSMAX(ZSMIN_NPIXELS, (int)(npix * ZSMAX_REJECT));
  int ngrow = ZSMAX(1, ZSNINT(npix * .01));
  float zstart, zslope;

  int ngoodpix = zFitLine(sample, npix, &zstart, &zslope, 
			  ZSKREJ, ngrow, ZSMAX_ITERATIONS);

  if (ngoodpix < minpix) {
    zLow = zmin;
    zHigh = zmax;
  }
  else {
    if (zContrast > 0)
      zslope = zslope / zContrast;
    zLow = ZSMAX(zmin, median - (center_pixel - 1) * zslope);
    zHigh = ZSMIN(zmax, median + (npix - center_pixel) * zslope);
  }

  delete [] sample;
}

// sampleImage -- Extract an evenly gridded subsample of the pixels from
// a two-dimensional image into a one-dimensional vector.

template<class T> int FitsDatam<T>::zSampleImage(float** sample, FitsBound* params)
{
  // Compute the number of pixels each line will contribute to the sample,
  // and the subsampling step size for a line.  The sampling grid must
  // span the whole line on a uniform grid.

  int wd = params->xmax - params->xmin;
  int opt_npix_per_line = ZSMAX(1, ZSMIN(wd, zLine));
  int col_step = ZSMAX(2, (wd + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (wd + col_step-1) / col_step);

  /*
  int opt_npix_per_line = ZSMAX(1, ZSMIN(width, zLine));
  int col_step = ZSMAX(2, (width + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (width + col_step-1) / col_step);
  */

  // Compute the number of lines to sample and the spacing between lines.
  // We must ensure that the image is adequately sampled despite its
  // size, hence there is a lower limit on the number of lines in the
  // sample.  We also want to minimize the number of lines accessed when
  // accessing a large image, because each disk seek and read is ex-
  // pensive. The number of lines extracted will be roughly the sample
  // size divided by zLine, possibly more if the lines are very
  // short.

  int hd = params->ymax-params->ymin;
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
				 ZSMIN(hd, (zSample + npix_per_line-1) / 
				 npix_per_line));
  int line_step = ZSMAX(2, hd / opt_nlines_in_sample);
  int max_nlines_in_sample = (hd + line_step-1) / line_step;

  /*
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
  			     ZSMIN(height, (zSample + npix_per_line-1) / 
			     npix_per_line));
  int line_step = ZSMAX(2, height / opt_nlines_in_sample);
  int max_nlines_in_sample = (height + line_step-1) / line_step;
  */

  // Allocate space for the output vector.  Buffer must be freed by our caller.

  int maxpix = npix_per_line * max_nlines_in_sample;
  *sample = new float[maxpix];
  //  float* row = new float[width];
  float* row = new float[wd];

  // Extract the vector

  int npix = 0;
  float* op = *sample;

  //  for (int line = (line_step + 1)/2; line < height; line+=line_step) {
  for (int line = (line_step + 1)/2 + params->ymin; line < params->ymax; line+=line_step) {

    // Load a row of values from the image

    //    for (int i=0; i < width; i++) {
    for (int i=0; i<wd; i++) {
      //      T* ptr = data + (line-1)*width + i;
      T* ptr = data + (line-1)*long(width) + i + long(params->xmin);
      T value = !byteswap ? *ptr : swap(ptr);

      if (hasBlank && (value == blank))
	row[i] = nanf;
      else
	row[i] = hasScaling ? value * bscale + bzero : value;
    }    

    int got = zSubSample(row, op, npix_per_line, col_step);
    op += got;
    npix += got;
    if (npix >= maxpix)
      break;
  }

  delete [] row;
  return npix;
}

template <> int FitsDatam<float>::zSampleImage(float** sample, FitsBound* params)
{
  // Compute the number of pixels each line will contribute to the sample,
  // and the subsampling step size for a line.  The sampling grid must
  // span the whole line on a uniform grid.

  int wd = params->xmax - params->xmin;
  int opt_npix_per_line = ZSMAX(1, ZSMIN(wd, zLine));
  int col_step = ZSMAX(2, (wd + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (wd + col_step-1) / col_step);

  /*
  int opt_npix_per_line = ZSMAX(1, ZSMIN(width, zLine));
  int col_step = ZSMAX(2, (width + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (width + col_step-1) / col_step);
  */

  // Compute the number of lines to sample and the spacing between lines.
  // We must ensure that the image is adequately sampled despite its
  // size, hence there is a lower limit on the number of lines in the
  // sample.  We also want to minimize the number of lines accessed when
  // accessing a large image, because each disk seek and read is ex-
  // pensive. The number of lines extracted will be roughly the sample
  // size divided by zLine, possibly more if the lines are very
  // short.

  int hd = params->ymax-params->ymin;
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
				 ZSMIN(hd, (zSample + npix_per_line-1) / 
				 npix_per_line));
  int line_step = ZSMAX(2, hd / opt_nlines_in_sample);
  int max_nlines_in_sample = (hd + line_step-1) / line_step;

  /*
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
  			     ZSMIN(height, (zSample + npix_per_line-1) / 
			     npix_per_line));
  int line_step = ZSMAX(2, height / opt_nlines_in_sample);
  int max_nlines_in_sample = (height + line_step-1) / line_step;
  */

  // Allocate space for the output vector.  Buffer must be freed by our caller.

  int maxpix = npix_per_line * max_nlines_in_sample;
  *sample = new float[maxpix];
  //  float* row = new float[width];
  float* row = new float[wd];

  // Extract the vector

  int npix = 0;
  float* op = *sample;

  //  for (int line = (line_step + 1)/2; line < height; line+=line_step) {
  for (int line = (line_step + 1)/2 + params->ymin; line < params->ymax; line+=line_step) {

    // Load a row of values from the image

    //    for (int i=0; i < width; i++) {
    for (int i=0; i<wd; i++) {
      //      T* ptr = data + (line-1)*width + i;
      float* ptr = data + (line-1)*long(width) + i + long(params->xmin);
      float value = !byteswap ? *ptr : swap(ptr);

      if (isnanf(value))
	row[i] = nanf;
      else
	row[i] = hasScaling ? value * bscale + bzero : value;
    }    

    int got = zSubSample(row, op, npix_per_line, col_step);
    op += got;
    npix += got;
    if (npix >= maxpix)
      break;
  }

  delete [] row;
  return npix;
}

template <> int FitsDatam<double>::zSampleImage(float** sample, FitsBound* params)
{
  // Compute the number of pixels each line will contribute to the sample,
  // and the subsampling step size for a line.  The sampling grid must
  // span the whole line on a uniform grid.

  int wd = params->xmax - params->xmin;
  int opt_npix_per_line = ZSMAX(1, ZSMIN(wd, zLine));
  int col_step = ZSMAX(2, (wd + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (wd + col_step-1) / col_step);

  /*
  int opt_npix_per_line = ZSMAX(1, ZSMIN(width, zLine));
  int col_step = ZSMAX(2, (width + opt_npix_per_line-1) / opt_npix_per_line);
  int npix_per_line = ZSMAX(1, (width + col_step-1) / col_step);
  */

  // Compute the number of lines to sample and the spacing between lines.
  // We must ensure that the image is adequately sampled despite its
  // size, hence there is a lower limit on the number of lines in the
  // sample.  We also want to minimize the number of lines accessed when
  // accessing a large image, because each disk seek and read is ex-
  // pensive. The number of lines extracted will be roughly the sample
  // size divided by zLine, possibly more if the lines are very
  // short.

  int hd = params->ymax-params->ymin;
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
				 ZSMIN(hd, (zSample + npix_per_line-1) / 
				 npix_per_line));
  int line_step = ZSMAX(2, hd / opt_nlines_in_sample);
  int max_nlines_in_sample = (hd + line_step-1) / line_step;

  /*
  int min_nlines_in_sample = ZSMAX(1, zSample / zLine);
  int opt_nlines_in_sample = ZSMAX(min_nlines_in_sample, 
  			     ZSMIN(height, (zSample + npix_per_line-1) / 
			     npix_per_line));
  int line_step = ZSMAX(2, height / opt_nlines_in_sample);
  int max_nlines_in_sample = (height + line_step-1) / line_step;
  */

  // Allocate space for the output vector.  Buffer must be freed by our caller.

  int maxpix = npix_per_line * max_nlines_in_sample;
  *sample = new float[maxpix];
  //  float* row = new float[width];
  float* row = new float[wd];

  // Extract the vector

  int npix = 0;
  float* op = *sample;

  //  for (int line = (line_step + 1)/2; line < height; line+=line_step) {
  for (int line = (line_step + 1)/2 + params->ymin; line < params->ymax; line+=line_step) {

    // Load a row of values from the image

    //    for (int i=0; i < width; i++) {
    for (int i=0; i<wd; i++) {
      //      T* ptr = data + (line-1)*width + i;
      double* ptr = data + (line-1)*long(width) + i + long(params->xmin);
      double value = !byteswap ? *ptr : swap(ptr);

      if (isnand(value))
	row[i] = nanf;
      else
	row[i] = hasScaling ? (float)value * bscale + bzero : (float)value;
    }    

    int got = zSubSample(row, op, npix_per_line, col_step);
    op += got;
    npix += got;
    if (npix >= maxpix)
      break;
  }

  delete [] row;
  return npix;
}

// subSample -- Subsample an image line.  Extract the first pixel and
// every "step"th pixel thereafter for a total of npix pixels.

int FitsData::zSubSample(float* a, float* b, int npix, int step)
{
  if (step <= 1)
    step = 1;

  int got = 0;
  int ip = 0;
  for (int i=0; i<npix; i++) {
    if (!isnanf(a[ip])) // we skip over the nan pixels
      b[got++] = a[ip];

    ip += step;
  }

  return got;
}

// fitLine -- Fit a straight line to a data array of type real.  This is
// an iterative fitting algorithm, wherein points further than ksigma from the
// current fit are excluded from the next fit.  Convergence occurs when the
// next iteration does not decrease the number of pixels in the fit, or when
// there are no pixels left.  The number of pixels left after pixel rejection
// is returned as the function value.

int FitsData::zFitLine (float* sampleData, int npix, float* zstart, 
			float* zslope, float krej, int ngrow, int maxiter)
{
  float	xscale;
  if (npix <= 0)
    return (0);
  else if (npix == 1) {
    *zstart = sampleData[1];
    *zslope = 0.0;

    return (1);
  }
  else
    xscale = 2.0 / (npix - 1);

  // Allocate a buffer for data minus fitted curve, another for the
  // normalized X values, and another to flag rejected pixels.

  float* flat = new float[npix];
  float* normx = new float[npix];
  short* badpix = new short[npix];

  for (int k=0; k<npix; k++)
    badpix[k]=0;

  // Compute normalized X vector.  The data X values [1:npix] are
  // normalized to the range [-1:1].  This diagonalizes the lsq matrix
  // and reduces its condition number.

  for (int i=0; i<npix; i++)
    normx[i] = i * xscale - 1.0;

  // Fit a line with no pixel rejection.  Accumulate the elements of the
  // matrix and data vector.  The matrix M is diagonal with
  // M[1,1] = sum x**2 and M[2,2] = ngoodpix.  The data vector is
  // DV[1] = sum (data[i] * x[i]) and DV[2] = sum (data[i]).

  double sumxsqr = 0;
  double sumxz = 0;
  double sumx = 0;
  double sumz = 0;

  for (int j=0; j<npix; j++) {
    float x = normx[j];
    float z = sampleData[j];
    sumxsqr = sumxsqr + (x * x);
    sumxz   = sumxz + z * x;
    sumz    = sumz + z;
  }

  // Solve for the coefficients of the fitted line

  float z0 = sumz / npix;
  float dz = sumxz / sumxsqr;

  // Iterate, fitting a new line in each iteration. Compute the flattened
  // data vector and the sigma of the flat vector.  Compute the lower and
  // upper k-sigma pixel rejection thresholds.  Run down the flat array
  // and detect pixels to be rejected from the fit.  Reject pixels from
  // the fit by subtracting their contributions from the matrix sums and
  // marking the pixel as rejected.

  int ngoodpix = npix;
  int last_ngoodpix;
  int minpix = ZSMAX(ZSMIN_NPIXELS, (int) (npix * ZSMAX_REJECT));

  for (int niter=0;  niter < maxiter;  niter++) {
    last_ngoodpix = ngoodpix;

    // Subtract the fitted line from the data array

    zFlattenData(sampleData, flat, normx, npix, z0, dz);

    // Compute the k-sigma rejection threshold.  In principle this
    // could be more efficiently computed using the matrix sums
    // accumulated when the line was fitted, but there are problems with
    // numerical stability with that approach.

    float mean;
    float sigma;
    ngoodpix = zComputeSigma (flat, badpix, npix, &mean, &sigma);
    float threshold = sigma * krej;

    // Detect and reject pixels further than ksigma from the fitted
    // line.

    ngoodpix = zRejectPixels(sampleData, flat, normx,
			     badpix, npix, &sumxsqr, &sumxz, &sumx, &sumz, 
			     threshold, ngrow);

    // Solve for the coefficients of the fitted line.  Note that after
    // pixel rejection the sum of the X values need no longer be zero.

    if (ngoodpix > 0) {
      double rowrat = sumx / sumxsqr;
      z0 = (sumz - rowrat * sumxz) / (ngoodpix - rowrat * sumx);
      dz = (sumxz - z0 * sumx) / sumxsqr;
    }

    if (ngoodpix >= last_ngoodpix || ngoodpix < minpix)
      break;
  }

  // Transform the line coefficients back to the X range [1:npix]

  *zstart = z0 - dz;
  *zslope = dz * xscale;

  delete [] flat;
  delete [] normx;
  delete [] badpix;

  return ngoodpix;
}


// flattenData -- Compute and subtract the fitted line from the data array,
// returned the flattened data in FLAT.

void FitsData::zFlattenData(float* sampleData, float* flat, float* x, 
			     int npix, float z0, float dz)
{
  for (int i=0; i < npix; i++) 
    flat[i] = sampleData[i] - (x[i] * dz + z0);
}

// computeSigma -- Compute the root mean square deviation from the
// mean of a flattened array.  Ignore rejected pixels.

int FitsData::zComputeSigma(float* a, short* badpix, int npix, 
			    float* mean, float* sigma)
{
  int ngoodpix = 0;
  double sum = 0.0;
  double sumsq = 0.0;

  // Accumulate sum and sum of squares

  for (int i=0; i < npix; i++)
    if (badpix[i] == GOOD_PIXEL) {
      float pixval = a[i];
      ngoodpix = ngoodpix + 1;
      sum = sum + pixval;
      sumsq = sumsq + pixval * pixval;
    }

  // Compute mean and sigma

  switch (ngoodpix) {
  case 0:
    *mean = ZSINDEF;
    *sigma = ZSINDEF;
    break;
  case 1:
    *mean = sum;
    *sigma = ZSINDEF;
    break;
  default:
    *mean = sum / ngoodpix;
    double temp = sumsq / (ngoodpix-1) - (sum*sum) / (ngoodpix*(ngoodpix - 1));
    if (temp < 0)		// possible with roundoff error
      *sigma = 0.0;
    else
      *sigma = sqrt(temp);
  }

  return ngoodpix;
}

// rejectPixels -- Detect and reject pixels more than "threshold" greyscale
// units from the fitted line.  The residuals about the fitted line are given
// by the "flat" array, while the raw data is in "data".  Each time a pixel
// is rejected subtract its contributions from the matrix sums and flag the
// pixel as rejected.  When a pixel is rejected reject its neighbors out to
// a specified radius as well.  This speeds up convergence considerably and
// produces a more stringent rejection criteria which takes advantage of the
// fact that bad pixels tend to be clumped.  The number of pixels left in the
// fit is returned as the function value.

int FitsData::zRejectPixels(float* sampleData, float* flat, float *normx, 
			    short *badpix, int npix, double* sumxsqr, 
			    double* sumxz, double* sumx, double* sumz, 
			    float threshold, int ngrow)
{
  int ngoodpix = npix;
  float lcut = -threshold;
  float hcut = threshold;

  for (int i=0; i < npix; i++) {
    if (badpix[i] == BAD_PIXEL)
      ngoodpix = ngoodpix - 1;
    else {
      float residual = flat[i];
      if (residual < lcut || residual > hcut) {

	// Reject the pixel and its neighbors out to the growing
	// radius.  We must be careful how we do this to avoid
	// directional effects.  Do not turn off thresholding on
	// pixels in the forward direction; mark them for rejection
	// but do not reject until they have been thresholded.
	// If this is not done growing will not be symmetric.

	for (int j=ZSMAX(0,i-ngrow); j < ZSMIN(npix,i+ngrow); j++) {
	  if (badpix[j] != BAD_PIXEL) {
	    if (j <= i) {
	      double x = normx[j];
	      double z = sampleData[j];
	      *sumxsqr = *sumxsqr - (x * x);
	      *sumxz = *sumxz - z * x;
	      *sumx = *sumx - x;
	      *sumz = *sumz - z;
	      badpix[j] = BAD_PIXEL;
	      ngoodpix = ngoodpix - 1;
	    } else
	      badpix[j] = REJECT_PIXEL;
	  }
	}
      }
    }
  }

  return ngoodpix;
}

template class FitsDatam<unsigned char>;
template class FitsDatam<short>;
template class FitsDatam<unsigned short>;
template class FitsDatam<int>;
template class FitsDatam<long long>;
template class FitsDatam<float>;
template class FitsDatam<double>;
