//
// WordCursorOne.cc
//
// Part of the ht://Dig package   <http://www.htdig.org/>
// Copyright (c) 1999, 2000, 2001 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU General Public License version 2 or later
// <http://www.gnu.org/copyleft/gpl.html>
//
// $Id: WordCursorOne.cc,v 1.11 2008/06/02 19:24:41 sebdiaz Exp $
//

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif /* HAVE_CONFIG_H */

#include <stdlib.h>

#include "WordCursorOne.h"
#include "WordListOne.h"
#include "WordDead.h"

#include <stdio.h>

//
// WordCursorOne implementation
// 

// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words) :
  WordCursor(words->GetContext()),
  prefixKey(words->GetContext())
{
  Clear();
}

// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) :
  WordCursor(words->GetContext()),
  prefixKey(words->GetContext())
{
  Clear();
  Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER);
}

// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action) :
  WordCursor(words->GetContext()),
  prefixKey(words->GetContext())
{
  Clear();
  Initialize(words, searchKey, 0, 0, action);
}

// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) :
  WordCursor(words->GetContext()),
  prefixKey(words->GetContext())
{
  Clear();
  Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER);
}

// *****************************************************************************
//
int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
{
  action = naction;
  searchKey = nsearchKey;
  callback = ncallback;
  callback_data = ncallback_data;
  words = nwords;
  cursor = ((WordListOne*)nwords)->db->Cursor();
  return OK;
}

// *****************************************************************************
//
void 
WordCursorOne::Clear()
{
  searchKey.Clear();
  action = 0;
  callback = 0;
  callback_data = 0;
  ClearResult();
  ClearInternal();
  words = 0;

  //
  // Debugging section. 
  //
  traceRes = 0;
}

// *****************************************************************************
//
void
WordCursorOne::ClearInternal()
{
  key.trunc();
  data.trunc();
  prefixKey.Clear();
  cursor_get_flags = DB_SET_RANGE;
  searchKeyIsSameAsPrefix = 0;
}

// *****************************************************************************
//
void
WordCursorOne::ClearResult()
{
  collectRes = 0;
  found.Clear();
  status = OK;
}

int
WordCursorOne::ContextRestore(const String& buffer)
{
  int ret = OK;
  if(!buffer.empty()) {
    WordKey key(words->GetContext(), buffer);
    if((ret = Seek(key)) != OK)
      return ret;
    //
    // Move to restored position so that next call to
    // WalkNext will go above the restored position.
    //
    if((ret = WalkNext()) != OK)
      return ret;
  }
  return ret;
}

// *****************************************************************************
//
// Walk and collect data from the word database.
//
// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
//    stored in a list and the list is returned.
// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
//    is called for each WordReference found. No list is built and the
//    function returns a null pointer.
//
// The <searchKey> argument may be a fully qualified key, containing precise values for each
// field of the key. It may also contain only some fields of the key. In both cases
// all the word occurrences matching the fields set in the key are retrieved. It may
// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
// be *slow* if key is not a prefix because it forces a complete walk of the
// index. 
//
int 
WordCursorOne::Walk()
{
  int ret;
  if((ret = WalkInit()) != OK) return ret;
  while((ret = WalkNext()) == OK)
    ;
  int ret1;
  if((ret1 = WalkFinish()) != OK) return ret1;

  return (ret & WORD_WALK_RESULT_MASK) == WORD_WALK_ATEND ? OK : NOTOK;
}

int 
WordCursorOne::WalkInit()
{
  ClearResult();
  ClearInternal();

  WordReference wordRef(words->GetContext());

  {
    int ret;
    if((ret = cursor->Open()) != 0)
      return ret;
  }

  if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());

  if(action & HTDIG_WORDLIST_COLLECTOR) {
    collectRes = new List;
  }

  WordKey first_key(words->GetContext());
  //
  // Move the cursor to start walking and do some sanity checks.
  //
  if(searchKey.Empty()) {
    //
    // Move past the stat data
    //
    if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n");

  } else {
    prefixKey = searchKey;
    //
    // If the key is a prefix, the start key is
    // the longest possible prefix contained in the key. If the
    // key does not contain any prefix, start from the beginning
    // of the file.
    //
    if(prefixKey.PrefixOnly() == NOTOK) {
      if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n");
      prefixKey.Clear();
    } else {
      if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get());
      first_key = prefixKey;
    }
  }

  first_key.Pack(key);
  //
  // Allow Seek immediately after Init
  //
  found.Key() = first_key;

  status = OK;
  searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
  cursor_get_flags = DB_SET_RANGE;

  return OK;
}

int 
WordCursorOne::WalkRewind()
{
  WordKey first_key(words->GetContext());
  //
  // Move the cursor to start walking and do some sanity checks.
  //
  if(searchKey.Empty()) {
    first_key.Clear();
  } else {
    prefixKey = searchKey;
    //
    // If the key is a prefix, the start key is
    // the longest possible prefix contained in the key. If the
    // key does not contain any prefix, start from the beginning
    // of the file.
    //
    if(prefixKey.PrefixOnly() == NOTOK) {
      prefixKey.Clear();
      first_key.Clear();
    } else {
      first_key = prefixKey;
    }
  }

  first_key.Pack(key);
  //
  // Allow Seek immediately after Rewind
  //
  found.Key() = first_key;

  status = OK;
  searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
  cursor_get_flags = DB_SET_RANGE;

  return OK;
}

int 
WordCursorOne::WalkNext()
{
  int ret;
  while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
    if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n");

  return ret;
}

int 
WordCursorOne::WalkNextStep()
{
  status = OK;

  {
    int error;
    if((error = cursor->Get(key, data, cursor_get_flags)) != 0) {
      if(error == DB_NOTFOUND) {
	if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches\n", (char*)searchKey.Get());
	found.Clear();
	return (status = WORD_WALK_ATEND);
      } else {
	return WORD_WALK_GET_FAILED;
      }
    }
  }

  //
  // Next step operation is always sequential walk
  //
  cursor_get_flags = DB_NEXT;

  found.Unpack(key, data);

  if(words->Dead()->Exists(found.Key()))
    return WORD_WALK_NOMATCH_FAILED;
  if(WalkNextExclude(found.Key()))
    return WORD_WALK_NOMATCH_FAILED;

  if(traceRes) traceRes->Add(new WordReference(found));

  if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());

  //
  // Don't bother to compare keys if we want to walk all the entries
  //
  if(!(searchKey.Empty()))     {
    // examples
    // searchKey: aabc 1 ? ? ?
    // prefixKey: aabc 1 ? ? ?

    //
    // Stop loop if we reach a record whose key does not
    // match prefix key requirement, provided we have a valid
    // prefix key.
    // (ie. stop loop if we're past last possible match...)
    //
    if(!prefixKey.Empty() &&
       !prefixKey.Equal(found.Key()))	{
      if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
      return (status = WORD_WALK_ATEND|WORD_WALK_ATEND_NOMATCH);
    }

    //
    // Skip entries that do not exactly match the specified key.
    // 
    if(!searchKeyIsSameAsPrefix && 
       !searchKey.Equal(found.Key()))	{
      int ret;
      switch((ret = SkipUselessSequentialWalking()) & WORD_WALK_RESULT_MASK) {
      case OK:
	if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
	return WORD_WALK_NOMATCH_FAILED;
	break;
      case WORD_WALK_ATEND:
	if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
	return status = ret;
	break;
      default:
	fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
	return NOTOK;
	break;
      }
    }
  }

  if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for next %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());

  if(collectRes) {
    if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n");
    collectRes->Add(new WordReference(found));
  } else if(callback) {
    if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n");
    int ret = (*callback)(words, *cursor, &found, *(callback_data) );
    //
    // The callback function tells us that something went wrong, might
    // as well stop walking.
    //
    if(ret != OK) {
      if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK");
      return (status = WORD_WALK_CALLBACK_FAILED|WORD_WALK_ATEND);
    }
  }

  return OK;
}

int 
WordCursorOne::WalkFinish()
{
  if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n");

  return cursor->Close() == 0 ? OK : NOTOK;
}

// *****************************************************************************
//
// Helper for SkipUselessSequentialWalking. 
// Undefine in foundKey all fields defined in searchKey
// so that they are not considered by SetToFollowing.
// It could become a method of WordKey but lacks generalisation and
// from what I see it is a rather specific operation.
//
static inline void complement(WordContext* context, WordKey& key, const WordKey& mask)
{
  int nfields = context->GetKeyInfo().nfields;
  int i;
  //
  // Undefine in 'key' all fields defined in 'mask'
  //
  for(i = 0; i < nfields; i++) {
    if(mask.IsDefined(i))
      key.Undefined(i);
    else
      key.SetDefined(i);
  }
}

// *****************************************************************************
//
// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of 
// sequential iterating (DB_NEXT). 
// If it is decided that jump is a better move :
//   cursor_set_flags = DB_SET_RANGE
//   key = calculated next possible key
// Else
//   do nothing
// Return values
// OK: skipping successfull.
// WORD_WALK_ATEND : no more possible match, reached the maximum
// WORD_WALK_FAILED: general failure, occurs if called and no skipping
//                   necessary.
// 
// Sequential searching can waste time by searching all keys, for example:
// If searching for Key: argh <DEF> <UNDEF> 10
// Under normal circonstances we would do the following
// 
//    DATA            STATUS   ACTION
// 1: argh 1 10       match    DB_NEXT
// 2: argh 2 11       nomatch  DB_NEXT
// 3: argh 2 15       nomatch  DB_NEXT
// 4: argh 2 20       nomatch  DB_NEXT
// 5: argh 2 30       nomatch  DB_NEXT
// 6: argh 5 1        nomatch  DB_NEXT
// 7: argh 5 8        nomatch  DB_NEXT
// 8: argh 8 6        nomatch  DB_NEXT
//
// But the optimal would be
//
//    DATA            STATUS   ACTION
// 1: argh 1 10       match    DB_NEXT
// 2: argh 2 11       nomatch  DB_SET_RANGE argh 3 10
// 3: argh 2 15       
// 4: argh 2 20       
// 5: argh 2 30
// 6: argh 5 1        nomatch  DB_SET_RANGE argh 5 10
// 7: argh 5 8
// 8: argh 8 6        nomatch  DB_SET_RANGE argh 8 10
//
// That saves a lot of unecessary hit. The underlying logic is a bit 
// more complex but you have the idea.
//
int
WordCursorOne::SkipUselessSequentialWalking()
{
  WordKey&      foundKey         = found.Key();

  int nfields = words->GetContext()->GetKeyInfo().nfields;
  int i;

  //
  // Find out how the searchKey and the foundKey differ.
  //
  int diff_field = 0;
  int lower = 0;
  if(!foundKey.Diff(searchKey, diff_field, lower)) {
    //
    // foundKey matches searchKey (no difference), don't
    // skip, everything is fine. The caller of SkipUselessSequentialWalking
    // is expected to avoid this case for efficiency.
    //
    return WORD_WALK_FAILED;
  }

  if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for next %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());

  //
  // Undefine in foundKey all fields defined in searchKey
  // so that they are not considered by SetToFollowing.
  //
  complement(words->GetContext(), foundKey, searchKey);

  //
  // If the key found is lower than the searched key when
  // considering only the fields defined in the search key,
  // we only need to enforce the key to get the match.
  // Otherwise we need to increment the found key to jump
  // properly.
  //
  if(lower) {
    if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
    for(i = diff_field + 1; i < nfields; i++)
      if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
  } else {
    if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n");
    //
    // diff_field - 1 is not really necessary because diff_field is undefined
    // in foundKey and would therefore be ignored by SetToFollowing. We write
    // diff_field - 1 to clearly state that incrementing begins just before the
    // field for which a difference was found.
    //
    int ret;
    if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
      return ret;
  }

  //
  // Copy all fields defined in searchKey into foundKey. This will copy
  // searchKey in foundKey because all these fields have been
  // previously undefined in foundKey.
  //
  foundKey.Merge(searchKey);

  if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for next %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());

  //
  // Instruct Next function to jump to the calculated key
  //
  if(foundKey.Pack(key) == NOTOK) {
    return WORD_WALK_FAILED;
  }
  cursor_get_flags = DB_SET_RANGE;

  return OK;
}

// *****************************************************************************
//
// Copy defined fields in patch into foundKey and 
// initialize internal state so that WalkNext jumps to
// this key next time it's called.
//
// Technically this means : Override latest key found (found data member)
// with patch fields values, starting from the first field set in
// patch up to the last.  Pack the result in the key field and set
// cursor_get_flags to DB_SET_RANGE.
//
int
WordCursorOne::Seek(const WordKey& patch)
{
  int nfields = words->GetContext()->GetKeyInfo().nfields;
  WordKey pos = searchKey;

  if(patch.Empty()) {
    fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n");
    return NOTOK;
  }
  
  int i;
  //
  // Leave the most significant fields untouched if they are
  // already set, or set them to zero if they are not.
  //
  for(i = WORD_KEY_WORD + 1; i < nfields; i++) {
    if(patch.IsDefined(i))
      break;
    else if(!pos.IsDefined(i))
      pos.Set(i, 0);
  }
  //
  // From the first value set in the patch to the end
  // override.
  //
  for(; i < nfields; i++) {
    if(patch.IsDefined(i))
      pos.Set(i, patch.Get(i));
    else
      pos.Set(i, 0);
  }

  if(!pos.Filled()) {
    fprintf(stderr, "WordCursorOne::Seek: only makes sense if the resulting key is fully defined\n");
    return NOTOK;
  }

  if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get());

  //
  // Next move will jump to the patched key
  //
  pos.Pack(key);
  cursor_get_flags = DB_SET_RANGE;
  
  return OK;
}

//
// Convert the whole structure to an ascii string description
//
int WordCursorOne::Get(String& bufferout) const
{
  String tmp;
  bufferout.trunc();

  searchKey.Get(tmp);
  bufferout << "Input: searchKey = " << tmp << ", action = " <<  action << "; Output: collectRes " << (collectRes ? "set" : "not set");
  found.Get(tmp);
  bufferout << ", found = " << tmp << ", status = " << status;
  prefixKey.Get(tmp);
  bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;

  return OK;
}
