/*************************************************************************
 *
 *  $RCSfile: ContextTables.java,v $
 *
 *  $Revision: 1.1 $
 *
 *  last change: $Author: abi $ $Date: 2000/11/30 18:03:47 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
package com.sun.xmlsearch.xml;

import java.io.*;
import com.sun.xmlsearch.util.*;

import com.sun.xmlsearch.xml.qe.QueryHitData;

public final class ContextTables {
  private int[] _initialWords;
  private int[] _dests;
  private int[] _linkTypes;
  private int[] _seqNumbers;
  private int   _nTextNodes;
  
  private final class Tables {
    private final int[] _initialWordsCached;
    private final int[] _destsCached;
    private final int[] _linkTypesCached;
    private final int[] _seqNumbersCached;

    public Tables(int[] initialWords,
		  int[] dests,
		  int[] linkTypes,
		  int[] seqNumbers) {
      _initialWordsCached = initialWords;
      _destsCached = dests;
      _linkTypesCached = linkTypes;
      _seqNumbersCached = seqNumbers;
    }
    
    public void setTables() {
      _initialWords           = _initialWordsCached;
      _dests                  = _destsCached;
      _linkTypes              = _linkTypesCached;
      _seqNumbers             = _seqNumbersCached;
      _nTextNodes             = _initialWords.length;
    }
  } // end of Tables

  private Tables[] _cache;

  // cached last position for linear search
  private int _initialWordsIndex;
  // link names are shared between all microindexes in an index
  private String[] _linkNames;
  // offsets to tables' storage in file (or memory)
  private int[] _offsets;
  private byte[] _contextData;	// !!! fully cached for now
  // auxillary
  private IntegerArray _kTable = new IntegerArray(5);
  // _auxArray will be used as an auxillary to decode arrays
  private IntegerArray _auxArray = new IntegerArray(4096);
  private int _lastDocNo = -1;

  private int[] _markers;

  public ContextTables(int[] offsets, byte[] contextData, String[] linkNames) {
    _offsets = offsets;
    _contextData = contextData;
    _linkNames = linkNames;
    _cache = new Tables[_offsets.length];
  }
  
  public void setMicroindex(final int docNo) throws Exception {
    if (docNo != _lastDocNo) {	// check if we need to do anything
      if (_cache[docNo] != null)
	_cache[docNo].setTables();
      else {
	final int offset = _offsets[docNo];
	final int k0 = _contextData[offset] & 0xFF;
	ByteArrayDecompressor compr =
	  new ByteArrayDecompressor(_contextData, offset + 1);
	_kTable.clear();
	compr.decode(k0, _kTable);
	// decompress initialWords into auxiliary array
	_auxArray.clear();
	compr.ascDecode(_kTable.at(0), _auxArray); // _initialWords
	_initialWords = _auxArray.toIntArray();
	_nTextNodes = _initialWords.length;
	// decompress destinations into auxiliary array
	_auxArray.clear();
	compr.decode(_kTable.at(1), _auxArray); // _dests
	_auxArray.add(-1);	// sentinel, root
	_dests = _auxArray.toIntArray();
	_linkTypes = new int[_dests.length - _nTextNodes - 1];
	compr.decode(_kTable.at(2), _linkTypes);
	_seqNumbers = new int[_dests.length - 1];
	compr.decode(_kTable.at(3), _seqNumbers);

	_cache[docNo] = new Tables(_initialWords,
				   _dests,
				   _linkTypes,
				   _seqNumbers);

	/*
	System.out.println("|_initialWords| = " + _nTextNodes);
	System.out.println("|_dests| -1 = " + (_dests.length - 1));
	System.out.println("|_seqNumbers| = " + _seqNumbers.length);
	System.out.println("|_linkTypes| = " + _linkTypes.length);
	*/
      }
      _lastDocNo = docNo;
      _markers = new int[_dests.length];
    }
    _initialWordsIndex = 0;
  }

  public final int parentContext(int context) {
    return _dests[context];
  }
  
  public String linkName(int context) {
    return _linkNames[_linkTypes[context]];
  }

  public int linkCode(String linkName) {
    for (int i = 0; i < _linkNames.length; i++)
      if (linkName.equals(_linkNames[i]))
	return i;
    return -1;			// when not found
  }

  public boolean[] getIgnoredElementsSet(String[] ignoredElements) {
    boolean result[] = null, noValidIgnoredElements = true;
    if (ignoredElements != null && ignoredElements.length > 0) {
      result = new boolean[_linkNames.length];
      for (int i = 0; i < ignoredElements.length; i++) {
	int code = linkCode(ignoredElements[i]);
	if (code > -1) {
	  result[code] = true;
	  noValidIgnoredElements = false;
	}
      }
    }
    return noValidIgnoredElements ? null : result;
  }
  
  public final boolean notIgnored(int ctx, boolean[] ignoredElements) {
    do {
      if (ignoredElements[_linkTypes[ctx]]) {
	System.out.println("hit ignored");
	return false;
      }
    }
    while ((ctx = _dests[ctx]) > -1); // parentContext 'hand inlined'
    return true;
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with the given linkCode */
  public int firstParentWithCode(final int pos, final int linkCode) {
    int ctx = _dests[wordContextLin(pos)]; // first parent of text node
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    while (_linkTypes[ctx - shift] != linkCode)
      if ((ctx = _dests[ctx]) == limit)
	return -1;
    return ctx;
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with the given linkCode and given parent code */
  public int firstParentWithCode2(int pos, final int linkCode, final int parentCode) {
    int ctx = _dests[wordContextLin(pos)]; // first parent of text node
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    for (int parent = _dests[ctx];
	 parent < limit;
	 parent = _dests[parent])
      if (_linkTypes[parent - shift] == parentCode
	  && _linkTypes[ctx - shift] == linkCode)
	return ctx;
      else
	ctx = parent;
    return -1;
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with the given linkCode and given ancestor code */
  public int firstParentWithCode3(int pos, int linkCode, int ancestorCode) {
    int ctx = _dests[wordContextLin(pos)];
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    // find first instance of linkCode
    while (ctx < limit && _linkTypes[ctx - shift] != linkCode)
      ctx = _dests[ctx];
    if (ctx < limit)		// found linkCode, check ancestry
      for (int ancestor = _dests[ctx];
	   ancestor < limit;
	   ancestor = _dests[ancestor])
	if (_linkTypes[ancestor - shift] == ancestorCode) // ancestor confirmed
	  return ctx;		// match found, return successful ctx
    return -1;			// match NOT found
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with any of the given linkCode */
  public int firstParentWithCode4(int pos, int[] linkCodes) {
    final int nCodes = linkCodes.length;
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    for (int ctx = _dests[wordContextLin(pos)]; ctx < limit; ctx = _dests[ctx]) {
      final int code = _linkTypes[ctx - shift];
      for (int i = 0; i < nCodes; i++)
	if (code == linkCodes[i])
	  return ctx;
    }
    return -1;
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with the given path */
  public int firstParentWithCode5(int pos, int[] pathCodes) {
    final int nCodes = pathCodes.length;
    final int lastCode = pathCodes[nCodes - 1];
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    int ctx = _dests[wordContextLin(pos)];
  SEARCH:
    for (int parent = _dests[ctx];
	 parent < limit;
	 parent = _dests[parent])
      if (_linkTypes[ctx - shift] == lastCode) { // initial match
	// try to match the entire path
	for (int i = nCodes - 2, parent2 = parent; i >= 0; i--)
	  if (_linkTypes[parent2 - shift] != pathCodes[i]) // match failure
	    continue SEARCH;	// try to match higher
	  else if ((parent2 = _dests[parent2]) == limit)
	    return -1;
	return ctx;
      }
      else
	ctx = parent;
    return -1;
  }

  /** starting with ctx and going up the ancestry tree look for the first
    context with the given linkCode */
  public int firstParentWithCode7(final int pos, final int linkCode, final int seq) {
    int ctx = _dests[wordContextLin(pos)]; // first parent of text node
    final int shift = _nTextNodes;
    final int limit = _dests.length - 1;
    while (_linkTypes[ctx - shift] != linkCode || _seqNumbers[ctx] != seq)
      if ((ctx = _dests[ctx]) == limit)
	return -1;
    return ctx;
  }

  public boolean isGoverning(int context) {
    return linkName(context).equals("TITLE");
  }

  public void resetContextSearch() {
    _initialWordsIndex = 0;
  }
  
  private void appendSegment(int context, StringBuffer result) {
    result.append(context < _nTextNodes
		  ? "text()"
		  : _linkNames[_linkTypes[context - _nTextNodes]]);
    result.append('[');
    result.append(_seqNumbers[context]);
    result.append("]/");
  }
  
  /** XPath (forking) location of the hit */
  public void hitLocation(String[] terms, int[] matches, StringBuffer result) {
    final int N = terms.length;
    IntegerArray[] stacks = new IntegerArray[N];
    int[] wordNumbers = new int[N];
    IntegerArray stack;
    int lastInitialWordIndex = -1;
    int pattern = 0, context = 0, nPopped = 0, pathStart = 0, pathEnd = 0;
    for (int i = 0, marker = 1; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	final int wordNumber = matches[i*2 + 1];
	final int initialWordIndex = findIndexBin(wordNumber);
	wordNumbers[i] = wordNumber - _initialWords[initialWordIndex] + 1;
	if (initialWordIndex == lastInitialWordIndex) // save work
	  ;			// do nothing, path will be reused
	else {
	  pattern |= marker;
	  stack = stacks[i] = new IntegerArray();
	  
	  context = initialWordIndex;
	  do {
	    final int parent = _dests[context];
	    if (parent != -1) {
	      stack.add(context);
	      _markers[context] |= marker;
	      context = parent;
	    }
	    else
	      break;
	  }
	  while (true);
	  lastInitialWordIndex = initialWordIndex;
	}
      }
    // find and output common path
    // process first non-missing match
    int i = 0, marker = 1, nMissing = 0;
    // find first non-missing matching term
    // there has to be at least one if the hit was built
    // count potential leading missing terms to output appropriate elements
    // before outputting elements for matches
    for ( ; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	result.append("<Matches path=\"");
	stack = stacks[i];
	while (stack.cardinality() > 0) {
	  context = stack.popLast();
	  if (_markers[context] == pattern) {
	    _markers[context] = 0;
	    appendSegment(context, result); // belongs to common
	    context = -1;	// used
	    ++nPopped;
	  }
	  else
	    break;
	}
	// end of 'matches' && common path
	result.append("\">");
	// output elements for any leading missingTerms
	while (--nMissing >= 0)
	  result.append("<MissingTerm/>");
	
	result.append("<Match term=\"");
	result.append(terms[i]);
	result.append("\" path=\"");
	pathStart = result.length();
	if (context != -1) {
	  appendSegment(context, result);
	  _markers[context] = 0;
	}
	while (stack.cardinality() > 0) {
	  context = stack.popLast();
	  appendSegment(context, result);
	  _markers[context] = 0;
	}
	pathEnd = result.length();
	
	result.append("\" tokenNumber=\"");
	result.append(wordNumbers[i]);
	result.append("]\"/>");

	break;			// just the first non-zero
      }
      else
	++nMissing;		// only count leading missing terms
	
    // process the remaining matches
    for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	result.append("<Match term=\"");
	result.append(terms[i]);
	result.append("\" path=\"");
	stack = stacks[i];
	if (stack == null)	// reuse previously generated path
	  result.append(result.substring(pathStart, pathEnd));
	else {
	  stack.pop(nPopped);
	  pathStart = result.length();
	  while (stack.cardinality() > 0) {
	    context = stack.popLast();
	    appendSegment(context, result);
	    _markers[context] = 0;
	  }
	  pathEnd = result.length();
	}
	result.append("\" tokenNumber=\"");
	result.append(wordNumbers[i]);
	result.append("]\"/>");
      }
      else
	result.append("<MissingTerm/>");
    result.append("</Matches>");
  }
  
  /**
    QueryHitData is initialized in the caller
    this function fills the commonPath for all matching terms
    and relative paths for the individual terms
    */
  public void hitLocation(String[] terms, int[] matches, QueryHitData data) {
    StringBuffer buffer = new StringBuffer(512);
    final int N = terms.length;
    IntegerArray[] stacks = new IntegerArray[N];
    int[] wordNumbers = new int[N];
    IntegerArray stack;
    int lastInitialWordIndex = -1;
    int pattern = 0, nPopped = 0, pathStart = 0, pathEnd = 0;
    for (int i = 0, marker = 1; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	final int wordNumber = matches[i*2 + 1];
	final int initialWordIndex = findIndexBin(wordNumber);
	wordNumbers[i] = wordNumber - _initialWords[initialWordIndex] + 1;
	if (initialWordIndex == lastInitialWordIndex) // save work
	  ;			// do nothing, path will be reused
	else {
	  pattern |= marker;
	  stack = stacks[i] = new IntegerArray();
	  for (int ctx = initialWordIndex;;) {
	    final int parent = _dests[ctx];
	    if (parent != -1) {
	      stack.add(ctx);
	      _markers[ctx] |= marker;
	      ctx = parent;
	    }
	    else
	      break;
	  }
	  lastInitialWordIndex = initialWordIndex;
	}
      }
    // find and output common path
    // process first match
    StringBuffer path = new StringBuffer(256);
    String previousPath = null;	// we may be copying subpaths from it
    int i = 0, marker = 1;
    for ( ; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	int context = 0;
	stack = stacks[i];
	while (stack.cardinality() > 0) {
	  context = stack.popLast();
	  if (_markers[context] == pattern) {
	    _markers[context] = 0;
	    appendSegment(context, path); // belongs to common
	    context = -1;	// used
	    ++nPopped;
	  }
	  else
	    break;
	}
	data.setCommonPath(path.toString());
	// end of 'matches' && common path
	path.setLength(0);		// will now be used for relative paths
	pathStart = 0;
	if (context != -1) {
	  appendSegment(context, path);
	  _markers[context] = 0;
	}
	while (stack.cardinality() > 0) {
	  context = stack.popLast();
	  appendSegment(context, path);
	  _markers[context] = 0;
	}
	pathEnd = path.length();
	data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
	break;			// just the first non-zero
      }
	
    // process the remaining matches
    for (i++, marker <<= 1 ; i < N; i++, marker <<= 1)
      if (terms[i] != null) {
	path.setLength(0);
	stack = stacks[i];
	if (stack == null)	// reuse previously generated path
	  path.append(previousPath.substring(pathStart, pathEnd));
	else {
	  stack.pop(nPopped);
	  pathStart = path.length();
	  while (stack.cardinality() > 0) {
	    final int context = stack.popLast();
	    appendSegment(context, path);
	    _markers[context] = 0;
	  }
	  pathEnd = path.length();
	}
	data.setMatchLocation(i, previousPath = path.toString(), wordNumbers[i]);
      }
  }

  private int findIndexBin(final int wordNumber) {
    int i = 0, j = _nTextNodes - 1;
    while (i <= j) {
      final int k = (i + j) >>> 1;
      if (_initialWords[k] < wordNumber)
	i = k + 1;
      else if (_initialWords[k] > wordNumber)
	j = k - 1;
      else 
	return k;
    }
    return i - 1;
  }
  
  /*
    public void addGoverningFiller(int query, RoleFiller rf, int parent) {
    // !!! for now assume just one query
    GoverningContext gc = null;
    if (_governingContexts[parent] == null) {
    // find parent governing context
    for (int c = _dests[parent]; ; c = _dests[c])
    if (_governingContexts[c] != null || c == 0) {
    //	  System.out.println("parent found at " + c);
    gc = new GoverningContext(c, rf);
    break;
    }
    }
    else
    gc = new GoverningContext(_governingContexts[parent], rf);
    _governingContexts[parent] = gc;
    }
    */
  
  public int wordContextLin(int wordNumber) {
    for (int i = _initialWordsIndex; i < _nTextNodes; i++)
      if (_initialWords[i] > wordNumber) {	// first such i
	// - 1 if wordNumbers can be the same
	_initialWordsIndex = i;	// cached to speed up next search
	return i - 1;
      }
    return _nTextNodes - 1;
  }
}
