/*******************************************************************************
* regexp.cpp: Regular expressions manipulation
*-------------------------------------------------------------------------------
* (c)1999-2001 VideoLAN
* $Id: regexp.cpp,v 1.1 2001/10/06 21:23:36 bozo Exp $
*
* Authors: Benoit Steiner <benny@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*
*-------------------------------------------------------------------------------
* This class provides a parser for regular expressions.
*
*******************************************************************************/


//------------------------------------------------------------------------------
// Preamble
//------------------------------------------------------------------------------
#include "defs.h"

#include "stdlib.h"
#include "stdio.h"
#ifdef HAVE_REGEX_H
#include <regex.h>
#endif

#include "common.h"
#include "debug.h"
#include "string.h"
#include "vector.h"
#include "regexp.h"

#include "vector.cpp"





//******************************************************************************
// C_RegexpMatch class
//******************************************************************************
//
//******************************************************************************

//------------------------------------------------------------------------------
// Constructor
//------------------------------------------------------------------------------
C_RegexpMatch::C_RegexpMatch(const C_String& strMatch, unsigned int iStartIndex,
                             unsigned int iEndIndex) : m_strMatch(strMatch)
{
  m_iStartIndex = iStartIndex;
  m_iEndIndex = iEndIndex;
}




//******************************************************************************
// C_Regexp class
//******************************************************************************
//
//******************************************************************************

//------------------------------------------------------------------------------
// 
//------------------------------------------------------------------------------
C_Regexp::C_Regexp(const C_String& strPattern)
{
#ifdef HAVE_REGEX_H
  int iRc = regcomp(&m_sPattern, strPattern.GetString(), REG_EXTENDED);
  m_bIsValid = !iRc;

#else
  ASSERT(false);
#endif
}


//------------------------------------------------------------------------------
// 
//------------------------------------------------------------------------------
C_Regexp::~C_Regexp()
{
#ifdef HAVE_REGEX_H
  regfree(&m_sPattern);

#else
  ASSERT(false);
#endif
}


//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
bool C_Regexp::DoesMatch(const C_String& strInput) const
{
  // Just check if the input match, don't build the answer
  int iRc = regexec(&m_sPattern, strInput.GetString(), 0, NULL, 0);
  ASSERT(iRc == 0 || iRc == REG_NOMATCH);

  return !iRc;
}


//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
unsigned int C_Regexp::GetNbMatches(const C_String& strInput) const
{
  // Won't be eficient at all, but is really simple
  C_Vector<C_RegexpMatch> vMatches = GetAllMatches(strInput);
  unsigned int iResult = vMatches.Size();
   
  return iResult;
}


//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
C_RegexpMatch* C_Regexp::GetFirstMatch(const C_String& strInput) const
{
  C_RegexpMatch* pAnswer = GetMatch(strInput, 0);
  return pAnswer;
}


//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
C_Vector<C_RegexpMatch> C_Regexp::GetAllMatches(const C_String& strInput) const
{
  // Various data and buffers
  C_Vector<C_RegexpMatch> vMatches;
  unsigned int iInputLen = strInput.Length();

  // Start by looking for the first match
  C_RegexpMatch* pMatch = GetMatch(strInput, 0);

  while(pMatch)
  {
    // Add the previous match to the answer
    vMatches.Add(pMatch);

    // Look for additional matches
    unsigned int iNextStart = pMatch->GetEndIndex();
    if(iNextStart < iInputLen)
    {
      pMatch = GetMatch(strInput, iNextStart);
    }
    else
      break;
  }

  return vMatches;
}


//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
C_RegexpMatch* C_Regexp::GetMatch(const C_String& strInput, int iStart) const
{
  ASSERT(iStart >= 0);

  // Init the parsing
  const unsigned int iSize = m_sPattern.re_nsub+1;
  regmatch_t asMatch[/*iSize*/100];
  const char* pszInput = strInput.GetString() + iStart;
  int iFlags = 0;
  if(iStart != 0)
    iFlags |= REG_NOTBOL;

  // Match the input against the pattern
  int iRc = regexec(&m_sPattern, pszInput, iSize, asMatch, iFlags);
  ASSERT(iRc == 0 || iRc == REG_NOMATCH);

  // Build the answer
  C_RegexpMatch* pAnswer = NULL;
  if(!iRc)
  {
    // Saves the complete match in the C_RegexpMatch
    ASSERT(asMatch[0].rm_so != -1);

    C_String strMatch = strInput.SubString(asMatch[0].rm_so + iStart,
                                           asMatch[0].rm_eo + iStart);
    pAnswer = new C_RegexpMatch(strMatch, asMatch[0].rm_so + iStart,
                                          asMatch[0].rm_eo + iStart);
    ASSERT(pAnswer);
    
    //LogDbg(NULL, "\nInput: <"+strInput+">, Match: <"+strMatch+"> (so: "+
    //       asMatch[0].rm_so+", eo: "+asMatch[0].rm_eo+")");

    // Also stores all the submatches in the C_RegexpMatch
    for(unsigned int i = 1; i < iSize; i++)
    {
      C_String strSubExpr = "";
      if(asMatch[i].rm_so != -1)
        strSubExpr = strInput.SubString(asMatch[i].rm_so + iStart,
                                        asMatch[i].rm_eo + iStart);

      pAnswer->AddSubExpr(new C_String(strSubExpr));

      //LogDbg(NULL, "->Subexpr("+i+"): <"+strSubExpr+"> (so: "+
      //       asMatch[i].rm_so+", eo: "+ asMatch[i].rm_eo+")");
    }
  }

  return pAnswer;
}

