/*************************************************************************
 *
 *  $RCSfile: scanner.cxx,v $
 *
 *  $Revision: 1.2 $
 *
 *  last change: $Author: dv $ $Date: 2001/10/05 08:32:10 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
#include <ctype.h>

#ifndef _DEBUG_HXX //autogen
#include <tools/debug.hxx>
#endif

#include "scanner.hxx"

// ------------------------------------------------------------------------------------

struct Keyword
{
	char		  *m_Name;
	SiLexemType    m_LexemType;
};

Keyword Keywords[] = // ACHTUNG: sortiert nach Name
{
	{"ConfigurationItem",	LT_CONFIGURATION_ITEM	},
	{"Custom",				LT_CUSTOM		 		},
	{"DataCarrier",			LT_DATACARRIER  		},
	{"Directory",			LT_DIRECTORY    		},
	{"End",					LT_END          		},
	{"File",				LT_FILE         		},
	{"Folder",				LT_FOLDER       		},
	{"FolderItem",			LT_FOLDERITEM   		},
	{"HelpText",			LT_HELPTEXT	 			},
	{"Installation",		LT_INSTALLATION 		},
	{"Module",				LT_MODULE       		},
	{"ModuleList",          LT_MODULELIST           },
	{"ModuleSet",           LT_MODULESET            },
	{"Os2Class",			LT_OS2_CLASS			},
	{"Os2Creator",			LT_OS2_CREATOR  		},
	{"Os2Template",			LT_OS2_TEMPLATE 		},
	{"Procedure",			LT_PROCEDURE    		},
	{"Profile",				LT_PROFILE      		},
	{"ProfileItem", 		LT_PROFILEITEM  		},
	{"RegistryArea",		LT_REGISTRYAREA 		},
	{"RegistryItem",		LT_REGISTRYITEM 		},
	{"ScpAction",  			LT_SCPACTION			},
	{"Shortcut",			LT_SHORTCUT				},
	{"Slide",				LT_SLIDE				},
	{"StarRegistry",		LT_STARREGISTRY			},
	{"StarRegistryItem",	LT_STARREGISTRY_ITEM	}
};

Keyword* FindKeyword(int nFrom, int nTo, ByteString const& s)
// Binrsuche
{
	// Rekursionsende
	//
	if (nTo - nFrom < 0)
		return NULL;

	int nMiddle = (nFrom+nTo)/2;

	// Achtung:
	// Nicht ByteString.Compare verwenden (Argumente werden verdreht)
	//
	if (s == Keywords[nMiddle].m_Name)
		return &Keywords[nMiddle];
	else
	{
		if (s < Keywords[nMiddle].m_Name)
			return FindKeyword(nFrom,nMiddle-1,s);
		else
			return FindKeyword(nMiddle+1,nTo,s);
	}
}

// ------------------------------------------------------------------------------------

SiScanner::SiScanner(SvStream &aStream)
: m_aStream(aStream)
{
	m_cCurrentChar     = '\0';
	m_bCurrentCharRead = FALSE;
	m_nLine            = 1;
}


SiLexemType SiScanner::GetTypeOfKeyword(ByteString const& s) const
{
	Keyword *pKey = FindKeyword(0, sizeof(Keywords)/sizeof(Keywords[0])-1, s);

	return pKey == NULL ? LT_NULL : pKey->m_LexemType;
}

int SiScanner::GetLine() const
{
	return m_nLine;
}

char SiScanner::GetCurrentChar()
{
	if (!m_bCurrentCharRead)
	{
		ReadNextChar();
		m_bCurrentCharRead = TRUE;
	}

	return m_cCurrentChar;
}

char SiScanner::ReadNextChar()
{
	m_aStream >> m_cCurrentChar;

	// Wurde Ende gelesen?
	// -> ungltiges Zeichen zurckgeben
	//
	if (m_aStream.IsEof())
		m_cCurrentChar = '\0';

	return m_cCurrentChar;
}

SiLexem const& SiScanner::GetCurrentLexem()
{
	if (m_aCurrentLexem.GetType() == LT_NULL)
		ReadNextLexem();

	return m_aCurrentLexem;
}

SiLexem const& SiScanner::ScanInteger()
// Integerkonstante
// regulrer Ausdruck: digit+; mit digit = 0-9
{
	ByteString s;
	char   c = GetCurrentChar();

	do
	{
		s += c;             // verarbeiten
		c = ReadNextChar(); // verbrauchen und neu lesen
	}
	while (isdigit(c));

	return m_aCurrentLexem = SiLexem(LT_ICONST,s);
}

SiLexem const& SiScanner::ScanByteString()
// ByteStringkonstante
// regulrer Ausdruck: "char"; mit char = beliebiges druckbares Zeichen
// Achtung: auch <"> ist ein legales Zeichen, um es nicht mit dem
//	        ByteStringende zu verwechseln, mu es mit <\> gequoted werden.
{
	ByteString s;
	char   c      = GetCurrentChar();
	BOOL   bQuote = FALSE;

	DBG_ASSERT(c == '\"',"SiScanner::ScanByteString");

	c = ReadNextChar(); // <"> berlesen

	// bis ByteStringende lesen
	//
	while (c != '\"' || bQuote)
	{
		/*	#66982#
		if( !ByteString::IsPrintable(c) && c != '\t' )
			return m_aCurrentLexem = SiLexem(LE_NOTPRINTABLE, s);
		*/
		if (bQuote)
		{
			if (c == '"')  // das einzige zu quotende Zeichen
				s += '"';
			else		   // kein zu quotendes Zeichen
			{
				s += '\\'; // War Backslash und kein Quote
				if( c != '\\' ) // wieder ein Quote
					s += c;	   // aktuelles Zeichen
			}
			if( c == '\\' )
				bQuote = TRUE;
			else
				bQuote = FALSE;
		}
		else
		{
			if (c == '\\')
				bQuote = TRUE; // merken
			else
				s += c;		   // ganz normal anhngen
		}
		c  = ReadNextChar();
	}

	ReadNextChar(); // <"> berlesen

	return m_aCurrentLexem = SiLexem(LT_SCONST,s);
}

SiLexem const& SiScanner::ScanMLByteString()
// mehrzeilige ByteStringkonstante
{
	ByteString s;
	char   c      = GetCurrentChar();
	BOOL   bQuote = FALSE;

	DBG_ASSERT(c == '{',"SiScanner::ScanByteString");

	c = ReadNextChar(); // <{> berlesen

	// bis ByteStringende lesen
	//
	while (c != '}' || bQuote)
	{
		// Fehler:
		// beim Suchen nach <}> wurde ein nicht druckbares Zeichen gelesen
		/*
		if(	!ByteString::IsPrintable(c) &&
			c != '\n' &&
			c != '\r' &&
			c != '\t' )
			return m_aCurrentLexem = SiLexem( LE_NOTPRINTABLE, s );
		*/

		if( bQuote )
		{
			if (c == '}')  // das einzige zu quotende Zeichen
				s += '}';
			else		   // kein zu quotendes Zeichen
			{
				s += '\\'; // War Backslash und kein Quote
				s += c;	   // aktuelles Zeichen
			}
			bQuote = FALSE;
		}
		else
		{
			if (c == '\\')
				bQuote = TRUE; // merken
			else
				if (c != '\r')
					s += c;		   // ganz normal anhngen
		}

		if (c == '\n')
			m_nLine++;

		c  = ReadNextChar();
	}

	ReadNextChar(); // <}> berlesen

	return m_aCurrentLexem = SiLexem(LT_SCONST,s);
}

SiLexem const& SiScanner::ScanIdentifier()
// Identifier, kann auch Keyword sein
// regulrer Ausdruck: alpha(alpha|digit)*; mit digit = 0-9 und alpha = a-z | A-Z
{
	ByteString s;
	char   c = GetCurrentChar();

	DBG_ASSERT(isalpha(c),"SiScanner::ScanIdentifier");

	do
	{
		s += c;
		c  = ReadNextChar();
	}
	while (isalnum(c) || c == '_');

	// Schlsselwort ?
	//
	SiLexemType lt = GetTypeOfKeyword(s);

	return m_aCurrentLexem = SiLexem(lt != LT_NULL ? lt : LT_ID, s);
}

SiLexem const& SiScanner::ReadNextLexem()
{
	char   c = GetCurrentChar(); // aktuelles Zeichen im Stream
	ByteString s;                    // erkannte Zeichen

	// "white spaces" berlesen (Blanks und Zeilenschaltung)
	//
	while (isspace(c))
	{
		if (c == '\n')  			// Zeilenschaltung ?
			m_nLine++;  			// Zeilenzhler erhhen
		c = ReadNextChar();
	}

	// Ende des Streams erreicht?
	//
	if (m_aStream.IsEof())
		return m_aCurrentLexem = SiLexem(LT_EOF);

	// Integerkonstante ?
	// regulrer Ausdruck: digit+; mit digit = 0-9
	if( isdigit(c) || c == '-' )
		return ScanInteger();

	// ByteStringkonstante ?
	// regulrer Ausdruck: "char"; mit char = beliebiges druckbares Zeichen
	//
	if (c == '\"')
		return ScanByteString();

	// mehrzeilige ByteStringkonstante ?
	//
	if (c == '{')
		return ScanMLByteString();

	// Identifier ?
	// regulrer Ausdruck: alpha(alpha|digit)*; mit digit = 0-9 und alpha = a-z | A-Z
	//
	if (isalpha(c))
		return ScanIdentifier();

	// spezielles Zeichen ?
	//
	switch(c)
	{
		case '(': { ReadNextChar(); return m_aCurrentLexem = SiLexem(LT_POPEN,ByteString(c));     }
		case ')': { ReadNextChar(); return m_aCurrentLexem = SiLexem(LT_PCLOSE,ByteString(c));    }
		case ',': { ReadNextChar(); return m_aCurrentLexem = SiLexem(LT_COMMA,ByteString(c));     }
		case ';': { ReadNextChar(); return m_aCurrentLexem = SiLexem(LT_SEMICOLON,ByteString(c)); }
		case '=': { ReadNextChar(); return m_aCurrentLexem = SiLexem(LT_ASSIGN,ByteString(c));    }
	}

	// Fehler:
	// aktuelles Zeichen ist nicht der Anfang eines Lexems
	//
	ReadNextChar();
	return m_aCurrentLexem = SiLexem(LE_NOTBEGIN, ByteString(c));
}
