/*********************************************************************
 * Author: B. Alex Bridges                                           *
 * Login ID: brid0129                                                *
 * Class: CPSC-431, Winter 2000                                      *
 * Project: Laboratory Exercise 2                                    *
 * Description: This program is a lexical analyzer and the parser    *
 *              portion of a syntax analyzer for the lanaguage Micro *
 *              Modula-2.                                            *
 * Contents: Methods for doing the lexical analyzing.                *
 *********************************************************************/

/* IMPORTS */
import java.io.*;


class Lexical
{
  /* CONSTANTS */
    final static boolean b_debug = false;   // CONTROLS EXTRA DEBUG OUTPUT
    final static int ucode_space      = 32; // UNICODE NUM. VALUE FOR space
    final static int ucode_quote_mark = 34; // UNICODE NUM. VALUE FOR "
    final static int ucode_apostrophe = 39; // UNICODE NUM. VALUE FOR '
    final static int ucode_period     = 46; // UNICODE NUM. VALUE FOR .
    final static int ucode_f_slash    = 47; // UNICODE NUM. VALUE FOR /
    final static int id_limit = 20;         // IDENTIFIER CHARACTER LIMIT

  /* GLOBAL VARIABLES */
  // NONE

  /*************************************************************************
   * Method: setup                                                         *
   * Purpose: Sets up the token stream.                                    *
   * Input: --PARAMATERS--                                                 *
   *        => 'br_file_in' = The stream of input.                         *
   * Output: --RETURNS--                                                   *
   *         => NONE                                                       *
   *************************************************************************/
  public static StreamTokenizer setup(BufferedReader br_file_in)
  {
    /* LOCAL VARIABLES */
    StreamTokenizer stmT_file_in; // STREAM TOKENIZER

    /* CREATION AND SETUP OF TOKENIZER */
    stmT_file_in = new StreamTokenizer(br_file_in);
    stmT_file_in.ordinaryChar(ucode_quote_mark);
    stmT_file_in.ordinaryChar(ucode_apostrophe);
    stmT_file_in.ordinaryChar(ucode_period);
    stmT_file_in.ordinaryChar(ucode_f_slash);
    //stmT_file_in.parseNumbers();

    return(stmT_file_in);
  } // method setup

  /*************************************************************************
   * Method: tokenize                                                      *
   * Purpose: Tokenizes the stream of input.                               *
   * Input: --PARAMATERS--                                                 *
   *        => 'stmT_file_in' = The stream of input.                       *
   * Output: --RETURNS--                                                   *
   *         => The next token in input stream.                            *
   *************************************************************************/
  public static Token tokenize(StreamTokenizer stmT_file_in)
  {
    /* LOCAL VARIABLES */
    Token Token_input = new Token();  // TOKEN TAKEN FROM INPUT STREAM

    try
    {
      Token_input.int_ttype = stmT_file_in.nextToken();
      
      if(b_debug)
        System.out.println("Type value is "+Token_input.int_ttype);

      Token_input.str_original = stmT_file_in.toString();
      Token_input.int_line = stmT_file_in.lineno();

      if(b_debug)
        System.out.println("Reading "+Token_input.str_original+".");

      /* JAVA'S TOKEN TYPE */
      switch(Token_input.int_ttype)
      {
        /* EOF */
        case java.io.StreamTokenizer.TT_EOF:
          Token_input.str_ttype = "TT_EOF";

          break;
        /* NUMBER */
        case java.io.StreamTokenizer.TT_NUMBER:
          Token_input.str_ttype = "TT_NUMBER";

          Token_input.str_actual = String.valueOf(stmT_file_in.nval);

          if( Token_input.str_actual.indexOf('.') != -1 )
            Token_input.str_name = "real";
          else
            Token_input.str_name = "integer";

          break;
        /* WORD */
        case java.io.StreamTokenizer.TT_WORD:
          Token_input.str_ttype = "TT_WORD";

          // OBSERVE IDENTIFIER LIMIT, SO GRAB 1st 20 CHARACTERS
          if( stmT_file_in.sval.length() < id_limit )
            Token_input.str_actual = stmT_file_in.sval;
          else
            Token_input.str_actual = stmT_file_in.sval.substring(0,id_limit-1);

          Token_input.str_name = "identifier";

          break;
        /* STRING CONSTANT */
        // These are handled in process method.
        //
        // NOTE #1: The subsequent code is commented out because if the end 
        //          marker is missing, the tokenizer will auto. treat the 
        //          remainder of the line as the string constant.
        //
        //case ucode_apostrophe:
        //  Token_input.str_ttype = "none";

        //  Token_input.str_actual = stmT_file_in.sval;

        //  Token_input.str_name = "str_const";

        //  break;
        /* OTHER */
        default:
          Token_input.str_ttype = "none";

          // Extract x from "Token['x'], line n"
          Token_input.str_actual = String.valueOf( Token_input.str_original.charAt(7) );

          if(b_debug)
            System.out.println("Read "+Token_input.str_actual+".");

          break;
      } // switch

      return(Token_input);
    } // try
    /* EXCEPTION HANDLING */
    catch(IOException exception)
    {
      System.out.println("\n FATAL EXCEPTION: File input problem.\n");
      return(null);
    } // catch
  } // method tokenize

  /*************************************************************************
   * Method: process                                                       *
   * Purpose: Processes the Token taken from the input stream.             *
   * Input: --PARAMATERS--                                                 *
   *        => 'stmT_file_in' = The stream of input.                       *
   *        => 'Token_given'  = The Token mentioned above.                 *
   * Output: --RETURNS--                                                   *
   *         => NONE                                                       *
   *************************************************************************/
  public static Token process(StreamTokenizer stmT_file_in)
  {
    /* LOCAL VARIABLES */
    Token Token_current = new Token();  // CURRENT TOKEN TAKEN FROM INPUT STREAM
    Token Token_next = new Token();     // NEXT TOKEN IN INPUT STREAM
    int int_comments = 0;               // COMMENT COUNTER
    String str_constant = new String(); // STRING TO HOLD STRING CONSTANT
   
    Token_current = tokenize(stmT_file_in);

    /* COMMENTS */
    // => SINGLE COMMENT: BEGINNING MARKER
    Token_next = tokenize(stmT_file_in);
    if( Token_current.str_actual.equals("(") && 
        Token_next.str_actual.equals("*") )
    {
      int_comments++;
      if(b_debug)
        System.out.println("\n COMMENT: Beginning marker found.\n");
      
      // => SINGLE AND NESTED COMMENTS: ENDING MARKER
      do
      {
        Token_next = tokenize(stmT_file_in);
        
        // => NESTED COMMENTS: BEGINNER MARKER
        if( Token_next.str_actual.equals("(") )
        {
          Token_next = tokenize(stmT_file_in);

          if( Token_next.str_actual.equals("*") )
          {
            int_comments++;
            if(b_debug)
              System.out.println("\n COMMENT: Beginning marker found.\n");         
          } // if
          else
            stmT_file_in.pushBack(); // IN CASE STRING IS "(*(<substring>*)"
        } // if
        else if( Token_next.str_actual.equals("*") )
        {
          Token_next = tokenize(stmT_file_in);

          if(Token_next.str_actual.equals(")") )
          {
            int_comments--;
            if(b_debug)
              System.out.println("\n COMMENT: Ending marker found.\n");
          } // if
          else
            stmT_file_in.pushBack(); // IN CASE STRING IS "(**<substring>*)"
        } // if
      } while( int_comments > 0 && !( Token_next.str_ttype.equals("TT_EOF") ) );

      if( Token_next.str_ttype.equals("TT_EOF") )
      {
        System.out.println("\n LEX ERROR: ')' portion of ending comment "+
                           "marker expected on line "+Token_next.int_line+".\n");

        return(Token_next);
      } // if
      else if (int_comments > 0)
      {
        System.out.println("\n LEX ERROR: ')' portion of ending comment "+
                           "marker expected on line "+Token_next.int_line+".\n");

        return(null);
      } // if
      else
      {
        Token_current.str_actual = "n/a";
        Token_current.str_name = "comment";

        return(Token_current);
      } // else
    } //if
    else
      stmT_file_in.pushBack();

    /* STRING CONSTANTS */
    // => BEGINNING MARKER
    if( Token_current.str_actual.equals("'") )
    {
      // HAVE TOKENIZER TREAT SPACES AS TOKENS
      stmT_file_in.ordinaryChar(ucode_space);
      
      if(b_debug)
      {
        System.out.println("\n STRING CONSTANT: Beginning marker found.\n");
        System.out.println("Token stored as '"+Token_current.str_actual+"'.");
      } // if
    
      // => ENDING MARKER
      do
      {
        Token_next = tokenize(stmT_file_in);
        //if( str_constant.equals(null) || str_constant.equals("") )
          str_constant = str_constant + Token_next.str_actual;
        //else
        //  str_constant = str_constant + " " + Token_next.str_actual;
        
        if(b_debug)
          System.out.println("Value of string constant is now '"+str_constant+"'");
      } while( !( Token_next.str_actual.equals("'") ) &&
               (Token_next.int_line == Token_current.int_line) );
      
      // IF CONTINUED ONTO NEXT LINE, REMOVE THAT TOKEN
      if(Token_next.int_line > Token_current.int_line)
      {
        str_constant = str_constant.substring( 0,str_constant.lastIndexOf(Token_next.str_actual) );

        if(b_debug)
        {
          System.out.println("Removing token which came from the next line.");
          System.out.println("Value of string constant is now '"+str_constant+"'");
        } // if
      } // if    

      // HAVE TOKENIZER TREAT SPACES AS WHITE SPACE
      stmT_file_in.whitespaceChars(ucode_space,ucode_space);

      // REMOVE APOSTROPHE FROM THE END
      if( str_constant.endsWith("'") )
      {

        str_constant = str_constant.substring( 0,str_constant.length()-1 );
    
        Token_current.str_actual = str_constant;
        Token_current.str_name = "str_const";
        
        if(b_debug)
        {
          System.out.println("\n STRING CONSTANT: Ending marker found.\n");
          System.out.println("Token stored as '"+Token_current.str_actual+"'.");
        } // if

        return(Token_current);
      } // if
      else
      {
        stmT_file_in.pushBack();
      
        System.out.println("\n LEX ERROR: Ending string constant marker (') "+
                           "expected on line "+Token_current.int_line+".\n");
    
        return(null);
      } // else
    } //if
 
    /* SPECIAL CHARACTERS */
    // => ASSIGNMENT
    Token_next = tokenize(stmT_file_in);
    if( Token_current.str_actual.equals(":") &&
        Token_next.str_actual.equals("=") )
    {
      Token_current.str_actual = Token_current.str_actual+Token_next.str_actual;
      Token_current.str_name = "assignment";

      return(Token_current);    
    } // if
    else
      stmT_file_in.pushBack();
    // => PERIOD
    if( Token_current.str_actual.equals(".") )
    {
      Token_current.str_name = "period";

      return(Token_current);    
    } // if
    // => SEMICOLON
    if( Token_current.str_actual.equals(";") )
    {
      Token_current.str_name = "semicolon";

      return(Token_current);    
    } // if
    // => COLON
    if( Token_current.str_actual.equals(":") )
    {
      Token_current.str_name = "colon";

      return(Token_current);    
    } // if
    // => LEFT PARANTHESE
    if( Token_current.str_actual.equals("(") )
    {
      Token_current.str_name = "l_paran";

      return(Token_current);    
    } // if
    // => RIGHT PARANTHESE
    if( Token_current.str_actual.equals(")") )
    {
      Token_current.str_name = "r_paran";

      return(Token_current);    
    } // if
    // => LEFT BRACKET
    if( Token_current.str_actual.equals("[") )
    {
      Token_current.str_name = "l_bracket";

      return(Token_current);    
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals("]") )
    {
      Token_current.str_name = "r_bracket";

      return(Token_current);     
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals(",") )
    {
      Token_current.str_name = "comma";

      return(Token_current);     
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals("..") )
    {
      Token_current.str_name = "ellipsis";

      return(Token_current);     
    } // if
    
    /* OPERATORS */
    // => ADDITION
    if( Token_current.str_actual.equals("+") ||
        Token_current.str_actual.equals("-") )
    {        
      Token_current.str_name = "add_op";

      return(Token_current);
    } // if
    // => MULTIPLICATION
    if( Token_current.str_actual.equals("*")   ||
        Token_current.str_actual.equals("/")   ||
        Token_current.str_actual.equals("DIV") ||
        Token_current.str_actual.equals("MOD") )
    {        
      Token_current.str_name = "mul_op";

      return(Token_current);
    } // if
    // => RELATIONAL
    Token_next = tokenize(stmT_file_in);   
    if( ( Token_current.str_actual.equals("<") &&
          Token_next.str_actual.equals("=") ) ||
        ( Token_current.str_actual.equals(">") &&
          Token_next.str_actual.equals("=") ) )
    {        
      Token_current.str_actual = Token_current.str_actual+Token_next.str_actual;
      Token_current.str_name = "rel_op";

      return(Token_current);
    } // if
    else if( Token_current.str_actual.equals("<") ||
             Token_current.str_actual.equals("=") ||
             Token_current.str_actual.equals(">") ||
             Token_current.str_actual.equals("#") )
    {
      stmT_file_in.pushBack();
      Token_current.str_name = "rel_op";

      return(Token_current);
    } // if
    else
      stmT_file_in.pushBack();      

    /* NUMBERS */
    // These are handled in tokenize method.
    
    /* RESERVED WORDS */
    // => DATA TYPES
    if( Token_current.str_actual.equals("INTEGER") ||
        Token_current.str_actual.equals("REAL")    ||
        Token_current.str_actual.equals("BOOLEAN") )
    {
      Token_current.str_name = "type_name";

      return(Token_current);
    } // if
    //
    // NOTE: These pre-defined functions, modules, and procedures 
    //       are commented out, so that they can be correctly 
    //       identified by the syntax analyzer.
    //
    // => FUNCTIONS
    //if( Token_current.str_actual.equals("TRUNC") ||
    //    Token_current.str_actual.equals("FLOAT") )
    //{
    //  Token_current.str_name = "function";
    //
    //  return(Token_current);
    //} // if
    // // => MODULES
    // if( Token_current.str_actual.equals("InOut") ||
    //     Token_current.str_actual.equals("RealIO") )
    // {
    //   Token_current.str_name = "module";
    // 
    //   return(Token_current);
    // } // if
    // // => PROCEDURES
    // if( Token_current.str_actual.equals("WriteLn")     ||
    //     Token_current.str_actual.equals("WriteString") || 
    //     Token_current.str_actual.equals("ReadInt")     ||
    //     Token_current.str_actual.equals("WriteInt")    || 
    //     Token_current.str_actual.equals("ReadReal")    || 
    //     Token_current.str_actual.equals("WriteRealg"))
    // {
    //   Token_current.str_name = "procedure";
    // 
    //   return(Token_current);
    // } // if
    // => MODULE
    if( Token_current.str_actual.equals("MODULE") )
    {
      Token_current.str_name = "module";

      return(Token_current);
    } // if
    // => FROM
    if( Token_current.str_actual.equals("FROM") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => FROM
    if( Token_current.str_actual.equals("IMPORT") )
    {
      Token_current.str_name = "import";

      return(Token_current);
    } // if
    // => BEGIN
    if( Token_current.str_actual.equals("BEGIN") )
    {
      Token_current.str_name = "begin";

      return(Token_current);
    } // if
    // => END
    if( Token_current.str_actual.equals("END") )
    {
      Token_current.str_name = "end";

      return(Token_current);
    } // if
    // => VAR
    if( Token_current.str_actual.equals("VAR") )
    {
      Token_current.str_name = "var";

      return(Token_current);
    } // if
    // => TYPE
    if( Token_current.str_actual.equals("TYPE") )
    {
      Token_current.str_name = "type";

      return(Token_current);
    } // if
    // => PROCEDURE
    if( Token_current.str_actual.equals("PROCEDURE") )
    {
      Token_current.str_name = "procedure";

      return(Token_current);
    } // if
    // => WHILE
    if( Token_current.str_actual.equals("WHILE") )
    {
      Token_current.str_name = "while";

      return(Token_current);
    } // if
    // => DO
    if( Token_current.str_actual.equals("DO") )
    {
      Token_current.str_name = "do";

      return(Token_current);
    } // if
    // => REPEAT
    if( Token_current.str_actual.equals("REPEAT") )
    {
      Token_current.str_name = "repeat";

      return(Token_current);
    } // if
    // => UNTIL
    if( Token_current.str_actual.equals("FROM") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => IF
    if( Token_current.str_actual.equals("IF") )
    {
      Token_current.str_name = "if";

      return(Token_current);
    } // if
    // => THEN
    if( Token_current.str_actual.equals("THEN") )
    {
      Token_current.str_name = "then";

      return(Token_current);
    } // if
    // => ELSE
    if( Token_current.str_actual.equals("ELSE") )
    {
      Token_current.str_name = "else";

      return(Token_current);
    } // if
    // => RETURN
    if( Token_current.str_actual.equals("RETURN") )
    {
      Token_current.str_name = "return";

      return(Token_current);
    } // if
    // => ARRAY
    if( Token_current.str_actual.equals("ARRAY") )
    {
      Token_current.str_name = "array";

      return(Token_current);
    } // if
    // => OF
    if( Token_current.str_actual.equals("OF") )
    {
      Token_current.str_name = "of";

      return(Token_current);
    } // if

    /* IDENTIFIERS */   
    // These are handled in tokenize method.

    /* DEFAULT */
    return(Token_current);
  } // method process
} // class Lexical

