/*********************************************************************
 * Author: B. Alex Bridges                                           *
 * Login ID: brid0129                                                *
 * Class: CPSC-431, Winter 2000                                      *
 * Project: Laboratory Exercise 1                                    *
 * Description: This program is a lexical analyzer for the lanaguage *
 *              Micro Modula-2.                                      *
 * Contents: Methods for doing the lexical analyzing.                *
 *********************************************************************/

/* IMPORTS */
import java.io.*;


class Lex
{
  /* CONSTANTS */
    final static boolean b_debug = false;   // CONTROLS EXTRA DEBUG OUTPUT
    final static int ucode_quote_mark = 34; // UNICODE NUMERICAL VALUE FOR "
    final static int ucode_apostrophe = 39; // UNICODE NUMERICAL VALUE FOR '
    final static int id_limit = 20;         // IDENTIFIER CHARACTER LIMIT


  /* GLOBAL VARIABLES */
  // NONE

  /*************************************************************************
   * Method: setup                                                         *
   * Purpose: Sets up the token stream.                                    *
   * Input: --PARAMATERS--                                                 *
   *        => 'br_file_in' = The stream of input.                         *
   * Output: --RETURNS--                                                   *
   *         => NONE                                                       *
   *************************************************************************/
  public static StreamTokenizer setup(BufferedReader br_file_in)
  {
    /* LOCAL VARIABLES */
    StreamTokenizer stmT_file_in; // STREAM TOKENIZER

    /* CREATION AND SETUP OF TOKENIZER */
    stmT_file_in = new StreamTokenizer(br_file_in);
    stmT_file_in.parseNumbers();

    return(stmT_file_in);
  } // method setup

  /*************************************************************************
   * Method: tokenize                                                      *
   * Purpose: Tokenizes the stream of input.                               *
   * Input: --PARAMATERS--                                                 *
   *        => 'stmT_file_in' = The stream of input.                       *
   * Output: --RETURNS--                                                   *
   *         => The next token in input stream.                            *
   *************************************************************************/
  public static Token tokenize(StreamTokenizer stmT_file_in)
  {
    /* LOCAL VARIABLES */
      Token Token_input = new Token();  // TOKEN TAKEN FROM INPUT STREAM

    try
    {
      Token_input.int_ttype = stmT_file_in.nextToken();
      
      if(b_debug)
        System.out.println("Type value is "+Token_input.int_ttype);

      Token_input.str_original = stmT_file_in.toString();
      Token_input.int_line = stmT_file_in.lineno();

      if(b_debug)
        System.out.println("Reading "+Token_input.str_original+".");

      /* JAVA'S TOKEN TYPE */
      switch(Token_input.int_ttype)
      {
        /* EOF */
        case java.io.StreamTokenizer.TT_EOF:
          Token_input.str_ttype = "TT_EOF";

          break;
        /* NUMBER */
        case java.io.StreamTokenizer.TT_NUMBER:
          Token_input.str_ttype = "TT_NUMBER";

          Token_input.str_actual = String.valueOf(stmT_file_in.nval);

          Token_input.str_name = "number";

          break;
        /* WORD */
        case java.io.StreamTokenizer.TT_WORD:
          Token_input.str_ttype = "TT_WORD";

          // OBSERVE IDENTIFIER LIMIT, SO GRAB 1st 20 CHARACTERS
          if( stmT_file_in.sval.length() < id_limit )
            Token_input.str_actual = stmT_file_in.sval;
          else
            Token_input.str_actual = stmT_file_in.sval.substring(0,id_limit-1);

          Token_input.str_name = "identifier";

          break;
        /* STRING CONSTANT */
        case ucode_apostrophe:
          Token_input.str_ttype = "none";

          Token_input.str_actual = stmT_file_in.sval;

          Token_input.str_name = "str_const";

          break;
        /* OTHER */
        default:
          Token_input.str_ttype = "none";

          // Extract x from "Token['x'], line n"
          Token_input.str_actual = String.valueOf( Token_input.str_original.charAt(7) );

          break;
      } // switch

      return(Token_input);
    } // try
    /* EXCEPTION HANDLING */
    catch(IOException exception)
    {
      System.out.println("\n FATAL EXCEPTION: File input problem.\n");
      return(null);
    } // catch
  } // method tokenize

  /*************************************************************************
   * Method: process                                                       *
   * Purpose: Processes the Token taken from the input stream.             *
   * Input: --PARAMATERS--                                                 *
   *        => 'stmT_file_in' = The stream of input.                       *
   *        => 'Token_given'  = The Token mentioned above.                 *
   * Output: --RETURNS--                                                   *
   *         => NONE                                                       *
   *************************************************************************/
  public static Token process(StreamTokenizer stmT_file_in)
  {
    /* LOCAL VARIABLES */
    Token Token_current = new Token();  // CURRENT TOKEN TAKEN FROM INPUT STREAM
    Token Token_next = new Token();     // NEXT TOKEN IN INPUT STREAM
    int int_comments = 0;               // COMMENT COUNTER
    String str_constant = new String(); // STRING TO HOLD STRING CONSTANT
    
    Token_current = tokenize(stmT_file_in);

    /* COMMENTS */
    // => SINGLE COMMENT: BEGINNING MARKER
    Token_next = tokenize(stmT_file_in);
    if( Token_current.str_actual.equals("(") && 
        Token_next.str_actual.equals("*") )
    {
      int_comments++;
      if(b_debug)
        System.out.println("\n COMMENT: Beginning marker found.\n");
      
      // => SINGLE AND NESTED COMMENTS: ENDING MARKER
      do
      {
        Token_next = tokenize(stmT_file_in);
        
        // => NESTED COMMENTS: BEGINNER MARKER
        if( Token_next.str_actual.equals("(") )
        {
          Token_next = tokenize(stmT_file_in);

          if( Token_next.str_actual.equals("*") )
          {
            int_comments++;
            if(b_debug)
              System.out.println("\n COMMENT: Beginning marker found.\n");         
          } // if
          else
            stmT_file_in.pushBack(); // IN CASE STRING IS "(*(<substring>*)"
        } // if
        else if( Token_next.str_actual.equals("*") )
        {
          Token_next = tokenize(stmT_file_in);

          if(Token_next.str_actual.equals(")") )
          {
            int_comments--;
            if(b_debug)
              System.out.println("\n COMMENT: Ending marker found.\n");
          } // if
          else
            stmT_file_in.pushBack(); // IN CASE STRING IS "(**<substring>*)"
        } // if
      } while( int_comments > 0 && !( Token_next.str_ttype.equals("TT_EOF") ) );

      if( Token_next.str_ttype.equals("TT_EOF") )
      {
        System.out.println("\n LEX ERROR: ')' portion of ending comment "+
                           "marker expected on line "+Token_next.int_line+".\n");

        return(Token_next);
      } // if
      else if (int_comments > 0)
      {
        System.out.println("\n LEX ERROR: ')' portion of ending comment "+
                           "marker expected on line "+Token_next.int_line+".\n");

        return(null);
      } // if
      else
      {
        Token_current.str_actual = "n/a";
        Token_current.str_name = "comment";

        return(Token_current);
      } // else
    } //if
    else
      stmT_file_in.pushBack();

    /* STRING CONSTANTS */
    // These are handled in tokenize method.
    //
    // NOTE #1: This has a known problem.  If the end marker is missing, 
    //          it will treat the remainder of the line as the string 
    //          constant.  Although this problem can be handled by the 
    //          parser.
    //
    
    //
    // NOTE #2: The subsequent code is commented out because it does not 
    //          make it work any better, so it will be commented out.
    //
    // => BEGINNING MARKER
    //if( Token_current.str_actual.equals("'") )
    //{
    //  if(b_debug)
    //  {
    //    System.out.println("\n STRING CONSTANT: Beginning marker found.\n");
    //    System.out.println("Token stored as '"+Token_current.str_actual+"'.");
    //  } // if
    //
    //  // => ENDING MARKER
    //  do
    //  {
    //    Token_next = tokenize(stmT_file_in);
    //    str_constant = str_constant + Token_next.str_actual;
    //    
    //    if(b_debug)
    //      System.out.println("Value of string constant is now '"+str_constant+"'");
    //  } while( !( Token_next.str_actual.equals("'") ) &&
    //           (Token_next.int_line == Token_current.int_line) );
    //
    //  if( str_constant.endsWith("'") )
    //  {
    //    // REMOVE APOSTROPHE FROM THE END
    //    str_constant.substring( 0,str_constant.length()-2 );
    //    
    //    if(b_debug)
    //      System.out.println("\n STRING CONSTANT: Ending marker found.\n");
    //
    //    Token_current.str_actual = str_constant;
    //    Token_current.str_name = "str_const";

    //    return(Token_current);
    //  } // if
    //  else
    //  {
    //    stmT_file_in.pushBack();
    //  
    //    System.out.println("\n LEX ERROR: Ending string constant marker (') "+
    //                       "expected on line "+Token_current.int_line+".\n");
    //
    //    return(null);
    //  } // else
    //} //if
 
    /* SPECIAL CHARACTERS */
    // => ASSIGNMENT
    Token_next = tokenize(stmT_file_in);
    if( Token_current.str_actual.equals(":") &&
        Token_next.str_actual.equals("=") )
    {
      Token_current.str_actual = Token_current.str_actual+Token_next.str_actual;
      Token_current.str_name = "assignment";

      return(Token_current);    
    } // if
    else
      stmT_file_in.pushBack();
    // => PERIOD
    if( Token_current.str_actual.equals(".") )
    {
      Token_current.str_name = "period";

      return(Token_current);    
    } // if
    // => SEMICOLON
    if( Token_current.str_actual.equals(";") )
    {
      Token_current.str_name = "semicolon";

      return(Token_current);    
    } // if
    // => COLON
    if( Token_current.str_actual.equals(":") )
    {
      Token_current.str_name = "colon";

      return(Token_current);    
    } // if
    // => LEFT PARANTHESE
    if( Token_current.str_actual.equals("(") )
    {
      Token_current.str_name = "l_paran";

      return(Token_current);    
    } // if
    // => RIGHT PARANTHESE
    if( Token_current.str_actual.equals(")") )
    {
      Token_current.str_name = "r_paran";

      return(Token_current);    
    } // if
    // => LEFT BRACKET
    if( Token_current.str_actual.equals("[") )
    {
      Token_current.str_name = "l_bracket";

      return(Token_current);    
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals("]") )
    {
      Token_current.str_name = "r_bracket";

      return(Token_current);     
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals(",") )
    {
      Token_current.str_name = "comma";

      return(Token_current);     
    } // if
    // => RIGHT BRACKET
    if( Token_current.str_actual.equals("..") )
    {
      Token_current.str_name = "ellipsis";

      return(Token_current);     
    } // if
    
    /* OPERATORS */
    // => ADDITION
    if( Token_current.str_actual.equals("+") ||
        Token_current.str_actual.equals("-") )
    {        
      Token_current.str_name = "add_op";

      return(Token_current);
    } // if
    // => MULTIPLICATION
    if( Token_current.str_actual.equals("*")   ||
        Token_current.str_actual.equals("/")   ||
        Token_current.str_actual.equals("DIV") ||
        Token_current.str_actual.equals("MOD") )
    {        
      Token_current.str_name = "mul_op";

      return(Token_current);
    } // if
    // => RELATIONAL
    Token_next = tokenize(stmT_file_in);   
    if( ( Token_current.str_actual.equals("<") &&
          Token_next.str_actual.equals("=") ) ||
        ( Token_current.str_actual.equals(">") &&
          Token_next.str_actual.equals("=") ) )
    {        
      Token_current.str_actual = Token_current.str_actual+Token_next.str_actual;
      Token_current.str_name = "rel_op";

      return(Token_current);
    } // if
    else if( Token_current.str_actual.equals("<") ||
             Token_current.str_actual.equals("=") ||
             Token_current.str_actual.equals(">") ||
             Token_current.str_actual.equals("#") )
    {
      stmT_file_in.pushBack();
      Token_current.str_name = "rel_op";

      return(Token_current);
    } // if
    else
      stmT_file_in.pushBack();      

    /* NUMBERS */
    // These are handled in tokenize method.
    
    /* RESERVED WORDS */
    // => DATA TYPES
    if( Token_current.str_actual.equals("INTEGER") ||
        Token_current.str_actual.equals("REAL")    ||
        Token_current.str_actual.equals("BOOLEAN") )
    {
      Token_current.str_name = "type_name";

      return(Token_current);
    } // if
    // => FUNCTIONS
    if( Token_current.str_actual.equals("TRUNC") ||
        Token_current.str_actual.equals("FLOAT") )
    {
      Token_current.str_name = "function";

      return(Token_current);
    } // if
    // => MODULES
    if( Token_current.str_actual.equals("InOut") ||
        Token_current.str_actual.equals("RealIO") )
    {
      Token_current.str_name = "module";

      return(Token_current);
    } // if
    // => PROCEDURES
    if( Token_current.str_actual.equals("WriteLn")     ||
        Token_current.str_actual.equals("WriteString") || 
        Token_current.str_actual.equals("ReadInt")     ||
        Token_current.str_actual.equals("WriteInt")    || 
        Token_current.str_actual.equals("ReadReal")    || 
        Token_current.str_actual.equals("WriteRealg"))
    {
      Token_current.str_name = "procedure";

      return(Token_current);
    } // if
    // => MODULE
    if( Token_current.str_actual.equals("MODULE") )
    {
      Token_current.str_name = "module";

      return(Token_current);
    } // if
    // => FROM
    if( Token_current.str_actual.equals("FROM") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => FROM
    if( Token_current.str_actual.equals("IMPORT") )
    {
      Token_current.str_name = "import";

      return(Token_current);
    } // if
    // => BEGIN
    if( Token_current.str_actual.equals("BEGIN") )
    {
      Token_current.str_name = "begin";

      return(Token_current);
    } // if
    // => END
    if( Token_current.str_actual.equals("END") )
    {
      Token_current.str_name = "end";

      return(Token_current);
    } // if
    // => VAR
    if( Token_current.str_actual.equals("VAR") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => TYPE
    if( Token_current.str_actual.equals("TYPE") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => PROCEDURE
    if( Token_current.str_actual.equals("PROCEDURE") )
    {
      Token_current.str_name = "procedure";

      return(Token_current);
    } // if
    // => WHILE
    if( Token_current.str_actual.equals("WHILE") )
    {
      Token_current.str_name = "while";

      return(Token_current);
    } // if
    // => DO
    if( Token_current.str_actual.equals("DO") )
    {
      Token_current.str_name = "do";

      return(Token_current);
    } // if
    // => REPEAT
    if( Token_current.str_actual.equals("REPEAT") )
    {
      Token_current.str_name = "repeat";

      return(Token_current);
    } // if
    // => UNTIL
    if( Token_current.str_actual.equals("FROM") )
    {
      Token_current.str_name = "from";

      return(Token_current);
    } // if
    // => IF
    if( Token_current.str_actual.equals("IF") )
    {
      Token_current.str_name = "if";

      return(Token_current);
    } // if
    // => THEN
    if( Token_current.str_actual.equals("THEN") )
    {
      Token_current.str_name = "then";

      return(Token_current);
    } // if
    // => ELSE
    if( Token_current.str_actual.equals("ELSE") )
    {
      Token_current.str_name = "else";

      return(Token_current);
    } // if
    // => RETURN
    if( Token_current.str_actual.equals("RETURN") )
    {
      Token_current.str_name = "return";

      return(Token_current);
    } // if
    // => ARRAY
    if( Token_current.str_actual.equals("ARRAY") )
    {
      Token_current.str_name = "ARRAY";

      return(Token_current);
    } // if
    // => OF
    if( Token_current.str_actual.equals("OF") )
    {
      Token_current.str_name = "of";

      return(Token_current);
    } // if

    /* IDENTIFIERS */   
    // These are handled in tokenize method.

    /* DEFAULT */
    return(Token_current);
  } // method process
} // class Lex

