?? RIGHT := 110 ??
?? NEWTITLE := 'NOS/VE SCL Interpreter : Token Scanner' ??
MODULE clm$scan_token;

{
{ PURPOSE:
{   This module contains routines  used to perform lexical
{   analysis on SCL input text.
{
{ DESIGN:
{   The design is essentially "ad hoc" in that the definition of the lexical
{   structure of SCL is imbedded in executable code as well as in the tables
{   that are used.  The detailed design of the code is oriented to making
{   extensive use of those CYBER 180 BDP instructions externalized in CYBIL
{   as intrinsics.
{

?? NEWTITLE := 'Global Declarations', EJECT ??
?? PUSH (LISTEXT := ON) ??
*copyc cle$ecc_command_processing
*copyc cle$ecc_lexical
*copyc cle$ecc_miscellaneous
*copyc clt$character_class
*copyc clt$command_line
*copyc clt$command_line_index
*copyc clt$token
*copyc oss$job_paged_literal
?? POP ??
*copyc clp$skip_spaces_and_comments
*copyc clv$comment_delimiter
*copyc clv$hex_digits
*copyc clv$isolate_balanced_text
*copyc clv$non_alphanumeric
*copyc clv$non_decimal_digit
*copyc clv$non_space
*copyc clv$string_delimiter
*copyc osp$establish_condition_handler
*copyc osp$set_status_abnormal
*copyc osv$lower_to_upper
*copyc pmp$continue_to_cause
?? TITLE := 'clv$character_class', EJECT ??

  VAR
    clv$character_class: [XDCL, #GATE, READ, oss$job_paged_literal] array [char] of clt$character_class := [
          {---} REP 9 of clc$other_character,
          {HT } clc$space_character,
          {---} REP 22 of clc$other_character,
          {- -} clc$space_character,
          { ! } clc$other_character,
          { " } clc$comment_delimiter_character,
          { # } clc$alpha_character,
          { $ } clc$alpha_character,
          { % } clc$other_character,
          { & } clc$other_character,
          { ' } clc$string_delimiter_character,
          { ( } clc$token_character,
          { ) } clc$token_character,
          { * } clc$digraph_token_character,
          { + } clc$token_character,
          { , } clc$token_character,
          { - } clc$token_character,
          { . } clc$digraph_token_character,
          { / } clc$digraph_token_character,
          {0..9} REP 10 of clc$digit_character,
          { : } clc$token_character,
          { ; } clc$token_character,
          { < } clc$digraph_token_character,
          { = } clc$token_character,
          { > } clc$digraph_token_character,
          { ? } clc$token_character,
          { @ } clc$alpha_character,
          {A..Z} REP 26 of clc$alpha_character,
          { [ } clc$alpha_character,
          { \ } clc$alpha_character,
          { ] } clc$alpha_character,
          { ^ } clc$alpha_character,
          { _ } clc$alpha_character,
          { ` } clc$alpha_character,
          {a..z} REP 26 of clc$alpha_character,
          { { } clc$alpha_character,
          { | } clc$alpha_character,
          { } clc$alpha_character,
          { ~ } clc$alpha_character,
          {---} REP 129 of clc$other_character];

?? TITLE := 'clv$isolate_command', EJECT ??

  VAR
    clv$isolate_command: [STATIC, READ, oss$job_paged_literal] packed array [char] of boolean := [
          {---} REP 34 of FALSE,
          { " } TRUE,
          {---} REP 4 of FALSE,
          { ' } TRUE,
          { ( } TRUE,
          { ) } TRUE,
          {---} REP 17 of FALSE,
          { ; } TRUE,
          {---} REP 196 of FALSE];

?? TITLE := 'clv$non_hex_digit', EJECT ??

  VAR
    clv$non_hex_digit: [XDCL, #GATE, READ, oss$job_paged_literal] packed array [char] of boolean := [
          {---} REP 48 of TRUE,
          {0..9} REP 10 of FALSE,
          {---} REP 7 of TRUE,
          {A..F} REP 6 of FALSE,
          {---} REP 26 of TRUE,
          {a..f} REP 6 of FALSE,
          {---} REP 153 of TRUE];

?? TITLE := 'clp$scan_token', EJECT ??
*copy clh$scan_token

  PROCEDURE [XDCL, #GATE] clp$scan_token ALIAS 'clpstok'
    (    text: string ( * );
     VAR index {input, output} : ost$string_index;
     VAR token: clt$token;
     VAR status: ost$status);

    VAR
      single_character_tokens: [STATIC, READ, oss$job_paged_literal] array ['(' .. '}'] of
            clt$lexical_kinds := [
            { ( } clc$lparen_token,
            { ) } clc$rparen_token,
            { * } clc$mult_token,
            { + } clc$add_token,
            { , } clc$comma_token,
            { - } clc$sub_token,
            { . } clc$dot_token,
            { / } clc$div_token,
            {0..9} REP 10 of * ,
            { : } clc$colon_token,
            { ; } clc$semicolon_token,
            { < } clc$lt_token,
            { = } clc$eq_token,
            { > } clc$gt_token,
            { ? } clc$query_token,
            { @ } * ,
            {A..Z} REP 26 of * ,
            { [ } clc$lbracket_token,
            { \ } clc$rslant_token,
            { ] } clc$rbracket_token,
            { ^ } clc$uparrow_token,
            { _ } * ,
            { ` } * ,
            {a..z} REP 26 of * ,
            { { } clc$lbrace_token,
            { | } * ,
            { } clc$rbrace_token];

    VAR
      digraph_tokens: [STATIC, READ, oss$job_paged_literal] array [0 .. 5] of record
        digraph: string (2),
        token: clt$lexical_kinds,
      recend := [['**', clc$exp_token], ['..', clc$ellipsis_token], ['//', clc$cat_token],
            ['<=', clc$le_token], ['<>', clc$ne_token], ['>=', clc$ge_token]];

    VAR
      spaces_before_not_part_of_token: [STATIC, READ, oss$job_paged_literal] set of clt$lexical_kinds :=
            [clc$unknown_token, clc$dot_token, clc$colon_token, clc$lparen_token, clc$lbracket_token,
            clc$lbrace_token, clc$uparrow_token, clc$query_token, clc$add_token, clc$sub_token,
            clc$string_token, clc$name_token, clc$integer_token, clc$real_token];

    VAR
      spaces_after_not_part_of_token: [STATIC, READ, oss$job_paged_literal] set of clt$lexical_kinds :=
            [clc$unknown_token, clc$dot_token, clc$colon_token, clc$rparen_token, clc$rbracket_token,
            clc$rbrace_token, clc$uparrow_token, clc$string_token, clc$name_token, clc$integer_token,
            clc$real_token];

    VAR
      control_characters: [STATIC, READ, oss$job_paged_literal] array [$CHAR (0) .. $CHAR (20(16))] of
            string (3) := ['NUL', 'SOH', 'STX', 'ETX', 'EOT', 'ENQ', 'ACK', 'BEL', 'BS', 'HT', 'LF', 'VT',
            'FF', 'CR', 'SO', 'SI', 'DLE', 'DC1', 'DC2', 'DC3', 'DC4', 'NAK', 'SYN', 'ETB', 'CAN', 'EM',
            'SUB', 'ESC', 'FS', 'GS', 'RS', 'US', 'SP'];

    VAR
      scan_found_char: boolean,
      scan_index: integer;

    VAR
      end_token_index: ost$string_index,
      found_leading_spaces: boolean,
      found_trailing_spaces: boolean,
      ignore_spaces: boolean,
      token_index: ost$string_index;

    VAR
      digraph_index: 0 .. 5;

    VAR
      end_integer_index: ost$string_index,
      end_radix_index: ost$string_index,
      hex_digit_present: boolean,
      lparen_follows: boolean,
      radix: integer,
      radix_index: ost$string_index;

    VAR
      end_fraction_index: ost$string_index,
      exponent: integer,
      exponent_index: ost$string_index,
      exponent_sign: -1 .. 1;

    VAR
      string_index: ost$string_index,
      string_part_size: ost$string_size;

    VAR
      local_status: ost$status;

    VAR
      translate_name_kludge: ^string ( * <= osc$max_name_size);


    IF STRLENGTH (text) > osc$max_string_size THEN
      osp$set_status_abnormal ('CL', cle$line_too_long, '', status);
      RETURN;
    IFEND;

  /scan/
    BEGIN

      status.normal := TRUE;
      local_status.normal := TRUE;
      token.text_index := index;
      clp$skip_spaces_and_comments (text, index, token_index, found_leading_spaces);
      IF token_index > STRLENGTH (text) THEN
        token.descriptor := 'END OF LINE';
        token.kind := clc$eol_token;
        token.str.size := 0;
        token.str.value := '';
        index := token_index;
        token.text_size := index - token.text_index;
        EXIT /scan/;
      IFEND;

    /scan_the_token/
      BEGIN
        CASE clv$character_class [text (token_index)] OF

        = clc$other_character =

          CASE text (token_index) OF
          = $CHAR (0) .. $CHAR (20(16)) =
            token.descriptor := 'CHARACTER';
            token.descriptor (11, 3) := control_characters [text (token_index)];
          = $CHAR (21(16)) .. $CHAR (7e(16)) =
            token.descriptor := ''' ''';
            token.descriptor (2) := text (token_index);
          = $CHAR (7f(16)) =
            token.descriptor := 'CHARACTER DEL';
          = $CHAR (80(16)) .. $CHAR (0ff(16)) =
            token.descriptor := 'CHARACTER 0xx(16)';
            token.descriptor (12) := clv$hex_digits [$INTEGER (text (token_index)) DIV 16];
            token.descriptor (13) := clv$hex_digits [$INTEGER (text (token_index)) MOD 16];
          CASEND;
          token.kind := clc$unknown_token;
          token.str.size := 1;
          token.str.value := text (token_index, 1);
          end_token_index := token_index + 1;

        = clc$token_character =

          token.descriptor := ''' ''';
          token.descriptor (2) := text (token_index);
          token.kind := single_character_tokens [text (token_index)];
          token.str.size := 1;
          token.str.value := text (token_index, 1);
          end_token_index := token_index + 1;

        = clc$digraph_token_character =

          token.descriptor := ''' ''';
          token.descriptor (2) := text (token_index);
          token.kind := single_character_tokens [text (token_index)];
          token.str.size := 1;
          token.str.value := text (token_index, 1);
          end_token_index := token_index + 1;
          IF end_token_index <= STRLENGTH (text) THEN

          /check_digraph/
            FOR digraph_index := LOWERBOUND (digraph_tokens) TO UPPERBOUND (digraph_tokens) DO
              IF text (token_index, 2) = digraph_tokens [digraph_index].digraph THEN
                token.descriptor (3) := text (end_token_index);
                token.descriptor (4) := '''';
                token.kind := digraph_tokens [digraph_index].token;
                token.str.size := 2;
                token.str.value (2) := text (end_token_index);
                end_token_index := end_token_index + 1;
                IF token.kind = clc$ellipsis_token THEN
                  WHILE (end_token_index <= STRLENGTH (text)) AND (text (end_token_index) = '.') DO
                    end_token_index := end_token_index + 1;
                  WHILEND;
                IFEND;
                EXIT /check_digraph/;
              IFEND;
            FOREND /check_digraph/;
          IFEND;

        = clc$digit_character =

          IF found_leading_spaces THEN
            token.kind := clc$integer_token;
            EXIT /scan_the_token/;
          IFEND;
          #SCAN (clv$non_hex_digit, text (token_index, * ), scan_index, scan_found_char);
          end_integer_index := scan_index + token_index - 1;
          IF scan_found_char AND (clv$character_class [text (end_integer_index)] = clc$alpha_character) THEN
            osp$set_status_abnormal ('CL', cle$alpha_char_in_number,
                  text (token_index, end_integer_index - token_index + 1), local_status);
            EXIT /scan_the_token/;
          IFEND;
          lparen_follows := scan_found_char AND (text (end_integer_index) = '(');
          #SCAN (clv$non_decimal_digit, text (token_index, end_integer_index - token_index), scan_index,
                hex_digit_present);
          IF hex_digit_present AND (NOT lparen_follows) THEN
            osp$set_status_abnormal ('CL', cle$missing_radix,
                  text (token_index, end_integer_index - token_index), local_status);
            EXIT /scan_the_token/;
          IFEND;

          IF (end_integer_index <= STRLENGTH (text)) AND (text (end_integer_index) = '.') THEN

          /scan_real/
            BEGIN

              token.descriptor := 'REAL NUMBER';
              token.kind := clc$real_token;
              #SCAN (clv$non_decimal_digit, text (end_integer_index + 1, * ), scan_index, scan_found_char);
              IF scan_index <= 1 THEN
                EXIT /scan_real/;
              IFEND;
              end_fraction_index := scan_index + end_integer_index;
              exponent := 0;
              end_token_index := end_fraction_index;
              IF scan_found_char AND (clv$character_class [text (end_fraction_index)] =
                    clc$alpha_character) THEN
                IF (text (end_fraction_index) <> 'E') AND (text (end_fraction_index) <> 'e') THEN
                  osp$set_status_abnormal ('CL', cle$alpha_char_in_fraction,
                        text (token_index, end_fraction_index - token_index + 1), local_status);
                  EXIT /scan_the_token/;
                IFEND;
                exponent_index := end_fraction_index + 1;
                exponent_sign := 1;
                IF exponent_index <= STRLENGTH (text) THEN
                  IF text (exponent_index) = '-' THEN
                    exponent_sign := -1;
                    exponent_index := exponent_index + 1;
                  ELSE
                    exponent_index := exponent_index + $INTEGER (text (exponent_index) = '+');
                  IFEND;
                IFEND;
                #SCAN (clv$non_decimal_digit, text (exponent_index, * ), scan_index, scan_found_char);
                end_token_index := scan_index + exponent_index - 1;
                IF end_token_index <= exponent_index THEN
                  osp$set_status_abnormal ('CL', cle$missing_exponent,
                        text (token_index, end_token_index - token_index + 1), local_status);
                  EXIT /scan_the_token/;
                ELSEIF scan_found_char AND (clv$character_class [text (end_token_index)] =
                      clc$alpha_character) THEN
                  osp$set_status_abnormal ('CL', cle$alpha_char_in_exponent,
                        text (token_index, end_token_index - token_index + 1), local_status);
                  EXIT /scan_the_token/;
                IFEND;
                convert_to_integer (text (exponent_index, end_token_index - exponent_index), 10, exponent,
                      local_status);
                IF NOT local_status.normal THEN
                  osp$set_status_abnormal ('CL', cle$exponent_too_large,
                        text (token_index, end_token_index - token_index), local_status);
                  EXIT /scan_the_token/;
                IFEND;
                exponent := exponent_sign * exponent;
              IFEND;

              osp$set_status_abnormal ('CL', cle$not_yet_implemented, 'Reals', local_status);
              EXIT /scan_the_token/;

            END /scan_real/;
          IFEND;

          token.descriptor := 'INTEGER';
          token.kind := clc$integer_token;
          token.int.radix := 10;
          token.int.radix_specified := lparen_follows;
          end_token_index := end_integer_index;
          IF lparen_follows THEN
            clp$skip_spaces_and_comments (text, end_integer_index + 1, radix_index, ignore_spaces);
            #SCAN (clv$non_decimal_digit, text (radix_index, * ), scan_index, scan_found_char);
            end_radix_index := scan_index + radix_index - 1;
            clp$skip_spaces_and_comments (text, end_radix_index, end_token_index, ignore_spaces);
            IF NOT ((end_token_index <= STRLENGTH (text)) AND (text (end_token_index) = ')')) THEN
              osp$set_status_abnormal ('CL', cle$improper_radix_spec,
                    text (token_index, end_token_index - token_index), local_status);
              EXIT /scan_the_token/;
            IFEND;
            end_token_index := end_token_index + 1;
            convert_to_integer (text (radix_index, end_radix_index - radix_index), 10, radix, local_status);
            IF local_status.normal AND (2 <= radix) AND (radix <= 16) THEN
              token.int.radix := radix;
            ELSE
              osp$set_status_abnormal ('CL', cle$improper_radix_value,
                    text (token_index, end_token_index - token_index), local_status);
              EXIT /scan_the_token/;
            IFEND;
          IFEND;
          convert_to_integer (text (token_index, end_integer_index - token_index), token.int.radix,
                token.int.value, local_status);

        = clc$alpha_character =

          token.kind := clc$name_token;
          IF found_leading_spaces THEN
            EXIT /scan_the_token/;
          IFEND;
          token.descriptor := 'NAME';
          #SCAN (clv$non_alphanumeric, text (token_index, * ), scan_index, scan_found_char);
          end_token_index := scan_index + token_index - 1;
          IF (end_token_index - token_index) > osc$max_name_size THEN
            osp$set_status_abnormal ('CL', cle$name_too_long,
                  text (token_index, end_token_index - token_index), local_status);
            EXIT /scan_the_token/;
          IFEND;
          token.name.size := end_token_index - token_index;
          translate_name_kludge := ^text (token_index, token.name.size);
          #TRANSLATE (osv$lower_to_upper, translate_name_kludge^, token.name.value);

        = clc$string_delimiter_character =

          token.kind := clc$string_token;
          IF found_leading_spaces THEN
            EXIT /scan_the_token/;
          IFEND;
          token.descriptor := 'STRING';
          token.str.size := 0;
          string_index := token_index + 1;

        /scan_string/
          WHILE TRUE DO
            #SCAN (clv$string_delimiter, text (string_index, * ), scan_index, scan_found_char);
            end_token_index := scan_index + string_index - 1;
            IF NOT scan_found_char THEN
              osp$set_status_abnormal ('CL', cle$missing_string_delimiter,
                    text (token_index, end_token_index - token_index), local_status);
              EXIT /scan_the_token/;
            IFEND;
            string_part_size := scan_index - 1;
            IF string_part_size > 0 THEN
              token.str.value (token.str.size + 1, string_part_size) := text (string_index, string_part_size);
            IFEND;
            token.str.size := token.str.size + string_part_size;
            end_token_index := end_token_index + 1;
            IF NOT ((end_token_index <= STRLENGTH (text)) AND (text (end_token_index) = '''')) THEN
              EXIT /scan_string/;
            IFEND;
            token.str.size := token.str.size + 1;
            token.str.value (token.str.size) := '''';
            string_index := end_token_index + 1;
          WHILEND /scan_string/;

        CASEND;
      END /scan_the_token/;

      IF found_leading_spaces AND (token.kind IN spaces_before_not_part_of_token) THEN
        token.descriptor := 'SPACE';
        token.kind := clc$space_token;
        token.str.size := 1;
        token.str.value := '';
        index := token_index;
        token.text_size := index - token.text_index;
        status.normal := TRUE;
        EXIT /scan/;
      IFEND;
      IF NOT local_status.normal THEN
        status := local_status;
        EXIT /scan/;
      IFEND;
      IF token.kind IN spaces_after_not_part_of_token THEN
        index := end_token_index;
      ELSE
        clp$skip_spaces_and_comments (text, end_token_index, index, found_trailing_spaces);
      IFEND;
      token.text_size := index - token.text_index;

    END /scan/;

  PROCEND clp$scan_token;
?? TITLE := 'convert_to_integer', EJECT ??
{
{ PURPOSE:
{   This procedure converts the string representation of an integer to the
{   corresponding integer value.
{

  PROCEDURE convert_to_integer
    (    text: string ( * );
         radix: 2 .. 16;
     VAR result: integer;
     VAR status: ost$status);

    VAR
      digit: 0 .. 15,
      i: ost$string_index;

?? NEWTITLE := 'condition_handler', EJECT ??

    PROCEDURE condition_handler
      (    condition: pmt$condition;
           ignore_cond_desc: ^pmt$condition_information;
           ignore_save_area: ^ost$stack_frame_save_area;
       VAR handler_status: ost$status);

      IF (condition.selector = pmc$system_conditions) AND (pmc$arithmetic_overflow IN
            condition.system_conditions) THEN
        osp$set_status_abnormal ('CL', cle$integer_too_large, text, status);
        EXIT convert_to_integer;
      IFEND;

      pmp$continue_to_cause (pmc$execute_standard_procedure, handler_status);

    PROCEND condition_handler;
?? OLDTITLE, EJECT ??

    osp$establish_condition_handler (^condition_handler, FALSE);

    result := 0;
    FOR i := 1 TO STRLENGTH (text) DO
      CASE text (i) OF
      = '0' .. '9' =
        digit := $INTEGER (text (i)) - $INTEGER ('0');
      = 'A' .. 'F' =
        digit := $INTEGER (text (i)) - $INTEGER ('A') + 10;
      = 'a' .. 'f' =
        digit := $INTEGER (text (i)) - $INTEGER ('a') + 10;
      ELSE
        osp$set_status_abnormal ('CL', cle$expecting_digit, text, status);
        RETURN;
      CASEND;
      IF digit >= radix THEN
        osp$set_status_abnormal ('CL', cle$digit_too_large, text, status);
        RETURN;
      IFEND;
      result := (result * radix) + digit;
    FOREND;

  PROCEND convert_to_integer;
?? TITLE := 'clp$isolate_command', EJECT ??
*copyc clh$isolate_command

  PROCEDURE [XDCL, #GATE] clp$isolate_command
    (    text: clt$command_line;
         start_index: clt$command_line_index;
     VAR end_index: clt$command_line_index);

    VAR
      found_char: boolean,
      nesting_level: integer,
      scan_index: integer,
      text_index: clt$command_line_index;


    text_index := start_index;
    nesting_level := 0;
    found_char := TRUE;

  /scan_loop/
    WHILE found_char AND (text_index <= STRLENGTH (text)) DO
      #SCAN (clv$isolate_command, text (text_index, * ), scan_index, found_char);
      text_index := scan_index + text_index - 1;
      IF found_char THEN
        CASE text (text_index) OF
        = '"' =
          #SCAN (clv$comment_delimiter, text (text_index + 1, * ), scan_index, found_char);
          text_index := scan_index + text_index + $INTEGER (found_char);
        = '''' =
          #SCAN (clv$string_delimiter, text (text_index + 1, * ), scan_index, found_char);
          text_index := scan_index + text_index + $INTEGER (found_char);
        = '(' =
          nesting_level := nesting_level + 1;
          text_index := text_index + 1;
        = ')' =
          nesting_level := nesting_level - 1;
          text_index := text_index + 1;
        = ';' =
          IF nesting_level <= 0 THEN
            EXIT /scan_loop/;
          IFEND;
          text_index := text_index + 1;
        ELSE
          text_index := text_index + 1;
        CASEND;
      IFEND;
    WHILEND /scan_loop/;

    end_index := text_index;

  PROCEND clp$isolate_command;

MODEND clm$scan_token;
