-- Copyright (C) 2011, Gabriel Dos Reis.
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are
-- met:
--
--     - Redistributions of source code must retain the above copyright
--       notice, this list of conditions and the following disclaimer.
--
--     - Redistributions in binary form must reproduce the above copyright
--       notice, this list of conditions and the following disclaimer in
--       the documentation and/or other materials provided with the
--       distribution.
--
--     - Neither the name of OpenAxiom. nor the names of its contributors
--       may be used to endorse or promote products derived from this
--       software without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--

--%
--% Author: Gabriel Dos Reis
--%

import sys_-utility
import sys_-macros

namespace BOOT

module lexing where
  matchString: %String -> %Maybe %Short
  matchAdvanceString: %String -> %Maybe %Short
  matchAdvanceKeyword: %Symbol -> %Thing

--%
--% Line abstract datatype
--%   structure Line ==
--%     Record(buffer: String, curChar: Character,
--%       curIdx: SingleInteger, lstIdx: SingleInteger, lineNo: SingleInteger)
--%
makeLine(buf == makeString 0, ch == charByName "Return",
          curIdx == 1, lstIdx == 0, no == 0) ==
  [buf,ch,curIdx,lstIdx,no]

macro lineBuffer l ==
  first l

macro lineCurrentChar l ==
  second l

macro lineCurrentIndex l ==
  third l

macro lineLastIndex l ==
  fourth l

macro lineNumber l ==
  fifth l

lineClear! l ==
  lineBuffer(l) := makeString 0
  lineCurrentChar(l) := charByName "Return"
  lineCurrentIndex(l) := 1
  lineLastIndex(l) := 0
  lineNumber(l) := 0

++ Sets string to be the next line stored in line
lineNewLine!(s,l,no == nil) ==
  sz := #s
  lineLastIndex(l) := sz - 1
  lineCurrentIndex(l) := 0
  lineCurrentChar(l) := sz > 0 and s.0 or charByName '"Return"
  lineBuffer(l) := s
  lineNumber(l) := no or (lineNumber l + 1)

++ Tests if line is empty or positioned past the last character
lineAtEnd? l ==
  lineCurrentIndex l >= lineLastIndex l

++ Tests if line is empty or positioned past the last character
linePastEnd? l ==
  lineCurrentIndex l > lineLastIndex l

++ Buffer from current index to last index
lineCurrentSegment l ==
  lineAtEnd? l => makeString 0
  subSequence(lineBuffer l,lineCurrentIndex l,lineLastIndex l)

lineNextChar l ==
  lineBuffer(l).(1 + lineCurrentIndex l)

lineAdvanceChar! l ==
  n := lineCurrentIndex l + 1
  lineCurrentIndex(l) := n
  lineCurrentChar(l) := lineBuffer(l).n

++ Current input line
$spadLine := makeLine()

++ List of lines returned from PREPARSE
$lineStack := nil

nextLine st ==
  $lineStack = nil => nil
  [[n,:l],:$lineStack] := $lineStack
  l := strconc(l,'" ")
  lineNewLine!(l,$spadLine,n)
  SETQ(LINE,l)
  $currentLine := l

++ Current input stream.  
IN_-STREAM := 'T
  
++ Advances IN-STREAM, invoking Next Line if necessary
advanceChar!() ==
  repeat
    not lineAtEnd? $spadLine => return lineAdvanceChar! $spadLine
    nextLine IN_-STREAM => return currentChar()
    return nil

--%

++ Returns the current character of the line, initially blank for
++ an unread line
currentChar() ==
  linePastEnd? $spadLine => charByName "Return"
  lineCurrentChar $spadLine

nextChar() ==
  lineAtEnd? $spadLine => charByName '"Return"
  lineNextChar $spadLine
  

--%
--% Token abstract datatype.
--%   Operational semantics:
--%      structure Token ==
--%         Record(symbol: Identifier, type: TokenType, nonBlank?: Boolean)
--%
--%   type in '(NUMBER IDENTIFIER SPECIAL_-CHAR)
--%   nonBlank? if token is not preceded by a blank.
--%
makeToken(sym == nil, typ == nil, blnk? == true) ==
  [sym,typ,blnk?]

macro copyToken t ==
  copyList t

macro tokenSymbol t ==
  first t

macro tokenType t ==
  second t

macro tokenNonblank? t ==
  third t

++ Last seen token
$priorToken := makeToken()

++ Is there no blank in front of current token?
$nonblank := true

++ First token in input stream
$currentToken := makeToken()

++ Next token in input stream
$nextToken := makeToken()

++ Number of token in the buffer (0, 1, 2)
$validTokens := 0

tokenInstall(sym,typ,tok,nonblank == true) ==
  tokenSymbol(tok) := sym
  tokenType(tok) := typ
  tokenNonblank?(tok) := nonblank
  tok

getToken tok ==
  not skipBlankChars() => nil
  tt := tokenLookaheadType currentChar()
  tt is 'EOF => tokenInstall(nil,'_*EOF,tok,$nonblank)
  tt is 'ESCAPE =>
    advanceChar!()
    getIdentifier(tok,true)
  tt is 'ARGUMENT_-DESIGNATOR => GET_-ARGUMENT_-DESIGNATOR_-TOKEN tok
  tt is 'ID => getIdentifier(tok,false)
  tt is 'NUM => GET_-SPAD_-INTEGER_-TOKEN tok
  tt is 'STRING => getSpadString tok
  tt is 'SPECIAL_-CHAR => getSpecial tok
  getGliph(tok,tt)

tryGetToken tok ==
  getToken tok =>
   $validTokens := $validTokens + 1
   tok
  nil

++ Returns the current token or gets a new one if necessary
currentToken() ==
  $validTokens > 0 => $currentToken
  tryGetToken $currentToken

++ Returns the token after the current token, or nil if there is none after
nextToken() ==
  currentToken()
  $validTokens > 1 => $nextToken
  tryGetToken $nextToken

matchToken(tok,typ,sym == nil) ==
  tok ~= nil and symbolEq?(tokenType tok,typ) and
    (sym = nil or sym = tokenSymbol tok) => tok
  nil

++ Return the current token if it has type `typ', and possibly the
++ same spelling as `sym'.
matchCurrentToken(typ,sym == nil) ==
  matchToken(currentToken(),typ,sym)

++ Return the next token if it has type `typ;, and possibly the same
++ spelling as `sym'.
matchNextToken(typ,sym == nil) ==
  matchToken(nextToken(),typ,sym)

++ Makes the next token be the current token.
advanceToken() ==
  $validTokens = 0 => tryGetToken $currentToken
  $validTokens = 1 =>
    $validTokens := $validTokens - 1
    $priorToken := copyToken $currentToken
    tryGetToken $currentToken
  $validTokens = 2 =>
    $priorToken := copyToken $currentToken
    $currentToken := copyToken $nextToken
    $validTokens := $validTokens - 1
  nil

makeSymbolOf tok ==
  tok = nil => nil
  tokenSymbol tok = nil => nil
  char? tokenSymbol tok => makeSymbol charString tokenSymbol tok
  tokenSymbol tok

currentSymbol() ==
  makeSymbolOf currentToken()

tokenStackClear!() ==
  $validTokens := 0
  tokenInstall(nil,nil,$currentToken,nil)
  tokenInstall(nil,nil,$nextToken,nil)
  tokenInstall(nil,nil,$priorToken,nil)

++ Predicts the kind of token to follow, based on the given initial character
tokenLookaheadType c ==
  c = nil => 'EOF
  c = char "__" => 'ESCAPE
  c = char "#" and digit? nextChar() => 'ARGUMENT_-DESIGNATOR
  digit? c => 'NUM
  c = char "%" or c = char "?" or c = char "?" or alphabetic? c => 'ID
  c = char "_"" => 'STRING
  c = char " " or c = charByName "Tab" or c = charByName "Return" => 'WHITE
  p := property(makeSymbol charString c,'GLIPH) => p
  'SPECIAL_-CHAR

skipBlankChars() ==
  $nonblank := true
  repeat
    c := currentChar()
    c = nil => return false
    tokenLookaheadType c = 'WHITE =>
      $nonblank := false
      advanceChar!() = nil => return false
    return true

getSpadString tok ==
  buf := nil
  currentChar() ~= char "_"" => nil
  advanceChar!()
  repeat
    currentChar() = char "_"" => leave nil
    buf := [(currentChar() = char "__" => advanceChar!(); currentChar()),:buf]
    advanceChar!() = nil =>
      sayBrightly '"close quote inserted"
      leave nil
  advanceChar!()
  tokenInstall(listToString reverse! buf,'SPADSTRING,tok)

++ Take a special character off the input stream.  We let the type name
++ of each special character be the atom whose print name is the
++ character itself
getSpecial tok ==
  c := currentChar()
  advanceChar!()
  tokenInstall(c,'SPECIAL_-CHAR,tok)

getGliph(tok,gliphs) ==
  buf := [currentChar()]
  advanceChar!()
  repeat
    gliphs := symbolAssoc(makeSymbol charString currentChar(),gliphs) =>
      buf := [currentChar(),:buf]
      gliphs := rest gliphs
      advanceChar!()
    s := makeSymbol listToString reverse! buf
    return tokenInstall(s,'GLIPH,tok,$nonblank)

Keywords == [
 "or", "and", "isnt", "is", "where", "forall", "exist", "try",
  "has", "with", "add", "case", "in", "by", "pretend", "mod", "finally",
   "exquo", "div", "quo", "else", "rem", "then", "suchthat", "catch", "throw",
    "if", "iterate", "break", "from", "exit", "leave", "return",
     "not", "repeat", "until", "while", "for", "import", "inline" ]

getIdentifier(tok,esc?) ==
  buf := [currentChar()]
  advanceChar!()
  repeat 
    c := currentChar()
    c = char "__" =>
      advanceChar!() = nil => leave nil
      buf := [currentChar(),:buf]
      esc? := true
      advanceChar!() = nil => leave nil
    alphabetic? c or digit? c
      or scalarMember?(c,[char "%",char "'",char "?",char "!"]) =>
        buf := [c,:buf]
        advanceChar!() = nil => leave nil
    leave nil
  s := makeSymbol listToString reverse! buf
  tt :=
    not esc? and symbolMember?(s,Keywords) => 'KEYWORD
    'IDENTIFIER
  tokenInstall(s,tt,tok,$nonblank)

escapeKeywords: (%String,%Symbol) -> %String
escapeKeywords(nm,id) ==
  symbolMember?(id,Keywords) => strconc('"__",nm)
  nm

underscore: %String -> %String
underscore s ==
  n := #s - 1
  and/[alphabetic? stringChar(s,i) for i in 0..n] => s
  buf := nil
  for i in 0..n repeat
    c := stringChar(s,i)
    if not alphabetic? c then
      buf := [char "__",:buf]
    buf := [c,:buf]
  listToString reverse! buf

quoteIfString: %Thing -> %Maybe %String
quoteIfString tok ==
  tok = nil => nil
  tt := tokenType tok
  tt is 'SPADSTRING => strconc('"_"",underscore tokenSymbol tok,'"_"")
  tt is 'NUMBER => formatToString('"~v,'0D",tokenNonblank? tok,tokenSymbol tok)
  tt is 'SPECIAL_-CHAR => charString tokenSymbol tok
  tt is 'IDENTIFIER =>
    escapeKeywords(symbolName tokenSymbol tok,tokenSymbol tok)
  symbolName tokenSymbol tok

ungetTokens() ==
  $validTokens = 0 => true
  $validTokens = 1 =>
    cursym := quoteIfString $currentToken
    curline := lineCurrentSegment $spadLine
    revisedline := strconc(cursym,curline,'" ")
    lineNewLine!(revisedline,$spadLine,lineNumber $spadLine)
    $nonblank := tokenNonblank? $currentToken
    $validTokens := 0
  $validTokens = 2 =>
    cursym := quoteIfString $currentToken
    nextsym := quoteIfString $nextToken
    curline := lineCurrentSegment $spadLine
    revisedline := strconc((tokenNonblank? $currentToken => '""; '" "),
      cursym,(tokenNonblank? $nextToken => '""; '" "),nextsym,curline,'" ")
    $nonblank := tokenNonblank? $currentToken
    lineNewLine!(revisedline,$spadLine,lineNumber $spadLine)
    $validTokens := 0
  coreError '"How many tokens do you think you have?"


++ Returns length of X if X matches initial segment of IN-STREAM.
++ Otherwise, return nil.
matchString x ==
  ungetTokens()
  skipBlankChars()
  not linePastEnd? $spadLine and currentChar() ~= nil =>
    nx := #x
    buf := lineBuffer $spadLine
    idx := lineCurrentIndex $spadLine
    nx + idx > #buf => nil
    and/[stringChar(x,i) = stringChar(buf,idx + i) for i in 0..nx-1] and nx
  nil

++ Same as matchString except if successful, advance inputstream past `x'.
matchAdvanceString x ==
  n := #x >= #quoteIfString currentToken() and matchString x =>
    lineCurrentIndex($spadLine) := lineCurrentIndex $spadLine + n
    c :=
      linePastEnd? $spadLine => charByName '"Space"
      lineBuffer($spadLine).(lineCurrentIndex $spadLine)
    lineCurrentChar($spadLine) := c
    $priorToken := makeToken(makeSymbol x,'IDENTIFIER,$nonblank)
    n
  nil

matchAdvanceKeyword kwd ==
  matchToken(currentToken(),'KEYWORD,kwd) =>
    advanceToken()
    true
  false

matchKeywordNext kwd ==
  matchToken(nextToken(),'KEYWORD,kwd)

matchSpecial c ==
  matchToken(currentToken(),'SPECIAL_-CHAR,c)

matchAdvanceSpecial c ==
  matchSpecial c =>
    advanceToken()
    true
  false

matchAdvanceGlyph s ==
  matchToken(currentToken(),'GLIPH,s) =>
    advanceToken()
    true
  false

--%
--% Stack abstract datatype.
--%  Operational semantics:
--%     structure Stack ==
--%         Record(store: List T, size: Integer, top: T, updated?: Boolean)

++ Construct a new stack
makeStack() ==
  [nil,0,nil,false]

macro stackStore st ==
  first st

macro stackSize st ==
  second st

macro stackTop st ==
  third st

macro stackUpdated? st ==
  fourth st

stackLoad!(l,st) ==
  stackStore(st) := l
  stackSize(st) := #l
  stackTop(st) := first l

stackClear! st ==
  stackStore(st) := nil
  stackSize(st) := 0
  stackTop(st) := nil
  stackUpdated?(st) := false

stackPush!(x,st) ==
  stackStore(st) := [x,:stackStore st]
  stackTop(st) := x
  stackSize(st) := stackSize st + 1
  stackUpdated?(st) := true

stackPop! st ==
  y := first stackStore st
  stackStore(st) := rest stackStore st
  stackSize(st) := stackSize st - 1
  if stackStore st ~= nil then
    stackTop(st) := first stackStore st
  y


--%  
--% Parsing reduction stack
--%
--% Abstractly;
--%   structure Reduction == Record(rule: RuleName, value: ParseTree)
--%
makeReduction(p == nil,v == nil) ==
  [p,v]

macro reductionRule r ==
  first r

macro reductionValue r ==
  second r

++ stack of results of reduced productions
$reduceStack := makeStack()

pushReduction(rn,pt) ==
  stackPush!(makeReduction(rn,pt),$reduceStack)

popReduction() ==
  stackPop! $reduceStack
  
reduceStackClear() ==
  stackClear! $reduceStack

popStack1() ==
  reductionValue popReduction()

popStack2() ==
  r1 := popReduction()
  r2 := popReduction()
  stackPush!(r1,$reduceStack)
  reductionValue r2

popStack3() ==
  r1 := popReduction()
  r2 := popReduction()
  r3 := popReduction()
  stackPush!(r2,$reduceStack)
  stackPush!(r1,$reduceStack)
  reductionValue r3

popStack4() ==
  r1 := popReduction()
  r2 := popReduction()
  r3 := popReduction()
  r4 := popReduction()
  stackPush!(r3,$reduceStack)
  stackPush!(r2,$reduceStack)
  stackPush!(r1,$reduceStack)
  reductionValue r4

nthStack n ==
  reductionValue stackStore($reduceStack).(n - 1)