'''An LALR(1) parser for C. Uses the Clex module.'''

import Clex

class BadToken(Exception):
  def __init__(self, token):
    self.token=token
  def __str__(self):
    return 'Bad token: %s' % `self.token`

# Tokens used by the parser
class tokens:
  TYPE=300             # Normal type
  VOIDTYPE=301         # void
  MODLONG=302          # long
  DECL=400             # TYPE IDEN
  LDECL=401            # TYPE IDEN(
  FUNC=500             # TYPE IDEN(TYPE...
  LFUNC=501            # TYPE IDEN(...){
  FBLOCK=502           # TYPE IDEN(...){...}
  LASGN=600            # IDEN=
  ASGN=601             # IDEN=arg2
  DASGN=602            # TYPE IDEN=
  IASGN=603            # TYPE IDEN=arg2
  POSTINCEXPR=604      # (arg2)++
  PREINCEXPR=605       # ++(arg2)
  POSTDECEXPR=606      # (arg2)--
  PREDECEXPR=607       # --(arg2)
  STMT=700             # ...;

  KWAUTO=2000
  KWBREAK=2001
  KWCASE=2002
  KWCHAR=2003
  KWCONTINUE=2004
  KWDEFAULT=2005
  KWDO=2006
  KWDOUBLE=2007
  KWELSE=2008
  KWENUM=2009
  KWEXTERN=2010
  KWFLOAT=2011
  KWFOR=2012
  KWGOTO=2013
  KWIF=2014
  KWINT=2015
  KWLONG=2016
  KWREGISTER=2017
  KWRETURN=2018
  KWSHORT=2019
  KWSIZEOF=2020
  KWSTATIC=2021
  KWSTRUCT=2022
  KWSWITCH=2023
  KWTYPEDEF=2024
  KWUNION=2025
  KWUNSIGNED=2026
  KWVOID=2027
  KWWHILE=2028
  KWASM=2029
  KWCOMPLEX=2030
  KWCONST=2031
  KWINLINE=2032
  KWITERATOR=2033
  KWSIGNED=2034
  KWTYPEOF=2035
  KWVOLATILE=2036
  KWRESTRICT=2037

  HINCLUDE=3000        # #include
  HDEFINE=3001         # #define
  HIF=3002             # #if
  HELSE=3003           # #else
  HENDIF=3004          # #endif
  HIFDEF=3005          # #ifdef
  HIFNDEF=3006         # #ifndef
  HPRAGMA=3007         # #pragma
  HLINE=3008           # #line

  LINCLUDE=3100        # #include <
  INCLUDE=3101         # #include <...>

  LRETURN=3200         # return

  LADDOP=4000          # IDEN+
  ADDOP=4001           # IDEN+...

# Reserved keywords in C. Cannot be re#define'd
keywords={
  'auto':tokens.KWAUTO,
  'break':tokens.KWBREAK,
  'case':tokens.KWCASE,
  'char':tokens.KWCHAR,
  'continue':tokens.KWCONTINUE,
  'default':tokens.KWDEFAULT,
  'do':tokens.KWDO,
  'double':tokens.KWDOUBLE,
  'else':tokens.KWELSE,
  'enum':tokens.KWENUM,
  'extern':tokens.KWEXTERN,
  'float':tokens.KWFLOAT,
  'for':tokens.KWFOR,
  'goto':tokens.KWGOTO,
  'if':tokens.KWIF,
  'int':tokens.KWINT,
  'long':tokens.KWLONG,
  'register':tokens.KWREGISTER,
  'return':tokens.KWRETURN,
  'short':tokens.KWSHORT,
  'sizeof':tokens.KWSIZEOF,
  'static':tokens.KWSTATIC,
  'struct':tokens.KWSTRUCT,
  'switch':tokens.KWSWITCH,
  'typedef':tokens.KWTYPEDEF,
  'union':tokens.KWUNION,
  'unsigned':tokens.KWUNSIGNED,
  'void':tokens.KWVOID,
  'while':tokens.KWWHILE,
}

# Reserved keywords for GCC
gcckeywords=keywords
gcckeywords.update({
  'asm':tokens.KWASM,
  'complex':tokens.KWCOMPLEX,
  'const':tokens.KWCONST,
  'inline':tokens.KWINLINE,
  'iterator':tokens.KWITERATOR,
  'signed':tokens.KWSIGNED,
  'typeof':tokens.KWTYPEOF,
  'volatile':tokens.KWVOLATILE,
})

# Reserved keywords for GCC in C99 mode
gcc99keywords=gcckeywords
gcc99keywords.update({
  'restrict':tokens.KWRESTRICT,
})

# Identifiers known about
idents={
  tokens.KWLONG:tokens.MODLONG,
  tokens.KWRETURN:tokens.LRETURN
}

# Hash words
hashwords={
  'include':tokens.HINCLUDE,
  'define':tokens.HDEFINE,
  'if':tokens.HIF,
  'else':tokens.HELSE,
  'endif':tokens.HENDIF,
  'ifdef':tokens.HIFDEF,
  'ifndef':tokens.HIFNDEF,
  'pragma':tokens.HPRAGMA,
  'line':tokens.HLINE,
}

# C types known about
types={
  tokens.KWINT:tokens.TYPE,
  tokens.KWCHAR:tokens.TYPE,
  tokens.KWFLOAT:tokens.TYPE,
  tokens.KWDOUBLE:tokens.TYPE,
  tokens.KWVOID:tokens.VOIDTYPE,
}

# Tokens that cause a parse tree to end
finalizers={
  Clex.tokens.RPAR:{
    tokens.LDECL:tokens.FUNC,
  },
  Clex.tokens.SEMI:{
    tokens.LFUNC:tokens.STMT,
    tokens.FBLOCK:tokens.STMT,
    tokens.STMT:tokens.STMT,
  },
  Clex.tokens.RBRC:{
    tokens.LFUNC:tokens.FBLOCK,
  },
  Clex.tokens.GT:{
    tokens.LINCLUDE:tokens.INCLUDE,
  },
}

# Transformations that must occur before a token is parsed
transforms={
  Clex.tokens.IDEN:'HandleIdentifier',
  Clex.tokens.RPAR:'HandleFinalizer',
  Clex.tokens.SEMI:'HandlePrefinalizer',
  Clex.tokens.RBRC:'HandleFinalizer',
  Clex.tokens.GT:'HandleFinalizer',
}

# LALR(1) stack operations
class operations:
  REPLACE=0   # Replace the token id of the first arg
  COMBINE=1   # Combine both args under a token id
  PUSH=2      # Push both args onto the stack
  MORPH=3     # Change the tokenid of arg1 and repeat
  FUSEL=4     # Replace the token id of the second arg and push it
  FUSER=5     # Replace the token id of the first arg and push it
  MERGE=6     # Merge the two args
  STACK=7     # Push arg1 under an additional tokenid

# LALR(1) grammar
productions={
  (Clex.tokens.DEC, Clex.tokens.IDEN):(operations.FUSEL, tokens.PREDECEXPR),
  (Clex.tokens.INC, Clex.tokens.IDEN):(operations.FUSEL, tokens.PREINCEXPR),
  (Clex.tokens.COMMA, Clex.tokens.WS):(operations.REPLACE, Clex.tokens.COMMA),
  (Clex.tokens.COMMA, tokens.TYPE):(operations.PUSH, None),
  (Clex.tokens.IDEN, Clex.tokens.DEC):(operations.FUSER, tokens.POSTDECEXPR),
  (Clex.tokens.IDEN, Clex.tokens.PLUS):(operations.COMBINE, tokens.LADDOP),
  (Clex.tokens.IDEN, Clex.tokens.INC):(operations.FUSER, tokens.POSTINCEXPR),
  (Clex.tokens.IDEN, Clex.tokens.ASGN):(operations.REPLACE, tokens.LASGN),
  (tokens.TYPE, Clex.tokens.AST):(operations.COMBINE, tokens.TYPE),
  (tokens.TYPE, Clex.tokens.IDEN):(operations.COMBINE, tokens.DECL),
  (tokens.TYPE, Clex.tokens.WS):(operations.REPLACE, tokens.TYPE),
  (tokens.VOIDTYPE, Clex.tokens.IDEN):(operations.COMBINE, tokens.DECL),
  (tokens.VOIDTYPE, Clex.tokens.WS):(operations.REPLACE, tokens.VOIDTYPE),
  (tokens.MODLONG, Clex.tokens.IDEN):(operations.MORPH, tokens.TYPE),
  (tokens.MODLONG, Clex.tokens.WS):(operations.REPLACE, tokens.MODLONG),
  (tokens.DECL, Clex.tokens.LPAR):(operations.REPLACE, tokens.LDECL),
  (tokens.DECL, Clex.tokens.ASGN):(operations.REPLACE, tokens.DASGN),
  (tokens.DECL, Clex.tokens.COMMA):(operations.PUSH, None),
  (tokens.LDECL, tokens.TYPE):(operations.PUSH, None),
  (tokens.LDECL, tokens.VOIDTYPE):(operations.PUSH, None),
  (tokens.LDECL, tokens.MODLONG):(operations.PUSH, None),
  (tokens.FUNC, Clex.tokens.LBRC):(operations.REPLACE, tokens.LFUNC),
  (tokens.FUNC, Clex.tokens.WS):(operations.REPLACE, tokens.FUNC),
  (tokens.FUNC, Clex.tokens.NL):(operations.REPLACE, tokens.FUNC),
  (tokens.LFUNC, Clex.tokens.WS):(operations.REPLACE, tokens.LFUNC),
  (tokens.LFUNC, Clex.tokens.NL):(operations.REPLACE, tokens.LFUNC),
  (tokens.LFUNC, tokens.TYPE):(operations.PUSH, None),
  (tokens.LFUNC, tokens.STMT):(operations.PUSH, None),
  (tokens.LFUNC, tokens.LRETURN):(operations.PUSH, None),
  (tokens.FBLOCK, Clex.tokens.NL):(operations.REPLACE, tokens.FBLOCK),
  (tokens.FBLOCK, tokens.FBLOCK):(operations.PUSH, None),
  (tokens.FBLOCK, tokens.STMT):(operations.PUSH, None),
  (tokens.LASGN, Clex.tokens.NUM):(operations.COMBINE, tokens.ASGN),
  (tokens.LASGN, Clex.tokens.SSTR):(operations.COMBINE, tokens.ASGN),
  (tokens.DASGN, Clex.tokens.NUM):(operations.MERGE, tokens.IASGN),
  (tokens.DASGN, Clex.tokens.SSTR):(operations.MERGE, tokens.IASGN),
  (tokens.STMT, Clex.tokens.INC):(operations.PUSH, None),
  (tokens.STMT, Clex.tokens.IDEN):(operations.PUSH, None),
  (tokens.STMT, Clex.tokens.WS):(operations.REPLACE, tokens.STMT),
  (tokens.STMT, Clex.tokens.NL):(operations.REPLACE, tokens.STMT),
  (tokens.STMT, tokens.TYPE):(operations.PUSH, None),
  (tokens.STMT, tokens.VOIDTYPE):(operations.PUSH, None),
  (tokens.STMT, tokens.FUNC):(operations.PUSH, None),
  (tokens.STMT, tokens.FBLOCK):(operations.PUSH, None),
  (tokens.STMT, tokens.MODLONG):(operations.PUSH, None),
  (tokens.STMT, tokens.STMT):(operations.PUSH, None),
  (tokens.HINCLUDE, Clex.tokens.LT):(operations.REPLACE, tokens.LINCLUDE),
  (tokens.HINCLUDE, Clex.tokens.WS):(operations.REPLACE, tokens.HINCLUDE),
  (tokens.LINCLUDE, Clex.tokens.MEM):(operations.MERGE, tokens.LINCLUDE),
  (tokens.LRETURN, Clex.tokens.IDEN):(operations.PUSH, None),
  (tokens.INCLUDE, Clex.tokens.NL):(operations.STACK, tokens.STMT),
  (tokens.LRETURN, Clex.tokens.WS):(operations.REPLACE, tokens.LRETURN),
  (tokens.LADDOP, Clex.tokens.IDEN):(operations.MERGE, tokens.ADDOP),
}

# Whitespace before any actual content
prewhitespace=[
  Clex.tokens.WS,
  Clex.tokens.NL,
]

# Tokens that prevent entry of identifiers into the symbol table
literals={
  tokens.LINCLUDE:(operations.MERGE, tokens.LINCLUDE),
}

# Symbol table
symtabid=10000
symtab={
}

class Cparse:
  def __init__(self, lexer=None):
    self.lexer=lexer
    self.symtabid=symtabid
    self.symtab=symtab
    self.stack=[]

  def HandleIdentifier(self, token):
    if len(self.stack)>0 and literals.has_key(self.stack[-1][0]):
      s=self.stack.pop()
      l=literals[s[0]]
      if l[0]==operations.MERGE:
        if not s[1]:
          return (l[1], (token,))
        else:
          return (l[1], s[1]+(token,))
    if len(self.stack)>0 and self.stack[-1][0]==Clex.tokens.HASH and hashwords.has_key(token[1]):
      token=(hashwords[token[1]], None)
      self.stack.pop()
      return token
    if keywords.has_key(token[1]):
      token=(token[0], keywords[token[1]])
    if types.has_key(token[1]):
      return (types[token[1]], token[1])
    if idents.has_key(token[1]):
      return (idents[token[1]], token[1])
    else:
      if self.symtab.has_key(token[1]):
        return (token[0], self.symtab[token[1]])
      else:
        self.symtab[token[1]]=len(self.symtab)+self.symtabid
        return (token[0], self.symtab[token[1]])

  def HandleFinalizer(self, token):
    s=[token]
    while not finalizers[token[0]].has_key(s[0][0]):
      s=[self.stack.pop()]+s
    return (finalizers[token[0]][s[0][0]], tuple([s[0][1]]+s[1:-1]))

  def HandlePrefinalizer(self, token):
    s=[token]
    while not finalizers[token[0]].has_key(s[0][0]):
      s=[self.stack.pop()]+s
    self.stack.append(s[0])
    return (finalizers[token[0]][s[0][0]], tuple(s[1:-1]))

  def pushtoken(self, token):
    if transforms.has_key(token[0]):
      token=getattr(self, transforms[token[0]])(token)
    if len(self.stack)>0:
      while 1:
        if productions.has_key((self.stack[-1][0], token[0])):
          p=productions[(self.stack[-1][0], token[0])]
          s=self.stack.pop()
          if p[0]==operations.REPLACE:
            self.stack.append((p[1], s[1]))
            break
          elif p[0]==operations.COMBINE:
            self.stack.append((p[1], (s, token)))
            break
          elif p[0]==operations.PUSH:
            self.stack.append(s)
            self.stack.append(token)
            break
          elif p[0]==operations.MORPH:
            self.stack.append((p[1], s[1]))
          elif p[0]==operations.FUSEL:
            self.stack.append((p[1], token[1]))
            break
          elif p[0]==operations.FUSER:
            self.stack.append((p[1], s[1]))
            break
          elif p[0]==operations.MERGE:
            self.stack.append(s[0], s[1]+(token,))
            break
          elif p[0]==operations.STACK:
            self.stack.append(p[1], s)
            break
        else:
          raise BadToken(token)
    else:
      if not token[0] in prewhitespace:
        self.stack.append(token)

if __name__=='__main__':
  import StringIO
  import pprint
  import string

  a='''
#include <stdio.h>

int main(void)
{
  int a=0;  // Line Comment
  a++;
  ++a;
  long b=12l;
  char *c="123"; /* Block Comment */
  float d=1.23f;
  b=5;
};

void func1(int a, char b)
{
};

long func2(long a, int b)
{
  return a+b;
};
''' 
    
  b=Clex.Clex(StringIO.StringIO(a))
  c=Cparse()
  p=pprint.PrettyPrinter(width=75)
  while 1:
    try:
      t=b.gettoken()
      print '%s ->' % `t`
      c.pushtoken(t)
      f=p.pformat(c.stack)
      for i in string.split(f, '\n'):
        print '  %s' % i
      print
    except Clex.EOF:
      break
  print 'Result:'
  p.pprint(c.stack)
  p.pprint(c.symtab)
