lexi.py

#Author: Edimar Valentin Kery <edimar.valentin@upr.edu>

import ply.lex as lex

#List of token names

tokens = ['ID', 'NUM', 'LPAREN', 'RPAREN', 'LBRACE', 'RBRACE', 
	'BECOMES', 'EQ', 'NE', 'LT', 'GT', 'LE', 'GE', 
	'PLUS', 'MINUS', 'STAR', 'SLASH', 'PCT', 'COMMA',
	'SEMI', 'COLON', 'ARROW', 'COMMENT', 'WHITESPACE']


#List of reserved names
reserved = {'def':'DEF', 'var':'VAR', 'Int':'INT', 'if':'IF', 
'else':'ELSE'}

tokens = tokens + list(reserved.values())

#REGEX rules for simple tokens

t_PLUS = r'\+'
t_MINUS = r'-'
t_STAR = r'\*'
t_SLASH = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_BECOMES = r'='
t_EQ = r'=='
t_COMMA = r','
t_COMMENT =r'//(.*)\n'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_NE = r'!='
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_PCT = r'%'
t_SEMI = r';'
t_COLON = r':'
t_ARROW = r'=>'
#t_WHITESPACE = r'\n|\t|\r|' 


#REGEX Rule for ID's
def t_ID(t):
	r'[A-Za-z][a-zA-Z0-9]*'
	t.type = reserved.get(t.value, 'ID');
	return t;

#REGEX Rule for numbers
def t_NUM(t):
	r'\d+'
	t.value = int(t.value)
	return t

#Error handler
def t_error(t):
    print("Illegal character '%s'" % t.value[0])
    t.lexer.skip(1)

#Track line numbers
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

# Ignored characters (spaces and tabs)
t_ignore = ' \t'

#Create the lex object
lexer = lex.lex()

#Opens the source code and creates the tokens
"""
with open('TEST_program.txt', 'r') as file:
	lexer = lex.lex()
	source = file.read()
	lexer.input(source)
	while True:
		tok = lexer.token()
		if not tok:
			break
		print(tok)
"""