-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexi.py
84 lines (68 loc) · 1.52 KB
/
lexi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#Author: Edimar Valentin Kery <[email protected]>
import ply.lex as lex
#List of token names
tokens = ['ID', 'NUM', 'LPAREN', 'RPAREN', 'LBRACE', 'RBRACE',
'BECOMES', 'EQ', 'NE', 'LT', 'GT', 'LE', 'GE',
'PLUS', 'MINUS', 'STAR', 'SLASH', 'PCT', 'COMMA',
'SEMI', 'COLON', 'ARROW', 'COMMENT', 'WHITESPACE']
#List of reserved names
reserved = {'def':'DEF', 'var':'VAR', 'Int':'INT', 'if':'IF',
'else':'ELSE'}
tokens = tokens + list(reserved.values())
#REGEX rules for simple tokens
t_PLUS = r'\+'
t_MINUS = r'-'
t_STAR = r'\*'
t_SLASH = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_BECOMES = r'='
t_EQ = r'=='
t_COMMA = r','
t_COMMENT =r'//(.*)\n'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_NE = r'!='
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_PCT = r'%'
t_SEMI = r';'
t_COLON = r':'
t_ARROW = r'=>'
#t_WHITESPACE = r'\n|\t|\r|'
#REGEX Rule for ID's
def t_ID(t):
r'[A-Za-z][a-zA-Z0-9]*'
t.type = reserved.get(t.value, 'ID');
return t;
#REGEX Rule for numbers
def t_NUM(t):
r'\d+'
t.value = int(t.value)
return t
#Error handler
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
#Track line numbers
def t_newline(t):
r'\n+'
t.lexer.lineno += len(t.value)
# Ignored characters (spaces and tabs)
t_ignore = ' \t'
#Create the lex object
lexer = lex.lex()
#Opens the source code and creates the tokens
"""
with open('TEST_program.txt', 'r') as file:
lexer = lex.lex()
source = file.read()
lexer.input(source)
while True:
tok = lexer.token()
if not tok:
break
print(tok)
"""