Skip to content
This repository was archived by the owner on Feb 14, 2021. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7c41e20
First step in development. Go to other branch
DiazRock Feb 26, 2020
5775246
Added a main.py file. The coolc.sh is a wrapper for it.
DiazRock Feb 27, 2020
d87bae5
Added the base for the compiler structure
DiazRock Feb 28, 2020
664f89d
Added lexer definitions
DiazRock Feb 28, 2020
fb038db
All the cool lexer analyzer declarations
DiazRock Feb 29, 2020
6d7a1b4
More changes
DiazRock Feb 29, 2020
888a63b
Added functionality for lexer analyzer
DiazRock Feb 29, 2020
c6663a7
I think that the lexer analyzer is ready for test
DiazRock Mar 1, 2020
a5b8569
The moment for test is near. I can feel it.
DiazRock Mar 1, 2020
facd09d
10 of 13 test passed
DiazRock May 20, 2020
ab5d06e
100% of test case passed
DiazRock May 21, 2020
5fdaea5
A bug in the error column tracker
DiazRock May 27, 2020
4fb1f7d
Passed more tests
DiazRock Jun 5, 2020
156089b
three errors and 9 shit/reduce conflicts
DiazRock Jun 6, 2020
a1e2abb
The insensitive case in keywords
DiazRock Jun 6, 2020
9e23612
Passed all test :)
DiazRock Jun 6, 2020
467240d
Passed all test for 'entrega-parser'
DiazRock Jun 8, 2020
a48d4c9
Some definitions for a better code. Useless right now
DiazRock Jun 8, 2020
c1b0ab2
The errors definitions
DiazRock Jun 16, 2020
0e9c7a6
This is the one who works
DiazRock Oct 6, 2020
e4a846e
Merge branch 'entrega-parser' into entrega-parser
DiazRock Jan 25, 2021
f359a6a
Selected only the first error
DiazRock Jan 25, 2021
84dc05e
Merge branch 'entrega-parser' of https://github.com/DiazRock/cool-com…
DiazRock Jan 25, 2021
659a802
Added the python version to coolc.sh file
DiazRock Jan 25, 2021
709a60c
Added the ply requirement
DiazRock Jan 25, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
ply
pytest
pytest-ordering
Empty file added src/compiler/__init__.py
Empty file.
Empty file.
20 changes: 20 additions & 0 deletions src/compiler/components/abstract_component.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

class Component:
def __init__(self,
input_info,
component_name,
debug_session = False,
error_log_file = None,
build_after_initialize = True):
self.input_info = input_info
self.component_name = component_name
self.debug_session = debug_session
self.error_log_file = error_log_file

if build_after_initialize: self.build_component()


def build_component (self):
if self.debug_session:
print('Building %s component' %self.component_name)

14 changes: 14 additions & 0 deletions src/compiler/components/dummy_component.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .abstract_component import Component


class dummy_component (Component):
def __init__(self, component_name, *args, **kwargs):
component_name = "dummy_" + component_name
super().__init__(*args, component_name = "dummy_" + component_name, **kwargs)



def build_component(self):
super().build_component()
return "Sorry, this is just a %s component" %(self.component_name)

228 changes: 228 additions & 0 deletions src/compiler/components/lexer_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import ply.lex as lex
from ply.lex import Token
from ply.lex import TOKEN
from ..utils.errors import error

tokens = [
# Identifiers
"ID", "TYPE",

# Primitive Types
"INTEGER", "STRING", "BOOLEAN",

# Literals
"LPAREN", "RPAREN", "LBRACE", "RBRACE", "COLON", "COMMA", "DOT", "SEMICOLON", "AT",

# Operators
"PLUS", "MINUS", "MULTIPLY", "DIVIDE", "EQ", "LT", "LTEQ", "ASSIGN", "INT_COMP",

# Special Operators
"ARROW"
]

reserved = {
'new':'NEW',
'of':'OF',
'if' : 'IF',
'let':'LET',
'in' : 'IN',
'fi':'FI',
'else' : 'ELSE',
'while':'WHILE',
'case':'CASE',
'then' : 'THEN',
'esac':'ESAC',
'pool':'POOL',
'class':'CLASS',
'loop':'LOOP',
'inherits':'INHERITS',
'isvoid':'ISVOID',
"self": "SELF",
"not": "NOT"
}

tokens += list(reserved.values())

#Simple rules
t_PLUS = r'\+'
t_MINUS = r'\-'
t_MULTIPLY = r'\*'
t_DIVIDE = r'\/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_EQ = r'\='
t_LT = r'\<'
t_LTEQ = r'\<\='
t_ASSIGN = r'\<\-'
t_INT_COMP = r'~'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COLON = r'\:'
t_COMMA = r'\,'
t_DOT = r'\.'
t_SEMICOLON = r'\;'
t_AT = r'\@'
t_ARROW = r'\=\>'
#complex rules

@TOKEN(r"(t[R|r][U|u][e|E]|f[a|A][l|L][s|S][e|E])")
def t_BOOLEAN(token):
token.value = token.value.lower()
return token

@TOKEN(r"\d+")
def t_INTEGER(token):
token.value = int(token.value)
return token

@TOKEN(r"[A-Z][A-Za-z0-9_]*")
def t_TYPE(token):
token.type = reserved.get(token.value.lower(), 'TYPE')
return token

@TOKEN(r"[a-z][A-Za-z0-9_]*")
def t_ID(token):
token.type = reserved.get(token.value.lower(), "ID")
return token

# Utility definitions
@TOKEN(r'\n+')
def t_newline(t):
global readjust_col
readjust_col = t.lexpos + len(t.value)
t.lexer.lineno += len(t.value)

def t_error(token):
global readjust_col
errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='ERROR "%s"' % (token.value[:1])))
token.lexer.skip(1)

t_ignore = ' \t'
t_ignore_COMMENTLINE = r"\-\-[^\n]*"


#Global states
states = (
("STRING", "exclusive"),
("COMMENT", "exclusive")
)

#The string states
@TOKEN(r'\"')
def t_start_string(token):
token.lexer.push_state("STRING")
token.lexer.string_backslashed = False
token.lexer.stringbuf = ""

@TOKEN(r'\n')
def t_STRING_newline(token):
global readjust_col
token.lexer.lineno += 1
if not token.lexer.string_backslashed:
errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1),
message= "Unterminated string constant"))
token.lexer.pop_state()
else:
token.lexer.string_backslashed = False
readjust_col = token.lexpos + len(token.value)

@TOKEN('\0')
def t_STRING_null(token):
errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='Null character in string'))
token.lexer.skip(1)

@TOKEN(r'\"')
def t_STRING_end(token):
if not token.lexer.string_backslashed:
token.lexer.pop_state()
token.value = token.lexer.stringbuf
token.type = "STRING"
return token
else:
token.lexer.stringbuf += '"'
token.lexer.string_backslashed = False

@TOKEN(r"[^\n]")
def t_STRING_anything(token):
if token.lexer.string_backslashed:
if token.value == 'b':
token.lexer.stringbuf += '\b'
elif token.value == 't':
token.lexer.stringbuf += '\t'
elif token.value == 'n':
token.lexer.stringbuf += '\n'
elif token.value == 'f':
token.lexer.stringbuf += '\f'
elif token.value == '\\':
token.lexer.stringbuf += '\\'
else:
token.lexer.stringbuf += token.value
token.lexer.string_backslashed = False
else:
if token.value != '\\':
token.lexer.stringbuf += token.value
else:
token.lexer.string_backslashed = True

def t_STRING_error(token):
token.lexer.skip(1)
errors.append(error(error_type="LexicographicError",
row_and_col= (token.lineno, token.lexpos - readjust_col + 1),
message= 'ERROR at or near '))

def t_STRING_eof(token):
errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message='EOF in string constant'))
token.lexer.pop_state()

t_STRING_ignore = ''

# The comment state

@TOKEN(r"\(\*")
def t_start_comment(token):
token.lexer.push_state("COMMENT")
token.lexer.comment_count = 0

@TOKEN(r"\(\*")
def t_COMMENT_startanother(token):
token.lexer.comment_count += 1

@TOKEN(r"\n+")
def t_COMMENT_newline(token):
global readjust_col
readjust_col = token.lexpos + len(token.value)
token.lexer.lineno += len(token.value)

@TOKEN(r"\*\)")
def t_COMMENT_end(token):
if token.lexer.comment_count == 0:
token.lexer.pop_state()
else:
token.lexer.comment_count -= 1


def t_COMMENT_error(token):
token.lexer.skip(1)

def t_COMMENT_eof(token):
global readjust_col
errors.append(error(error_type="LexicographicError", row_and_col= (token.lineno, token.lexpos - readjust_col + 1), message= "EOF in comment"))
token.lexer.pop_state()

t_COMMENT_ignore = ''
errors = []


lexer = lex.lex()
def tokenizer(stream_input):
global readjust_col
readjust_col = 0
lexer.input(stream_input)
token_list = []
real_col = {}
for tok in lexer:
real_col.update({ str(tok): tok.lexpos - readjust_col + 1 })
token_list.append(tok)

return errors, token_list, real_col

Loading