diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..c5a614015 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM docker.uclv.cu/python:latest + +COPY ./requirements.txt ./requirements.txt + +RUN pip3 install -r requirements.txt + +RUN apt update + +RUN apt install spim -y + +RUN mkdir cool-compiler-2021 + +WORKDIR /cool-compiler-2021 diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..423c34480 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +pytest: + docker-compose run compiler make test + +compile: + docker-compose run compiler python3 cool.py program.cl +exec: + docker-compose run compiler bash -c "python3 cool.py program.cl && spim" + +spim: + docker-compose run compiler spim + diff --git a/doc/report.pdf b/doc/report.pdf new file mode 100644 index 000000000..c8134f8b6 Binary files /dev/null and b/doc/report.pdf differ diff --git a/doc/report/architecture.png b/doc/report/architecture.png new file mode 100644 index 000000000..0c6fbb230 Binary files /dev/null and b/doc/report/architecture.png differ diff --git a/doc/report/inheritance_memory.png b/doc/report/inheritance_memory.png new file mode 100644 index 000000000..3f5a00650 Binary files /dev/null and b/doc/report/inheritance_memory.png differ diff --git a/doc/report/memory.png b/doc/report/memory.png new file mode 100644 index 000000000..bdf844b2b Binary files /dev/null and b/doc/report/memory.png differ diff --git a/doc/report/report.md b/doc/report/report.md new file mode 100644 index 000000000..cb530800d --- /dev/null +++ b/doc/report/report.md @@ -0,0 +1,447 @@ +# Cool Compiler + + +## 1. Uso del compilador + +Para usar el compilador, solo se debe ejecutar el siguiente comando en una consola abierta dentro de la carpeta src: + +``` +python3 cool.py +``` + + + +## 2. Arquitectura del compilador + +El objetivo del presente software es dado un programa escrito en el lenguaje COOL generar un programa equivalente escrito en MIPS que pueda ser ejecutado en SPIM. + +El proceso de convertir lenguaje COOL a lenguaje MIPS se dividió en un total de 5 fases repartidas en 3 fases principales: + +- Parsing + - Análisis Léxico + - Análisis Sintáctico +- Análisis semántico +- Generación de código + - Generación de CIL + - Generación de MIPS + +Cada una de estas fases principales se corresponde con un módulo del compilador encargado de su ejecución por lo que la estructura del compilador sería la siguiente. +``` +src/ +┣ codegen/ +┣ parsing/ +┣ semantics/ +┣ utils/ +┗ cool.py +``` +Por último el archivo `cool.py` se encarga de crear el *pipeline* entre el archivo de entrada, los módulos del compilador y el archivo de salida siguiendo el flujo mostrado en la siguiente imágen. + +![architecture](architecture.png) + + +## 3. Módulos del compilador + +### 3.1 parsing + +El proceso de parsing del programa se divide en dos fases: el análisis lexicográfico y el análisis sintático. Para la implementación de este módulo se decidió utilizar `ply` el cual es una implementación en python de las herramientas de construcción de compiladores `lex` y `yacc`. Este módulo recibe como entrada un string y en caso de dicho string pertencer al lenguaje COOL devuelve el árbol de sintaxis abstracta del programa, en caso negativo devuelve una lista con los errores encontrados. + + +**Análisis lexicográfico:** + +----- + +Utilizando la herramienta `lex` se implementó el tokenizador de texto utilizando expresiones regulares para reconocer los tokens, esto resulta conveniente dado que la mayoría de componentes del lenguaje son reconocibles mediante estas, no obstante existen excepciones como los comentarios (multilínea y de una sola línea) y los strings debido a que estos elementos se defininen como un lenguaje de la forma $a^nb^n$. Para estos casos especiales se optó por reconocer el símbolo inicial del elemento y luego procesar el resto del texto sin utilizar el autómata hasta encontrar el símbolo que cierra el elemento para luego reanudar el procesamiento utilizando el autómata. + +**Análisis sintáctico:** + +---- + +La herramienta `yacc` permite la creación de parsers con gramáticas LALR(1) y la construcción del árbol de sintaxis abstracta mediante gramáticas atributadas. En esta fase se definió la gramática y la jerarquía de clases a utilizar para construir el AST. + +A continuación definimos la gramática LALR(1) utilizada. + +``` +S -> program +program -> class_list +class_list -> def_class +class_list -> dec_class class_list +def_class -> CLASS TYPEID OCUR feature_list CCUR SEMICOLON +feature_list -> ε +feature_list -> def_attr feature_list +feature_list -> def_func feature_list +def_attr -> OBJECTID COLON TYPEID SEMICOLON +def_attr -> OBJECTID COLON TYPEID ASSIGN exp SEMICOLON +def_func -> OBJECTID OPAR CPAR COLON TYPEID OCUR exp CCUR SEMICOLON +def_func -> OBJECTID OPAR param_list CPAR COLON TYPEID OCUR exp CCUR SEMICOLON +param_list -> param +param_list -> param COMMA param_list +param -> OBJECTID COLON TYPEID +exp -> OBJECTID ASSIGN exp +exp -> LET ident_list IN exp +iden_list -> iden +iden_list -> iden COMMA ident_list +iden -> OBJECTID COLON TYPEID +iden -> OBJECTID COLON TYPEID ASSIGN exp +case_list -> branch +case_list -> branch case_list +branch -> OBJECTID COLON TYPEID CASSIGN exp SEMICOLON +exp -> NOT exp +exp -> comp +comp -> arith +comp -> arith LOWER arith +comp -> arith LEQ arith +comp -> arith EQUAL arith +comp -> arith EQUAL NOT exp +arith -> term +arith -> arith PLUS term +arith -> arith MINUS term +term -> factor +term -> term STAR factor +term -> term DIV factor +factor -> atom +factor -> TILDE factor +factor -> CASE exp OF case_list ESAC +factor -> WHILE exp LOOP exp POOL +factor -> OCUR exp_list CCUR +exp_list -> exp SEMICOLON +exp_list -> exp SEMICOLON exp_list +factor -> IF exp THEN exp ELSE exp FI +factor -> ISVOID factor +atom -> INT_CONST +atom -> STRING_CONST +atom -> TRUE +atom -> FALSE +atom -> OBJECTID +atom -> NEW TYPEID +atom -> func_call +atom -> OPAR exp CPAR +func_call -> OBJECTID OPAR arg_list CPAR +func_call -> atom DOT OBJECTID OPAR arg_list CPAR +func_call -> atom AT TYPEID DOT OBJECTID OPAR arg_list CPAR +arg_list -> ε +arg_list -> arg_list_not_empty +arg_list_not_empty -> exp +arg_list_not_empty -> exp COMMA arg_list_not_empty +``` + +### 3.2 semantics + +El módulo `semantics` implementa la fase de análisis semántico del compilador, este recibe como entrada el árbol de sintaxis abstracta producido por el parser y tiene como salida una lista de errores, la cual si es vacía indica que el programa es válido. + +**Análisis semántico** + +----- + +Para realizar el análisis se definieron 3 clases las cuales implementan un patrón *visitor* para recorrer el AST, cada una de estas clases realiza un recorrido sobre el árbol comprobando distintas reglas del lenguaje en cada caso. + +1. `TypeCollector`: este primer recorrido es el encargado de recolectar los identificadores de los tipos definidos por las clases declaradas en el código y verifica que no hayan clases que tengan con el mismo nombre. + +2. `TypeBuilder`: es el encargado de validar las declaraciones hechas dentro de los tipos creados en el recorrido anterior. Verifica que los atributos y métodos declarados no sean redefinidos dentro de la misma clase y que los tipos presentes en estas definiciones existan. Además realiza comprobaciones de integridad sobre la jerarquía de tipos comprobando la no existencia de ciclos en la herencia y que toda clase base haya sido definida. + +3. `TypeChecker`: se encarga de analizar el cuerpo de las definiciones de los métodos y las expresiones de las definiciones de los atributos de las clases. Se analiza la signatura de los métodos para validar la reescritura de métodos en casos de herencia y. que los atributos heredados no se encuentren definidos nuevamente. Además comprueba que las reglas de cálculo de tipos para expresiones definidas en el manual del lenguje COOL. + +Para la realización de estos recorridos fue necesaria la implementación de representaciones de los elementos semánticos del programa como los tipos (`Type`), atributos (`Attribute`), métodos (`Method`), etc... además de contenedores que nos permitiesen acceder a esta información de una forma efectiva y sencilla como `Context` y `Scope`. Estas clases se encuentran en el archivo `semantic.py` y fueron implementadas siguiendo las ideas del temario de análisis semántico visto en el curso. + +Luego de haber realizado los diferentes recorridos por el AST el módulo devuelve una lista la cual contiene los errores semánticos encontrados en el programa. + + +### 3.3 codegen + +Luego de comprobar que el código porporcionado por el usuario no contenga errores, se procede a la generación de código de máquina. + +La generación de código está dividida en dos partes: primero se genera un código intermedio CIL y luego, a partir de este, se genera el código MIPS. Esto nos va a permitir generar código del programa de forma más sencilla, ya que el salto directamente desde COOL a MIPS es demasiado complejo. El AST devuelto por el parser se recorre y se van formando los nodos de un nuevo AST con nodos que representan las definiciones, instrucciones y expresiones de CIL. Por último, a partir de este AST se procede a generar el código MIPS en donde se incluye el trabajo con la memoria. + + +**Generación de código CIL** + +----- + +EL lenguaje CIL está dividido en tres secciones: + +**1.** type: Se definen los tipos (clases) con sus atributos y el encabezado de sus métodos como se explica en la bibliogarfia de la asignatura. + +Se conoce que el ast producido por el parser tiene como nodo raíz un ProgramNode que está formado por nodos ClassDeclarationNode que son las clases, y estos a su vez por nodos AttrDeclarationNode y FunctionDeclarationNode que son las definiciones de los atributos y los métodos de una clase. Cuando se visita un ClassDeclarationNode, se crean los nodos CILTypeNode que contienen una lista de CILAttributesNode y una lista de CILMethodNode, los cuales solo contienen el nombre del método y el nombre de la función que lo define en la sección code. + +``` +type A { + attribute x ; + method f : f1; +} +``` + +Como se sabe los métodos y los atributos de un tipo deben siempre definirse en un mismo orden y si un tipo A hereda de B, A debe tener acceso a los atributos y métodos de su padre y de los demás ancestros de él hasta llegar a Object. Para lograr esto se tomaron en orden topológico todos los nodos de las clases y al recorrerlo se fueron agregando los atributos del padre y métodos en el mismo orden, y por último los suyos. De esta forma, cuando se llega a un nodo su padre tiene ya todos sus atributos y métodos definidos, asi como los del los ancestros en orden de aparición. Además, para una mayor comodidad en la implementación, todas las variables o atributos almacenan un tipo además de su nombre, dejando el trabajo de la memoria para la segunda fase de generación. + +Además a cada tipo se le añade un método especial, el init, que va a ser llamado cada vez que se inicialice una clase usando la expresión `new` desde el código de COOL. Por cada atributo declarado en la clase y que se le asigna una expresión, se van a añadir las instrucciones responsables de inicializar esos valores. Este método va a ser invocado pasándole como argumento la referencia de la variable a la que se le desea asignar este tipo. + +Los tipos built-in quedan implementados de la siguiente forma: + +``` +type Object { + method init : init_Object; + method abort : abort_Object; + method type_name : type_name_Object; + method copy : copy_Object; +} + +type Int { + attribute value; + + method init : init_Int; + method abort : abort_Object; + method type_name : type_name_Object; + method copy : copy_Object; +} + +type String { + attribute value; + + method init : init_String; + method abort : abort_Object; + method type_name : type_name_Object; + method copy : copy_Object; + method length : length_String; + method concat : concat_String; + method substr : substr_String; +} + +type Bool { + attribute value; + + method init : init_Bool; + method abort : abort_Object; + method type_name : type_name_Object; + method copy : copy_Object; +} + +type IO { + method init : init_IO; + method abort : abort_Object; + method type_name : type_name_Object; + method copy : copy_Object; + method out_string : out_string_IO; + method out_int : out_int_IO; + method in_string : in_string_IO; + method in_int : in_int_IO; +} +``` + + + +**2.** data: En esta sección se guardan los string que se van encontrando en el AST y se almacenan en una variable para su posterior uso. + +**3. ** code: Esta sección contiene la definición de todas las funciones, sus parámetros, las variables locales que se utilizan y el retorno. + +Un FunctionDeclarationNode en el AST devuelto por el parser se convierte en CIL en un CILFuntionDeclarationNode que está formado por tres secciones: `params` en donde están todos los parámetros de la función declarada, la sección `local` donde se crean las variables locales e `instructions` donde se encuentran las instrucciones a ejecutar. Para crear las variables se utiliza un alias que depende de su nombre original, esto se realizó porque una variable de un método o un argumento pueden llamarse como un atributo de la clase y a la vez en una expresión let pueden redefinirse nombres de variables que estén en los parámetros o en otro let más externo, este alias posibillita que la variable al crearla sea única y no sobrescriba variables creadas en otro entorno accesible. + +Como se conoce en el lenguaje COOL solo existen expresiones y en el lenguaje CIL se tienen tanto expresiones como instrucciones, por tanto una expresión COOL se traduce a una lista de instrucciones CIL ya sean de asignación o de cambio del valor de un atributo, ifgoto, goto, label, entre otras. Una asignación en CIL puede ser la traducción de una `asignación` de COOL o puede ser creada para llevar una expresión de COOL distinta de asignación a una instrucción en CIL, un ejemplo es cuando queremos realizar una operación suma en COOL y una de las partes (izquierda o derecha) no es un nodo atómico, debemos separar en en una asignación a una nueva variable creada en CIL que guarde la expresión de esa parte que y luego a esta sumarla con la parte que si es atómica, lo mismo pasa para todas las expresiones binarias. + + +La conversión de la lógica del `loop`, así como la del `conditional`, a código CIL fue muy sencilla. Con la ayuda de los labels y de las expresiones ifGoto y goto se decidía cuales eran las próximas instrucciones a ejecutar haciendo saltos en el código. + +En el caso de una expresión `block` en cool , para llevarla a CIL recorremos todas las expresiones que la compongan, cada una sabe cómo convertirse al lenguaje CIL y luego retornamos la que devolvió la última expresión del `block`. + +En el caso del `let` se debe crear un nuevo ámbito de variable que te permita definir varibles que ya están definidas y a la ves utilizar las anteriores creadas, luego se crean tantas variables como tenga el `let` y también para cada una de las expresiones de las mismas y a estas se les asigna lo que devuelve cada una de las expresiones y luego se procede a crear una nueva variable que guarde lo que retorne el recorrido que se le realiza a la expresión in del `let `. + +Las asignación es un caso importante, ya que tiene la tarea de verificar a qué tipo de variable es a la que se le asigna un valor. Por tanto, primero la busca en el scope y en caso de no encontrarse es un atributo, por lo que la instrucción a ejecutar es un `set attribute` ; en otro caso es un `assign`. + +Para las expresiones unarias en el caso del `prime` lo que se hace es restarle al valor cero la expresión que devuelve el recorrido de la expresión del `prime`, en el caso del `not` se devuelve una nueva variable que guarda el valor de un nuevo NodoCIL CILNotNode que guarda la expresión del `not` la segunda parte de generación de código es el encargado de procesar dicha expresión. En la expresión `isvoid` se recorre la variable y se crea una nueva para asignarle el resultado de este proceso para luego esta pasarla como parámetro y llamar a una función creada llamada `isvoid` que se implementará en la segunda parte de generación de código. El proceso de llamado de funciones en CIL a traves de las expresiones VCALL o CALL estas se utilizan en dependencia de cómo lo necesite la segunda parte de generación de código. + +Cuando se encuentra un nodo string en el ast se agrega a la sección .`data` y para utilizarlo se utiliza la función `load` de CIL que es agregada a la lista de instrucciones. Para los valores boleanos se crean un expresión `equals` entre 0 y 1 para el False y entre 0 y 0 para el True , esta expresión se le asigna a una nueva variable y es lo que se retorna. En el caso de las variables si esta es un atributo se crea una nueva variable que se e asignará el resultado de hacer `get attr` y si es una varible local se busca en el scope. Cada vez q se crea una varible esta se añade al scope y a esta se le asigna el valor de una expresión que esta expresión es un nuevo nodo creado que pertenece al lenguaje CIL, estas asignaciónes se añaden a la lista de instrucciones al igual que aquellas instrucciones de CIL que se necesitan para convertir una expresión del leguaje COOL a CIL. + +**Generación de código MIPS** + +---- + + +La generación de código MIPS se realiza mediante dos recorridos al árbol de sintaxis abstracta del lenguaje CIL, el primer recorrido se encarga de construir la información de las estructuras en memoria utilizadas por el compilador y el segundo recorrido genera el código utilizando la información contextual generada por el primero. + +El primer recorrido del AST del lenguaje CIL se realiza con el objetivo de crear un objeto `MIPSContext`, en esta clase almacenaremos la información relativa a la representación en memoria de los tipos, sus instancias y del manejo de memoria durante el llamado de funciones. + +A continuación definiremos las ideas seguidas para la representación de los tipos en memoria. +1. El valor de los atributos de una clase son independientes para cada instancia de la clase, sin embargo, los métodos de la clase son globales para todas las instancias. Esto permite separar la información global del tipo en memoria estática y la información de las instancias en memoria dinámica. + +2. Los identificadores de las clases en COOL son únicos y por tanto los identificadores de los tipos también lo son. Esto permite utilizar su nombre como alias a la dirección de memoria donde se almacena la información del tipo. La información que se decidió almacenar sobre los tipos fue su tamaño (para la creación dinámica de instancias mediante `copy`), su representación como string (para la implementación de `type_name`), el tiempo de descubrimiento y finalización en el grafo de la jerarquía de tipos (implementación del case), y las direcciones de los métodos del tipo. + + ```mips + .data + : .asciiz + .data + .align 4 + : .word ... + ``` + +3. La representación de una instancia de tipo `` es un bloque de memoria de tamaño `` localizado en memoria dinámica. La primera palabra de este bloque contienen la dirección representada por `` las siguientes palabras del bloque contienen direcciones de memoria apuntando al valor de los atributos de la instancia en el orden en que fueron declarados. + + ![memory](memory.png) + +4. El orden de los atributos y métodos en un tipo debe de respetar el orden en que se declaran en su tipo padre. Esto permite definir reglas precisas para obtener las direcciones de los atributos y métodos. + + $$ + attribute\_address_i = 4i + instance\_address\\ + method\_address_i = 4(i + 1) + type\_address + $$ + + Nos referiremos a este índice $i$ como offset. + + A continuación mostramos un ejemplo de la representación de la herencia en memoria. + + ![inheritance_memory](inheritance_memory.png) + +Esta representación se implementa como una clase `TypeInfo` la cual ofrece métodos para el cálculo de las direcciones en memoria de los atributos y métodos. + +```python +class TypeInfo: + def __init__(self, typex: CILTypeNode): + # This is obvious + self.id = typex.id + + # Memory to allocate for an instance of the type + self.size = (len(typex.attributes) + 1) * WSIZE + + # Use this offset to calculate the attribute address + self.attrs_offset = {attr.id : i for i, attr in enumerate(typex.attributes, start=1)} + + # Use this offset to calculate the method address + self.methods_offset = { m.id : i for i, m in enumerate(typex.methods, start=1) } + + def get_attr_addr(self, attr, register): + offset = self.attrs_offset[attr] + return f'{(offset) * WSIZE}({register})' + + def get_method_addr(self, method, register): + offset = self.methods_offset[method] + return f'{(offset + 3) * WSIZE}({register})' +``` +Otro aspecto fundamental a tener en cuenta durante la generación de MIPS es el manejo de memoria durante el llamado a funciones, para la resolución de este problema es común la adopción de convenciones por lo que en este caso nos hemos adherido a las convenciones propuestas por `gcc`. Estas convenciones se definen en torno a una estructura llamada *procedure call frame* la cual definimos a continuación. + +Un *procedure call frame* es un bloque de memoria localizado en la pila el cual se encarga de proveer espacio para las variables locales del procedimiento, además su dirección es utilizada para acceder a los argumentos de la función. + +Convenciones seguidas por la función que llama a otra (*caller*): + +1. Guardar todos los registros utilizados los cuales puedan ser modificados por la función llamada. +2. Pasar los argumentos a la función llamada a través de la pila respetando el orden en que fueron declarados. +3. Llamar a la función. +4. Restablecer el estado de la pila extrayendo los argumentos pasados y los registros guardados + + +Convenciones seguidas por la función que es llamada (*called*): + +1. Guardar la dirección de retorno en la pila `$ra` +2. Guardar el puntero al bloque en la pila `$fp` +3. Actualizar `$fp` con el valor de `$sp` +4. Restar el tamaño del call frame a `$sp` +5. Realizar sus instrucciones +6. Actualizar `$sp` con el valor de `$fp` +7. Restaurar el puntero al bloque de la pila `$fp` +8. Restuarar la dirección de retorno en la pila `$ra` + +Debio a estas convenciones el offset de los argumentos se calcula teniendo en cuenta la estructura LIFO de la pila por lo que el último argumento tiene offset 1 y los parámetros su offset se va asignando de forma ascendente respecto al orden al cual fueron declarados empezando desde 0, obteniendo así expresiones para el cómputo de las direcciones de argumentos y parámetros. + +$$ + arg\_addr_i = 4(i + 2) + frame\_addr\\ + param\_addr_i = -4i + frame\_addr +$$ + +Esta estructura es implementada por la clase `ProcCallFrame` la cual permite el cálculo de direcciones de argumentos y parámetros. + +```python +class ProcCallFrame: + def __init__(self,name, nargs, nvars): + self.name = name + self.nargs = nargs + self.size = WSIZE * nvars + self.args = {} # Associates each argument with the offset to be accessed in the stack + self.vars = {} # Associates each parameter with the offset to be accessed in the stack + + def add_argument(self, idx): + self.args[idx] = self.nargs - len(self.args) + + def add_variable(self, idx): + self.vars[idx] = len(self.vars) + + def arg_addr(self, id): + offset = self.args[id] + return f'{(2 + offset) * WSIZE}($fp)' + + def var_addr(self, id): + offset = self.vars[id] + return f'-{offset * WSIZE}($fp)' + + def get_addr(self, id): + try: + return self.arg_addr(id) + except KeyError: + return self.var_addr(id) +``` + +Por tanto este primer recorrido construye instancias de estas clases y las asocia con su correspondiente tipo o función creando un contexto con la información necesaria para manejar la memoria. + +El segundo recorrido se encarga de generar el código MIPS del programa, la idea de este recorrido es bastante simple dado que es solo traducir instrucciones o expresiones muy sencillas de CIL a mips, sin embargo existen ciertos casos de particular interés los cuales expondremos a continuación. + +- Reservar memoria: representada por la instrucción CIL `ALLOCATE `, cuando se reserva memoria para una instancia de la clase de tipo `` el compilador reserva `` espacio y copia la dirección de memoria de la información del tipo en la primera palabra del espacio reservado. + +- Dispatch: representada por la instrucción CIL `VCALL T f` este puede ocurrir de dos formas en dependencia de la especificación del dispatch, en específico del uso del símbolo @. Cuando se realiza el llamado a una función de COOL el primer argumento que es pasado es la dirección de memoria de la instancia desde la cual el método es llamado. Dado que el `VCALL T f` tiene el identificador del tipo estático y de la función a llamar se puede utilizar esta información para calcular la dirección del método con la clase `TypeInfo` del `MIPSContext`, la diferencia radica a partir de que dirección se calcula la posición del método, en el caso de que el dispatch no utilice @ se busca el método a partir de la dirección del tipo dinámico de la instancia, en caso contrario se busca el método a partir de la dirección del tipo ``. + +- Operador `=` : Esta operación puede producirse entre diferentes tipos, a diferencia de otros operaciones como las aritméticas o el resto de operaciones de comparación. Esta particularidad se resolvió definiendo la comparación como un procedimiento built-in principalmente para evitar generar un código con muchas ramas condicionales dado las comparaciones son distintas en dependencia del tipo dinámico de la instancia. + +- Retorno de operadores: Dado que los operadores tienen un tipo de retorno bien definido debido a las reglas de tipado de COOL los operadores se encarga de almacenar el resultado de la operacion en instancias de la clase de su tipo, las operaciones aritméticas crean instancias de tipo `Int` y las operaciones de comparación de tipo `Bool`. + +**Tipos por valor** + +En la especificación de COOL todos los tipos del lenguaje se especifican como tipos por referencia, por motivos de mejorar la eficiencia del uso de la memoria se implementaron los tipos básicos `Int` y `Bool` como tipos por valor lo cual nos brinda ciertos beneficios: + +- Al ser un tipo por valor para cada instancia solo se guarda en memoria su valor, a diferencia de si fuera un tipo por referencia se guardaría su valor, el puntero a la estructura `TypeInfo` y la dirección de memoria de la instancia. +- Debido a que no se implementaron mecanismos de liberación de memoria dinámica automáticos esto ayuda a liberar de forma automática la memoria utilizada dado que estos valores se almacenan en la pila. +- Las operaciones son más rápidas dado que no hay que estar redireccionando en memoria dinámica para buscar los valores. + +Los principales problemas a resolver durante la implementación fueron: + +- Upcasting/Downcasting: + + ``` + (* Upcasting *) + a : Object <- 10; + + (* Downcasting *) + b : Int <- case a of + x : Int => x; + esac; + ``` + + Para permitir el upcasting/downcasting entre tipos que se representan de forma distinta en memoria utilizamos la información obtenida durante el análisis semántico, asociamos a cada variable de CIL su tipo estático, así tenemos información sobre si lo que almacena esa variable es un valor o una referencia. Luego cuando se genera una asignación de CIL se comprueba el tipo estático del miembro izquierdo y derecho, para los casos `(RType, RType)` y `(VType, VType)` se procede a copiar el valor de una variable hacia la otra. En el caso `(RType, VType)` se realiza el boxing (`CILBoxNode`) del tipo por valor, es decir se guarda el valor que está en la pila en una instancia de la clase `VType` la cual solo se instancia para estos casos. La definición de estas clases (`Int` y `Bool`) se realizó de acuerdo a la especificación de COOL y tienen un atributo llamado `value` para almacenar el valor. En el caso (`VType, RType`) se realiza el unboxing (`CILUnboxNode`) de la instancia de la clase `VType` correspondiente retornando el valor de su atributo `value`. + + > Nota: `RType` se refiere a un tipo por referencia y `VType` a un tipo por valor + +- Expresión case + + La implementación propuesta de la expresión case utilizaba la instrucción `TYPEOF` para poder elegir la rama a evaluar, sin embargo, en los tipos por valor no tenemos una referencia a la información del tipo por tanto se tuvo que tratar como un caso especial, para ello se aprovechó el hecho de que las ramas se recorren en orden topológico inverso según su tipo y por tanto la primera rama válida para evaluar es la rama a escoger dado que los tipos por valor en este caso no permiten que hereden de ellos. + + Cuando el tipo de retorno del case es `Object` debemos realizar un casteo antes de retornar la evaluación de una rama que retorne un tipo por valor, no obstante esto se resuelve con el boxing/unboxing automático discutido anteriormente. + +- Dispatch + + ``` + (1).type_name() + ``` + + Los tipos por valor pueden usar los métodos heredados de `Object` pero no tienen acceso a la información del tipo para obtener la dirección del método, para resolver esto implementamos métodos built-in para los tipos por valor, estos no sobrescriben los de la implementación de su clase dado que los métodos de una clase `VType` tienen el mismo identificador que los de la clase `Object` al ser heredados. Durante la generación de código MIPS sabemos si la variable de CIL ejecutando un dispatch es una variable de tipo por valor o por referencia y hacemos el binding dinámico de los métodos o llamamos a los built-int de los tipos por valor según corresponda. + + + +**Optimización del case** + + +Un caso de especial consideración dentro de las expresiones de COOL es el `case`, el comportamiento esperado es que se compute el tipo dinámico de la expresión del `case` y se seleccione la rama del tipo ancestro más cercano a dicho tipo. Para resolver esto se recorre el árbol de clases iniciando por Object utilizando un DFS y para cada clase se almacena su tiempo de descubrimiento `dt` y tiempo de finalización `ft`, estos tiempos se guardan en la estructura `TypeInfo` que se almacena en memoria estática para cada tipo. Luego recorremos las ramas del case en orden descendente de acuerdo a su tiempo de descubrimiento y comprobamos si el tipo de la expresión del case (obtenido mediante una operación `TYPEOF`) conforma con el tipo de dicha rama. Esto se traduce a la creación de un `CILConformsNode` el cual es procesado de la siguiente forma `T1 CONFORMS TO T2 <=> dt(T2) <= dt(T1) <= ft(T2)`, dado que el `dt` y `ft` se almacenan en la memoria esta comprobación se realiza en $ \Omicron(1) $, como se recorre en orden descendente de acuerdo al `dt` la primera rama que cumpla esta condición es el ancestro más cercano, dado que a lo sumo debemos recorrer todas las ramas del case esto es $\Omicron(n)$ donde $n$ es la cantidad de ramas. + + + + + + + + + + + + + diff --git a/doc/team.yml b/doc/team.yml index c16162532..d5aa8f7bd 100644 --- a/doc/team.yml +++ b/doc/team.yml @@ -1,10 +1,10 @@ members: - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX - - name: Nombre Apellido1 Apellido2 - github: github_id - group: CXXX + - name: Amanda González Borrell + github: amyGB99 + group: C411 + - name: Karla Olivera Hernández + github: karlaoh99 + group: C411 + - name: Victor Manuel Cardentey Fundora + github: Vitico99 + group: C411 diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..755042489 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,8 @@ +version: "3.7" +services: + compiler: + build: . + working_dir: /cool-compiler-2021/src + volumes: + - .//:/cool-compiler-2021 + diff --git a/requirements.txt b/requirements.txt index 9eb0cad1a..cba16ee2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ pytest pytest-ordering +ply diff --git a/src/codegen/ast_CIL.py b/src/codegen/ast_CIL.py new file mode 100644 index 000000000..e8205fa95 --- /dev/null +++ b/src/codegen/ast_CIL.py @@ -0,0 +1,390 @@ +class CILNode: + pass + + +class CILProgramNode(CILNode): + def __init__(self, types, data, functions) : + self.types = types + self.data = data + self.functions = functions + + def __str__(self): + text = "ProgramNode:\n\n" + text += "Types:\n" + for t in self.types: + text += str(t) + "\n" + text += "Data:\n" + for d in self.data: + text += str(d) + '\n' + text += "Functions:\n" + for f in self.functions: + text += str(f) + '\n' + return text + + +class CILTypeNode(CILNode): + def __init__(self, id, attributes, methods, hierarchy_branch): + self.id = id + self.attributes = attributes + self.methods = methods + self.hierarchy_branch = hierarchy_branch + + def __str__(self): + text = "TypeNode:\n" + text += f"id: {self.id}\n" + text += "Attributes:\n" + for a in self.attributes: + text += str(a) + '\n' + text += "Methods:\n" + for m in self.methods: + text += str(m) + '\n' + return text + + +class CILDataNode(CILNode): + def __init__(self, id, text): + self.id = id + self.text = text + + def __str__(self): + text = "DataNode:\n" + text += f" id: {self.id}\n" + text += f" text: {self.text}\n" + return text + + +class CILFuncNode(CILNode): + def __init__(self, id, params, locals, instructions): + self.id = id + self.params = params + self.locals = locals + self.instructions = instructions + + def __str__(self): + text = "FuncNode:\n" + text += f"id: {self.id}\n" + text += f"Params:\n" + for p in self.params: + text += str(p) + '\n' + text += f"Locals:\n" + for l in self.locals: + text += str(l) + '\n' + text += f"Instructions:\n" + for i in self.instructions: + text += str(i) + '\n' + return text + + +class CILAttributeNode(CILNode): + def __init__(self, id ,type): + self.id = id + self.type = type + + def __str__(self): + text = "AttributeNode:\n" + text += f"id: {self.id}\n" + text += f"type: {self.type}\n" + return text + + +class CILMethodNode(CILNode): + def __init__(self, id, function_id): + self.id = id + self.function_id = function_id + + def __str__(self): + text = "MethodNode:\n" + text += f"id: {self.id}\n" + text += f"function_id: {self.function_id}\n" + return text + + +class CILParamNode(CILNode): + def __init__(self, id, type): + self.id = id + self.type = type + + def __str__(self): + text = "ParamNode:\n" + text += f"id: {self.id}\n" + text += f"type: {self.type}\n" + return text + + +class CILLocalNode(CILNode): + def __init__(self, id, type): + self.id = id + self.type = type + + def __repr__(self): + text = "LocalNode:\n" + text += f"id: {self.id}\n" + text += f"type: {self.type}\n" + return text + + +# Instructions +class CILInstructionNode(CILNode): + pass + + +class CILAssignNode(CILInstructionNode): + def __init__(self, id, expr): + self.id = id + self.expr = expr + + def __str__(self): + text = "AssignNode:\n" + text += f"id: {self.id}\n" + text += f"expr: {self.expr}\n" + return text + + +class CILSetAttributeNode(CILInstructionNode): + def __init__(self, id, type, attr , var): + self.id = id + self.type = type + self.attr = attr + self.var = var + + def __str__(self): + text = "SetAttrNode:\n" + text += f"id: {self.id}\n" + text += f"type: {self.type}\n" + text += f"att: {self.attr}\n" + text += f"var: {self.var}\n" + return text + + +class CILArgNode(CILInstructionNode): + def __init__(self, var): + self.var = var + + def __str__(self): + text = "ArgNode:\n" + text += f"var: {self.var}\n" + return text + + +class CILIfGotoNode(CILInstructionNode): + def __init__(self, var, label): + self.var = var + self.label = label + + def __str__(self): + text = "IfGotoNode:\n" + text += f"var: {self.var}\n" + text += f"label: {self.label}\n" + return text + + +class CILGotoNode(CILInstructionNode): + def __init__(self, label): + self.label = label + + def __str__(self): + text = "GotoNode:\n" + text += f"label: {self.label}\n" + return text + + +class CILLabelNode(CILInstructionNode): + def __init__(self, id): + self.id = id + + def __str__(self): + text = "LabelNode:\n" + text += f"id: {self.id}\n" + return text + + +class CILReturnNode(CILInstructionNode): + def __init__(self, var = None): + self.var = var + + def __str__(self): + text = "ReturnNode:\n" + if self.var is not None: + text += f"var: {self.var}\n" + return text + + +# Expressions +class CILExpressionNode(CILNode): + pass + + +class CILBoxNode(CILExpressionNode): + def __init__(self, var, type): + self.var = var + self.type = type + + def __str__(self): + text = "CILBoxNode:\n" + text += f"var: {self.var}\n" + text += f"type: {self.type}\n" + return text + +class CILUnboxNode(CILExpressionNode): + def __init__(self, var, type): + self.var = var + self.type = type + + def __str__(self): + text = "CILUnboxNode:\n" + text += f"var: {self.var}\n" + text += f"type: {self.type} \n" + return text + + +class CILBinaryOperationNode(CILExpressionNode): + def __init__(self, left, right): + self.left = left + self.right = right + + def __str__(self): + text = "BinaryNode:\n" + text += f"left: {self.left}\n" + text += f"right: {self.right}\n" + return text + + +class CILGetAttribute(CILExpressionNode): + def __init__(self, var, type, attr): + self.var = var + self.type = type + self.attr = attr + + def __str__(self): + text = "GetAttrNode:\n" + text += f"var: {self.var}\n" + text += f"type: {self.type}\n" + text += f"att: {self.attr}\n" + return text + + +class CILAllocateNode(CILExpressionNode): + def __init__(self, type): + self.type = type + + def __str__(self): + text = "AllocateNode:\n" + text += f"type: {self.type}\n" + return text + + +class CILTypeOfNode(CILExpressionNode): + def __init__(self, var): + self.var = var + + def __str__(self): + text = "TypeOfNode:\n" + text += f"var: {self.var}\n" + return text + + +class CILCallNode(CILExpressionNode): + def __init__(self, func): + self.func = func + + def __str__(self): + text = "CallNode:\n" + text += f"func: {self.func}\n" + return text + + +class CILVCallNode(CILExpressionNode): + def __init__(self, type, func, static=False): + self.type = type + self.func = func + self.static = static + + def __str__(self): + text = "VCallNode:\n" + text += f"type: {self.type}\n" + text += f"func: {self.func}\n" + return text + + +class CILLoadNode(CILExpressionNode): + def __init__(self, var): + self.var = var + + def __str__(self): + text = "LoadNode:\n" + text += f"var: {self.var}\n" + return text + + +class CILAtomicNode(CILExpressionNode): + def __init__(self, lex): + self.lex = lex + + def __str__(self): + text = "AtomicNode:\n" + text += f"lex: {self.lex}\n" + return text + + +class CILVariableNode(CILAtomicNode): + pass + + +class CILExceptionNode(CILAtomicNode): + pass + + +class CILTypeConstantNode(CILAtomicNode): + pass + + +class CILNumberNode(CILAtomicNode): + pass + + +# Arithmetic Operations +class CILPlusNode(CILBinaryOperationNode): + pass + + +class CILMinusNode(CILBinaryOperationNode): + pass + + +class CILStarNode(CILBinaryOperationNode): + pass + + +class CILDivNode(CILBinaryOperationNode): + pass + + +# Comparison Operations +class CILLessNode(CILBinaryOperationNode): + pass + + +class CILElessNode(CILBinaryOperationNode): + pass + + +class CILEqualsNode(CILBinaryOperationNode): + def __init__(self, left, right, ref): + super().__init__(left, right) + self.ref = ref + + +class CILNotEqualsNode(CILBinaryOperationNode): + def __init__(self, left, right, ref): + super().__init__(left, right) + self.ref = ref + + +class CILNotNode(CILExpressionNode): + def __init__(self, var): + self.var = var + +class CILConformsNode(CILExpressionNode): + def __init__(self, left, right): + self.left = left + self.right = right \ No newline at end of file diff --git a/src/codegen/cil_codegen.py b/src/codegen/cil_codegen.py new file mode 100644 index 000000000..a0c578b21 --- /dev/null +++ b/src/codegen/cil_codegen.py @@ -0,0 +1,231 @@ +import utils.visitor as visitor +from .ast_CIL import * + +eol = '\n' +tab = '\t' + +class CILCodegen: + @visitor.on('node') + def visit(self, node, frame): + pass + + @visitor.when(CILProgramNode) + def visit(self, node: CILProgramNode): + code = '.TYPES\n\n' + for t in node.types: + code += self.visit(t) + '\n' + code += '\n' + code += '.DATA\n\n' + for d in node.data: + code += self.visit(d) + '\n' + code += '\n' + code += '.CODE\n\n' + for f in node.functions: + code += self.visit(f) + '\n' + return code + + @visitor.when(CILTypeNode) + def visit(self, node: CILTypeNode): + code = f'type {node.id} ' + '{\n' + + for c in node.attributes: + code += self.visit(c) + code += '\n' + + for m in node.methods: + code += self.visit(m) + code += '}\n' + + return code + + @visitor.when(CILDataNode) + def visit(self, node: CILDataNode): + return f'{node.id} = {node.text};' + + @visitor.when(CILFuncNode) + def visit(self, node: CILFuncNode): + code = f'function {node.id} ' + '{\n' + for p in node.params: + code += self.visit(p) + code += eol + for l in node.locals: + code += self.visit(l) + code += eol + for i in node.instructions: + code += self.visit(i) + code += eol + code += '}\n\n' + return code + + @visitor.when(CILAttributeNode) + def visit(self, node: CILAttributeNode): + return f'\tattribute {node.id};\n' + + @visitor.when(CILMethodNode) + def visit(self, node: CILMethodNode): + return f'\tmethod {node.id} : {node.function_id};\n' + + @visitor.when(CILParamNode) + def visit(self, node: CILParamNode): + return f'\tPARAM {node.id};\n' + + @visitor.when(CILLocalNode) + def visit(self, node: CILLocalNode): + return f'\tLOCAL {node.id};\n' + + @visitor.when(CILInstructionNode) + def visit(self, node): + pass + + @visitor.when(CILAssignNode) + def visit(self, node: CILAssignNode): + code = f'\t{node.id.lex} = ' + try: + code += self.visit(node.expr) + except: + print(node.expr) + code += ';\n' + return code + + @visitor.when(CILSetAttributeNode) + def visit(self, node: CILSetAttributeNode): + return f'\tSETATTR {node.id.lex} {node.attr.lex} {node.var.lex};\n' + + @visitor.when(CILArgNode) + def visit(self, node:CILArgNode): + return f'\tARG {node.var.lex};\n' + + @visitor.when(CILGotoNode) + def visit(self, node: CILGotoNode): + return f'\tGOTO {node.label.id};\n' + + @visitor.when(CILIfGotoNode) + def visit(self, node:CILIfGotoNode): + return f'\tIF {node.var.lex} GOTO {node.label.id};\n' + + @visitor.when(CILLabelNode) + def visit(self, node: CILLabelNode): + return f'{node.id}:\n' + + @visitor.when(CILReturnNode) + def visit(self, node: CILReturnNode): + return f'\tRETURN {node.var.lex};\n' + + @visitor.when(CILExpressionNode) + def visit(self, node: CILExpressionNode): + pass + + @visitor.when(CILUnboxNode) + def visit(self, node: CILUnboxNode): + return f'UNBOX {node.var.lex}' + + @visitor.when(CILBoxNode) + def visit(self, node: CILBoxNode): + return f'BOX {node.var.lex} {node.type}' + + + @visitor.when(CILBinaryOperationNode) + def visit(self, node: CILBinaryOperationNode): + pass + + @visitor.when(CILGetAttribute) + def visit(self, node: CILGetAttribute): + return f'GETATTR {node.var.lex} {node.attr.lex}' + + @visitor.when(CILAllocateNode) + def visit(self, node: CILAllocateNode): + return f'ALLOCATE {node.type.lex}' + + @visitor.when(CILTypeOfNode) + def visit(self, node: CILTypeOfNode): + return f'TYPEOF {node.var.lex}' + + @visitor.when(CILCallNode) + def visit(self, node: CILCallNode): + return f'CALL {node.func}' + + @visitor.when(CILVCallNode) + def visit(self, node: CILVCallNode): + return f'VCALL {node.type} {node.func}' + + @visitor.when(CILLoadNode) + def visit(self, node: CILLoadNode): + return f'LOAD {node.var}' + + @visitor.when(CILAtomicNode) + def visit(self, node: CILAtomicNode): + return f'{node.lex}' + + @visitor.when(CILNumberNode) + def visit(self, node): + return f'{node.lex}' + + @visitor.when(CILTypeConstantNode) + def visit(self, node): + return f'{node.lex}' + + @visitor.when(CILVariableNode) + def visit(self, node): + return f'{node.lex}' + + @visitor.when(CILExceptionNode) + def visit(self, node): + return f'{node.lex}' + + @visitor.when(CILPlusNode) + def visit(self, node: CILPlusNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} + {r}' + + @visitor.when(CILMinusNode) + def visit(self, node: CILMinusNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} - {r}' + + @visitor.when(CILStarNode) + def visit(self, node: CILStarNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} * {r}' + + @visitor.when(CILDivNode) + def visit(self, node: CILPlusNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} / {r}' + + @visitor.when(CILLessNode) + def visit(self, node: CILLessNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} < {r}' + + @visitor.when(CILElessNode) + def visit(self, node: CILElessNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} <= {r}' + + @visitor.when(CILEqualsNode) + def visit(self, node: CILEqualsNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} == {r}' + + @visitor.when(CILNotEqualsNode) + def visit(self, node: CILNotEqualsNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} != {r}' + + @visitor.when(CILNotNode) + def visit(self, node: CILNotNode): + return f'~ {self.var.lex}' + + @visitor.when(CILConformsNode) + def visit(self, node: CILConformsNode): + l = self.visit(node.left) + r = self.visit(node.right) + return f'{l} CONFORMS TO {r}' \ No newline at end of file diff --git a/src/codegen/generate_ast.py b/src/codegen/generate_ast.py new file mode 100644 index 000000000..296040ce5 --- /dev/null +++ b/src/codegen/generate_ast.py @@ -0,0 +1,456 @@ + +from weakref import ref +from parsing.ast import * +from .ast_CIL import * +from .utils import * +from semantics.semantic import IOType, IntType, StringType, BoolType, ObjectType +import utils.visitor as visitor + + + +class CIL: + def __init__(self, context): + self.scope = CILScope(context) + + @visitor.on('node') + def visit(self, node, scope): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + # Creates the first function to execute + locals = [] + locals.append(CILLocalNode("m0", "Main")) + locals.append(CILLocalNode("m1", "Main")) + locals.append(CILLocalNode("m2", "Main")) + + instructions = [] + instructions.append(CILAssignNode(CILVariableNode("m0"), CILAllocateNode(CILTypeConstantNode("Main")))) + instructions.append(CILArgNode(CILVariableNode("m0"))) + instructions.append(CILAssignNode(CILVariableNode("m1"), CILVCallNode("Main", "Init_Main"))) + instructions.append(CILArgNode(CILVariableNode("m1"))) + instructions.append(CILAssignNode(CILVariableNode("m2"), CILVCallNode("Main", "main"))) + instructions.append(CILReturnNode(CILVariableNode("m2"))) + self.scope.functions.append(CILFuncNode('main', [], locals, instructions)) + + self.scope.data.append(CILDataNode(f'str_empty', "\"\"")) + + types_ts, types_heirs = get_ts(self.scope.context) + self.types_ts = types_ts + self.hierarchy_branch = hierarchy_branch(self.scope.context) + infos = self.scope.infos = {} + for type in types_ts: + t = TypeInfo() + infos[type.name] = t + if type.parent is not None: + p = type.parent.name + t.attrs = infos[p].attrs.copy() + t.methods = infos[p].methods.copy() + + t.attrs.extend(type.attributes) + for m in type.methods: + t.methods[m.name] = f'{m.name}_{type.name}' + + types = [] + for d in node.declarations: + type = self.visit(d) + types.append(type) + + # Add built-in types and functions + types.extend(self.scope.create_builtin_types(self.hierarchy_branch)) + + return CILProgramNode(types, self.scope.data, self.scope.functions) + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + self.scope.current_class = node.id + self.scope.attributes = {} + features = [] + methods = [] + locals = [] + type_info = self.scope.infos[node.id] + + for a in type_info.attrs: + self.scope.attributes[a.name] = CILAttributeNode(a.name, a.type) + methods.append(CILMethodNode(f'Init_{node.id}', f'Init_{node.id}')) + + for m in type_info.methods.keys(): + methods.append(CILMethodNode(m, type_info.methods[m])) + + for feature in node.features: + self.scope.instructions = [] + self.scope.locals = [{}] + self.scope.all_locals = [] + if isinstance(feature, AttrDeclarationNode): + if feature.expr is not None: + expr = self.visit(feature.expr), feature.expr.computed_type + features.append((feature.id, feature.type, expr, self.scope.instructions.copy())) + self.scope.instructions = [] + else: + expr = None + features.append((feature.id, feature.type, None, None)) + + locals.extend(self.scope.all_locals.copy()) + + else: + function = self.visit(feature) + self.scope.functions.append(function) + + self.scope.locals = [{}] + self.scope.all_locals = [] + init_class = self.scope.create_init_class(features, locals) + self.scope.functions.append(init_class) + + return CILTypeNode(node.id, self.scope.attributes.values(), methods, self.hierarchy_branch[node.id]) + + @visitor.when(AttrDeclarationNode) + def visit(self, node): + pass + + @visitor.when(FuncDeclarationNode) + def visit(self, node): + self.scope.all_locals = [] + self.scope.locals = [{}] + self.scope.instructions = [] + + params = [] + param_node = CILParamNode(f'self_{self.scope.current_class}', self.scope.current_class) + params.append(param_node) + + for param in node.params: + name = self.scope.add_local(param.id, param.type, True) + param_node = CILParamNode(name, param.type) + params.append(param_node) + + expr = self.visit(node.expr) + new_var = self.scope.add_new_local(node.type) + self.scope.instructions.append(CILAssignNode(CILVariableNode(new_var), expr)) + self.scope.instructions.append(CILReturnNode(CILVariableNode(new_var))) + return CILFuncNode(f'{node.id}_{self.scope.current_class}', params, self.scope.all_locals, self.scope.instructions) + + @visitor.when(BlockNode) + def visit(self, node): + for i in range(0, len(node.expr_lis) - 1): + self.visit(node.expr_lis[i]) # Necesary instructions are added, but there is not sense to keep the expression + expr = node.expr_lis[len(node.expr_lis) - 1] + return self.visit(expr) + + @visitor.when(DispatchNode) + def visit(self, node): + if not isinstance(node.expr, VariableNode) or node.expr.lex != 'self': + expr = self.visit(node.expr) + name = self.scope.add_new_local(node.expr.computed_type.name) + instruction = CILAssignNode(CILVariableNode(name), expr) + self.scope.instructions.append(instruction) + type = node.expr.computed_type.name + elif node.expr.lex == 'self': + name = f'self_{self.scope.current_class}' + type = self.scope.current_class + else: + name = self.scope.find_local(node.expr.lex) + type = node.expr.computed_type.name + if node.type is not None: + type = node.type + args = [] + args.append(CILArgNode(CILVariableNode(name))) + for arg in node.arg: + expr = self.visit(arg) + name_arg = self.scope.add_new_local(arg.computed_type.name) + if not isinstance(expr, VariableNode): + instruction = CILAssignNode(CILVariableNode(name_arg), expr) + self.scope.instructions.append(instruction) + args.append(CILArgNode(CILVariableNode(name_arg))) + else: + args.append(CILArgNode(CILVariableNode(expr.lex))) + self.scope.instructions.extend(args) + + if node.type is not None: + expression = CILVCallNode(node.type, node.id, True) + else: + expression = CILVCallNode(type, node.id) + type = self.scope.ret_type_of_method(node.id, type) + new_var = self.scope.add_new_local(type) + node_var = CILVariableNode(new_var) + self.scope.instructions.append(CILAssignNode(node_var, expression)) + return node_var + + @visitor.when(ConditionalNode) + def visit(self, node): + exp = self.visit(node.predicate) + name_expr = self.scope.add_new_local("Int") + name_return = self.scope.add_new_local(node.computed_type.name) + var_condition = CILVariableNode(name_expr) + var_return = CILVariableNode(name_return) + self.scope.instructions.append(CILAssignNode(var_condition, exp)) + self.scope.instructions.append(CILIfGotoNode(var_condition,CILLabelNode(f'then_{self.scope.if_count}'))) + count = self.scope.if_count + self.scope.if_count += 1 + exp_else = self.visit(node.elsex) + self.scope.instructions.append(CILAssignNode(var_return, exp_else)) + self.scope.instructions.append(CILGotoNode(CILLabelNode(f'ifend_{count}'))) + self.scope.instructions.append(CILLabelNode( f'then_{count}')) + exp_then = self.visit(node.then) + self.scope.instructions.append(CILAssignNode(var_return, exp_then)) + self.scope.instructions.append(CILLabelNode(f'ifend_{count}')) + return var_return + + @visitor.when(LetNode) + def visit(self, node): + self.scope.locals.append({}) + for variable in node.variables: + self.visit(variable) + expr = self.visit(node.expr) + self.scope.locals.pop() + return expr + + @visitor.when(VarDeclarationNode) + def visit(self, node): + name = self.scope.add_local(node.id, node.type) + + if node.expr is not None: + expr = self.visit(node.expr) + elif isinstance(node.computed_type, IntType): + expr = CILNumberNode(0) + elif isinstance(node.computed_type, BoolType): + expr = CILEqualsNode(CILNumberNode(0), CILNumberNode(1), False) + elif isinstance(node.computed_type, StringType): + expr = CILLoadNode('str_empty') + else: + expr = None + + if expr is not None: + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), expr)) + + @visitor.when(LoopNode) + def visit(self, node): + count = self.scope.loop_count + self.scope.loop_count += 1 + self.scope.instructions.append(CILLabelNode(f'while_{count}')) + pred = self.visit(node.predicate) + name_pred = self.scope.add_new_local("Bool") + name_return = self.scope.add_new_local(node.computed_type.name) + + var_condition = CILVariableNode(name_pred) + var_return = CILVariableNode(name_return) + self.scope.instructions.append(CILAssignNode(var_condition, pred)) + self.scope.instructions.append(CILIfGotoNode(var_condition,CILLabelNode(f'body_{count}'))) + self.scope.instructions.append(CILGotoNode(CILLabelNode(f'pool_{count}'))) + self.scope.instructions.append(CILLabelNode(f'body_{count}')) + body = self.visit(node.body) + self.scope.instructions.append(CILAssignNode(var_return, body)) + self.scope.instructions.append(CILGotoNode(CILLabelNode(f'while_{count}'))) + self.scope.instructions.append(CILLabelNode(f'pool_{count}')) + + return var_return + + @visitor.when(CaseNode) + def visit(self, node): + expr = self.visit(node.expr) # the code for computing the expression is generated + self.expression_var_case = expr + + expr_type = node.expr.computed_type + expr_var_name = self.scope.add_new_local(expr_type.name) + expr_var = CILVariableNode(expr_var_name) + self.scope.instructions.append(CILAssignNode(expr_var, expr)) # save the expression result in a local + + types_ts_pos = { type.name : i for i, type in enumerate(self.types_ts) } + + if (expr_type.ref): # when the expression is a reference type we need to compute least ancestor for all valid dynamic types of the expression + expr_type_of = CILTypeOfNode(expr_var) + name_type_expr = self.scope.add_new_local(expr_type.name) + type_expr_var = CILVariableNode(name_type_expr) + self.scope.instructions.append(CILAssignNode(type_expr_var,expr_type_of)) + # until here we have + # t0 = expr + # t1 = TYPEOF t0 + name_type_comp = self.scope.add_new_local('Bool') + type_comp_var = CILVariableNode(name_type_comp) + + + # use the topological sort computed in the ProgramNode to sort the types of the branches of the case + case_types = [case.type for case in node.cases] + case_types = sorted(case_types, key=lambda t: types_ts_pos[t], reverse=True) + + for type in case_types: + label = CILLabelNode(f'case_{self.scope.case_count}_{type}') + + + self.scope.instructions.append(CILAssignNode(type_comp_var, CILConformsNode(type_expr_var, CILTypeConstantNode(type)))) + self.scope.instructions.append(CILIfGotoNode(type_comp_var, label)) + + result_name = self.scope.add_new_local(node.computed_type.name) + var_result = CILVariableNode(result_name) + # first generate the instructions of the labels to get the CILLabelNodes to use + + for case in sorted(node.cases, key=lambda c: types_ts_pos[c.type], reverse=True): + case_type_ref = case.type not in ["Bool", "Int"] + if (not expr_type.ref and case_type_ref and case.type != 'Object'): + continue + self.scope.instructions.append(CILLabelNode(f'case_{self.scope.case_count}_{case.type}')) + if (expr_type.ref and not case_type_ref): + self.scope.instructions.append(CILAssignNode(expr, CILUnboxNode(expr, case.type))) + if (not expr_type.ref and case_type_ref): + self.scope.instructions.append(CILAssignNode(expr, CILBoxNode(expr, expr_type.name))) + branch_expr = self.visit(case) + self.scope.instructions.append(CILAssignNode(var_result, branch_expr)) + if (node.computed_type.ref and not case_type_ref): + self.scope.instructions.append(CILAssignNode(var_result, CILBoxNode(var_result, case.type))) + self.scope.instructions.append(CILGotoNode(CILLabelNode(f'case_{self.scope.case_count}_end'))) + if (not expr_type.ref): + break + self.scope.instructions.append(CILLabelNode(f'case_{self.scope.case_count}_end')) + self.scope.case_count += 1 + return var_result + + @visitor.when(CaseAttrNode) + def visit(self, node): + self.scope.locals.append({}) + local = self.scope.add_local(node.id, node.type) + self.scope.instructions.append(CILAssignNode(CILVariableNode(local), self.expression_var_case)) + + expression_branch = self.visit(node.expr) + self.scope.locals.pop() + return expression_branch + + @visitor.when(AssignNode) + def visit(self, node): + var = self.visit(node.expr) + + if not isinstance(var, CILAtomicNode): + variable = CILVariableNode(self.scope.add_new_local(node.expr.computed_type.name)) + self.scope.instructions.append(CILAssignNode(variable, var)) + else: + variable = var + + local = self.scope.find_local(node.id.lex) + + if local is not None: + if local.type == 'Object' and node.expr.computed_type.name in ['Int', 'Bool']: + self.scope.instructions.append(CILAssignNode(CILVariableNode(local.id), CILBoxNode(variable, node.expr.computed_type.name))) + else: + self.scope.instructions.append(CILAssignNode(CILVariableNode(local.id), variable)) + return CILVariableNode(local.id) + else: + if self.scope.attributes[node.id.lex].type.name == 'Object' and node.expr.computed_type.name in ['Int', 'Bool']: + var1 = CILVariableNode(self.scope.add_new_local('Object')) + self.scope.instructions.append(CILAssignNode(var1, CILBoxNode(variable, node.expr.computed_type.name))) + self.scope.instructions.append(CILSetAttributeNode(CILVariableNode(f'self_{self.scope.current_class}'), self.scope.current_class, CILVariableNode(node.id.lex), var1)) + else: + self.scope.instructions.append(CILSetAttributeNode(CILVariableNode(f'self_{self.scope.current_class}'), self.scope.current_class, CILVariableNode(node.id.lex), variable)) + return CILGetAttribute(CILVariableNode(f'self_{self.scope.current_class}'), self.scope.current_class, CILVariableNode(node.id.lex)) + + @visitor.when(BinaryNode) + def visit(self, node): + expr_left = self.visit(node.left) + expr_right = self.visit(node.right) + if not isinstance(expr_left, CILAtomicNode): + name = self.scope.add_new_local(node.left.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), expr_left)) + left = CILVariableNode(name) + else: + left = expr_left + + if not isinstance(expr_right, CILAtomicNode): + name = self.scope.add_new_local(node.right.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), expr_right)) + right = CILVariableNode(name) + else: + right = expr_right + + if isinstance(node, PlusNode): + oper = CILPlusNode(left, right) + elif isinstance(node, MinusNode): + oper = CILMinusNode(left, right) + elif isinstance(node, DivNode): + oper = CILDivNode(left, right) + elif isinstance(node, StarNode): + oper = CILStarNode(left, right) + elif isinstance(node, ElessNode): + oper = CILElessNode(left, right) + elif isinstance(node, LessNode): + oper = CILLessNode(left, right) + else: + oper = CILEqualsNode(left, right, node.left.computed_type.ref) + name = self.scope.add_new_local(node.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name),oper)) + return CILVariableNode(name) + + @visitor.when(PrimeNode) + def visit(self, node): + expr = self.visit(node.expr) + name_exp = self.scope.add_new_local(node.expr.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name_exp), expr)) + name = self.scope.add_new_local(node.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILMinusNode(CILNumberNode(0), CILVariableNode(name_exp)))) + return CILVariableNode(name) + + @visitor.when(NotNode) + def visit(self, node): + expr = self.visit(node.expr) + name_exp = self.scope.add_new_local(node.expr.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name_exp), expr)) + name = self.scope.add_new_local(node.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILNotNode(CILVariableNode(name_exp)))) + return CILVariableNode(name) + + @visitor.when(StringNode) + def visit(self, node): + data = CILDataNode(f'str_{self.scope.str_count}', node.lex) + self.scope.str_count += 1 + self.scope.data.append(data) + name = self.scope.add_new_local('String') + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILLoadNode(data.id))) + return CILVariableNode(name) + + @visitor.when(IsVoidNode) + def visit(self, node): + expr = self.visit(node.expr) + name = self.scope.add_new_local(node.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), expr)) + self.scope.instructions.append(CILArgNode(CILVariableNode(name))) + name = self.scope.add_new_local("Bool") + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILCallNode("isvoid") )) + return CILVariableNode(name) + + @visitor.when(ConstantNumNode) + def visit(self, node): + return CILNumberNode(node.lex) + + @visitor.when(VariableNode) + def visit(self, node): + local = self.scope.find_local(node.lex) + if local is not None: + return CILVariableNode(local.id) + else: + if node.lex == 'self': + return CILVariableNode(f'self_{self.scope.current_class}') + else: + name = self.scope.add_new_local(node.computed_type.name) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name),CILGetAttribute(CILVariableNode(f'self_{self.scope.current_class}'), self.scope.current_class, CILVariableNode(node.lex)))) + return CILVariableNode(name) + + @visitor.when(TrueNode) + def visit(self, node): + oper = CILEqualsNode(CILNumberNode(0), CILNumberNode(0), False) + name = self.scope.add_new_local('Bool') + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), oper)) + return CILVariableNode(name) + + @visitor.when(FalseNode) + def visit(self, node): + oper = CILEqualsNode(CILNumberNode(0), CILNumberNode(1), False) + name = self.scope.add_new_local('Bool') + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), oper)) + return CILVariableNode(name) + + @visitor.when(InstantiateNode) + def visit(self, node): + name = self.scope.add_new_local(node.lex) + if node.lex in ["Bool", "Int"]: + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILNumberNode(0))) + else: + self.scope.instructions.append(CILAssignNode(CILVariableNode(name),CILAllocateNode(CILTypeConstantNode(node.lex)))) + self.scope.instructions.append(CILArgNode(CILVariableNode(name))) + name = self.scope.add_new_local(node.lex) + self.scope.instructions.append(CILAssignNode(CILVariableNode(name), CILVCallNode(node.lex, f"Init_{node.lex}"))) + return CILVariableNode(name) \ No newline at end of file diff --git a/src/codegen/mips_built_in.txt b/src/codegen/mips_built_in.txt new file mode 100644 index 000000000..faab5ae62 --- /dev/null +++ b/src/codegen/mips_built_in.txt @@ -0,0 +1,742 @@ +######################################## Object ###################################### + +abort_Object: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + la $a0, ObjectAbortMessage + li $v0, 4 + syscall + + lw $t0, 12($fp) + lw $t0, 0($t0) + lw $t0, 4($t0) + + move $a0, $t0 + li $v0, 4 + syscall + + li $v0, 10 + syscall + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + + + +copy_Object: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + + lw $t7, 12($fp) # load the object address + lw $t6, 0($t7) # get the type info address + lw $t5, 0($t6) # get the size of the type + + move $a0, $t5 + li $v0, 9 + syscall + move $t6, $v0 +copy_Object_loop: + lw $t4, 0($t7) + sw $t4, 0($t6) + addu $t7, $t7, 4 + addu $t6, $t6, 4 + addu $t5, $t5, -4 + bgtz $t5, copy_Object_loop + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +type_name_Object: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $t7, 12($fp) # get the instance address + lw $t6, 0($t7) # get the type info address + lw $t5, 4($t6) # get the type name + + # create the String class instance to return + li $a0, 8 + li $v0, 9 + syscall + + la $t1, String + sw $t1, 0($v0) + sw $t5, 4($v0) + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +############################################### End Object #################################################### + + +################################################ Int ########################################################## + +abort_Int: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + la $a0, ObjectAbortMessage + li $v0, 4 + syscall + + la $t0, Int + lw $t0, 4($t0) + + move $a0, $t0 + li $v0, 4 + syscall + + li $v0, 10 + syscall + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +copy_Int: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + + lw $v0, 12($fp) # load the object address + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +type_name_Int: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + la $t6, Int # get the type info address + lw $t5, 4($t6) # get the type name + + # create the String class instance to return + li $a0, 8 + li $v0, 9 + syscall + + la $t1, String + sw $t1, 0($v0) + sw $t5, 4($v0) + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +########################################################## End Int ############################################################ + + +########################################################### Bool ############################################################# + +abort_Bool: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + la $a0, ObjectAbortMessage + li $v0, 4 + syscall + + + la $t0, Bool + lw $t0, 4($t0) + + move $a0, $t0 + li $v0, 4 + syscall + + li $v0, 10 + syscall + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +copy_Bool: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + + lw $v0, 12($fp) # load the object address + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +type_name_Bool: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + la $t6, Bool # get the type info address + lw $t5, 4($t6) # get the type name + + # create the String class instance to return + li $a0, 8 + li $v0, 9 + syscall + + la $t1, String + sw $t1, 0($v0) + sw $t5, 4($v0) + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +################################################################## End Bool ######################################################## +out_string_IO: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + + lw $a1, 12($fp) # reference to string object + lw $a0, 4($a1) # get the address of the value of the string + li $v0, 4 + syscall + + lw $v0, 16($fp) + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +out_int_IO: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $a0, 12($fp) + li $v0, 1 + syscall + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +in_string_IO: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + # Read the string to the buffer + la $a0, IO_Buffer + li $a1, 1000 + li $v0, 8 + syscall + + # get the length of the string to allocate the memory + la $t0, IO_Buffer + sw $t0, 0($sp) + addi $sp, $sp, -4 + jal strlen + addi $sp, $sp, 4 + lw $t0, 0($sp) # the length is now in $v0 + + addi $v0, $v0, 1 + move $a0, $v0 + li $v0, 9 + syscall # in $v0 is the address of the value string + + la $t1, IO_Buffer # copy the string value from the buffer to the heap + move $t2, $v0 + in_string_IO_loop: + lb $t3, 0($t1) + sb $t3, 0($t2) + addi $t1, $t1, 1 + addi $t2, $t2, 1 + bgtz $t3, in_string_IO_loop + addi $t2, $t2, -2 + + li $t4, 10 + lb $t5, 0($t2) + bne $t5, $t4, in_string_IO_end + li $t3, 0 + sb $t3, 0($t2) + + in_string_IO_end: + move $t0, $v0 + + li $a0, 8 + li $v0, 9 + syscall + + la $t1, String + sw $t0, 4($v0) + sw $t1, 0($v0) + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +in_int_IO: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + li $v0, 5 + syscall + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + + + + + + + + + + + + +substr_String: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $t7, 20($fp) # get the String instance address + lw $t0, 4($t7) # get the value of the source String + + lw $t1, 16($fp) # get the start parameter Int instance address + + lw $t2, 12($fp) # get the length perameter Int instance address + + move $a0, $t2 + addi $a0, $a0, 1 + li $v0, 9 + syscall # allocate memory for the substring value + + + li $t3, 0 # current pos in the string + + substr_String_loop1: + beq $t3, $t1, substr_String_eloop1 # if the current pos == start pos break + # else move the current pos + addi $t0, $t0, 1 + addi $t3, $t3, 1 + j substr_String_loop1 + + substr_String_eloop1: + + li $t3, 0 + move $t4, $v0 # move the substring address to $t4 + + substr_String_loop2: + beq $t3, $t2, substr_String_eloop2 + lb $t7, 0($t0) + sb $t7, 0($t4) + addi $t0, $t0, 1 + addi $t4, $t4, 1 + addi $t3, $t3, 1 + j substr_String_loop2 + + substr_String_eloop2: + sb $zero, 0($t4) + move $t0, $v0 + la $t1, String + + li $a0, 8 + li $v0, 9 + syscall + + sw $t1, 0($v0) + sw $t0, 4($v0) + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +isvoid: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $t0, 12($fp) + li $t1, 0 + beq $t0, $t1, isvoid_end + li $t0, 1 + isvoid_end: + + move $v0, $t0 + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + +# function to get the length of a string value +strlen: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + + lw $a0, 12($fp) + li $t0, 0 +strlen_loop: + lbu $t1, 0($a0) + beqz $t1, strlen_exit + addu $a0, $a0, 1 + addu $t0, $t0, 1 + j strlen_loop + strlen_exit: + move $v0, $t0 + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +length_String: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $v0, 12($fp) # get the string instance address + lw $v1, 4($v0) # get the string value address + + # push the instace in the stack + sw $v1, 0($sp) + addi $sp, $sp, -4 + + jal strlen # length at v0 + + addi $sp, $sp, 4 + lw $t0, 0($sp) + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +compare: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $t0, 12($fp) + lw $t1, 16($fp) + lw $t2, 20($fp) + + beqz $t2, compare_values + + lw $t3, 0($t0) + + la $t4, Int + beq $t3, $t4, compare_branch1 + + la $t4, Bool + beq $t3, $t4, compare_branch1 + + la $t4, type + beq $t3, $t4, compare_branch1 + + la $t4, String + beq $t3, $t4, compare_branch2 + + j compare_values + + compare_branch1: + lw $t0, 4($t0) + lw $t1, 4($t1) + + compare_values: + beq $t0, $t1, compare_true + j compare_false + + + compare_branch2: + lw $t0, 4($t0) + lw $t1, 4($t1) + compare_str_loop: + lbu $t3, 0($t0) + lbu $t4, 0($t1) + bne $t3, $t4, compare_false + beq $t3, $zero, compare_true + addi $t0, $t0, 1 + addi $t1, $t1, 1 + j compare_str_loop + + compare_true: + li $t0, 1 + j compare_end + + compare_false: + li $t0, 0 + + compare_end: + + move $v0, $t0 + + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + +concat_String: + # calling conventions + sw $ra, 0($sp) + addi $sp, $sp, -4 + sw $fp, 0($sp) + addi $sp, $sp, -4 + move $fp, $sp + + lw $t0, 16($fp) + lw $t0, 4($t0) # the value of the first String instance + + # call strlen with the string + sw $t0, 0($sp) + addi $sp, $sp, -4 + jal strlen + addi $sp, $sp, 4 + lw $t0, 0($sp) + + #save the lenght of the first string + sw $v0, 0($sp) + addi $sp, $sp, -4 + + + lw $t0, 16($fp) + lw $t0, 4($t0) # the value of the second String instance + + # call strlen with the string + sw $t0, 0($sp) + addi $sp, $sp, -4 + jal strlen + addi $sp, $sp, 4 + lw $t0, 0($sp) + + # pop the lenght of the first string from the stack + addi $sp, $sp, 4 + lw $t0, 0($sp) + + # get the total space for allocating the new string + addu $t0, $t0, $v0 + addi $t0, $t0, 1 + + move $a0, $t0 + li $v0, 9 + syscall # at $v0 is the result string + + lw $t0, 16($fp) + lw $t0, 4($t0) # the address of the value of the first String instance + move $t1, $v0 # the address of the value of the result string + concat_String_loop1: + lbu $t3, 0($t0) + beq $t3, $zero, concat_String_eloop1 + sb $t3, 0($t1) + addi $t0, $t0, 1 + addi $t1, $t1, 1 + j concat_String_loop1 + + concat_String_eloop1: + + lw $t0, 12($fp) + lw $t0, 4($t0) + concat_String_loop2: + lbu $t3, 0($t0) + beq $t3, $zero, concat_String_eloop2 + sb $t3, 0($t1) + addi $t0, $t0, 1 + addi $t1, $t1, 1 + j concat_String_loop2 + concat_String_eloop2: + sb $zero, 0($t1) + + la $t0, String + move $t1, $v0 + + li $a0, 8 + li $v0, 9 + syscall + + sw $t0, 0($v0) + sw $t1, 4($v0) + + # calling conventions + addi $sp, $sp, 4 + lw $fp, 0($sp) + addi $sp, $sp, 4 + lw $ra, 0($sp) + + jr $ra + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/codegen/spim_scope.py b/src/codegen/spim_scope.py new file mode 100644 index 000000000..4c462e05b --- /dev/null +++ b/src/codegen/spim_scope.py @@ -0,0 +1,130 @@ +import utils.visitor as visitor +from .ast_CIL import * + +WSIZE = 4 + +class MIPSScope: + def __init__(self): + self.types = {} + self.functions = {} + + def __str__(self): + r = '' + for t, ti in self.types.items(): + r += f'{t}\n' + r += f'{ti}\n\n' + for f, cf in self.functions.items(): + r += f'{f}\n' + r += f'{cf}\n\n' + return r + + + +class TypeInfo: + def __init__(self, typex: CILTypeNode): + # This is obvious + self.id = typex.id + + # Memory to allocate for an instance of the type + self.size = (len(typex.attributes) + 1) * WSIZE + + # Use this offset to calculate the attribute address, given the address of the instance will be attr_addr = inst_addr + WORD_SIZE * offset + self.attrs_offset = {attr.id : i for i, attr in enumerate(typex.attributes)} + + # Associates every method of the type to the label to call + self.methods_offset = { m.id : i for i, m in enumerate(typex.methods) } + + def get_attr_addr(self, attr, register): + offset = self.attrs_offset[attr] + return f'{(offset + 1) * WSIZE}({register})' + + def get_method_addr(self, method, register): + offset = self.methods_offset[method] + return f'{(offset + 4) * WSIZE}({register})' + + def __str__(self): + r = '--------------------Type----------------\n' + r += f'Attrs : {self.attrs_offset}\n' + r += f'Methods : {self.methods_offset}\n' + r += '-------------------------------------------' + return r + +class ProcCallFrame: + def __init__(self,name, nargs, nvars): + self.name = name + self.nargs = nargs + self.size = WSIZE * nvars + self.args = {} # Associates each argument with the offset to be accessed in the stack + self.vars = {} # Associates each parameter with the offset to be accessed in the stack + self.arg_queue = [] + + def push_arg(self, arg): + self.arg_queue.append(arg) + + def clear_args(self): + self.arg_queue = [] + + + def add_argument(self, idx): + self.args[idx] = self.nargs - len(self.args) + + def add_variable(self, idx): + self.vars[idx] = len(self.vars) + + def arg_addr(self, id): + offset = self.args[id] + return f'{(2 + offset) * WSIZE}($fp)' + + def var_addr(self, id): + offset = self.vars[id] + return f'-{offset * WSIZE}($fp)' + + def get_addr(self, id): + try: + return self.arg_addr(id) + except KeyError: + return self.var_addr(id) + + def __str__(self): + r = f'-------------- Frame {self.name} -----------------\n' + r += f'Size: {self.size}\n' + r += f'Args: {self.args}\n' + r += f'Vars: {self.vars}\n' + r += '-----------------------------------------\n' + return r + +class MIPSScopeBuilder: + def __init__(self): + self.scope = MIPSScope() + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(CILProgramNode) + def visit(self, node: CILProgramNode): + for t in node.types: + self.visit(t) + + for f in node.functions: + self.visit(f) + + return self.scope + + @visitor.when(CILTypeNode) + def visit(self, node: CILTypeNode): + info = TypeInfo(node) + self.scope.types[node.id] = info + + @visitor.when(CILFuncNode) + def visit(self, node: CILFuncNode): + frame = ProcCallFrame(node.id, len(node.params), len(node.locals)) + for p in node.params: + frame.add_argument(p.id) + for l in node.locals: + frame.add_variable(l.id) + self.scope.functions[node.id] = frame + + + + diff --git a/src/codegen/spim_visitor.py b/src/codegen/spim_visitor.py new file mode 100644 index 000000000..820bb03d9 --- /dev/null +++ b/src/codegen/spim_visitor.py @@ -0,0 +1,513 @@ +import utils.visitor as visitor +from .spim_scope import * +from .ast_CIL import * + +WSIZE = 4 # word size in bytes + +class MIPSCodegen: + def __init__(self, scope): + self.scope = scope + self.code = "" + self.tabs = '' + self.main = True + + # =================== Utils ======================== + def add_line(self,line): + self.code += self.tabs + line + '\n' + + def set_tabs(self,n): + self.tabs = '\t' * n + + def gen_push(self,src): + self.add_line(f'# push {src} to the stack') + self.add_line(f'sw {src}, 0($sp)') + self.add_line(f'addi $sp $sp -{WSIZE}') + + self.add_line('') + + def gen_pop(self,dst): + self.add_line(f'# pop the top of the stack to {dst}') + self.add_line(f'addi $sp $sp {WSIZE}') + self.add_line(f'lw {dst}, 0($sp)') + self.add_line('') + + @visitor.on('node') + def visit(self, node, frame): + pass + + @visitor.when(CILProgramNode) + def visit(self, node: CILProgramNode, frame): + + self.set_tabs(1) + self.add_line(".data") + self.add_line(".align 4") + self.set_tabs(0) + self.add_line("type: .word 8") + self.add_line('') + for t in node.types: + self.visit(t, frame) + + self.set_tabs(1) + self.add_line(".data") + self.set_tabs(0) + self.add_line("ObjectAbortMessage : .asciiz \"Abort called from class \"") + + self.set_tabs(1) + self.add_line(".data") + self.set_tabs(0) + self.add_line("IO_Buffer : .space 1001") + for d in node.data: + self.visit(d, frame) + + for f in node.functions: + self.visit(f, frame) + self.add_line('') + + with open('./codegen/mips_built_in.txt') as file: + self.code += file.read() + + @visitor.when(CILTypeNode) + def visit(self, node: CILTypeNode, frame): + # place the type name as a string in static data + self.set_tabs(1) + self.add_line(".data") + self.set_tabs(0) + t = self.scope.types[node.id] + discovery_time, finish_time = node.hierarchy_branch + methods_str = ' '.join(m.function_id for m in node.methods) + assert len(node.methods) == len(t.methods_offset) + self.add_line(f"_{node.id}: .asciiz \"{node.id}\\n\"") + self.add_line("\t.data") + self.add_line("\t.align 4") + self.add_line(f"{node.id}: .word {t.size} _{node.id} {discovery_time} {finish_time} {methods_str}") + self.add_line('') + + @visitor.when(CILDataNode) + def visit(self, node: CILDataNode, frame): + self.set_tabs(1) + self.add_line(".data") + self.set_tabs(0) + self.add_line(f"{node.id}: .asciiz {node.text}") + self.add_line('') + + @visitor.when(CILFuncNode) + def visit(self, node: CILFuncNode, frame): + frame = self.scope.functions[node.id] + self.set_tabs(1) + self.add_line('.text') + self.set_tabs(0) + self.add_line(f'{node.id}:') + self.set_tabs(1) + self.add_line('# save the return address and frame pointer') + self.gen_push('$ra') # Save the return address + self.gen_push('$fp') # Save the frame pointer + + + self.add_line('# update the frame pointer and allocate the frame in the stack') + self.add_line(f'move $fp $sp') # Update the frame pointer to the top of the stack + + # Allocate frame size in memory + self.add_line(f'subu $sp $sp {frame.size}') + self.add_line('') + + for i in node.instructions: + self.visit(i, frame) + + self.add_line(f'# restore the stack pointer, frame pointer y return address') + self.add_line(f'addu $sp $sp {frame.size}') + self.gen_pop('$fp') + self.gen_pop('$ra') + + if self.main: + self.add_line('li $v0, 10') + self.add_line('syscall') + self.main = False + else: + self.add_line('jr $ra') + + @visitor.when(CILAttributeNode) + def visit(self, node: CILAttributeNode, frame): + pass + + @visitor.when(CILMethodNode) + def visit(self, node: CILMethodNode, frame): + pass + + @visitor.when(CILParamNode) + def visit(self, node: CILParamNode, frame): + pass + + @visitor.when(CILLocalNode) + def visit(self, node: CILParamNode, frame): + pass + + # ==================== Instructions ======================== + @visitor.when(CILInstructionNode) + def visit(self, node, frame): + pass + + @visitor.when(CILAssignNode) + def visit(self, node: CILAssignNode, frame: ProcCallFrame): + # Adds the code for calculating the expresion and stores the address for the value in register + self.add_line(f'# assign (add here the expr.to_string) to {node.id.lex}') + register = self.visit(node.expr, frame) + id_addr = frame.get_addr(node.id.lex) + self.add_line(f'sw {register}, {id_addr}') + self.add_line(f'') + + @visitor.when(CILSetAttributeNode) + def visit(self, node: CILSetAttributeNode, frame): + self.add_line(f'# Setting value of the attribute {node.attr.lex} in the instance {node.id.lex} to {node.var.lex}') + inst_addr = frame.get_addr(node.id.lex) + t = self.scope.types[node.type] # Change this for dynamic type? Not needed because the attributes are always declared in the same order in inhereted classes + register1 = '$v1' + register2 = '$s2' + attr_addr = t.get_attr_addr(node.attr.lex, register1) # + value_addr = self.visit(node.var, frame) + self.add_line(f'move {register2}, {value_addr}') + self.add_line(f'lw {register1}, {inst_addr}') + self.add_line(f'sw {register2}, {attr_addr}') + self.add_line('') + + @visitor.when(CILArgNode) + def visit(self, node: CILArgNode, frame): + frame.push_arg(node.var) # keep track of the args to be pass to the funcion to get the instance to bind the dynamic type + value_addr = frame.get_addr(node.var.lex) + self.add_line(f'lw $v0, {value_addr}') + self.gen_push('$v0') + + @visitor.when(CILIfGotoNode) + def visit(self, node: CILIfGotoNode, frame): + value_addr = frame.get_addr(node.var.lex) + self.add_line(f'lw $t0, {value_addr}') + self.add_line(f'bne $t0, $zero, {node.label.id}') + + @visitor.when(CILGotoNode) + def visit(self, node: CILGotoNode, frame): + self.add_line(f'j {node.label.id}') + + @visitor.when(CILLabelNode) + def visit(self, node: CILLabelNode, frame): + self.add_line(f'{node.id}:') + + @visitor.when(CILReturnNode) + def visit(self, node: CILReturnNode,frame): + register0 = '$v0' + self.add_line(f'# return the value of the function in the register {register0}') + register1 = self.visit(node.var, frame) + self.add_line(f'move {register0}, {register1}') + self.add_line('') + + + @visitor.when(CILExpressionNode) + def visit(self, node: CILExpressionNode,frame): + pass + + @visitor.when(CILBinaryOperationNode) + def visit(self, node: CILBinaryOperationNode, frame): + pass + + @visitor.when(CILGetAttribute) + def visit(self, node: CILGetAttribute, frame): + var_addr = frame.get_addr(node.var.lex) + register0 = '$v0' + register1 = '$v1' + t = self.scope.types[node.type] + attr_addr = t.get_attr_addr(node.attr.lex, register1) + + # the memory of at var_addr contains the address to the instance of T + # move the instance address to the register + self.add_line(f'lw {register1}, {var_addr}') + self.add_line(f'lw {register0}, {attr_addr}') + return register0 + + @visitor.when(CILAllocateNode) + def visit(self, node: CILAllocateNode, frame): + register0 = '$v0' + register1 = '$a0' + t = self.scope.types[node.type.lex] + + self.add_line(f'li {register1}, {t.size}') + self.add_line(f'li {register0}, 9') + self.add_line(f'syscall') + self.add_line(f'la {register1}, {node.type.lex}') + self.add_line(f'sw {register1}, 0({register0})') # Place the dynamic type of the instance in memory + return register0 + + @visitor.when(CILTypeOfNode) # Get the dynamic type of an instance + def visit(self, node: CILTypeOfNode, frame): + self.visit(node.var, frame) + + self.add_line('lw $v0, 0($v0)') # get the type of the var + return '$v0' + + @visitor.when(CILCallNode) # I don't think this is necessary + def visit(self, node: CILCallNode, frame): + register0 = '$v0' + self.add_line(f'jal {node.func}') + for a in frame.arg_queue: + self.gen_pop('$v1') + frame.clear_args() # clear arguments for the new function + return register0 + + @visitor.when(CILVCallNode) + def visit(self, node: CILVCallNode, frame): + # the instance of type T is always the first argument to be passed to the function + self.add_line(f'# calling the method {node.func} of type {node.type}') + + if node.type in ['Bool', 'Int']: + self.add_line(f'la $v1, {node.func}_{node.type}') + else: + if node.static: + self.add_line(f'la $t0, {node.type}') + else: + instance = frame.arg_queue[0] + instance_addr = self.visit(instance, frame) # load into a register the address of the instance in the heap + + # register0 has the dynamic type address of the instance + # since every instance stores its type in the first word of the allocated memory + self.add_line(f'lw $t0, 0({instance_addr})') + + # use the information of the static type to get the location of the method in memory + t = self.scope.types[node.type] + method_addr = t.get_method_addr(node.func, '$t0') + self.add_line(f'lw $v1, {method_addr}') + + self.add_line(f'jal $v1') # calls the method and by convention methods return in $v0 + for a in frame.arg_queue: + self.gen_pop('$v1') + frame.clear_args() # clear arguments for the new function + + return '$v0' + + @visitor.when(CILLoadNode) + def visit(self, node: CILLoadNode, frame): + self.add_line(f'#load the string {node.var}') + register = '$v0' + self.add_line(f'li $a0, 8') + self.add_line(f'li $v0, 9') + self.add_line(f'syscall') + self.add_line(f'la $v1, String') + self.add_line(f'sw $v1, 0($v0)') + self.add_line(f'la $v1, {node.var}') + self.add_line(f'sw $v1, 4($v0)') + return register + + + @visitor.when(CILNumberNode) + def visit(self, node: CILNumberNode, frame): + register = '$v0' + self.add_line(f'li $v0, {node.lex}') + self.add_line(f'') + return register + + @visitor.when(CILVariableNode) + def visit(self, node: CILVariableNode, frame): + self.add_line(f'#load the variable {node.lex}') + register = '$v0' + var_addr = frame.get_addr(node.lex) + self.add_line(f'lw {register}, {var_addr}') + return register + + @visitor.when(CILTypeConstantNode) + def visit(self, node: CILTypeConstantNode, frame): + + self.add_line(f'la $v0, {node.lex}') + return '$v0' + + @visitor.when(CILPlusNode) + def visit(self, node: CILPlusNode, frame): + register0 = '$v0' + self.add_line(f'# computes the sum of (node.left.to_string) and (node.right.to_string) and stores it at {register0}') + self.visit(node.left, frame) # in $v0 is the address of the Int instance + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t0') + self.add_line(f'move $t1, $v0') + self.add_line(f'add $v0, $t0, $t1') + return register0 + + @visitor.when(CILMinusNode) + def visit(self, node: CILMinusNode, frame): + register0 = '$v0' + self.add_line(f'# computes the sub of (node.left.to_string) and (node.right.to_string) and stores it at {register0}') + self.visit(node.left, frame) # in $v0 is the address of the Int instance + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t0') + self.add_line(f'move $t1, $v0') + self.add_line(f'sub $v0, $t0, $t1') + return register0 + + + @visitor.when(CILStarNode) + def visit(self, node: CILStarNode, frame): + register0 = '$v0' + self.add_line(f'# computes the product of (node.left.to_string) and (node.right.to_string) and stores it at {register0}') + self.visit(node.left, frame) # in $v0 is the address of the Int instance + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t0') + self.add_line(f'move $t1, $v0') + self.add_line(f'mult $t0, $t1') + self.add_line(f'mflo $v0') + return register0 + + @visitor.when(CILDivNode) + def visit(self, node: CILDivNode, frame): + register0 = '$v0' + self.add_line(f'# computes the sub of (node.left.to_string) and (node.right.to_string) and stores it at {register0}') + self.visit(node.left, frame) # in $v0 is the address of the Int instance + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t0') + self.add_line(f'move $t1, $v0') + self.add_line(f'div $t0, $t1') + self.add_line(f'mflo $v0') + return register0 + + @visitor.when(CILLessNode) + def visit(self, node: CILLessNode, frame): + self.visit(node.left, frame) + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t1') + self.add_line(f'move $t2, $v0') # get the address to the right Int instance + self.add_line(f'slt $v0, $t1, $t2') # l < r ? + return '$v0' + + @visitor.when(CILElessNode) + def visit(self, node: CILElessNode, frame): + self.visit(node.left, frame) + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_pop('$t1') + self.add_line(f'move $t2, $v0') # get the value of the right Int instance + + + self.add_line(f'slt $t4, $t2, $t1') # r < l? + self.add_line(f'li $t3, 1') + self.add_line(f'xor $t3, $t3, $t4') + self.add_line(f'andi $v0, $t3, 0x01') # get the last bit + return '$v0' + + @visitor.when(CILEqualsNode) + def visit(self, node: CILEqualsNode, frame): + + if node.ref: + self.add_line('li $t0, 1') + else: + self.add_line('li $t0, 0') + self.gen_push('$t0') + self.visit(node.left, frame) + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_push('$v0') + self.add_line('jal compare') + self.gen_pop('$t0') + self.gen_pop('$t0') + self.gen_pop('$t0') + return '$v0' + + + @visitor.when(CILNotEqualsNode) + def visit(self, node: CILNotEqualsNode, frame): + self.visit(node.left, frame) + self.gen_push('$v0') + self.visit(node.right, frame) + self.gen_push('$v0') + self.add_line('jal compare') + self.gen_pop('$t0') + self.gen_pop('$t0') + self.add_line('move $t0, $v0') + self.add_line('li $t1, 1') + self.add_line('xor $t0, $t0, $t1') + self.add_line('andi $v0, $t0, 0x01') + return '$v0' + + @visitor.when(CILNotNode) + def visit(self, node: CILNotNode, frame): + self.visit(node.var, frame) + self.add_line('move $t0, $v0') + self.add_line('li $t1, 1') + self.add_line('xor $t0, $t0, $t1') + self.add_line('andi $t0, $t0, 0x01') + self.add_line('move $v0, $t0') + return '$v0' + + + @visitor.when(CILBoxNode) + def visit(self, node: CILBoxNode, frame): + print("Boxing shit") + self.add_line('# boxing some stuff') + self.visit(node.var, frame) + self.add_line('move $t0, $v0 # boxing some stuff') + self.add_line('li $a0, 8') + self.add_line('li $v0, 9') + self.add_line('syscall') + self.add_line(f'la $t1, {node.type}') + self.add_line('sw $t1, 0($v0)') + self.add_line('sw $t0, 4($v0)') + return '$v0' + + @visitor.when(CILUnboxNode) + def visit(self, node: CILUnboxNode, frame): + self.visit(node.var, frame) + self.add_line('lw $t0, 4($v0) # unbox something') + self.add_line('move $v0, $t0') + return '$v0' + + @visitor.when(CILConformsNode) + def visit(self, node: CILConformsNode, frame): + self.visit(node.left, frame) + self.gen_push('$v0') + self.visit(node.right, frame) + self.add_line('move $t1, $v0') + self.gen_pop('$t0') + + #Load discovery time of left + self.add_line('# Check conform') + + self.add_line('lw $t0, 8($t0)') + self.add_line('lw $t2, 8($t1)') #Load discovery time of rigth + self.add_line('lw $t3, 12($t1)') #Load finish time of rigth + + self.add_line('slt $t1, $t0, $t2') # first condition + self.add_line('li $t2, 1') + self.add_line(f'xor $t1, $t1, $t2') + self.add_line(f'andi $t1, $t1, 0x01') # get the last bit + + + self.add_line(f'slt $t4, $t3, $t0') # r < l? + self.add_line(f'li $t3, 1') + self.add_line(f'xor $t3, $t3, $t4') + self.add_line(f'andi $v0, $t3, 0x01') # get the last bit + self.add_line('and $v0, $v0, $t1') + return '$v0' + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/codegen/utils.py b/src/codegen/utils.py new file mode 100644 index 000000000..278ea3203 --- /dev/null +++ b/src/codegen/utils.py @@ -0,0 +1,335 @@ +from cmath import exp +from semantics.semantic import IntType, ObjectType, StringType, BoolType +from .ast_CIL import * +from collections import deque +from itertools import chain +from collections import OrderedDict + +class CILScope: + def __init__(self, context): + self.context = context + + self.if_count = 0 + self.case_count = 0 + self.variables_count = 0 + self.str_count = 0 + self.loop_count = 0 + + self.locals = [{}] + self.all_locals = [] + self.instructions = [] + self.data = [] + self.functions = [] + self.current_class = "" + + def add_local(self, id, type, is_param = False): + local_dict = self.locals[-1] + if id in local_dict.keys(): + nickname = local_dict[id].id + local_dict[id] = CILLocalNode(nickname, type) + else: + nickname = f'{id}_{self.variables_count}' + self.variables_count += 1 + node = CILLocalNode(nickname, type) + local_dict[id] = node + + if not is_param: + self.all_locals.append(node) + + return nickname + + def add_new_local(self, type): + local_dict = self.locals[-1] + name = f't_{self.variables_count}' + self.variables_count += 1 + node = CILLocalNode(name, type) + local_dict[name] = node + self.all_locals.append(node) + return name + + def find_local(self, id): + for i in range (len(self.locals) - 1 , -1, -1): + d = self.locals[i] + try: + return d[id] + except: + pass + return None + + def find_data(self, id): + for d in self.data: + if d.id == id: + return d + return None + + def ret_type_of_method(self, name_meth, name_class): + type_class = self.context.get_type(name_class) + method = type_class.get_method(name_meth) + return method.return_type.name + + def create_builtin_types(self, hierarchy_branch): + types = [] + + obj_methods = [ + CILMethodNode('Init_Object', 'Init_Object'), + CILMethodNode('abort', 'abort_Object'), + CILMethodNode('type_name', 'type_name_Object'), + CILMethodNode('copy', 'copy_Object'), + ] + types.append(CILTypeNode('Object', [], obj_methods, hierarchy_branch['Object'])) + init_Object = CILFuncNode( + 'Init_Object', + [CILParamNode('self', None)], + [], + [CILReturnNode(CILVariableNode('self'))]) + self.functions.append(init_Object) + + int_methods = [ + CILMethodNode('Init_Int', 'Init_Int'), + CILMethodNode('abort', 'abort_Object'), + CILMethodNode('type_name', 'type_name_Object'), + CILMethodNode('copy', 'copy_Object'), + ] + types.append(CILTypeNode('Int', [CILAttributeNode('value', None)], int_methods, hierarchy_branch['Int'])) + init_int = CILFuncNode( + 'Init_Int', + [CILParamNode('self', None), CILParamNode('v', None)], + [], + [CILSetAttributeNode(CILVariableNode('self'), 'Int', CILVariableNode('value'), CILVariableNode('v')), CILReturnNode(CILVariableNode('self'))]) + self.functions.append(init_int) + + str_methods = [ + CILMethodNode('Init_String', 'Init_String'), + CILMethodNode('abort', 'abort_Object'), + CILMethodNode('type_name', 'type_name_Object'), + CILMethodNode('copy', 'copy_Object'), + CILMethodNode('length', 'length_String'), + CILMethodNode('concat', 'concat_String'), + CILMethodNode('substr', 'substr_String'), + ] + types.append(CILTypeNode('String', [CILAttributeNode('value', None)], str_methods, hierarchy_branch['String'])) + init_string = CILFuncNode( + 'Init_String', + [CILParamNode('self', None), CILParamNode('v', None)], + [], + [CILSetAttributeNode(CILVariableNode('self'), 'String', CILVariableNode('value'), CILVariableNode('v')), CILReturnNode(CILVariableNode('self'))]) + self.functions.append(init_string) + + bool_methods = [ + CILMethodNode('Init_Bool', 'Init_Bool'), + CILMethodNode('abort', 'abort_Object'), + CILMethodNode('type_name', 'type_name_Object'), + CILMethodNode('copy', 'copy_Object'), + ] + types.append(CILTypeNode('Bool', [CILAttributeNode('value', None)], bool_methods, hierarchy_branch['Bool'])) + init_bool = CILFuncNode( + 'Init_Bool', + [CILParamNode('self', None), CILParamNode('v', None)], + [], + [CILSetAttributeNode(CILVariableNode('self'), 'Bool', CILVariableNode('value'), CILVariableNode('v')), CILReturnNode(CILVariableNode('self'))]) + self.functions.append(init_bool) + + io_methods = [ + CILMethodNode('Init_IO', 'Init_IO'), + CILMethodNode('abort', 'abort_Object'), + CILMethodNode('type_name', 'type_name_Object'), + CILMethodNode('copy', 'copy_Object'), + CILMethodNode('out_string', 'out_string_IO'), + CILMethodNode('out_int', 'out_int_IO'), + CILMethodNode('in_string', 'in_string_IO'), + CILMethodNode('in_int', 'in_int_IO'), + ] + types.append(CILTypeNode('IO', [], io_methods, hierarchy_branch['IO'])) + init_IO = CILFuncNode( + 'Init_IO', + [CILParamNode('self', None)], + [], + [CILReturnNode(CILVariableNode('self'))]) + self.functions.append(init_IO) + + return types + + def create_init_class(self, attributes, locals): + type = self.context.get_type(self.current_class) + instructions = [] + if not isinstance(type.parent, ObjectType): + instructions.append(CILArgNode(CILVariableNode(f'self_{self.current_class}'))) + call = CILCallNode(f'Init_{type.parent.name}') + instructions.append(CILAssignNode(CILVariableNode(f'self_{self.current_class}'), call)) + + for id, type, expr, inst in attributes: + if expr is not None: + _expr, _ = expr + instructions.extend(inst) + variable = CILVariableNode(self.add_new_local(type)) + instructions.append(CILAssignNode(variable, _expr)) + elif type == 'Int': + variable = CILNumberNode(0) + elif type == 'String': + variable = CILVariableNode(self.add_new_local(type)) + instructions.append(CILAssignNode(variable, CILLoadNode('str_empty'))) + elif type == 'Bool': + variable = CILVariableNode(self.add_new_local(type)) + instructions.append(CILAssignNode(variable, CILEqualsNode(CILNumberNode(0), CILNumberNode(1)))) + else: + variable = None + + if expr is not None: + _, t = expr + if type == 'Object' and not t.ref: + instructions.append(CILAssignNode(variable, CILBoxNode(variable, t.name))) + + if variable is not None: + instructions.append(CILSetAttributeNode(CILVariableNode(f'self_{self.current_class}'), self.current_class, CILVariableNode(id), variable)) + + instructions.append(CILReturnNode(CILVariableNode(f'self_{self.current_class}'))) + locals.extend(self.all_locals.copy()) + return CILFuncNode(f'Init_{self.current_class}', [CILParamNode(f'self_{self.current_class}', None)], locals, instructions) + + + +class TypeInfo: + def __init__(self): + self.attrs = [] + self.methods = {} + + def __repr__(self): + text = str(self.attrs) + '\n' + text += str(self.methods) + '\n' + return text + + +def get_ts(context): + list = [] + heirs = {} + visited = [] + for c in context.types.values(): + if c not in visited: + dfs_visit_ts(context, c, list,heirs, visited) + return list, heirs + + +def dfs_visit_ts(context, u, list, heirs, visited): + visited.append(u) + if u.parent is not None: + try: + heirs[u.parent.name].append(u.name) + except KeyError: + heirs[u.parent.name] = [u.name] + + if u.parent not in visited: + dfs_visit_ts(context, u.parent, list, heirs, visited) + + list.append(u) + + +def hierarchy_branch(context): + heirs = {} + + for type in context.types.values(): + if type.parent is not None: + try: + heirs[type.parent.name].append(type.name) + except: + heirs[type.parent.name] = [type.name] + + branch = {} + + hierarchy_branch_visit(context, branch, heirs, "Object", 0) + return branch + +def hierarchy_branch_visit(context, branch, heirs, type, time): + discovery_time = time + finish_time = discovery_time + try: + for heir in heirs[type]: + finish_time = hierarchy_branch_visit(context, branch, heirs, heir, finish_time + 1) + except: + pass + + branch[type] = (discovery_time, finish_time) + return finish_time + + + +def bfs ( list, d ,s , m ): + d[s.name] = 0 + while (len(list) > 0): + u = list.pop() + if u.parent is not None: + v = u.parent + else : + v = None + while v is not None: + if d[v.name] == -1: + d[v.name] = d[s.name] + 1 + s = v + list.append(v) + + v = v.parent + return d + +def init (context,d): + for c in context.types.values(): + d [c.name] = -1 + return d + +def table (table): + d = {} + for k in (table.keys()): + value = table[k] + for c in value.keys(): + if table[k][c] != -1 and table[k][c] != 0: + + try: + d [c].append((k,table[k][c])) + except: + d [c] = [(k,table[k][c])] + return d + + +def order_case_branc_to(branchs, to): + d = {} + string = [] + list = [branch.type for branch in branchs] + for s in to : + string.append(s.name) + for s in string: + try: + d[s] = list.index(s) + except: + pass + return d + + +def valid_case (table, branchs): + valid = {} + for key in branchs.keys(): + try: + s = table[key] + except: + continue + order = sorted(s, key=lambda tu : tu[1]) + for m in order: + try: + valid[key].append(m) + except: + valid[key] = [m] + return valid + + +def return_list_valid_case(node, to, table ): + order = order_case_branc_to(node.cases,to) + valid = valid_case(table,order) + s = list(valid.values()) + iterator = chain(*s) + l = list(iterator) + m = list(OrderedDict.fromkeys(l)) + new_cases = sorted(m, key=lambda tu : tu[1]) + return new_cases, valid + + + + + + diff --git a/src/cool.py b/src/cool.py new file mode 100644 index 000000000..c91e87887 --- /dev/null +++ b/src/cool.py @@ -0,0 +1,67 @@ +import sys +from parsing.parser import COOL_Parser +from parsing.lexer import COOL_Lexer +from semantics.TypeCollector import TypeCollector +from semantics.TypeBuilder import TypeBuilder +from semantics.TypeChecker import TypeChecker +from codegen.generate_ast import CIL +from codegen.cil_codegen import CILCodegen +from codegen.spim_scope import MIPSScopeBuilder +from codegen.spim_visitor import MIPSCodegen +input_file = sys.argv[1] +with open(input_file, 'r') as f: + s = f.read() + +lexer = COOL_Lexer() +lexer.input(s) +a = list(lexer.output) +if lexer.errors: + for e in lexer.errors: + print(e) + exit(1) + +parser = COOL_Parser() +ast, errors = parser.parse(s) +if errors: + for e in errors: + print(e) + exit(1) + +# Collecting Types +collector = TypeCollector() +collector.visit(ast) +context = collector.context +errors = collector.errors + +# Building Types +builder = TypeBuilder(context, errors) +builder.visit(ast) + +# Checking Types +checker = TypeChecker(context, errors) +checker.visit(ast) + +if errors: + for e in errors: + print(e) + exit(1) + +cil_generator = CIL(context) +cil = cil_generator.visit(ast) +#print(cil) +cil_codegen = CILCodegen() +code = cil_codegen.visit(cil) +with open(f'{input_file[:-3]}.cil', 'w') as f: + f.write(code) + + +mips_scope_builder = MIPSScopeBuilder() +scope = mips_scope_builder.visit(cil) +#print(scope) +mips_codegen = MIPSCodegen(scope) +mips_codegen.visit(cil, None) +#print(mips_codegen.code) +with open(f'{input_file[:-3]}.mips', 'w') as f: + f.write(mips_codegen.code) + +exit(0) diff --git a/src/coolc.sh b/src/coolc.sh index 3088de4f9..f748bfd3b 100755 --- a/src/coolc.sh +++ b/src/coolc.sh @@ -4,8 +4,8 @@ INPUT_FILE=$1 OUTPUT_FILE=${INPUT_FILE:0: -2}mips # Si su compilador no lo hace ya, aquí puede imprimir la información de contacto -echo "LINEA_CON_NOMBRE_Y_VERSION_DEL_COMPILADOR" # TODO: Recuerde cambiar estas -echo "Copyright (c) 2019: Nombre1, Nombre2, Nombre3" # TODO: líneas a los valores correctos +echo "COOLCompiler_v1.0" # TODO: Recuerde cambiar estas +echo "Copyright (c) 2021: Karla Olivera, Amanda Gonzalez, Victor Cardentey" # TODO: líneas a los valores correctos -# Llamar al compilador -echo "Compiling $INPUT_FILE into $OUTPUT_FILE" + +exec python3 cool.py $INPUT_FILE \ No newline at end of file diff --git a/src/parsing/ast.py b/src/parsing/ast.py new file mode 100644 index 000000000..c821864d6 --- /dev/null +++ b/src/parsing/ast.py @@ -0,0 +1,200 @@ +class Node: + pass + + +class ProgramNode(Node): + def __init__(self, location, declarations): + self.location = location + self.declarations = declarations + + +class DeclarationNode(Node): + pass + + +class ExpressionNode(Node): + pass + + +# Class +class ClassDeclarationNode(DeclarationNode): + def __init__(self, location, idx, features, parent=None, parent_location=None): + self.location = location + self.id, self.id_location = idx + self.parent = parent + self.features = features + self.parent_location = parent_location + + +# Features +class FuncDeclarationNode(DeclarationNode): + def __init__(self, location, idx, params, return_type, body): + self.location = location + self.id = idx + self.params = params + self.type, self.type_location = return_type + self.expr = body + + +class AttrDeclarationNode(DeclarationNode): + def __init__(self, location, idx, typex, expr=None): + self.location = location + self.id = idx + self.type, self.type_location = typex + self.expr = expr + + +class VarDeclarationNode(ExpressionNode): + def __init__(self, location, idx, typex, expr=None): + self.location = location + self.id = idx + self.type, self.type_location = typex + self.expr = expr + + +class AssignNode(ExpressionNode): + def __init__(self, location, symbol_location, idx, expr): + self.location = location + self.symbol_location = symbol_location + self.id = idx + self.expr = expr + + +class DispatchNode(ExpressionNode): + def __init__(self, location, obj, idx, args, from_type=None): + self.location = location + self.expr = obj + self.id = idx + self.arg = args + self.type = from_type + + +class BinaryNode(ExpressionNode): + def __init__(self, location, symbol_location, left, right): + self.symbol_location = symbol_location + self.location = location + self.left = left + self.right = right + + +class UnaryNode(ExpressionNode): + def __init__(self, location, exp): + self.location = location + self.expr = exp + + +class ConditionalNode(ExpressionNode): + def __init__(self, location, if_exp, then_exp, else_exp): + self.location = location + self.predicate = if_exp + self.then = then_exp + self.elsex = else_exp + + +class LoopNode(ExpressionNode): + def __init__(self, location, while_exp, loop_exp): + self.location = location + self.predicate = while_exp + self.body = loop_exp + + +class BlockNode(ExpressionNode): + def __init__(self, location, exp_list): + self.location = location + self.expr_lis = exp_list + + +class LetNode(ExpressionNode): + def __init__(self,location, var_list, in_exp): + self.location = location + self.variables = var_list + self.expr = in_exp + + +class CaseNode(ExpressionNode): + def __init__(self,location, cond, case_list): + self.location = location + self.expr = cond + self.cases = case_list + + +class CaseAttrNode(ExpressionNode): + def __init__(self, location, idx, typex, expr): + self.location = location + self.id = idx + self.type, self.type_location = typex + self.expr = expr + + +class AtomicNode(ExpressionNode): + def __init__(self, location, lex): + self.location = location + self.lex = lex + + +# Atomic Expressions +class ConstantNumNode(AtomicNode): + pass + + +class StringNode(AtomicNode): + pass + + +class TrueNode(AtomicNode): + pass + + +class FalseNode(AtomicNode): + pass + + +class VariableNode(AtomicNode): + pass + + +class InstantiateNode(AtomicNode): + pass + + +# Arithmetic Operations +class PlusNode(BinaryNode): + pass + + +class MinusNode(BinaryNode): + pass + + +class StarNode(BinaryNode): + pass + + +class DivNode(BinaryNode): + pass + + +# Comparison Operations +class LessNode(BinaryNode): + pass + + +class ElessNode(BinaryNode): + pass + + +class EqualsNode(BinaryNode): + pass + + +# Unary Operations +class NotNode(UnaryNode): + pass + + +class PrimeNode(UnaryNode): + pass + + +class IsVoidNode(UnaryNode): + pass diff --git a/src/parsing/lexer.py b/src/parsing/lexer.py new file mode 100644 index 000000000..be55c8ea5 --- /dev/null +++ b/src/parsing/lexer.py @@ -0,0 +1,238 @@ +import ply.lex as lex + + +class COOL_Lexer: + tokens = [ + 'OBJECTID', # object identifiers + 'TYPEID', # type identifiers + 'INT_CONST', # integer constants + 'STRING_CONST', # string constants + + #symbols + 'DOT', #. + 'COMMA', #, + 'COLON', #: + 'SEMICOLON', #; + 'AT', #@ + 'TILDE', #~ + 'PLUS', #+ + 'MINUS', #- + 'STAR', #* + 'DIV', #/ + 'LEQ', #<= + 'LOWER', #< + 'EQUAL', #= + 'ASSIGN', #<- + 'CASSIGN', #=> + 'OPAR', #( + 'CPAR', #) + 'OCUR', #{ + 'CCUR', #} + ] + + keywords = { + 'class' : 'CLASS', + 'else' : 'ELSE', + 'false' : 'FALSE', + 'fi' : 'FI', + 'if' : 'IF', + 'in' : 'IN', + 'inherits' : 'INHERITS', + 'isvoid' : 'ISVOID', + 'let' : 'LET', + 'loop' : 'LOOP', + 'pool' : 'POOL', + 'then' : 'THEN', + 'while' : 'WHILE', + 'case' : 'CASE', + 'esac' : 'ESAC', + 'new' : 'NEW', + 'of' : 'OF', + 'not' : 'NOT', + 'true' : 'TRUE' + } + + tokens += list(keywords.values()) + + def input(self, string): + self.errors = [] + self.prev_last_newline = 0 + self.current_last_newline = 0 + self.output = self.tokenize(string) + + def token(self): + return next(self.output, None) + + def tokenize(self, text): + lexer = lex.lex(module=self) + lexer.input(text) + for t in lexer: + t.lexpos = t.lexpos - self.prev_last_newline + 1 + self.prev_last_newline = self.current_last_newline + yield t + + ###################################################################### + # Literals # + ###################################################################### + t_INT_CONST = r'[0-9]+' + + def t_OBJECTID(self, t): + r'[a-z][a-zA-Z0-9_]*' # object identifiers must start with lower case + t.type = self.keywords.get(t.value.lower(), 'OBJECTID') # try match with keywords that also match the objectid pattern + return t + + def t_TYPEID(self, t): + r'[A-Z][a-zA-Z0-9_]*' # type identifiers must start with upper case + val = t.value.lower() + if val != 'true' and val != 'false': # discard `lower` and `false` that start with lower case + t.type = self.keywords.get(val, 'TYPEID') + return t + + def t_STRING_CONST(self, t): + r'\"' # match the first " + value = '\"' + text = t.lexer.lexdata + pos = t.lexer.lexpos + + contains_null = False + while True: + if pos >= len(text): # founded EOF before closing " + t.lexer.lexpos = pos + self.register_error(t.lexer.lineno, t.lexer.lexpos - self.current_last_newline + 1, "LexicographicError: EOF in string constant") + return + c = text[pos] + + if c == '\\': + if text[pos+1] == '\n': + value += '\n' + t.lexer.lineno += 1 + self.current_last_newline = pos + 2 + elif text.startswith('\r\n', pos+1): + value += '\r\n' + t.lexer.lineno += 1 + pos+=1 + self.current_last_newline = pos + 1 + elif text[pos+1] in ('b', 'f', 't', 'n'): # i'm not very sure about this + value += f'\\{text[pos+1]}' + else: + value += text[pos+1] + pos += 2 + elif c == '\n': # string with no scaped \n# try match with false or true that also match the identifier pattern and have higher precedence + t.lexer.lexpos = pos + self.register_error(t.lineno, t.lexer.lexpos - self.current_last_newline + 1, "LexicographicError: Unterminated string constant") + return + elif c == '\0': + contains_null = True + self.register_error(t.lineno, pos - self.current_last_newline + 1, "LexicographicError: String contains null character") + pos += 1 + else: + value += c + pos += 1 + if c == '\"': + break + + t.lexer.lexpos = pos + t.value = value + t.type = 'STRING_CONST' + if not contains_null: + return t + + ####################################################################### + # Comments # + ####################################################################### + + def t_SINGLE_LINE_COMMENT(self, t): + r'--' + value = '--' + text = t.lexer.lexdata + pos = t.lexer.lexpos + + while True: + if pos >= len(text): + break + if text[pos] == '\n': + t.lexer.lineno += 1 + value += text[pos] + pos+=1 + self.prev_last_newline = pos + self.current_last_newline = pos + break + value += text[pos] + pos+=1 + + t.lexer.lexpos = pos + + def t_MULTI_LINE_COMMENT(self, t): + r'\(\*' + opar = 1 + value = '(*' + text = t.lexer.lexdata + pos = t.lexer.lexpos + + while opar > 0: + if pos >= len(text): + t.lexer.lexpos = pos + self.register_error(t.lexer.lineno, t.lexer.lexpos - self.current_last_newline, 'LexicographicError: EOF in comment') + return + + if text.startswith('(*', pos): + value += '(*' + pos += 2 + opar += 1 + elif text.startswith('*)', pos): + opar -= 1 + pos +=2 + value += '*)' + else: + if text[pos] == '\n': + t.lexer.lineno += 1 + self.current_last_newline = pos + value += text[pos] + pos += 1 + t.lexer.lexpos = pos + + ####################################################################### + # Symbols # + ####################################################################### + t_DOT = r'\.' + t_COMMA = r',' + t_COLON = r':' + t_SEMICOLON = r';' + t_AT = r'@' + t_TILDE = r'~' + t_PLUS = r'\+' + t_MINUS = r'-' + t_STAR = r'\*' + t_DIV = r'/' + t_LEQ = r'<=' + t_LOWER = r'<' + t_EQUAL = r'=' + t_ASSIGN = r'<-' + t_CASSIGN = r'=>' + t_OPAR = r'\(' + t_CPAR = r'\)' + t_OCUR = r'{' + t_CCUR = r'}' + + ####################################################################### + # Ignored # + ####################################################################### + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + self.prev_last_newline = t.lexer.lexpos + self.current_last_newline = t.lexer.lexpos + + t_ignore = ' \t\r' + + ######################################################################## + # Error Handling # + ######################################################################## + + def t_error(self, t): # not recognized symbol + self.register_error(t.lexer.lineno, t.lexer.lexpos - self.prev_last_newline + 1, f"LexicographicError: ERROR \"{t.value[0]}\"") + t.lexer.skip(1) + + def register_error(self, line, column, text): + self.errors.append(f'{line,column} - {text}') diff --git a/src/parsing/parser.py b/src/parsing/parser.py new file mode 100644 index 000000000..4082a6cb1 --- /dev/null +++ b/src/parsing/parser.py @@ -0,0 +1,393 @@ +import ply.yacc as yacc +from .lexer import COOL_Lexer +from .ast import * + + +class COOL_Parser: + + def __init__(self): + self.tokens = COOL_Lexer.tokens + + def parse(self, input_string): + self.lex = COOL_Lexer() + self.errors = [] + self.parser = yacc.yacc(module=self) + self.input = input_string + result = self.parser.parse(input_string, lexer=self.lex) + return result, self.errors + + ###################################################################### + # Grammar # + ###################################################################### + + @staticmethod + def p_program(p): + 'program : class_list' + p[0] = ProgramNode(None,p[1]) # location of this is + + @staticmethod + def p_class_list_single(p): + 'class_list : def_class' + p[0] = [p[1]] + + @staticmethod + def p_class_list_multi(p): + 'class_list : def_class class_list' + p[0] = [p[1]] + p[2] + + @staticmethod + def p_def_class(p): + 'def_class : CLASS TYPEID OCUR feature_list CCUR SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(2), p.lexpos(2)) + p[0] = ClassDeclarationNode(location,(p[2], type_location),p[4]) + + @staticmethod + def p_def_class_parent(p): + 'def_class : CLASS TYPEID INHERITS TYPEID OCUR feature_list CCUR SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(2), p.lexpos(2)) + parent_location = (p.lineno(4), p.lexpos(4)) + p[0] = ClassDeclarationNode(location,(p[2], type_location),p[6],p[4], parent_location) + + @staticmethod + def p_feature_list_empty(p): + 'feature_list :' + p[0] = [] + pass + + @staticmethod + def p_feature_list_attr(p): + 'feature_list : def_attr feature_list' + p[0] = [p[1]] + p[2] + + @staticmethod + def p_feature_list_fun(p): + 'feature_list : def_func feature_list' + p[0] = [p[1]] + p[2] + + @staticmethod + def p_attr(p): + 'def_attr : OBJECTID COLON TYPEID SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = AttrDeclarationNode(location,p[1],(p[3], type_location)) + + @staticmethod + def p_attr_exp(p): + 'def_attr : OBJECTID COLON TYPEID ASSIGN exp SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = AttrDeclarationNode(location,p[1],(p[3], type_location),p[5]) + + @staticmethod + def p_func(p): + 'def_func : OBJECTID OPAR CPAR COLON TYPEID OCUR exp CCUR SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + return_location = (p.lineno(5), p.lexpos(5)) + p[0] = FuncDeclarationNode(location,p[1],[],(p[5], return_location),p[7]) + + @staticmethod + def p_func_param(p): + 'def_func : OBJECTID OPAR param_list CPAR COLON TYPEID OCUR exp CCUR SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + return_location = (p.lineno(6), p.lexpos(6)) + p[0] = FuncDeclarationNode(location,p[1],p[3],(p[6], return_location),p[8]) + + @staticmethod + def p_param_list_single(p): + 'param_list : param' + p[0] = [p[1]] + + @staticmethod + def p_param_list_multi(p): + 'param_list : param COMMA param_list' + p[0] = [p[1]] + p[3] + + @staticmethod + def p_param(p): + 'param : OBJECTID COLON TYPEID' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = VarDeclarationNode(location,p[1],(p[3], type_location)) + + @staticmethod + def p_exp_assign(p): + 'exp : OBJECTID ASSIGN exp' + location = (p.lineno(1), p.lexpos(1)) + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = AssignNode(location, symbol_location, VariableNode(location,p[1]),p[3]) + + @staticmethod + def p_exp_let(p): + 'exp : LET ident_list IN exp' + location = (p.lineno(1), p.lexpos(1)) + p[0] = LetNode(location,p[2],p[4]) + + @staticmethod + def p_ident_list_single(p): + 'ident_list : iden' + p[0] = [p[1]] + + @staticmethod + def p_ident_list_multi(p): + 'ident_list : iden COMMA ident_list' + p[0] = [p[1]] + p[3] + + @staticmethod + def p_iden(p): + 'iden : OBJECTID COLON TYPEID' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = VarDeclarationNode(location,p[1],(p[3], type_location),None) + + @staticmethod + def p_iden_init(p): + 'iden : OBJECTID COLON TYPEID ASSIGN exp' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = VarDeclarationNode(location,p[1],(p[3], type_location),p[5]) + + @staticmethod + def p_case_list_single(p): + 'case_list : branch' + p[0] = [p[1]] + + @staticmethod + def p_case_list_multi(p): + 'case_list : branch case_list' + p[0] = [p[1]] + p[2] + + @staticmethod + def p_branch(p): + 'branch : OBJECTID COLON TYPEID CASSIGN exp SEMICOLON' + location = (p.lineno(1), p.lexpos(1)) + type_location = (p.lineno(3), p.lexpos(3)) + p[0] = CaseAttrNode(location,p[1],(p[3], type_location),p[5]) + + @staticmethod + def p_exp_not(p): + 'exp : NOT exp' + location = (p.lineno(1), p.lexpos(1)) + p[0] = NotNode(location,p[2]) + + @staticmethod + def p_exp_comp(p): + 'exp : comp' + p[0] = p[1] + + @staticmethod + def p_comp_arith(p): + 'comp : arith' + p[0] = p[1] + + @staticmethod + def p_comp_lower(p): + 'comp : arith LOWER arith' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = LessNode(p[1].location, symbol_location, p[1],p[3]) + + @staticmethod + def p_comp_leq(p): + 'comp : arith LEQ arith' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = ElessNode(p[1].location,symbol_location,p[1],p[3]) + + @staticmethod + def p_comp_equal(p): + 'comp : arith EQUAL arith' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = EqualsNode(p[1].location,symbol_location,p[1],p[3]) + + @staticmethod + def p_comp_equal_not(p): + 'comp : arith EQUAL NOT exp' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = EqualsNode(p[1].location,symbol_location,p[1],p[4]) + + @staticmethod + def p_arith_term(p): + 'arith : term' + p[0]= p[1] + + @staticmethod + def p_arith_plus(p): + 'arith : arith PLUS term' + location = (p.lineno(2), p.lexpos(2)) + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = PlusNode(location,symbol_location,p[1],p[3]) + + @staticmethod + def p_arith_minus(p): + 'arith : arith MINUS term' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = MinusNode(p[1].location,symbol_location,p[1],p[3]) + + @staticmethod + def p_term_fac(p): + 'term : factor' + p[0] = p[1] + + @staticmethod + def p_term_star(p): + 'term : term STAR factor' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = StarNode(p[1].location,symbol_location,p[1],p[3]) + + @staticmethod + def p_term_div(p): + 'term : term DIV factor' + symbol_location = (p.lineno(2), p.lexpos(2)) + p[0] = DivNode(p[1].location,symbol_location,p[1],p[3]) + + @staticmethod + def p_factor_atom(p): + 'factor : atom' + p[0] = p[1] + + @staticmethod + def p_factor_neg(p): + 'factor : TILDE factor' + location = (p.lineno(1), p.lexpos(1)) + p[0] = PrimeNode(location,p[2]) + + @staticmethod + def p_factor_case(p): + 'factor : CASE exp OF case_list ESAC' + location = (p.lineno(1), p.lexpos(1)) + p[0] = CaseNode(location,p[2],p[4]) + + @staticmethod + def p_factor_while(p): + 'factor : WHILE exp LOOP exp POOL' + location = (p.lineno(1), p.lexpos(1)) + p[0] = LoopNode(location,p[2],p[4]) + + @staticmethod + def p_factor_block(p): + 'factor : OCUR exp_list CCUR' + location = (p.lineno(1), p.lexpos(1)) + p[0] = BlockNode(location,p[2]) + + @staticmethod + def p_exp_list_single(p): + 'exp_list : exp SEMICOLON' + p[0] = [p[1]] + + @staticmethod + def p_exp_list_multi(p): + 'exp_list : exp SEMICOLON exp_list' + p[0] = [p[1]] + p[3] + + @staticmethod + def p_factor_cond(p): + 'factor : IF exp THEN exp ELSE exp FI' + location = (p.lineno(1), p.lexpos(1)) + p[0] = ConditionalNode(location,p[2],p[4],p[6]) + + @staticmethod + def p_factor_void(p): + 'factor : ISVOID factor' + location = (p.lineno(1), p.lexpos(1)) + p[0] = IsVoidNode(location,p[2]) + + @staticmethod + def p_atom_num(p): + 'atom : INT_CONST' + location = (p.lineno(1), p.lexpos(1)) + p[0] = ConstantNumNode(location,p[1]) + + @staticmethod + def p_atom_string(p): + 'atom : STRING_CONST' + location = (p.lineno(1), p.lexpos(1)) + p[0] = StringNode(location,p[1]) + + @staticmethod + def p_atom_true(p): + 'atom : TRUE' + location = (p.lineno(1), p.lexpos(1)) + p[0] = TrueNode(location,p[1]) + + @staticmethod + def p_atom_false(p): + 'atom : FALSE' + location = (p.lineno(1), p.lexpos(1)) + p[0] = FalseNode(location,p[1]) + + @staticmethod + def p_atom_var(p): + 'atom : OBJECTID' + location = (p.lineno(1), p.lexpos(1)) + p[0] = VariableNode(location,p[1]) + + @staticmethod + def p_atom_new(p): + 'atom : NEW TYPEID' + location = (p.lineno(1), p.lexpos(1)) + p[0] = InstantiateNode(location,p[2]) + + @staticmethod + def p_atom_func_call(p): + 'atom : func_call' + p[0] = p[1] + + @staticmethod + def p_atom_exp(p): + 'atom : OPAR exp CPAR' + p[0] = p[2] + p[0].location = (p.lineno(1), p.lexpos(1)) + + @staticmethod + def p_func_call_self(p): + 'func_call : OBJECTID OPAR arg_list CPAR' + location = (p.lineno(1), p.lexpos(1)) + p[0] = DispatchNode(location,VariableNode(None,'self'),p[1],p[3]) + + @staticmethod + def p_func_call(p): + 'func_call : atom DOT OBJECTID OPAR arg_list CPAR' + p[0] = DispatchNode(p[1].location,p[1],p[3],p[5]) + + @staticmethod + def p_func_call_at(p): + 'func_call : atom AT TYPEID DOT OBJECTID OPAR arg_list CPAR' + p[0] = DispatchNode(p[1].location,p[1],p[5],p[7],p[3]) + + @staticmethod + def p_arg_list_empty(p): + 'arg_list :' + p[0] = [] + pass + + @staticmethod + def p_arg_list_not_empty(p): + 'arg_list : arg_list_not_empty' + p[0] = p[1] + + @staticmethod + def p_arg_list_not_empty_single(p): + 'arg_list_not_empty : exp' + p[0] = [p[1]] + + @staticmethod + def p_arg_list_not_empty_multi(p): + 'arg_list_not_empty : exp COMMA arg_list_not_empty' + p[0] = [p[1]] + p[3] + + #Error rule for syntax errors + def p_error(self, p): + if not p: + self.errors.append('(0, 0) - SyntacticError: ERROR at or near EOF') + return + + col = p.lexpos + line = p.lineno + val = p.value + #(29, 9) - SyntacticError: ERROR at or near "Test1" + self.errors.append(f'({line}, {col}) - SyntacticError: ERROR at or near "{val}"') + + def __find_column(self, token): + input_s = self.input + line_start = input_s.rfind('\n', 0, token.lexpos) + 1 + return (token.lexpos - line_start) + 1 diff --git a/src/parsing/parsetab.py b/src/parsing/parsetab.py new file mode 100644 index 000000000..63203858f --- /dev/null +++ b/src/parsing/parsetab.py @@ -0,0 +1,91 @@ + +# parsetab.py +# This file is automatically generated. Do not edit. +# pylint: disable=W,C,R +_tabversion = '3.10' + +_lr_method = 'LALR' + +_lr_signature = 'ASSIGN AT CASE CASSIGN CCUR CLASS COLON COMMA CPAR DIV DOT ELSE EQUAL ESAC FALSE FI IF IN INHERITS INT_CONST ISVOID LEQ LET LOOP LOWER MINUS NEW NOT OBJECTID OCUR OF OPAR PLUS POOL SEMICOLON STAR STRING_CONST THEN TILDE TRUE TYPEID WHILEprogram : class_listclass_list : def_classclass_list : def_class class_listdef_class : CLASS TYPEID OCUR feature_list CCUR SEMICOLONdef_class : CLASS TYPEID INHERITS TYPEID OCUR feature_list CCUR SEMICOLONfeature_list :feature_list : def_attr feature_listfeature_list : def_func feature_listdef_attr : OBJECTID COLON TYPEID SEMICOLONdef_attr : OBJECTID COLON TYPEID ASSIGN exp SEMICOLONdef_func : OBJECTID OPAR CPAR COLON TYPEID OCUR exp CCUR SEMICOLONdef_func : OBJECTID OPAR param_list CPAR COLON TYPEID OCUR exp CCUR SEMICOLONparam_list : paramparam_list : param COMMA param_listparam : OBJECTID COLON TYPEIDexp : OBJECTID ASSIGN expexp : LET ident_list IN expident_list : idenident_list : iden COMMA ident_listiden : OBJECTID COLON TYPEIDiden : OBJECTID COLON TYPEID ASSIGN expcase_list : branchcase_list : branch case_listbranch : OBJECTID COLON TYPEID CASSIGN exp SEMICOLONexp : NOT expexp : compcomp : arithcomp : arith LOWER arithcomp : arith LEQ arithcomp : arith EQUAL arithcomp : arith EQUAL NOT exparith : termarith : arith PLUS termarith : arith MINUS termterm : factorterm : term STAR factorterm : term DIV factorfactor : atomfactor : TILDE factorfactor : CASE exp OF case_list ESACfactor : WHILE exp LOOP exp POOLfactor : OCUR exp_list CCURexp_list : exp SEMICOLONexp_list : exp SEMICOLON exp_listfactor : IF exp THEN exp ELSE exp FIfactor : ISVOID factoratom : INT_CONSTatom : STRING_CONSTatom : TRUEatom : FALSEatom : OBJECTIDatom : NEW TYPEIDatom : func_callatom : OPAR exp CPARfunc_call : OBJECTID OPAR arg_list CPARfunc_call : atom DOT OBJECTID OPAR arg_list CPARfunc_call : atom AT TYPEID DOT OBJECTID OPAR arg_list CPARarg_list :arg_list : arg_list_not_emptyarg_list_not_empty : exparg_list_not_empty : exp COMMA arg_list_not_empty' + +_lr_action_items = {'CLASS':([0,3,20,60,],[4,4,-4,-5,]),'$end':([1,2,3,5,20,60,],[0,-1,-2,-3,-4,-5,]),'TYPEID':([4,8,17,29,30,53,58,76,95,136,],[6,13,21,56,57,85,88,105,118,144,]),'OCUR':([6,13,28,37,43,44,45,46,47,48,55,57,61,62,68,69,70,71,72,73,74,87,88,93,99,107,109,110,113,115,120,131,138,143,148,],[7,19,46,46,46,46,46,46,46,46,46,87,46,46,46,46,46,46,46,46,46,46,113,46,46,46,46,46,46,46,46,46,46,46,46,]),'INHERITS':([6,],[8,]),'CCUR':([7,9,10,11,15,16,19,26,27,34,38,39,40,41,42,49,50,51,52,54,63,67,77,78,81,84,85,89,96,97,98,100,101,102,103,108,109,111,112,114,116,119,126,129,134,137,139,142,146,149,150,],[-6,14,-6,-6,-7,-8,-6,33,-9,-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-10,-25,-39,-51,108,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-43,-54,128,-55,-17,-31,-44,140,-40,-41,-11,-56,-12,-45,-57,]),'OBJECTID':([7,10,11,18,19,27,28,32,36,37,43,44,45,46,47,48,55,61,62,63,68,69,70,71,72,73,74,75,87,93,94,99,106,107,109,110,113,115,120,121,123,131,138,139,143,146,148,152,],[12,12,12,22,12,-9,34,22,66,34,78,34,34,34,34,78,34,34,34,-10,78,78,78,78,78,78,78,104,34,34,66,34,124,34,34,34,34,34,34,133,124,34,34,-11,34,-12,34,-24,]),'COLON':([12,22,23,31,66,124,],[17,29,30,58,95,136,]),'OPAR':([12,28,34,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,78,87,93,99,104,107,109,110,113,115,120,131,133,138,143,148,],[18,55,62,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,62,55,55,55,120,55,55,55,55,55,55,55,143,55,55,55,]),'SEMICOLON':([14,21,33,34,35,38,39,40,41,42,49,50,51,52,54,67,77,78,82,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,128,134,137,140,142,149,150,151,],[20,27,60,-51,63,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,109,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,139,-40,-41,146,-56,-45,-57,152,]),'CPAR':([18,24,25,34,38,39,40,41,42,49,50,51,52,54,56,59,62,67,77,78,84,85,86,89,90,91,92,96,97,98,100,101,102,103,108,111,114,116,119,120,130,132,134,137,142,143,147,149,150,],[23,31,-13,-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-15,-14,-58,-25,-39,-51,-46,-52,111,-16,114,-59,-60,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,-58,-61,142,-40,-41,-56,-58,150,-45,-57,]),'ASSIGN':([21,34,118,],[28,61,131,]),'COMMA':([25,34,38,39,40,41,42,49,50,51,52,54,56,65,67,77,78,84,85,89,92,96,97,98,100,101,102,103,108,111,114,116,118,119,134,137,141,142,149,150,],[32,-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-15,94,-25,-39,-51,-46,-52,-16,115,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-20,-31,-40,-41,-21,-56,-45,-57,]),'LET':([28,37,44,45,46,47,55,61,62,87,93,99,107,109,110,113,115,120,131,138,143,148,],[36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,36,]),'NOT':([28,37,44,45,46,47,55,61,62,70,87,93,99,107,109,110,113,115,120,131,138,143,148,],[37,37,37,37,37,37,37,37,37,99,37,37,37,37,37,37,37,37,37,37,37,37,37,]),'TILDE':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,43,]),'CASE':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,44,]),'WHILE':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,]),'IF':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,47,]),'ISVOID':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,48,]),'INT_CONST':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,]),'STRING_CONST':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,50,]),'TRUE':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,]),'FALSE':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,52,]),'NEW':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,53,]),'DOT':([34,42,49,50,51,52,54,78,85,105,111,114,142,150,],[-51,75,-47,-48,-49,-50,-53,-51,-52,121,-54,-55,-56,-57,]),'AT':([34,42,49,50,51,52,54,78,85,111,114,142,150,],[-51,76,-47,-48,-49,-50,-53,-51,-52,-54,-55,-56,-57,]),'STAR':([34,40,41,42,49,50,51,52,54,77,78,84,85,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,73,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,73,73,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'DIV':([34,40,41,42,49,50,51,52,54,77,78,84,85,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,74,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,74,74,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'LOWER':([34,39,40,41,42,49,50,51,52,54,77,78,84,85,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,68,-32,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,-33,-34,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'LEQ':([34,39,40,41,42,49,50,51,52,54,77,78,84,85,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,69,-32,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,-33,-34,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'EQUAL':([34,39,40,41,42,49,50,51,52,54,77,78,84,85,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,70,-32,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,-33,-34,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'PLUS':([34,39,40,41,42,49,50,51,52,54,77,78,84,85,96,97,98,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,71,-32,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,71,71,71,-33,-34,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'MINUS':([34,39,40,41,42,49,50,51,52,54,77,78,84,85,96,97,98,100,101,102,103,108,111,114,134,137,142,149,150,],[-51,72,-32,-35,-38,-47,-48,-49,-50,-53,-39,-51,-46,-52,72,72,72,-33,-34,-36,-37,-42,-54,-55,-40,-41,-56,-45,-57,]),'OF':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,79,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,134,137,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,106,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,-40,-41,-56,-45,-57,]),'LOOP':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,80,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,134,137,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,107,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,-40,-41,-56,-45,-57,]),'THEN':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,83,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,134,137,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,110,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,-40,-41,-56,-45,-57,]),'POOL':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,125,134,137,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,137,-40,-41,-56,-45,-57,]),'ELSE':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,127,134,137,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,138,-40,-41,-56,-45,-57,]),'IN':([34,38,39,40,41,42,49,50,51,52,54,64,65,67,77,78,84,85,89,96,97,98,100,101,102,103,108,111,114,116,117,118,119,134,137,141,142,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,93,-18,-25,-39,-51,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-19,-20,-31,-40,-41,-21,-56,-45,-57,]),'FI':([34,38,39,40,41,42,49,50,51,52,54,67,77,78,84,85,89,96,97,98,100,101,102,103,108,111,114,116,119,134,137,142,145,149,150,],[-51,-26,-27,-32,-35,-38,-47,-48,-49,-50,-53,-25,-39,-51,-46,-52,-16,-28,-29,-30,-33,-34,-36,-37,-42,-54,-55,-17,-31,-40,-41,-56,149,-45,-57,]),'ESAC':([122,123,135,152,],[134,-22,-23,-24,]),'CASSIGN':([144,],[148,]),} + +_lr_action = {} +for _k, _v in _lr_action_items.items(): + for _x,_y in zip(_v[0],_v[1]): + if not _x in _lr_action: _lr_action[_x] = {} + _lr_action[_x][_k] = _y +del _lr_action_items + +_lr_goto_items = {'program':([0,],[1,]),'class_list':([0,3,],[2,5,]),'def_class':([0,3,],[3,3,]),'feature_list':([7,10,11,19,],[9,15,16,26,]),'def_attr':([7,10,11,19,],[10,10,10,10,]),'def_func':([7,10,11,19,],[11,11,11,11,]),'param_list':([18,32,],[24,59,]),'param':([18,32,],[25,25,]),'exp':([28,37,44,45,46,47,55,61,62,87,93,99,107,109,110,113,115,120,131,138,143,148,],[35,67,79,80,82,83,86,89,92,112,116,119,125,82,127,129,92,92,141,145,92,151,]),'comp':([28,37,44,45,46,47,55,61,62,87,93,99,107,109,110,113,115,120,131,138,143,148,],[38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,38,]),'arith':([28,37,44,45,46,47,55,61,62,68,69,70,87,93,99,107,109,110,113,115,120,131,138,143,148,],[39,39,39,39,39,39,39,39,39,96,97,98,39,39,39,39,39,39,39,39,39,39,39,39,39,]),'term':([28,37,44,45,46,47,55,61,62,68,69,70,71,72,87,93,99,107,109,110,113,115,120,131,138,143,148,],[40,40,40,40,40,40,40,40,40,40,40,40,100,101,40,40,40,40,40,40,40,40,40,40,40,40,40,]),'factor':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[41,41,77,41,41,41,41,84,41,41,41,41,41,41,41,41,102,103,41,41,41,41,41,41,41,41,41,41,41,41,41,]),'atom':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,]),'func_call':([28,37,43,44,45,46,47,48,55,61,62,68,69,70,71,72,73,74,87,93,99,107,109,110,113,115,120,131,138,143,148,],[54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,]),'ident_list':([36,94,],[64,117,]),'iden':([36,94,],[65,65,]),'exp_list':([46,109,],[81,126,]),'arg_list':([62,120,143,],[90,132,147,]),'arg_list_not_empty':([62,115,120,143,],[91,130,91,91,]),'case_list':([106,123,],[122,135,]),'branch':([106,123,],[123,123,]),} + +_lr_goto = {} +for _k, _v in _lr_goto_items.items(): + for _x, _y in zip(_v[0], _v[1]): + if not _x in _lr_goto: _lr_goto[_x] = {} + _lr_goto[_x][_k] = _y +del _lr_goto_items +_lr_productions = [ + ("S' -> program","S'",1,None,None,None), + ('program -> class_list','program',1,'p_program','parser.py',25), + ('class_list -> def_class','class_list',1,'p_class_list_single','parser.py',30), + ('class_list -> def_class class_list','class_list',2,'p_class_list_multi','parser.py',35), + ('def_class -> CLASS TYPEID OCUR feature_list CCUR SEMICOLON','def_class',6,'p_def_class','parser.py',40), + ('def_class -> CLASS TYPEID INHERITS TYPEID OCUR feature_list CCUR SEMICOLON','def_class',8,'p_def_class_parent','parser.py',45), + ('feature_list -> ','feature_list',0,'p_feature_list_empty','parser.py',50), + ('feature_list -> def_attr feature_list','feature_list',2,'p_feature_list_attr','parser.py',56), + ('feature_list -> def_func feature_list','feature_list',2,'p_feature_list_fun','parser.py',61), + ('def_attr -> OBJECTID COLON TYPEID SEMICOLON','def_attr',4,'p_attr','parser.py',66), + ('def_attr -> OBJECTID COLON TYPEID ASSIGN exp SEMICOLON','def_attr',6,'p_attr_exp','parser.py',71), + ('def_func -> OBJECTID OPAR CPAR COLON TYPEID OCUR exp CCUR SEMICOLON','def_func',9,'p_func','parser.py',76), + ('def_func -> OBJECTID OPAR param_list CPAR COLON TYPEID OCUR exp CCUR SEMICOLON','def_func',10,'p_func_param','parser.py',81), + ('param_list -> param','param_list',1,'p_param_list_single','parser.py',86), + ('param_list -> param COMMA param_list','param_list',3,'p_param_list_multi','parser.py',91), + ('param -> OBJECTID COLON TYPEID','param',3,'p_param','parser.py',96), + ('exp -> OBJECTID ASSIGN exp','exp',3,'p_exp_assign','parser.py',101), + ('exp -> LET ident_list IN exp','exp',4,'p_exp_let','parser.py',106), + ('ident_list -> iden','ident_list',1,'p_ident_list_single','parser.py',111), + ('ident_list -> iden COMMA ident_list','ident_list',3,'p_ident_list_multi','parser.py',116), + ('iden -> OBJECTID COLON TYPEID','iden',3,'p_iden','parser.py',121), + ('iden -> OBJECTID COLON TYPEID ASSIGN exp','iden',5,'p_iden_init','parser.py',126), + ('case_list -> branch','case_list',1,'p_case_list_single','parser.py',131), + ('case_list -> branch case_list','case_list',2,'p_case_list_multi','parser.py',136), + ('branch -> OBJECTID COLON TYPEID CASSIGN exp SEMICOLON','branch',6,'p_branch','parser.py',141), + ('exp -> NOT exp','exp',2,'p_exp_not','parser.py',146), + ('exp -> comp','exp',1,'p_exp_comp','parser.py',151), + ('comp -> arith','comp',1,'p_comp_arith','parser.py',156), + ('comp -> arith LOWER arith','comp',3,'p_comp_lower','parser.py',161), + ('comp -> arith LEQ arith','comp',3,'p_comp_leq','parser.py',166), + ('comp -> arith EQUAL arith','comp',3,'p_comp_equal','parser.py',171), + ('comp -> arith EQUAL NOT exp','comp',4,'p_comp_equal_not','parser.py',177), + ('arith -> term','arith',1,'p_arith_term','parser.py',182), + ('arith -> arith PLUS term','arith',3,'p_arith_plus','parser.py',187), + ('arith -> arith MINUS term','arith',3,'p_arith_minus','parser.py',192), + ('term -> factor','term',1,'p_term_fac','parser.py',197), + ('term -> term STAR factor','term',3,'p_term_star','parser.py',202), + ('term -> term DIV factor','term',3,'p_term_div','parser.py',207), + ('factor -> atom','factor',1,'p_factor_atom','parser.py',212), + ('factor -> TILDE factor','factor',2,'p_factor_neg','parser.py',217), + ('factor -> CASE exp OF case_list ESAC','factor',5,'p_factor_case','parser.py',222), + ('factor -> WHILE exp LOOP exp POOL','factor',5,'p_factor_while','parser.py',227), + ('factor -> OCUR exp_list CCUR','factor',3,'p_factor_block','parser.py',232), + ('exp_list -> exp SEMICOLON','exp_list',2,'p_exp_list_single','parser.py',237), + ('exp_list -> exp SEMICOLON exp_list','exp_list',3,'p_exp_list_multi','parser.py',242), + ('factor -> IF exp THEN exp ELSE exp FI','factor',7,'p_factor_cond','parser.py',247), + ('factor -> ISVOID factor','factor',2,'p_factor_void','parser.py',252), + ('atom -> INT_CONST','atom',1,'p_atom_num','parser.py',257), + ('atom -> STRING_CONST','atom',1,'p_atom_string','parser.py',262), + ('atom -> TRUE','atom',1,'p_atom_true','parser.py',267), + ('atom -> FALSE','atom',1,'p_atom_false','parser.py',272), + ('atom -> OBJECTID','atom',1,'p_atom_var','parser.py',277), + ('atom -> NEW TYPEID','atom',2,'p_atom_new','parser.py',282), + ('atom -> func_call','atom',1,'p_atom_func_call','parser.py',287), + ('atom -> OPAR exp CPAR','atom',3,'p_atom_exp','parser.py',292), + ('func_call -> OBJECTID OPAR arg_list CPAR','func_call',4,'p_func_call_self','parser.py',297), + ('func_call -> atom DOT OBJECTID OPAR arg_list CPAR','func_call',6,'p_func_call','parser.py',302), + ('func_call -> atom AT TYPEID DOT OBJECTID OPAR arg_list CPAR','func_call',8,'p_func_call_at','parser.py',307), + ('arg_list -> ','arg_list',0,'p_arg_list_empty','parser.py',312), + ('arg_list -> arg_list_not_empty','arg_list',1,'p_arg_list_not_empty','parser.py',318), + ('arg_list_not_empty -> exp','arg_list_not_empty',1,'p_arg_list_not_empty_single','parser.py',323), + ('arg_list_not_empty -> exp COMMA arg_list_not_empty','arg_list_not_empty',3,'p_arg_list_not_empty_multi','parser.py',328), +] diff --git a/src/semantics/TypeBuilder.py b/src/semantics/TypeBuilder.py new file mode 100644 index 000000000..997fbffab --- /dev/null +++ b/src/semantics/TypeBuilder.py @@ -0,0 +1,145 @@ +from parsing.ast import * +from .semantic import SemanticError +from .semantic import ErrorType, StringType, IntType, BoolType, ObjectType, SelfType +import utils.visitor as visitor + + +CANNOT_INHERIT = "SemanticError: Class %s cannot inherit class %s." +MAIN_NOT_DEFINED = "SemanticError: Method main must be defined in Main class." +MAIN_NOT_HERITABLE = "SemanticError: Class Main is not heritable." +CYCLES_IN_CLASES = "SemanticError: Class %s, or an ancestor of %s, is involved in an inheritance cycle." +NOT_SELF_TYPE = "TypeError: The type of the parameter %s can not be SELF_TYPE in method %s in class %s." +IDENTIFIER_USED = "SemanticError: Formal parameter %s is multiply defined." +ATTRIBUTE_REDEFINED = "SemanticError: Attribute %s is multiply defined in class." +METHOD_REDEFINED = "SemanticError: Method %s is multiply defined in class." +UNDEFINED_ATTRIBUTE_TYPE ="TypeError: Class %s of attribute %s is undefined." +PARENT_ATTRIBUTE_REDEFINED = "SemanticError: Attribute %s is an attribute of an inherited class." +PARENT_NOT_DEFINED = "TypeError: Class %s inhertits from an undefined class %s." +UNDEFINED_PARAM_TYPE = "TypeError: Class %s of formal parameter %s is undefined." +UNDEFINED_RETURN_TYPE = "TypeError: Undefined return type %s in method %s." + + +class TypeBuilder: + def __init__(self, context, errors=[]): + self.context = context + self.errors = errors + self.current_type = None + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + for dec in node.declarations: + self.visit(dec) + + # Check that class Main contains method main + try: + main_type = self.context.get_type('Main') + try: + main_type.get_method('main') + except SemanticError: + self.errors.append(MAIN_NOT_DEFINED) + except SemanticError: + pass + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + self.current_type = self.context.get_type(node.id) + + # Set class parent, if it does not exist then the class inherits from Object + parent = self.context.get_type('Object') + if node.parent is not None: + try: + parent = self.context.get_type(node.parent) + if parent == BoolType() or parent == IntType() or parent == StringType() or parent == SelfType(): + e = CANNOT_INHERIT.replace('%s', node.id, 1).replace('%s', parent.name, 1) + location = node.parent_location + self.errors.append(f'{location} - {e}') + parent = ErrorType() + else: + try: + main_type = self.context.get_type('Main') + if parent == main_type: + self.errors.append(MAIN_NOT_HERITABLE) + parent = ErrorType() + except SemanticError: + pass + except SemanticError as error: + e = PARENT_NOT_DEFINED.replace('%s', node.id, 1).replace('%s', node.parent, 1) + location = node.parent_location + self.errors.append(f'{location} - {e}') + parent = ErrorType() + + if parent.conforms_to(self.current_type): + e = CYCLES_IN_CLASES.replace('%s', node.id, 2) + location = node.parent_location + self.errors.append(f'{location} - {e}') + parent = ErrorType() + + if self.current_type != ObjectType(): + self.current_type.set_parent(parent) + + for feature in node.features: + self.visit(feature) + + @visitor.when(AttrDeclarationNode) + def visit(self, node): + try: + att_type = self.context.get_type(node.type) + except SemanticError as error: + e = UNDEFINED_ATTRIBUTE_TYPE.replace('%s', node.type, 1).replace('%s', node.id, 1) + location = node.type_location + self.errors.append(f'{location} - {e}') + att_type = ErrorType() + + try: + self.current_type.define_attribute(node.id, att_type) + except SemanticError as error: + x = self.current_type.get_attribute_parent(node.id) + if x == self.current_type: + e = ATTRIBUTE_REDEFINED.replace('%s', node.id, 1) + self.errors.append(f'{node.location} - {e}') + else: + e = PARENT_ATTRIBUTE_REDEFINED.replace('%s', node.id, 1) + self.errors.append(f'{node.location} - {e}') + + @visitor.when(FuncDeclarationNode) + def visit(self, node): + try: + return_type = self.context.get_type(node.type) + except SemanticError as error: + e = UNDEFINED_RETURN_TYPE.replace('%s', node.type, 1).replace('%s', node.id, 1) + location = node.type_location + self.errors.append(f'{location} - {e}') + return_type = ErrorType() + + params = [] + types = [] + for var in node.params: + try: + param_type = self.context.get_type(var.type) + if param_type == SelfType(): + e = NOT_SELF_TYPE.replace('%s', var.id, 1).replace('%s', node.id, 1).replace('%s', self.current_type.name, 1) + location = var.type_location + self.errors.append(f'{location} - {e}') + param_type = ErrorType() + except SemanticError as error: + e = UNDEFINED_PARAM_TYPE.replace('%s', var.type, 1).replace('%s', var.id, 1) + location = var.type_location + self.errors.append(f'{location} - {e}') + param_type = ErrorType() + + if var.id in params: + e = IDENTIFIER_USED.replace('%s', var.id, 1) + self.errors.append(f'{var.location} - {e}') + + params.append(var.id) + types.append(param_type) + + try: + self.current_type.define_method(node.id, params, types, return_type) + except SemanticError as error: + e = (METHOD_REDEFINED.replace('%s', node.id, 1)) + self.errors.append(f'{node.location} - {e}') diff --git a/src/semantics/TypeChecker.py b/src/semantics/TypeChecker.py new file mode 100644 index 000000000..1b91bc069 --- /dev/null +++ b/src/semantics/TypeChecker.py @@ -0,0 +1,440 @@ +from parsing.ast import * +from .utils import find_parent_type, is_base_class +from .semantic import Scope, SemanticError +from .semantic import ObjectType, IntType, StringType, BoolType, ErrorType, SelfType +import utils.visitor as visitor + + +SELF_IS_READONLY = "SemanticError: Cannot assign to 'self'." +SELF_IS_READONLY_LET = "SemanticError: 'self' cannot be bound in a 'let' expression." +SELF_IS_READONLY_PARAM = "SemanticError: 'self' cannot be the name of a formal parameter." +SELF_IS_READONLY_ATTRIBUTE = "SemanticError: 'self' cannot be the name of an attribute." +INCOMPATIBLE_ATTRIBUTE_TYPE = "TypeError: Inferred type %s of initialization of attribute %s does not conform to declared type %s." +INCOMPATIBLE_VARIABLE_TYPE = "TypeError: Inferred type %s of initialization of %s does not conform to identifier's declared type %s." +INCOMPATIBLE_RET_FUNC_TYPE = "TypeError: Inferred return type %s of method %s does not conform to declared return type %s." +INCOMPATIBLE_DISPATCH_TYPE = "TypeError: In call of method %s, type %s of parameter %s does not conform to declared type %s." +INCOMPATIBLE_DISPATCH_DEC_TYPE = "TypeError: Expression type %s does not conform to declared static dispatch type %s." +VARIABLE_NOT_DEFINED = "NameError: Undeclared identifier %s." +INVALID_OPERATION = "TypeError: non-Int arguments: %s %s %s" +INVALID_BASIC_COMPARISON = "TypeError: Illegal comparison with a basic type." +OPERATION_NOT_DEFINED = "TypeError: Operation '%s' is not defined for type '%s'." +UNARY_OPERATION_NOT_DEFINED = "TypeError: Argument of '%s' has type %s instead of %s." +PREDICATE_OPERATIONS = "TypeError: %s condition does not have type Bool." +WRONG_NUMBER_ARGUMENTS = "SemanticError: Method %s called with wrong number of arguments." +DUPLICATE_BRANCH = "SemanticError: Duplicate branch %s in case statement." +CASE_TYPE_UNDEFINED = "TypeError: Class %s of case branch is undefined." +UNDEFINED_METHOD = "AttributeError: Dispatch to undefined method %s." +PARENT_ATTRIBUTE_REDEFINED = "SemanticError: Attribute %s is an attribute of an inherited class." +UNDEFINED_VARIABLE_TYPE = "TypeError: Class %s of let-bound identifier %s is undefined." +METHOD_REDEFINED_PARAM = "SemanticError: In redefined method %s, parameter type %s is different from original type %s." +METHOD_REDEFINED_RETURN = "SemanticError: In redefined method %s, return type %s is different from original return type %s." +METHOD_REDEFINED_NPARAM = "SemanticError: Incompatible number of formal parameters in redefined method %s." +UNDEFINED_NEW_TYPE = "TypeError: 'new' used with undefined class %s." + + +class TypeChecker: + def __init__(self, context, errors=[]): + self.context = context + self.errors = errors + self.current_type = None + self.current_method = None + + @visitor.on('node') + def visit(self, node, scope): + pass + + @visitor.when(ProgramNode) + def visit(self, node, scope=None): + scope = Scope() + for dec in node.declarations: + self.visit(dec, scope.create_child(dec.id)) + return scope + + @visitor.when(ClassDeclarationNode) + def visit(self, node, scope): + self.current_type = self.context.get_type(node.id) + scope.define_variable('self', self.current_type) + + for feature in node.features: + # If two attributes with the same name are defined, the second one is not added to the attribute + # list, so I will only visit its expression. + if isinstance(feature, AttrDeclarationNode): + if self.current_type.get_attribute(feature.id).type.name == feature.type: + self.visit(feature, scope) + elif feature.expr is not None: + self.visit(feature.expr, scope) + else: + self.visit(feature, scope) + + @visitor.when(AttrDeclarationNode) + def visit(self, node, scope): + # Ask if the attribute is defined in the parent, if true an error is added + parent = self.current_type.parent + if parent is not None: + try: + parent.get_attribute(node.id) + e = PARENT_ATTRIBUTE_REDEFINED.replace('%s', node.id, 1) + self.errors.append(f'{node.location} - {e}') + except SemanticError: + pass + + if node.id == "self": + e = SELF_IS_READONLY_ATTRIBUTE + self.errors.append(f"{node.location} - {e}") + + node_type = self.current_type.get_attribute(node.id).type + if node_type == SelfType(): + node_type = self.current_type + + if node.expr is not None: + self.visit(node.expr, scope) + expr_type = node.expr.computed_type + if expr_type == SelfType(): + expr_type = self.current_type + + if not expr_type.conforms_to(node_type): + e = INCOMPATIBLE_ATTRIBUTE_TYPE.replace('%s', expr_type.name, 1).replace('%s', node.id, 1).replace('%s', node_type.name, 1) + location = node.expr.location + self.errors.append(f"{location} - {e}") + + @visitor.when(FuncDeclarationNode) + def visit(self, node, scope): + self.current_method = self.current_type.get_method(node.id) + child = scope.create_child(scope.class_name, self.current_method.name) + + # Ask if the method is defined with a diffrent signature in the parent, if true an error is added + parent = self.current_type.parent + if parent is not None: + try: + method = parent.get_method(node.id) + if method.return_type != self.current_method.return_type: + e = METHOD_REDEFINED_RETURN.replace('%s', node.id, 1).replace('%s', self.current_method.return_type.name, 1).replace('%s', method.return_type.name, 1) + location = node.type_location + self.errors.append(f'{location} - {e}') + if len(self.current_method.param_types) != len(method.param_types): + e = METHOD_REDEFINED_NPARAM.replace('%s', node.id, 1) + self.errors.append(f'{node.location} - {e}') + else: + index = 0 + for type_child, type_parent in zip(self.current_method.param_types, method.param_types): + if type_child != type_parent: + e = METHOD_REDEFINED_PARAM.replace('%s', node.id, 1).replace('%s', type_child.name, 1).replace('%s', type_parent.name, 1) + location = node.params[index].location + self.errors.append(f'{location} - {e}') + index += 1 + + except SemanticError: + pass + + index = 0 + for name, typex in zip(self.current_method.param_names, self.current_method.param_types): + if name != "self": + child.define_variable(name, typex) + else: + e = SELF_IS_READONLY_PARAM + location = node.params[index].location + self.errors.append(f'{location} - {e}') + index += 1 + + self.visit(node.expr, child) + + return_type_exp = node.expr.computed_type if node.expr.computed_type != SelfType() else self.current_type + return_type_met = self.current_method.return_type if self.current_method.return_type != SelfType() else self.current_type + + if not return_type_exp.conforms_to(return_type_met): + e = INCOMPATIBLE_RET_FUNC_TYPE.replace('%s', return_type_exp.name, 1).replace('%s',node.id , 1).replace('%s',return_type_met.name , 1) + location = node.expr.location + self.errors.append(f'{location} - {e}') + + @visitor.when(BlockNode) + def visit(self, node, scope): + for expr in node.expr_lis: + self.visit(expr, scope) + node.computed_type = node.expr_lis[-1].computed_type + + @visitor.when(DispatchNode) + def visit(self, node, scope): + typee = None + if node.expr is not None: + self.visit(node.expr, scope) + obj_type = node.expr.computed_type if node.expr.computed_type != SelfType() else self.current_type + + if node.type is not None: + try: + typex = self.context.get_type(node.type) + if not obj_type.conforms_to(typex): + e = INCOMPATIBLE_DISPATCH_DEC_TYPE.replace('%s', obj_type.name, 1).replace('%s', typex.name, 1) + self.errors.append(f'{node.location} - {e}') + typex = ErrorType() + obj_type = typex + except SemanticError as error: + self.errors.append(error.text) + obj_type = ErrorType() + else: + obj_type = scope.find_variable('self').type + + try: + obj_type = self.context.get_type(obj_type.name) + method = obj_type.get_method(node.id) + if (node.arg is None and method.arg is None) or (len(node.arg) == len(method.param_types)): + if node.arg is not None: + for arg, param_type, param_name in zip(node.arg, method.param_types, method.param_names): + self.visit(arg, scope) + arg_type = arg.computed_type if arg.computed_type != SelfType() else self.current_type + + if not arg_type.conforms_to(param_type): + e = INCOMPATIBLE_DISPATCH_TYPE.replace('%s', node.id, 1).replace('%s', arg_type.name, 1).replace('%s', param_name, 1).replace('%s', param_type.name, 1) + location = arg.location + self.errors.append(f'{location} - {e}') + typee = ErrorType() + else: + e = WRONG_NUMBER_ARGUMENTS.replace('%s', method.name, 1) + self.errors.append(f'{node.location} - {e}') + + if typee is None: + ret_type = method.return_type if method.return_type != SelfType() else obj_type + else: + ret_type = typee + + except SemanticError as error: + e = UNDEFINED_METHOD.replace('%s',node.id) + self.errors.append(f'{node.location} - {e}') + ret_type = ErrorType() + + node.computed_type = ret_type + + @visitor.when(ConditionalNode) + def visit(self, node, scope): + self.visit(node.predicate, scope) + predicate_type = node.predicate.computed_type + + self.visit(node.then, scope) + self.visit(node.elsex, scope) + + if predicate_type.conforms_to(BoolType()): + node.computed_type = find_parent_type(self.current_type, node.then.computed_type, node.elsex.computed_type) + else: + e = PREDICATE_OPERATIONS.replace('%s', "If", 1) + self.errors.append(f'{node.location} - {e}') + node.computed_type = ErrorType() + + @visitor.when(LetNode) + def visit(self, node, scope): + child = scope.create_child(scope.class_name, scope.method_name) + for item in node.variables: + self.visit(item, child) + + self.visit(node.expr, child) + node.computed_type = node.expr.computed_type + + @visitor.when(VarDeclarationNode) + def visit(self, node, scope): + if node.id == 'self': + self.errors.append(f'{node.location} - {SELF_IS_READONLY_LET}') + + try: + var_type = self.context.get_type(node.type) + except SemanticError as error: + e = UNDEFINED_VARIABLE_TYPE.replace('%s', node.type, 1).replace('%s', node.id, 1) + location = node.type_location + self.errors.append(f'{location} - {e}') + var_type = ErrorType() + + if node.expr is not None: + self.visit(node.expr, scope) + expresion_type = node.expr.computed_type if node.expr.computed_type != SelfType() else self.current_type + + if not expresion_type.conforms_to(var_type): + e = INCOMPATIBLE_VARIABLE_TYPE.replace('%s', expresion_type.name, 1).replace('%s', node.id, 1).replace('%s', var_type.name, 1) + location = node.expr.location + self.errors.append(f'{location} - {e}') + + if scope.is_local(node.id): + scope.remove_variable(node.id) + + scope.define_variable(node.id, var_type) + + node.computed_type = var_type + + @visitor.when(LoopNode) + def visit(self, node, scope): + self.visit(node.predicate, scope) + predicate_type = node.predicate.computed_type + self.visit(node.body, scope) + + if predicate_type.conforms_to(BoolType()): + node.computed_type = ObjectType() + else: + e = PREDICATE_OPERATIONS.replace('%s',"Loop", 1) + self.errors.append(f'{node.location} - {e}') + node.computed_type = ErrorType() + + @visitor.when(CaseNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + + types_computed = [] + types = [] + for attr in node.cases: + self.visit(attr, scope) + types_computed.append(attr.computed_type) + if attr.type in types: + e = DUPLICATE_BRANCH.replace('%s', attr.type, 1) + location = attr.type_location + self.errors.append(f'{location} - {e}') + else: + types.append(attr.type) + + typex = types_computed[0] + for i in range(1,len(types_computed)): + typex = find_parent_type(self.current_type, typex, types_computed[i]) + node.computed_type = typex + + @visitor.when(CaseAttrNode) + def visit(self, node, scope): + try: + typex = self.context.get_type(node.type) + except SemanticError as error: + e = (CASE_TYPE_UNDEFINED.replace('%s', node.type, 1)) + location = node.type_location + self.errors.append(f'{location} - {e}') + typex = ErrorType() + + child_scope = scope.create_child(scope.class_name, scope.method_name) + child_scope.define_variable(node.id, typex) + self.visit(node.expr, child_scope) + + node.computed_type = node.expr.computed_type + + @visitor.when(AssignNode) + def visit(self, node, scope): + self.visit(node.id, scope) + var_type = node.id.computed_type + + self.visit(node.expr, scope) + expresion_type = node.expr.computed_type if node.expr.computed_type != SelfType() else self.current_type + + node.computed_type = expresion_type + + if node.id.lex == 'self': + self.errors.append(f'{node.symbol_location} - {SELF_IS_READONLY}') + node.computed_type = ErrorType() + elif not expresion_type.conforms_to(var_type): + self.errors.append(INCOMPATIBLE_VARIABLE_TYPE.replace('%s', expresion_type.name, 1).replace('%s', node.id.lex).replace('%s', var_type.name, 1)) + node.computed_type = ErrorType() + + @visitor.when(BinaryNode) + def visit(self, node, scope): + self.visit(node.left,scope) + left_type = node.left.computed_type + self.visit(node.right,scope) + right_type = node.right.computed_type + + if isinstance(node, PlusNode): + operation = "+" + elif isinstance(node, MinusNode): + operation= "-" + elif isinstance(node, DivNode): + operation = "/" + elif isinstance(node, StarNode): + operation = "*" + elif isinstance(node, ElessNode): + operation = "<=" + elif isinstance(node, LessNode): + operation = "<" + + if not isinstance(node, EqualsNode): + if left_type == IntType() and right_type == IntType(): + if isinstance(node, ElessNode) or isinstance(node, LessNode): + node.computed_type = BoolType() + else: + node.computed_type = IntType() + else: + if(left_type == right_type): + e = OPERATION_NOT_DEFINED.replace('%s', operation, 1).replace('%s', left_type.name, 1) + else: + e = INVALID_OPERATION.replace('%s', left_type.name, 1).replace('%s', operation, 1).replace('%s', right_type.name, 1) + if left_type != IntType(): + location = node.left.location + else: + location = node.left.location + + self.errors.append(f'{location} - {e}') + node.computed_type = ErrorType() + else: + if left_type == right_type: + node.computed_type = BoolType() + else: + if is_base_class(left_type.name) or is_base_class(right_type.name): + self.errors.append(f'{node.symbol_location} - {INVALID_BASIC_COMPARISON}') + node.computed_type = ErrorType() + else: + node.computed_type = BoolType() + + @visitor.when(PrimeNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + type_expr = node.expr.computed_type + + if type_expr == IntType(): + node.computed_type = IntType() + else: + e = UNARY_OPERATION_NOT_DEFINED.replace('%s', "~", 1).replace('%s', type_expr.name, 1).replace('%s', "Int", 1) + location = node.expr.location + self.errors.append(f'{location} - {e}') + node.computed_type = ErrorType() + + @visitor.when(NotNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + type_expr = node.expr.computed_type + + if type_expr == BoolType(): + node.computed_type = BoolType() + else: + e = UNARY_OPERATION_NOT_DEFINED.replace('%s', "not", 1).replace('%s', type_expr.name, 1).replace('%s', "Bool", 1) + location = node.expr.location + self.errors.append(f'{location} - {e}') + node.computed_type = ErrorType() + + @visitor.when(StringNode) + def visit(self, node, scope): + node.computed_type = StringType() + + @visitor.when(IsVoidNode) + def visit(self, node, scope): + self.visit(node.expr, scope) + node.computed_type = BoolType() + + @visitor.when(ConstantNumNode) + def visit(self, node, scope): + node.computed_type = IntType() + + @visitor.when(VariableNode) + def visit(self, node, scope): + if scope.is_defined(node.lex, self.current_type): + var_type = scope.find_variable_or_attribute(node.lex, self.current_type).type + else: + e = VARIABLE_NOT_DEFINED.replace('%s', node.lex, 1) + self.errors.append(f'{node.location} - {e}') + var_type = ErrorType() + node.computed_type = var_type + + @visitor.when(TrueNode) + def visit(self, node, scope): + node.computed_type = BoolType() + + @visitor.when(FalseNode) + def visit(self, node, scope): + node.computed_type = BoolType() + + @visitor.when(InstantiateNode) + def visit(self, node, scope): + try: + var_type = self.context.get_type(node.lex) + except SemanticError as error: + e = UNDEFINED_NEW_TYPE.replace('%s', node.lex, 1) + self.errors.append(f'{node.location} - {e}') + var_type = ErrorType() + + node.computed_type = var_type diff --git a/src/semantics/TypeCollector.py b/src/semantics/TypeCollector.py new file mode 100644 index 000000000..5dd35b728 --- /dev/null +++ b/src/semantics/TypeCollector.py @@ -0,0 +1,76 @@ +from parsing.ast import * +from .semantic import SemanticError, Context +from .semantic import ObjectType, StringType, IntType, BoolType, IOType, SelfType +import utils.visitor as visitor +from .utils import is_base_class + + +BASIC_CLASS_REDEFINED = "SemanticError: Redefinition of basic class %s." +CLASS_REDEFINED = "SemanticError: Classes may not be redefined" +MAIN_NOT_DEFINED = "SemanticError: Class Main must be defined." + + +class TypeCollector(object): + def __init__(self): + self.context = None + self.errors = [] + + @visitor.on('node') + def visit(self, node): + pass + + @visitor.when(ProgramNode) + def visit(self, node): + self.context = Context() + + # Define base classes and their methods + define_base_classes(self.context) + + for dec in node.declarations: + self.visit(dec) + + # Check that class Main is defined + try: + self.context.get_type('Main') + except SemanticError: + self.errors.append(MAIN_NOT_DEFINED) + + @visitor.when(ClassDeclarationNode) + def visit(self, node): + try: + self.context.get_type(node.id) + if is_base_class(node.id): + e = BASIC_CLASS_REDEFINED.replace('%s', node.id, 1) + self.errors.append(f"{node.location} - {e}") + else: + self.errors.append(f'{node.id_location} - {CLASS_REDEFINED}') + except SemanticError: + self.context.create_type(node.id) + + +def define_base_classes(context): + object_type = context.types['Object'] = ObjectType() + io_type = context.types['IO'] = IOType() + int_type = context.types['Int'] = IntType() + string_type = context.types['String'] = StringType() + bool_type = context.types['Bool'] = BoolType() + self_type = context.types['SELF_TYPE'] = SelfType() + + object_type.define_method('abort', [], [], object_type) + object_type.define_method('type_name', [], [], string_type) + object_type.define_method('copy', [], [], self_type) + + int_type.set_parent(object_type) + + string_type.set_parent(object_type) + string_type.define_method('length', [], [], int_type) + string_type.define_method('concat', ['s'], [string_type], string_type) + string_type.define_method('substr', ['i', 'l'], [int_type, int_type], string_type) + + bool_type.set_parent(object_type) + + io_type.set_parent(object_type) + io_type.define_method('out_string', ['x'], [string_type], self_type) + io_type.define_method('out_int', ['x'], [int_type], self_type) + io_type.define_method('in_string', [], [], string_type) + io_type.define_method('in_int', [], [], int_type) diff --git a/src/semantics/semantic.py b/src/semantics/semantic.py new file mode 100644 index 000000000..91494b9ac --- /dev/null +++ b/src/semantics/semantic.py @@ -0,0 +1,312 @@ +import itertools as itt +from collections import OrderedDict + + +class SemanticError(Exception): + @property + def text(self): + return self.args[0] + + +class Attribute: + def __init__(self, name, typex): + self.name = name + self.type = typex + + def __str__(self): + return f'[attrib] {self.name} : {self.type.name};' + + def __repr__(self): + return str(self) + + +class Method: + def __init__(self, name, param_names, params_types, return_type): + self.name = name + self.param_names = param_names + self.param_types = params_types + self.return_type = return_type + + def __str__(self): + params = ', '.join(f'{n}:{t.name}' for n,t in zip(self.param_names, self.param_types)) + return f'[method] {self.name}({params}): {self.return_type.name};' + + def __eq__(self, other): + return other.name == self.name and \ + other.return_type == self.return_type and \ + other.param_types == self.param_types + + +class Type: + def __init__(self, name:str): + self.name = name + self.attributes = [] + self.methods = [] + self.parent = None + self.ref = True + + def set_parent(self, parent): + self.parent = parent + + def get_attribute(self, name:str): + try: + return next(attr for attr in self.attributes if attr.name == name) + except StopIteration: + if self.parent is None: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + try: + return self.parent.get_attribute(name) + except SemanticError: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + + def get_attribute_parent(self, name:str): + try: + next(attr for attr in self.attributes if attr.name == name) + return self + except StopIteration: + if self.parent is None: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + try: + return self.parent.get_attribute_parent(name) + except SemanticError: + raise SemanticError(f'Attribute "{name}" is not defined in {self.name}.') + + def define_attribute(self, name:str, typex): + try: + self.get_attribute(name) + except SemanticError: + attribute = Attribute(name, typex) + self.attributes.append(attribute) + return attribute + else: + raise SemanticError(f'Attribute "{name}" is already defined in {self.name}.') + + def get_method(self, name:str): + try: + return next(method for method in self.methods if method.name == name) + except StopIteration: + if self.parent is None: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + try: + return self.parent.get_method(name) + except SemanticError: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + + def get_method_parent(self, name:str): + try: + next(method for method in self.methods if method.name == name) + return self + except StopIteration: + if self.parent is None: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + try: + return self.parent.get_method_parent(name) + except SemanticError: + raise SemanticError(f'Method "{name}" is not defined in {self.name}.') + + def define_method(self, name:str, param_names:list, param_types:list, return_type): + if name in (method.name for method in self.methods): + raise SemanticError(f'Method "{name}" already defined in {self.name}') + + method = Method(name, param_names, param_types, return_type) + self.methods.append(method) + return method + + def all_attributes(self, clean=True): + plain = OrderedDict() if self.parent is None else self.parent.all_attributes(False) + for attr in self.attributes: + plain[attr.name] = (attr, self) + return plain.values() if clean else plain + + def all_methods(self, clean=True): + plain = OrderedDict() if self.parent is None else self.parent.all_methods(False) + for method in self.methods: + plain[method.name] = (method, self) + return plain.values() if clean else plain + + def conforms_to(self, other): + return other.bypass() or self == other or self.parent is not None and self.parent.conforms_to(other) + + def bypass(self): + return False + + def __str__(self): + output = f'type {self.name}' + parent = '' if self.parent is None else f' : {self.parent.name}' + output += parent + output += ' {' + output += '\n\t' if self.attributes or self.methods else '' + output += '\n\t'.join(str(x) for x in self.attributes) + output += '\n\t' if self.attributes else '' + output += '\n\t'.join(str(x) for x in self.methods) + output += '\n' if self.methods else '' + output += '}\n' + return output + + def __repr__(self): + return str(self) + + +class ErrorType(Type): + def __init__(self): + Type.__init__(self, '') + + def conforms_to(self, other): + return True + + def bypass(self): + return True + + def __eq__(self, other): + return isinstance(other, ErrorType) + + +class ObjectType(Type): + def __init__(self): + Type.__init__(self, 'Object') + + def __eq__(self, other): + return other.name == self.name or isinstance(other, ObjectType) + + +class IntType(Type): + def __init__(self): + Type.__init__(self, 'Int') + Type.set_parent(self, ObjectType()) + self.ref = False + + def __eq__(self, other): + return other.name == self.name or isinstance(other, IntType) + + +class StringType(Type): + def __init__(self): + Type.__init__(self, 'String') + Type.set_parent(self, ObjectType()) + + def __eq__(self, other): + return other.name == self.name or isinstance(other, StringType) + + +class BoolType(Type): + def __init__(self): + Type.__init__(self, 'Bool') + Type.set_parent(self, ObjectType()) + self.ref = False + + def __eq__(self, other): + return other.name == self.name or isinstance(other, BoolType) + + +class AutoType(Type): + def __init__(self): + Type.__init__(self, 'AUTO_TYPE') + self.infered_type = None + + def __eq__(self, other): + return isinstance(other, AutoType) + + +class SelfType(Type): + def __init__(self): + Type.__init__(self, 'SELF_TYPE') + + def __eq__(self, other): + return isinstance(other, SelfType) + + +class IOType(Type): + def __init__(self): + Type.__init__(self, 'IO') + Type.set_parent(self, ObjectType()) + + def __eq__(self, other): + return isinstance(other, IOType) + + +class Context: + def __init__(self): + self.types = {} + + def create_type(self, name:str): + if name in self.types: + raise SemanticError(f'Type with the same name ({name}) already in context.') + typex = self.types[name] = Type(name) + return typex + + def get_type(self, name:str): + try: + return self.types[name] if self.types[name] != AutoType() else AutoType() + except KeyError: + raise SemanticError(f'Type "{name}" is not defined.') + + def __str__(self): + return '{\n\t' + '\n\t'.join(y for x in self.types.values() for y in str(x).split('\n')) + '\n}' + + def __repr__(self): + return str(self) + + +class VariableInfo: + def __init__(self, name, vtype): + self.name = name + self.type = vtype + + +class Scope: + def __init__(self, parent=None): + self.locals = [] + self.parent = parent + self.children = [] + self.index = 0 if parent is None else len(parent) + self.class_name = None + self.method_name = None + + def __len__(self): + return len(self.locals) + + def create_child(self, class_name=None, method_name=None): + child = Scope(self) + self.children.append(child) + child.class_name = class_name + child.method_name = method_name + return child + + def define_variable(self, vname, vtype): + info = VariableInfo(vname, vtype) + self.locals.append(info) + return info + + def remove_variable(self, vname): + self.locals = [v for v in self.locals if v.name == vname] + + def find_variable(self, vname, index=None): + locals = self.locals if index is None else itt.islice(self.locals, index) + try: + return next(x for x in locals if x.name == vname) + except StopIteration: + return self.parent.find_variable(vname, self.index) if self.parent is not None else None + + def find_variable_or_attribute(self, vname, current_type): + var = self.find_variable(vname) + if var is None: + try: + return current_type.get_attribute(vname) + except SemanticError: + return None + else: + return var + + def is_defined(self, vname, current_type): + return self.find_variable_or_attribute(vname, current_type) is not None + + def is_local(self, vname): + return any(True for x in self.locals if x.name == vname) + + def child_find_variable(self, vname): + var = next(x for x in self.locals if x.name == vname) + if var is not None: + return self + else: + for child in self.children: + child.child_find_variable(vname) \ No newline at end of file diff --git a/src/semantics/utils.py b/src/semantics/utils.py new file mode 100644 index 000000000..8fd08d102 --- /dev/null +++ b/src/semantics/utils.py @@ -0,0 +1,34 @@ +from parsing.ast import * +from .semantic import ErrorType, ObjectType, SelfType + + +def find_parent_type(current_type, type1, type2): + if type1 == SelfType(): + type1 = current_type + if type2 == SelfType(): + type2 = current_type + + if type1 == type2: + return type1 + elif type1 == ErrorType() or type2 == ErrorType(): + return ErrorType() + elif type1 == ObjectType() or type2 == ObjectType(): + return ObjectType() + + parent1 = find_parent_type(current_type, type1.parent, type2) + parent2 = find_parent_type(current_type, type1, type2.parent) + parent3 = find_parent_type(current_type, type1.parent, type2.parent) + + if parent1.conforms_to(parent2): + temp = parent1 + else: + temp = parent2 + + if temp.conforms_to(parent3): + return temp + else: + return parent3 + + +def is_base_class(id): + return id in ['Object', 'IO', 'Int', 'String', 'Bool'] \ No newline at end of file diff --git a/src/utils/visitor.py b/src/utils/visitor.py new file mode 100644 index 000000000..567fa5a78 --- /dev/null +++ b/src/utils/visitor.py @@ -0,0 +1,82 @@ +# The MIT License (MIT) +# +# Copyright (c) 2013 Curtis Schlak +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import inspect + + +__all__ = ['on', 'when'] + + +def on(param_name): + def f(fn): + dispatcher = Dispatcher(param_name, fn) + return dispatcher + return f + + +def when(param_type): + def f(fn): + frame = inspect.currentframe().f_back + func_name = fn.func_name if 'func_name' in dir(fn) else fn.__name__ + dispatcher = frame.f_locals[func_name] + if not isinstance(dispatcher, Dispatcher): + dispatcher = dispatcher.dispatcher + dispatcher.add_target(param_type, fn) + def ff(*args, **kw): + return dispatcher(*args, **kw) + ff.dispatcher = dispatcher + return ff + return f + + +class Dispatcher(object): + def __init__(self, param_name, fn): + frame = inspect.currentframe().f_back.f_back + top_level = frame.f_locals == frame.f_globals + self.param_index = self.__argspec(fn).args.index(param_name) + self.param_name = param_name + self.targets = {} + + def __call__(self, *args, **kw): + typ = args[self.param_index].__class__ + d = self.targets.get(typ) + if d is not None: + return d(*args, **kw) + else: + issub = issubclass + t = self.targets + ks = t.keys() + ans = [t[k](*args, **kw) for k in ks if issub(typ, k)] + if len(ans) == 1: + return ans.pop() + return ans + + def add_target(self, typ, target): + self.targets[typ] = target + + @staticmethod + def __argspec(fn): + # Support for Python 3 type hints requires inspect.getfullargspec + if hasattr(inspect, 'getfullargspec'): + return inspect.getfullargspec(fn) + else: + return inspect.getargspec(fn) \ No newline at end of file