esl-epfl · maaspa · Jul 10, 2024 · Jul 11, 2024 · Jul 11, 2024 · Aug 6, 2024
diff --git a/src/cgra.py b/src/cgra.py
@@ -2,7 +2,7 @@
 from ctypes import c_int32
 import csv
 import os.path
-
+from characterization import load_operation_characterization, display_characterization, get_latency_cc
 from kernels import *
 
 # CGRA from left to right, top to bottom
@@ -20,7 +20,8 @@
 srcs    = ['ZERO', 'SELF', 'RCL', 'RCR', 'RCT', 'RCB',  'R0', 'R1', 'R2', 'R3', 'IMM']
 dsts    = ['SELF', 'RCL', 'RCR', 'RCT', 'RCB','R0', 'R1', 'R2', 'R3']
 regs    = dsts[-4:]
-
+operation_latency_mapping = {}
+operation_latency_mapping = load_operation_characterization(operation_latency_mapping, "latency_cc")
 class INSTR:
     def __init__( self,matrix):
         self.time = matrix[0][0]                        # ToDo: Fix how we assign this length
@@ -71,6 +72,9 @@ def __init__( self, kernel, memory, read_addrs, write_addrs):
         self.memory     = memory
         self.instr2exec = 0
         self.cycles     = 0
+        self.total_latency_cc = 0
+        self.instr_latency_cc = []
+        self.max_latency_instr = None
         if read_addrs is not None and len(read_addrs) == N_COLS:
             self.load_addr = read_addrs
         else:
@@ -110,10 +114,13 @@ def step( self, prs="ROUT" ):
             reg     = [[ self.cells[r][i].regs[regs[x]]   for i in range(N_COLS) ] for x in range(len(regs)) ]
             print_out( prs, outs, insts, ops, reg )
 
+        get_latency_cc(self)  
         self.instr2exec += 1
         self.cycles += 1
         return self.exit
 
+
+
     def get_neighbour_address( self, r, c, dir ):
         n_r = r
         n_c = c
@@ -181,6 +188,7 @@ def __init__( self, parent, row, col ):
         self.regs       = {'R0':0, 'R1':0, 'R2':0, 'R3':0 }
         self.op         = ""
         self.instr      = ""
+        self.latency_cc = 0
 
     def get_out( self ):
         return self.old_out
@@ -222,7 +230,7 @@ def run_instr( self, instr):
             self.op      = instr[0]
         except:
             self.op = instr
-
+        self.latency_cc = int(operation_latency_mapping[self.op])
         if self.op in self.ops_arith:
             des     = instr[1]
             val1    = self.fetch_val( instr[2] )
@@ -375,6 +383,8 @@ def blt( self,  val1, val2, branch ):
     ops_jump    = { 'JUMP'      : '' }
     ops_exit    = { 'EXIT'      : '' }
 
+
+
 def run( kernel, version="", pr="ROUT", limit=100, load_addrs=None, store_addrs=None):
     ker = []
     mem = []
@@ -407,5 +417,5 @@ def run( kernel, version="", pr="ROUT", limit=100, load_addrs=None, store_addrs=
     sorted_mem = sorted(mem, key=lambda x: x[0])
     with open( kernel + "/"+FILENAME_MEM_O+version+EXT, 'w+') as f:
         for row in sorted_mem: csv.writer(f).writerow(row)
-
+    display_characterization(cgra)
     print("\n\nEND")
diff --git a/src/characterization.py b/src/characterization.py
@@ -0,0 +1,66 @@
+import copy
+import os.path
+import csv
+
+OPERATIONS_MEMORY_ACCESS = ["LWD", "LWI", "SWD","SWI"]
+
+def load_operation_characterization(operation_mapping, characterization_type):
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    csv_file_path = os.path.join(script_dir, 'operation_characterization.csv')
+
+    with open(csv_file_path, 'r') as csvfile:
+        reader = csv.reader(csvfile)
+        for row in reader:
+            if not row:
+                continue
+            if row[0].startswith('#'):
+                current_section = row[0].strip('# ')
+                continue
+            if current_section == f'operation_{characterization_type}_mapping':
+                if len(row) == 3:
+                    key_type, value_type = int, float
+                    operation, key, value = row
+                    key = key_type(key)
+                    value = value_type(value)
+                    if operation not in operation_mapping:
+                        operation_mapping[operation] = {}
+                    operation_mapping[operation][key] = value
+                elif len(row) == 2:
+                        key_type, value_type = int, int
+                        operation, key = row
+                        key = key_type(key)
+                        if operation not in operation_mapping:
+                            operation_mapping[operation] = key
+            else:
+                continue 
+    return operation_mapping
+
+def get_latency_cc(self):  
+    from cgra import N_ROWS, N_COLS
+    self.max_latency_instr = None
+    mem_latency_cc = 0
+    for r in range(N_ROWS):
+        for c in range(N_COLS):             
+            if self.max_latency_instr is None or self.cells[r][c].latency_cc > self.max_latency_instr.latency_cc:
+                self.max_latency_instr = self.cells[r][c]
+            if self.cells[r][c].op in OPERATIONS_MEMORY_ACCESS:      
+                mem_latency_cc += 1
+    # A memory access to a memory bank has a 2-cycle overhead, 
+    # plus 1 additional cycle per PE trying to access it.
+    if mem_latency_cc >= 1:
+        mem_latency_cc += 1
+    self.max_latency_instr.latency_cc = max(self.max_latency_instr.latency_cc, mem_latency_cc)
+    if (self.exit):
+        if (self.max_latency_instr.latency_cc > 2):
+            self.max_latency_instr.latency_cc += 1
+
+    self.max_latency_instr.instr2exec = self.instr2exec    
+    self.instr_latency_cc.append(copy.copy(self.max_latency_instr))
+    self.total_latency_cc += self.instr_latency_cc[-1].latency_cc
+
+def display_characterization(cgra):
+    print("Longest instructions per cycle:\n")
+    print("{:<8} {:<25} {:<10}".format("Cycle", "Instruction", "Latency (CC)"))
+    for index, item in enumerate(cgra.instr_latency_cc):
+        print("{:<2} {:<6} {:<25} {:<10}".format(index + 1, f'({item.instr2exec})', item.instr, item.latency_cc))
+    print("\nTotal latency for all instructions:", cgra.total_latency_cc, "CC")
diff --git a/src/operation_characterization.csv b/src/operation_characterization.csv
@@ -0,0 +1,27 @@
+# operation_latency_cc_mapping
+NOP,1
+EXIT,2
+SADD,1
+SSUB,1
+SLT,1
+SRT,1
+SRA,1
+LAND,1
+LOR,1
+LXOR,1
+LNAND,1
+LNOR,1
+LXNOR,1
+BSFA,1
+BZFA,1
+BEQ,1
+BNE,1
+BLT,1
+BGE,1
+JUMP,1
+LWD,2
+SWD,2
+LWI,2
+SWI,2
+SMUL,3
+FXPMUL,3