improved sample code

michaelkamprath · Apr 25, 2024 · 2eb5702 · 2eb5702
1 parent 2c5d8f3
commit 2eb5702
Show file tree

Hide file tree

Showing 4 changed files with 378 additions and 16 deletions.
diff --git a/examples/slu4-minimal-64x4/README.md b/examples/slu4-minimal-64x4/README.md
@@ -18,6 +18,8 @@ The arguments to the command above are:
 * `-c /path/to/slu4-minimal-64x4.yaml` - The file path to the **BespokeASM** instruction set configuration for the Minimal 64x4.
 * `/path/to/my-code.min64x4` - The file path to the Minimal 64x4 assembly code to be compiled. Here by convention the assembly code has a file extension of `.min64x4`. While **BespokeASM** can work with any file extension for the code, the convention is used so that code editors know what file type they are editing and thus are able to support syntax highlighting specific to the Minimal 64x4 assembly syntax. See [**BespokeASM**'s documentation on syntax highlighting support](https://github.com/michaelkamprath/bespokeasm/wiki/Installation-and-Usage#installing-language-extensions) for more information.
 
+Once compiled, the intel hex output can be copied and pasted into the Minimal 64x4 OS's terminal window to load the code into the Minimal 64x4's memory using it's `receive` command. Further, the compile code can be run on [the Minimal 64x4's web-based emulator](https://editor.p5js.org/slu4coder/sketches/lRq1gyYR2).
+
 ### Instruction Set
 Carsten Herting thoroughly documents [the instruction set for the Minimal 64x4 in his user guide](https://docs.google.com/document/d/1-nDv_8WEG1FrlO3kEK0icoYo-Z-jlhpCMiCstxGOCjQ/edit?usp=sharing). All of the documented instructions in their original syntax are implemented in in this **BespokeASM** port. However, **BespokeASM** will be case insensitive when matching instruction mnemonics.
 

diff --git a/examples/slu4-minimal-64x4/slu4-minimal-64x4.yaml b/examples/slu4-minimal-64x4/slu4-minimal-64x4.yaml
@@ -2712,6 +2712,18 @@ macros:
         - "phs"
         - "ldi BYTE1(@ARG(0))"
         - "phs"
+  phs2s:
+    # push 2 bytes at (current) stack offset to the stack
+    - operands:
+        count: 1
+        operand_sets:
+          list:
+            - offset
+      instructions:
+        - "lds @ARG(0)+1+0"
+        - "phs"
+        - "lds @ARG(0)+0+1"
+        - "phs"
   pls2:
     # pull 2 bytes from the stack
     - operands:
@@ -2727,6 +2739,34 @@ macros:
         - "pls"
         - "pls"
         - "pls"
+  ms2v:
+    # move 2 bytes from stack to zero-page word
+    # stack is arranged as big-endian, word in zero-page is little-endian
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - offset
+            - zero_page
+      instructions:
+        - "lds @ARG(0)+0"
+        - "stz @ARG(1)+1"
+        - "lds @ARG(0)+1"
+        - "stz @ARG(1)+0"
+  mvs2:
+    # move 4 bytes from zero-page word to stack starting at passed offset
+    # stack is arranged as big-endian, word in zero-page is little-endian
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - zero_page
+            - offset
+      instructions:
+        - "ldz @ARG(0)+1"
+        - "sts @ARG(1)+0"
+        - "ldz @ARG(0)+0"
+        - "sts @ARG(1)+1"
   ms4q:
     # move 4 bytes from stack starting at passed offset to zero-page long
     # stack is arranged as big-endian, long in zero-page is little-endian
@@ -2779,3 +2819,28 @@ macros:
         - "ac.z @ARG(1)+2"
         - "ldz @ARG(0)+3"
         - "ac.z @ARG(1)+3"
+  sqq:
+    # subtract two zero-page longs. *Q2 = *Q2 - Q1
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - zero_page
+            - zero_page
+      instructions:
+        - "szz @ARG(0)+0,@ARG(1)+0"
+        - "ldz @ARG(0)+2"
+        - "sc.z @ARG(1)+2"
+        - "ldz @ARG(0)+3"
+        - "sc.z @ARG(1)+3"
+  mqq:
+    # move zero-page long to zero-page long
+    - operands:
+        count: 2
+        operand_sets:
+          list:
+            - zero_page
+            - zero_page
+      instructions:
+        - "mvv @ARG(0)+0,@ARG(1)+0"
+        - "mvv @ARG(0)+2,@ARG(1)+2"
diff --git a/examples/slu4-minimal-64x4/software/mathlib32.min64x4 b/examples/slu4-minimal-64x4/software/mathlib32.min64x4
@@ -3,9 +3,7 @@
 .memzone ZERO_PAGE_APPS
 #mute
 _temp_byte1:            .byte 0
-_temp_byte2:            .byte 0
-_temp_byte3:            .byte 0
-_temp_byte4:            .byte 0
+_temp_long1:            .zero 4
 _working_mem8:          .zero 8
 _multiply_sign_byte:    .byte 0
 _argX4:                 .zero 4
@@ -25,13 +23,27 @@ _counter:               .byte 0
 ;   Returns
 ;       flags will be set per comparison
 ;
-compare_uint32:
-    ; first check high bytes, then others in sequence
-    ; values on stack are stored big endian
-    lds (3+0) stz _temp_byte1 lds (7+0) cpz _temp_byte1 bne .done
-    lds (3+1) stz _temp_byte1 lds (7+1) cpz _temp_byte1 bne .done
-    lds (3+2) stz _temp_byte1 lds (7+2) cpz _temp_byte1 bne .done
-    lds (3+3) stz _temp_byte1 lds (7+3) cpz _temp_byte1
+compare_uint32ss:
+    ; load values into zero page and then use _compare_uint32_XY
+    ms4q 7,_argX4
+    ms4q 3,_argY4
+    jps _compare_uint32_XY
+    rts
+
+;   compares high and low longs (4 byte) in _argX4 and _argY4.
+;       X ? Y
+;
+;   Arguments
+;       X - _argX4
+;       Y - _argY4
+;
+;  Returns
+;     flags will be set per comparison
+_compare_uint32_XY:
+    czz _argY4+3,_argX4+3 fne .done
+    czz _argY4+2,_argX4+2 fne .done
+    czz _argY4+1,_argX4+1 fne .done
+    czz _argY4+0,_argX4+0
 .done:
     rts
 
@@ -82,7 +94,7 @@ multiply_int32:
 
 _multiply:
     ; set counter for 32 bits
-    ldi 32 stz _counter
+    miz 32,_counter
 .mult_loop:
     ; check to see if LSb of working memory is 1
     ldz _working_mem8+0 lr1 bcc .continue
@@ -99,28 +111,141 @@ _multiply:
     rrz _working_mem8+1
     rrz _working_mem8+0
     ; decrement counter (placing it in A) and stop if 0
-    dez _counter cpi 0 bne .mult_loop
+    dez _counter ciz 0,_counter bne .mult_loop
 .set_sign:
     ; check to see if result is negative:
     ciz 0,_multiply_sign_byte beq .copy_results
     ; take twos complement of 8-byte results
     noq _working_mem8+0
     noq _working_mem8+4
-    inw _working_mem8+0 bcc .copy_results       ; if only INQ set the flags :-(
-    inw _working_mem8+2 bcc .copy_results
-    inw _working_mem8+4 bcc .copy_results
+    inw _working_mem8+0 fcc .copy_results       ; if only INQ set the flags :-(
+    inw _working_mem8+2 fcc .copy_results
+    inw _working_mem8+4 fcc .copy_results
     inw _working_mem8+6
 .copy_results:
     ; the entire working memory is the 64-bit results
     mqs4 _working_mem8+4,3+0
     mqs4 _working_mem8+0,3+4
     rts
 
+; divide32
+;   Divides X by Y (note, unsigned only)
+;
+;   Arguments:
+;       sp+3 : value X dividend (4 bytes)
+;       sp+7 : value Y divisor (4 bytes)
+;
+;   Return Value:
+;       sp+3 : the quotient (replaces X)
+;       sp+7 : the remainder (replaces Y)
+;
+divide32:
+    ; first check divisor is not 0
+    ms4q 7,_argX4
+    clq _argY4
+    jps _compare_uint32_XY
+    beq .divide_by_zero
+    ; check if dividend is 0
+    ms4q 3,_argX4
+    jps _compare_uint32_XY
+    beq .return_zero
+    ; check if divisor > dividend
+    ms4q 7,_argX4        ; get divisor
+    ms4q 3,_argY4        ; get dividend
+    jps _compare_uint32_XY
+    bgt .divisor_too_large
+.start_division:
+    ; set up working memory:
+    ;   little endian
+    ;   _working_mem8+0 : init with dividend (4 bytes)  --> becomes quotient
+    ;   _working_mem8+4 : set to zero (4 bytes) --> becomes remainder
+    ;   _temp_long1     : divisor
+    ;   _temp_byte1     : carry bit
+    clz _temp_byte1             ; init carry bit
+    ms4q 3,_working_mem8+0      ; init low word with dividend
+    clq _working_mem8+4         ; init high word
+    ms4q 7,_temp_long1          ; divisor
+    miz 32,_counter             ; init loop counter
+
+.div_loop:
+    ; shift working memory and add carry bit to the right side
+    jps .div_lsl64
+    azz _temp_byte1,_working_mem8+0     ; add carry bit to low byte
+    clz _temp_byte1                     ; clear carry bit
+    ; determine if we can do subtraction if _working_mem8 high long is larger than divisor
+    mqq _temp_long1,_argX4              ; set _argX4 to divisor
+    mqq _working_mem8+4,_argY4          ; set _argY4 to _working_mem8+4 is high word
+    jps _compare_uint32_XY
+    bgt .div_loop_continue
+.div_loop_subtraction:
+    ; working value is equal to or larger than divsior
+    ; do the subtraction
+    sqq _temp_long1,_working_mem8+4     ; subtract divisor from high long
+    miz 1,_temp_byte1                   ; set carry bit
+.div_loop_continue:
+    ; decrement counter and check for 0
+    dez _counter bne .div_loop
+
+
+.division_done:
+    ; at this point we have the remainder in the high word, save it
+    mqs4 _working_mem8+4,7
+    ; and then we left shift one more time to get the quotient
+    jps .div_lsl64
+    azz _temp_byte1,_working_mem8+0     ; add carry bit to low byte
+    ; the quotient is in _working_mem8+0
+    mqs4 _working_mem8+0,3
+    rts
+.divisor_too_large:
+    ; quotient = 0, remander = dividend
+    ms4q 3,_argX4               ; get dividend
+    mqs4 _argX4,7               ; set dividend to remainder
+    clq _argX4                  ; set quotient to 0
+    mqs4 _argX4,3               ; set quotient
+    rts
+.divide_by_zero:
+    ; for now, just return 0
+    mqs4 _argY4,7               ; _argY4 is already 0
+.return_zero:
+    mqs4 _argY4,3               ; _argY4 is already 0
+    rts
+; .div_lsl64
+;
+;   local method for shifting _working_mem8 left 1 bit
+.div_lsl64:
+    llz _working_mem8+0
+    rlz _working_mem8+1
+    rlz _working_mem8+2
+    rlz _working_mem8+3
+    rlz _working_mem8+4
+    rlz _working_mem8+5
+    rlz _working_mem8+6
+    rlz _working_mem8+7
+    rts
+
+
+
+; subtracts 4 byte value in _argY4 from 4 byte value in _argX4
+;   X - Y
+;
+; Arguments
+;   X - _argX4
+;   Y - _argY4
+;
+; Return Value
+;   _argX4 = X - Y
+_subtract32:
+    svv _argY4+0,_argX4+0
+    ldz _argY4+2 sc.z _argX4+2
+    ldz _argY4+3 sc.z _argX4+3
+    rts
+
+
 ; _print_working_memory
 ;   prints the contents of the _working_mem8 8 bytes in hex
 ;   used for debugging purposes.
 _print_working_memory:
-    jps _Print "working memory = $"
+    jps _Print "work mem = $"
     ldz _working_mem8+7 jas _PrintHex
     ldz _working_mem8+6 jas _PrintHex
     ldz _working_mem8+5 jas _PrintHex