-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🚧 WIP cmake: enable -mcall-prologues
#4732
base: MK3
Are you sure you want to change the base?
Conversation
All values in bytes. Δ Delta to base
|
9e495d9
to
d3ed7d1
Compare
Took a look at
plan_buffer_line WITHOUT -mcall-prologues (click to view code block)00030d68 <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)>:
void plan_buffer_line(float x, float y, float z, const float &e, float feed_rate, const float* gcode_start_position, uint16_t segment_idx)
{
30d68: 2f 92 push r2
30d6a: 3f 92 push r3
30d6c: 4f 92 push r4
30d6e: 5f 92 push r5
30d70: 6f 92 push r6
30d72: 7f 92 push r7
30d74: 8f 92 push r8
30d76: 9f 92 push r9
30d78: af 92 push r10
30d7a: bf 92 push r11
30d7c: cf 92 push r12
30d7e: df 92 push r13
30d80: ef 92 push r14
30d82: ff 92 push r15
30d84: 0f 93 push r16
30d86: 1f 93 push r17
30d88: cf 93 push r28
30d8a: df 93 push r29
30d8c: cd b7 in r28, 0x3d ; 61
30d8e: de b7 in r29, 0x3e ; 62
30d90: c5 58 subi r28, 0x85 ; 133
30d92: d1 09 sbc r29, r1
30d94: 0f b6 in r0, 0x3f ; 63
30d96: f8 94 cli
30d98: de bf out 0x3e, r29 ; 62
30d9a: 0f be out 0x3f, r0 ; 63
30d9c: cd bf out 0x3d, r28 ; 61
30d9e: 69 a3 std Y+33, r22 ; 0x21
30da0: 7a a3 std Y+34, r23 ; 0x22
30da2: 8b a3 std Y+35, r24 ; 0x23
30da4: 9c a3 std Y+36, r25 ; 0x24
30da6: 2d a3 std Y+37, r18 ; 0x25
30da8: 3e a3 std Y+38, r19 ; 0x26
30daa: 4f a3 std Y+39, r20 ; 0x27
30dac: 58 a7 std Y+40, r21 ; 0x28
30dae: a8 96 adiw r28, 0x28 ; 40
30db0: ec ae std Y+60, r14 ; 0x3c
30db2: fd ae std Y+61, r15 ; 0x3d
30db4: 0e af std Y+62, r16 ; 0x3e
30db6: 1f af std Y+63, r17 ; 0x3f
30db8: a8 97 sbiw r28, 0x28 ; 40
30dba: aa 96 adiw r28, 0x2a ; 42
30dbc: df ae std Y+63, r13 ; 0x3f
30dbe: ce ae std Y+62, r12 ; 0x3e
30dc0: aa 97 sbiw r28, 0x2a ; 42
30dc2: 89 aa std Y+49, r8 ; 0x31
30dc4: 99 ae std Y+57, r9 ; 0x39
30dc6: ad ae std Y+61, r10 ; 0x3d
30dc8: bd aa std Y+53, r11 ; 0x35
30dca: c5 56 subi r28, 0x65 ; 101
30dcc: df 4f sbci r29, 0xFF ; 255
30dce: 08 81 ld r16, Y
30dd0: 19 81 ldd r17, Y+1 ; 0x01
30dd2: cb 59 subi r28, 0x9B ; 155
30dd4: d0 40 sbci r29, 0x00 ; 0
// CRITICAL_SECTION_START; //prevent stack overflow in ISR
// printf_P(PSTR("plan_buffer_line(%f, %f, %f, %f, %f, %u, [%f,%f,%f,%f], %u)\n"), x, y, z, e, feed_rate, extruder, gcode_start_position[0], gcode_start_position[1], gcode_start_position[2], gcode_start_position[3], segment_idx);
// CRITICAL_SECTION_END;
// Calculate the buffer head after we push this byte
uint8_t next_buffer_head = next_block_index(block_buffer_head);
30dd6: 80 91 59 0e lds r24, 0x0E59 ; 0x800e59 <block_buffer_head>
static bool plan_reset_next_e_sched;
// Returns the index of the next block in the ring buffer
// NOTE: Removed modulo (%) operator, which uses an expensive divide and multiplication.
static inline uint8_t next_block_index(uint8_t block_index) {
if (++ block_index == BLOCK_BUFFER_SIZE)
30dda: 8f 5f subi r24, 0xFF ; 255
30ddc: a1 96 adiw r28, 0x21 ; 33
30dde: 8f af std Y+63, r24 ; 0x3f
30de0: a1 97 sbiw r28, 0x21 ; 33
30de2: 80 31 cpi r24, 0x10 ; 16
30de4: 19 f4 brne .+6 ; 0x30dec <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x84>
block_index = 0;
30de6: a1 96 adiw r28, 0x21 ; 33
30de8: 1f ae std Y+63, r1 ; 0x3f
30dea: a1 97 sbiw r28, 0x21 ; 33
// Calculate the buffer head after we push this byte
uint8_t next_buffer_head = next_block_index(block_buffer_head);
// If the buffer is full: good! That means we are well ahead of the robot.
// Rest here until there is room in the buffer.
if (block_buffer_tail == next_buffer_head) {
30dec: 80 91 5a 0e lds r24, 0x0E5A ; 0x800e5a <block_buffer_tail>
30df0: a1 96 adiw r28, 0x21 ; 33
30df2: 2f ad ldd r18, Y+63 ; 0x3f
30df4: a1 97 sbiw r28, 0x21 ; 33
30df6: 82 13 cpse r24, r18
30df8: 0f c0 rjmp .+30 ; 0x30e18 <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0xb0>
do {
manage_heater();
30dfa: 0f 94 bd 2b call 0x2577a ; 0x2577a <manage_heater()>
// Vojtech: Don't disable motors inside the planner!
manage_inactivity(false);
30dfe: 80 e0 ldi r24, 0x00 ; 0
30e00: 0e 94 a2 6d call 0xdb44 ; 0xdb44 <manage_inactivity(bool)>
lcd_update(0);
30e04: 80 e0 ldi r24, 0x00 ; 0
30e06: 0e 94 90 53 call 0xa720 ; 0xa720 <lcd_update(unsigned char)>
} while (block_buffer_tail == next_buffer_head);
30e0a: 80 91 5a 0e lds r24, 0x0E5A ; 0x800e5a <block_buffer_tail>
30e0e: a1 96 adiw r28, 0x21 ; 33
30e10: 3f ad ldd r19, Y+63 ; 0x3f
30e12: a1 97 sbiw r28, 0x21 ; 33
30e14: 83 17 cp r24, r19
30e16: 89 f3 breq .-30 ; 0x30dfa <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x92>
}
#ifdef PLANNER_DIAGNOSTICS
planner_update_queue_min_counter();
#endif /* PLANNER_DIAGNOSTICS */
if(planner_aborted) {
30e18: 40 91 60 0e lds r20, 0x0E60 ; 0x800e60 <planner_aborted>
30e1c: a2 96 adiw r28, 0x22 ; 34
30e1e: 4f af std Y+63, r20 ; 0x3f
30e20: a2 97 sbiw r28, 0x22 ; 34
30e22: 44 23 and r20, r20
30e24: 11 f1 breq .+68 ; 0x30e6a <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x102>
// avoid planning the block early if aborted
SERIAL_ECHO_START;
30e26: 89 e0 ldi r24, 0x09 ; 9
30e28: 99 e6 ldi r25, 0x69 ; 105
30e2a: 0e 94 13 5b call 0xb626 ; 0xb626 <serialprintPGM(char const*)>
SERIAL_ECHOLNRPGM(_n("Move aborted"));
30e2e: 86 e3 ldi r24, 0x36 ; 54
30e30: 91 e7 ldi r25, 0x71 ; 113
30e32: 0e 94 3a 5d call 0xba74 ; 0xba74 <serialprintlnPGM(char const*)>
// The stepper timer interrupt will run continuously from now on.
// If there are no planner blocks to be executed by the stepper routine,
// the stepper interrupt ticks at 1kHz to wake up and pick a block
// from the planner queue if available.
ENABLE_STEPPER_DRIVER_INTERRUPT();
}
30e36: cb 57 subi r28, 0x7B ; 123
30e38: df 4f sbci r29, 0xFF ; 255
30e3a: 0f b6 in r0, 0x3f ; 63
30e3c: f8 94 cli
30e3e: de bf out 0x3e, r29 ; 62
30e40: 0f be out 0x3f, r0 ; 63
30e42: cd bf out 0x3d, r28 ; 61
30e44: df 91 pop r29
30e46: cf 91 pop r28
30e48: 1f 91 pop r17
30e4a: 0f 91 pop r16
30e4c: ff 90 pop r15
30e4e: ef 90 pop r14
30e50: df 90 pop r13
30e52: cf 90 pop r12
30e54: bf 90 pop r11
30e56: af 90 pop r10
30e58: 9f 90 pop r9
30e5a: 8f 90 pop r8
30e5c: 7f 90 pop r7
30e5e: 6f 90 pop r6
30e60: 5f 90 pop r5
30e62: 4f 90 pop r4
30e64: 3f 90 pop r3
30e66: 2f 90 pop r2
30e68: 08 95 ret
SERIAL_ECHOLNRPGM(_n("Move aborted"));
return;
} When plan_buffer_line WITH -mcall-prologues (click to view code block)0002fbd0 <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)>:
void plan_buffer_line(float x, float y, float z, const float &e, float feed_rate, const float* gcode_start_position, uint16_t segment_idx)
{
2fbd0: a5 e8 ldi r26, 0x85 ; 133
2fbd2: b0 e0 ldi r27, 0x00 ; 0
2fbd4: ef e8 ldi r30, 0x8F ; 143
2fbd6: f3 e0 ldi r31, 0x03 ; 3
2fbd8: 0d 94 8a b1 jmp 0x36314 ; 0x36314 <__prologue_saves__>
2fbdc: 69 a3 std Y+33, r22 ; 0x21
2fbde: 7a a3 std Y+34, r23 ; 0x22
2fbe0: 8b a3 std Y+35, r24 ; 0x23
2fbe2: 9c a3 std Y+36, r25 ; 0x24
2fbe4: 2d a3 std Y+37, r18 ; 0x25
2fbe6: 3e a3 std Y+38, r19 ; 0x26
2fbe8: 4f a3 std Y+39, r20 ; 0x27
2fbea: 58 a7 std Y+40, r21 ; 0x28
2fbec: a8 96 adiw r28, 0x28 ; 40
2fbee: ec ae std Y+60, r14 ; 0x3c
2fbf0: fd ae std Y+61, r15 ; 0x3d
2fbf2: 0e af std Y+62, r16 ; 0x3e
2fbf4: 1f af std Y+63, r17 ; 0x3f
2fbf6: a8 97 sbiw r28, 0x28 ; 40
2fbf8: aa 96 adiw r28, 0x2a ; 42
2fbfa: df ae std Y+63, r13 ; 0x3f
2fbfc: ce ae std Y+62, r12 ; 0x3e
2fbfe: aa 97 sbiw r28, 0x2a ; 42
2fc00: 89 aa std Y+49, r8 ; 0x31
2fc02: 99 ae std Y+57, r9 ; 0x39
2fc04: ad ae std Y+61, r10 ; 0x3d
2fc06: bd aa std Y+53, r11 ; 0x35
2fc08: c5 56 subi r28, 0x65 ; 101
2fc0a: df 4f sbci r29, 0xFF ; 255
2fc0c: 08 81 ld r16, Y
2fc0e: 19 81 ldd r17, Y+1 ; 0x01
2fc10: cb 59 subi r28, 0x9B ; 155
2fc12: d0 40 sbci r29, 0x00 ; 0
// CRITICAL_SECTION_START; //prevent stack overflow in ISR
// printf_P(PSTR("plan_buffer_line(%f, %f, %f, %f, %f, %u, [%f,%f,%f,%f], %u)\n"), x, y, z, e, feed_rate, extruder, gcode_start_position[0], gcode_start_position[1], gcode_start_position[2], gcode_start_position[3], segment_idx);
// CRITICAL_SECTION_END;
// Calculate the buffer head after we push this byte
uint8_t next_buffer_head = next_block_index(block_buffer_head);
2fc14: 80 91 59 0e lds r24, 0x0E59 ; 0x800e59 <block_buffer_head>
static bool plan_reset_next_e_sched;
// Returns the index of the next block in the ring buffer
// NOTE: Removed modulo (%) operator, which uses an expensive divide and multiplication.
static inline uint8_t next_block_index(uint8_t block_index) {
if (++ block_index == BLOCK_BUFFER_SIZE)
2fc18: 8f 5f subi r24, 0xFF ; 255
2fc1a: a1 96 adiw r28, 0x21 ; 33
2fc1c: 8f af std Y+63, r24 ; 0x3f
2fc1e: a1 97 sbiw r28, 0x21 ; 33
2fc20: 80 31 cpi r24, 0x10 ; 16
2fc22: 19 f4 brne .+6 ; 0x2fc2a <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x5a>
block_index = 0;
2fc24: a1 96 adiw r28, 0x21 ; 33
2fc26: 1f ae std Y+63, r1 ; 0x3f
2fc28: a1 97 sbiw r28, 0x21 ; 33
// Calculate the buffer head after we push this byte
uint8_t next_buffer_head = next_block_index(block_buffer_head);
// If the buffer is full: good! That means we are well ahead of the robot.
// Rest here until there is room in the buffer.
if (block_buffer_tail == next_buffer_head) {
2fc2a: 80 91 5a 0e lds r24, 0x0E5A ; 0x800e5a <block_buffer_tail>
2fc2e: a1 96 adiw r28, 0x21 ; 33
2fc30: 2f ad ldd r18, Y+63 ; 0x3f
2fc32: a1 97 sbiw r28, 0x21 ; 33
2fc34: 82 13 cpse r24, r18
2fc36: 0f c0 rjmp .+30 ; 0x2fc56 <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x86>
do {
manage_heater();
2fc38: 0f 94 5e 27 call 0x24ebc ; 0x24ebc <manage_heater()>
// Vojtech: Don't disable motors inside the planner!
manage_inactivity(false);
2fc3c: 80 e0 ldi r24, 0x00 ; 0
2fc3e: 0e 94 b8 6c call 0xd970 ; 0xd970 <manage_inactivity(bool)>
lcd_update(0);
2fc42: 80 e0 ldi r24, 0x00 ; 0
2fc44: 0e 94 7d 53 call 0xa6fa ; 0xa6fa <lcd_update(unsigned char)>
} while (block_buffer_tail == next_buffer_head);
2fc48: 80 91 5a 0e lds r24, 0x0E5A ; 0x800e5a <block_buffer_tail>
2fc4c: a1 96 adiw r28, 0x21 ; 33
2fc4e: 3f ad ldd r19, Y+63 ; 0x3f
2fc50: a1 97 sbiw r28, 0x21 ; 33
2fc52: 83 17 cp r24, r19
2fc54: 89 f3 breq .-30 ; 0x2fc38 <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0x68>
}
#ifdef PLANNER_DIAGNOSTICS
planner_update_queue_min_counter();
#endif /* PLANNER_DIAGNOSTICS */
if(planner_aborted) {
2fc56: 40 91 60 0e lds r20, 0x0E60 ; 0x800e60 <planner_aborted>
2fc5a: a2 96 adiw r28, 0x22 ; 34
2fc5c: 4f af std Y+63, r20 ; 0x3f
2fc5e: a2 97 sbiw r28, 0x22 ; 34
2fc60: 44 23 and r20, r20
2fc62: 69 f0 breq .+26 ; 0x2fc7e <plan_buffer_line(float, float, float, float const&, float, float const*, unsigned int)+0xae>
// avoid planning the block early if aborted
SERIAL_ECHO_START;
2fc64: 85 e2 ldi r24, 0x25 ; 37
2fc66: 9b e6 ldi r25, 0x6B ; 107
2fc68: 0e 94 ef 5a call 0xb5de ; 0xb5de <serialprintPGM(char const*)>
SERIAL_ECHOLNRPGM(_n("Move aborted"));
2fc6c: 82 e5 ldi r24, 0x52 ; 82
2fc6e: 93 e7 ldi r25, 0x73 ; 115
2fc70: 0e 94 04 5d call 0xba08 ; 0xba08 <serialprintlnPGM(char const*)>
// The stepper timer interrupt will run continuously from now on.
// If there are no planner blocks to be executed by the stepper routine,
// the stepper interrupt ticks at 1kHz to wake up and pick a block
// from the planner queue if available.
ENABLE_STEPPER_DRIVER_INTERRUPT();
}
2fc74: cb 57 subi r28, 0x7B ; 123
2fc76: df 4f sbci r29, 0xFF ; 255
2fc78: e2 e1 ldi r30, 0x12 ; 18
2fc7a: 0d 94 a6 b1 jmp 0x3634c ; 0x3634c <__epilogue_restores__>
SERIAL_ECHOLNRPGM(_n("Move aborted"));
return;
} Where we have the following new routines: __prologue_saves__ (click to view code block)00036314 <__prologue_saves__>:
36314: 2f 92 push r2
36316: 3f 92 push r3
36318: 4f 92 push r4
3631a: 5f 92 push r5
3631c: 6f 92 push r6
3631e: 7f 92 push r7
36320: 8f 92 push r8
36322: 9f 92 push r9
36324: af 92 push r10
36326: bf 92 push r11
36328: cf 92 push r12
3632a: df 92 push r13
3632c: ef 92 push r14
3632e: ff 92 push r15
36330: 0f 93 push r16
36332: 1f 93 push r17
36334: cf 93 push r28
36336: df 93 push r29
36338: cd b7 in r28, 0x3d ; 61
3633a: de b7 in r29, 0x3e ; 62
3633c: ca 1b sub r28, r26
3633e: db 0b sbc r29, r27
36340: 0f b6 in r0, 0x3f ; 63
36342: f8 94 cli
36344: de bf out 0x3e, r29 ; 62
36346: 0f be out 0x3f, r0 ; 63
36348: cd bf out 0x3d, r28 ; 61
3634a: 19 94 eijmp It's intersting that the epilogue uses __epilogue_restores__ (click to view code block)0003634c <__epilogue_restores__>:
3634c: 2a 88 ldd r2, Y+18 ; 0x12
3634e: 39 88 ldd r3, Y+17 ; 0x11
36350: 48 88 ldd r4, Y+16 ; 0x10
36352: 5f 84 ldd r5, Y+15 ; 0x0f
36354: 6e 84 ldd r6, Y+14 ; 0x0e
36356: 7d 84 ldd r7, Y+13 ; 0x0d
36358: 8c 84 ldd r8, Y+12 ; 0x0c
3635a: 9b 84 ldd r9, Y+11 ; 0x0b
3635c: aa 84 ldd r10, Y+10 ; 0x0a
3635e: b9 84 ldd r11, Y+9 ; 0x09
36360: c8 84 ldd r12, Y+8 ; 0x08
36362: df 80 ldd r13, Y+7 ; 0x07
36364: ee 80 ldd r14, Y+6 ; 0x06
36366: fd 80 ldd r15, Y+5 ; 0x05
36368: 0c 81 ldd r16, Y+4 ; 0x04
3636a: 1b 81 ldd r17, Y+3 ; 0x03
3636c: aa 81 ldd r26, Y+2 ; 0x02
3636e: b9 81 ldd r27, Y+1 ; 0x01
36370: ce 0f add r28, r30
36372: d1 1d adc r29, r1
36374: 0f b6 in r0, 0x3f ; 63
36376: f8 94 cli
36378: de bf out 0x3e, r29 ; 62
3637a: 0f be out 0x3f, r0 ; 63
3637c: cd bf out 0x3d, r28 ; 61
3637e: ed 01 movw r28, r26
36380: 08 95 ret |
Reading further I found that the stack pointer registers, SPH and SPL have addresses 0x3E and 0x3D 0x3F is SREG (AVR Status Register) According the info here https://gcc.gnu.org/wiki/avr-gcc, Y points to the stack frame, not the stack pointer.
|
d3ed7d1
to
cbc1b32
Compare
cbc1b32
to
bd33c6d
Compare
-mcall-prologues
-mcall-prologues
Reduces flash memory usage by ~5KB The CMake build outputs an ASM file for the firmware image and is human readable. To see the impacted functions, look for these routines: __prologue_saves__ __epilogue_restores__ They should come in pairs. For more info about this option, see: https://gcc.gnu.org/onlinedocs/gcc-7.3.0/gcc/AVR-Options.html#AVR-Options
bd33c6d
to
cb85623
Compare
Idea to reduce flash memory usage by ~5KB. There is some performance impact due to calling two routines, needs to be scoped/investigated. If we run out flash memory, then this could be a good way to free some of it.