| asm(
#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined (__s390x__) || defined (__powerpc64__)
// We keep architecture-specific code interleaved in order to enforce commonality.
#if defined(__x86_64__)
#if defined(__LP64__) || defined(__LLP64__)
// Pointers are of the right size
#else
// Having non-native-sized pointers makes things very messy.
#error "Non-native pointer size."
#endif
#endif // defined(__x86_64__)
".text\n"
"_lightweight_swapcontext:\n"
#if defined(__i386__)
/* `current_pointer_out` is in `4(%ebp)`. `dest_pointer` is in `8(%ebp)`. */
#elif defined(__x86_64__)
/* `current_pointer_out` is in `%rdi`. `dest_pointer` is in `%rsi`. */
#elif defined(__arm__)
/* `current_pointer_out` is in `r0`. `dest_pointer` is in `r1` */
#elif defined(__s390x_)
/* `current_pointer_out` is in `%r2`. `dest_pointer` is in `%r3`. */
#elif defined(__powerpc64__)
/* `current_pointer_out` is in `r3`. `dest_pointer` is in `r4` */
#endif
// Save preserved registers.
#if defined(__i386__)
// Preserve esi, edi, ebx, and ebp. The return address is already on the stack.
"push %esi\n"
"push %edi\n"
"push %ebx\n"
"push %ebp\n"
#elif defined(__x86_64__)
// Preserve r12-r15, rbx, and rbp. The return address is already on the stack.
"pushq %r12\n"
"pushq %r13\n"
"pushq %r14\n"
"pushq %r15\n"
"pushq %rbx\n"
"pushq %rbp\n"
#elif defined(__arm__)
// Preserve r4-r12 and the return address (r14). For consistency with x86 r12 is
// pushed first, followed by r14 and then r4-r11.
"push {r12}\n"
"push {r14}\n"
"push {r4-r11}\n"
#elif defined(__arm64__) || defined(__aarch64__)
// Preserve d8-d15 + x19-x29 and the return address (x30).
// Note: x30 is stored twice due to alignment requirements
"sub sp, sp, #0xb0\n"
"stp d8, d9, [sp, #0x00]\n"
"stp d10, d11, [sp, #0x10]\n"
"stp d12, d13, [sp, #0x20]\n"
"stp d14, d15, [sp, #0x30]\n"
"stp x19, x20, [sp, #0x40]\n"
"stp x21, x22, [sp, #0x50]\n"
"stp x23, x24, [sp, #0x60]\n"
"stp x25, x26, [sp, #0x70]\n"
"stp x27, x28, [sp, #0x80]\n"
"stp x29, x30, [sp, #0x90]\n"
"str x30, [sp, #0xa0]\n"
#elif defined(__s390x__)
// Preserve r6-r13, the return address (r14), and f8-f15.
"aghi %r15, -136\n"
"stmg %r6, %r14, 64(%r15)\n"
"std %f8, 0(%r15)\n"
"std %f9, 8(%r15)\n"
"std %f10, 16(%r15)\n"
"std %f11, 24(%r15)\n"
"std %f12, 32(%r15)\n"
"std %f13, 40(%r15)\n"
"std %f14, 48(%r15)\n"
"std %f15, 56(%r15)\n"
#elif defined(__powerpc64__)
"addi 1, 1, -(21*8)\n"
"std 2, (8*0)(1)\n"
"std 14, (8*1)(1)\n"
"std 15, (8*2)(1)\n"
"std 16, (8*3)(1)\n"
"std 17, (8*4)(1)\n"
"std 18, (8*5)(1)\n"
"std 19, (8*6)(1)\n"
"std 20, (8*7)(1)\n"
"std 21, (8*8)(1)\n"
"std 22, (8*9)(1)\n"
"std 23, (8*10)(1)\n"
"std 24, (8*11)(1)\n"
"std 25, (8*12)(1)\n"
"std 26, (8*13)(1)\n"
"std 27, (8*14)(1)\n"
"std 28, (8*15)(1)\n"
"std 29, (8*16)(1)\n"
"std 30, (8*17)(1)\n"
"std 31, (8*18)(1)\n"
"mfcr 0\n"
"std 0, (8*19)(1)\n"
"mflr 0\n"
"std 0, (8*20)(1)\n"
#endif
/* Save old stack pointer. */
#if defined(__i386__)
/* i386 passes arguments on the stack. We add ((number of things pushed)+1)*(sizeof(void*)) to esp in order to get the first argument. */
"mov 20(%esp), %ecx\n"
/* We then copy the stack pointer into the space indicated by the first argument. */
"mov %esp, (%ecx)\n"
#elif defined(__x86_64__)
/* On amd64, the first argument comes from rdi. */
"movq %rsp, (%rdi)\n"
#elif defined(__arm__)
/* On ARM, the first argument is in `r0`. `r13` is the stack pointer. */
"str r13, [r0]\n"
#elif defined(__arm64__) || defined(__aarch64__)
/* On ARM64, the first argument is in `x0`. `sp` is the stack pointer and `x4` is a scratch register. */
"mov x4, sp\n"
"str x4, [x0]\n"
#elif defined(__s390x__)
/* On s390x, the first argument is in r2. r15 is the stack pointer. */
"stg %r15, 0(%r2)\n"
#elif defined(__powerpc64__)
"std 1, 0(3)\n"
#endif
/* Load the new stack pointer and the preserved registers. */
#if defined(__i386__)
/* i386 passes arguments on the stack. We add ((number of things pushed)+1)*(sizeof(void*)) to esp in order to get the first argument. */
"mov 24(%esp), %esi\n"
/* We then copy the second argument to be the new stack pointer. */
"mov %esi, %esp\n"
#elif defined(__x86_64__)
/* On amd64, the second argument comes from rsi. */
"movq %rsi, %rsp\n"
#elif defined(__arm__)
/* On ARM, the second argument is in `r1` */
"mov r13, r1\n"
#elif defined(__arm64__) || defined(__aarch64__)
/* On ARM64, the second argument is in `x1` */
"mov sp, x1\n"
#elif defined(__s390x__)
/* On s390x, the second argument is in r3 */
"lgr %r15, %r3\n"
#elif defined(__powerpc64__)
"mr 1, 4\n"
#endif
#if defined(__i386__)
"pop %ebp\n"
"pop %ebx\n"
"pop %edi\n"
"pop %esi\n"
#elif defined(__x86_64__)
"popq %rbp\n"
"popq %rbx\n"
"popq %r15\n"
"popq %r14\n"
"popq %r13\n"
"popq %r12\n"
#elif defined(__arm__)
"pop {r4-r11}\n"
"pop {r14}\n"
"pop {r12}\n"
#elif defined(__arm64__) || defined(__aarch64__)
"ldp d8, d9, [sp, #0x00]\n"
"ldp d10, d11, [sp, #0x10]\n"
"ldp d12, d13, [sp, #0x20]\n"
"ldp d14, d15, [sp, #0x30]\n"
"ldp x19, x20, [sp, #0x40]\n"
"ldp x21, x22, [sp, #0x50]\n"
"ldp x23, x24, [sp, #0x60]\n"
"ldp x25, x26, [sp, #0x70]\n"
"ldp x27, x28, [sp, #0x80]\n"
"ldp x29, x30, [sp, #0x90]\n"
"ldr x4, [sp, #0xa0]\n"
"add sp, sp, #0xb0\n"
#elif defined(__s390x__)
"lmg %r6, %r14, 64(%r15)\n"
"ld %f8, 0(%r15)\n"
"ld %f9, 8(%r15)\n"
"ld %f10, 16(%r15)\n"
"ld %f11, 24(%r15)\n"
"ld %f12, 32(%r15)\n"
"ld %f13, 40(%r15)\n"
"ld %f14, 48(%r15)\n"
"ld %f15, 56(%r15)\n"
"aghi %r15, 136\n"
#elif defined(__powerpc64__)
"ld 2, (8*0)(1)\n"
"ld 14, (8*1)(1)\n"
"ld 15, (8*2)(1)\n"
"ld 16, (8*3)(1)\n"
"ld 17, (8*4)(1)\n"
"ld 18, (8*5)(1)\n"
"ld 19, (8*6)(1)\n"
"ld 20, (8*7)(1)\n"
"ld 21, (8*8)(1)\n"
"ld 22, (8*9)(1)\n"
"ld 23, (8*10)(1)\n"
"ld 24, (8*11)(1)\n"
"ld 25, (8*12)(1)\n"
"ld 26, (8*13)(1)\n"
"ld 27, (8*14)(1)\n"
"ld 28, (8*15)(1)\n"
"ld 29, (8*16)(1)\n"
"ld 30, (8*17)(1)\n"
"ld 31, (8*18)(1)\n"
"ld 0, (8*19)(1)\n"
"mtcr 0\n"
"ld 0, (8*20)(1)\n"
"mtlr 0\n"
"addi 1, 1, (8*21)\n"
#endif
#if defined(__i386__) || defined(__x86_64__)
/* The following ret should return to the address set with
`artificial_stack_t()` or with the previous `lightweight_swapcontext`. The
instruction pointer is saved on the stack from the previous call (or
initialized with `artificial_stack_t()`). */
"ret\n"
#elif defined(__arm__)
/* Above, we popped `LR` (`r14`) off the stack, so the bx instruction will
jump to the correct return address. */
"bx r14\n"
#elif defined(__arm64__) || defined(__aarch64__)
/* Above, we stored the `x30` the return address in a variable register `x4` so the ret instruction will
return it to jump. */
"ret x4\n"
#elif defined(__s390x__)
/* Above, we popped the return address (r14) off the stack. */
"br %r14\n"
#elif defined(__powerpc64__)
"blr\n"
#endif
#else
#error "Unsupported architecture."
#endif
);
|