diff options
Diffstat (limited to 'src')
45 files changed, 5254 insertions, 271 deletions
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c new file mode 100644 index 0000000..1405665 --- /dev/null +++ b/src/aarch64/ffi.c @@ -0,0 +1,1076 @@ +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include <stdio.h> + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +/* Stack alignment requirement in bytes */ +#define AARCH64_STACK_ALIGN 16 + +#define N_X_ARG_REG 8 +#define N_V_ARG_REG 8 + +#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT) + +union _d +{ + UINT64 d; + UINT32 s[2]; +}; + +struct call_context +{ + UINT64 x [AARCH64_N_XREG]; + struct + { + union _d d[2]; + } v [AARCH64_N_VREG]; +}; + +static void * +get_x_addr (struct call_context *context, unsigned n) +{ + return &context->x[n]; +} + +static void * +get_s_addr (struct call_context *context, unsigned n) +{ +#if defined __AARCH64EB__ + return &context->v[n].d[1].s[1]; +#else + return &context->v[n].d[0].s[0]; +#endif +} + +static void * +get_d_addr (struct call_context *context, unsigned n) +{ +#if defined __AARCH64EB__ + return &context->v[n].d[1]; +#else + return &context->v[n].d[0]; +#endif +} + +static void * +get_v_addr (struct call_context *context, unsigned n) +{ + return &context->v[n]; +} + +/* Return the memory location at which a basic type would reside + were it to have been stored in register n. */ + +static void * +get_basic_type_addr (unsigned short type, struct call_context *context, + unsigned n) +{ + switch (type) + { + case FFI_TYPE_FLOAT: + return get_s_addr (context, n); + case FFI_TYPE_DOUBLE: + return get_d_addr (context, n); + case FFI_TYPE_LONGDOUBLE: + return get_v_addr (context, n); + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + return get_x_addr (context, n); + default: + FFI_ASSERT (0); + return NULL; + } +} + +/* Return the alignment width for each of the basic types. */ + +static size_t +get_basic_type_alignment (unsigned short type) +{ + switch (type) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + return sizeof (UINT64); + case FFI_TYPE_LONGDOUBLE: + return sizeof (long double); + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + return sizeof (UINT64); + + default: + FFI_ASSERT (0); + return 0; + } +} + +/* Return the size in bytes for each of the basic types. */ + +static size_t +get_basic_type_size (unsigned short type) +{ + switch (type) + { + case FFI_TYPE_FLOAT: + return sizeof (UINT32); + case FFI_TYPE_DOUBLE: + return sizeof (UINT64); + case FFI_TYPE_LONGDOUBLE: + return sizeof (long double); + case FFI_TYPE_UINT8: + return sizeof (UINT8); + case FFI_TYPE_SINT8: + return sizeof (SINT8); + case FFI_TYPE_UINT16: + return sizeof (UINT16); + case FFI_TYPE_SINT16: + return sizeof (SINT16); + case FFI_TYPE_UINT32: + return sizeof (UINT32); + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + return sizeof (SINT32); + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + return sizeof (UINT64); + case FFI_TYPE_SINT64: + return sizeof (SINT64); + + default: + FFI_ASSERT (0); + return 0; + } +} + +extern void +ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *, + extended_cif *), + struct call_context *context, + extended_cif *, + unsigned, + void (*fn)(void)); + +extern void +ffi_closure_SYSV (ffi_closure *); + +/* Test for an FFI floating point representation. */ + +static unsigned +is_floating_type (unsigned short type) +{ + return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE + || type == FFI_TYPE_LONGDOUBLE); +} + +/* Test for a homogeneous structure. */ + +static unsigned short +get_homogeneous_type (ffi_type *ty) +{ + if (ty->type == FFI_TYPE_STRUCT && ty->elements) + { + unsigned i; + unsigned short candidate_type + = get_homogeneous_type (ty->elements[0]); + for (i =1; ty->elements[i]; i++) + { + unsigned short iteration_type = 0; + /* If we have a nested struct, we must find its homogeneous type. + If that fits with our candidate type, we are still + homogeneous. */ + if (ty->elements[i]->type == FFI_TYPE_STRUCT + && ty->elements[i]->elements) + { + iteration_type = get_homogeneous_type (ty->elements[i]); + } + else + { + iteration_type = ty->elements[i]->type; + } + + /* If we are not homogeneous, return FFI_TYPE_STRUCT. */ + if (candidate_type != iteration_type) + return FFI_TYPE_STRUCT; + } + return candidate_type; + } + + /* Base case, we have no more levels of nesting, so we + are a basic type, and so, trivially homogeneous in that type. */ + return ty->type; +} + +/* Determine the number of elements within a STRUCT. + + Note, we must handle nested structs. + + If ty is not a STRUCT this function will return 0. */ + +static unsigned +element_count (ffi_type *ty) +{ + if (ty->type == FFI_TYPE_STRUCT && ty->elements) + { + unsigned n; + unsigned elems = 0; + for (n = 0; ty->elements[n]; n++) + { + if (ty->elements[n]->type == FFI_TYPE_STRUCT + && ty->elements[n]->elements) + elems += element_count (ty->elements[n]); + else + elems++; + } + return elems; + } + return 0; +} + +/* Test for a homogeneous floating point aggregate. + + A homogeneous floating point aggregate is a homogeneous aggregate of + a half- single- or double- precision floating point type with one + to four elements. Note that this includes nested structs of the + basic type. */ + +static int +is_hfa (ffi_type *ty) +{ + if (ty->type == FFI_TYPE_STRUCT + && ty->elements[0] + && is_floating_type (get_homogeneous_type (ty))) + { + unsigned n = element_count (ty); + return n >= 1 && n <= 4; + } + return 0; +} + +/* Test if an ffi_type is a candidate for passing in a register. + + This test does not check that sufficient registers of the + appropriate class are actually available, merely that IFF + sufficient registers are available then the argument will be passed + in register(s). + + Note that an ffi_type that is deemed to be a register candidate + will always be returned in registers. + + Returns 1 if a register candidate else 0. */ + +static int +is_register_candidate (ffi_type *ty) +{ + switch (ty->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_POINTER: + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: + case FFI_TYPE_SINT64: + return 1; + + case FFI_TYPE_STRUCT: + if (is_hfa (ty)) + { + return 1; + } + else if (ty->size > 16) + { + /* Too large. Will be replaced with a pointer to memory. The + pointer MAY be passed in a register, but the value will + not. This test specifically fails since the argument will + never be passed by value in registers. */ + return 0; + } + else + { + /* Might be passed in registers depending on the number of + registers required. */ + return (ty->size + 7) / 8 < N_X_ARG_REG; + } + break; + + default: + FFI_ASSERT (0); + break; + } + + return 0; +} + +/* Test if an ffi_type argument or result is a candidate for a vector + register. */ + +static int +is_v_register_candidate (ffi_type *ty) +{ + return is_floating_type (ty->type) + || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty)); +} + +/* Representation of the procedure call argument marshalling + state. + + The terse state variable names match the names used in the AARCH64 + PCS. */ + +struct arg_state +{ + unsigned ngrn; /* Next general-purpose register number. */ + unsigned nsrn; /* Next vector register number. */ + unsigned nsaa; /* Next stack offset. */ +}; + +/* Initialize a procedure call argument marshalling state. */ +static void +arg_init (struct arg_state *state, unsigned call_frame_size) +{ + state->ngrn = 0; + state->nsrn = 0; + state->nsaa = 0; +} + +/* Return the number of available consecutive core argument + registers. */ + +static unsigned +available_x (struct arg_state *state) +{ + return N_X_ARG_REG - state->ngrn; +} + +/* Return the number of available consecutive vector argument + registers. */ + +static unsigned +available_v (struct arg_state *state) +{ + return N_V_ARG_REG - state->nsrn; +} + +static void * +allocate_to_x (struct call_context *context, struct arg_state *state) +{ + FFI_ASSERT (state->ngrn < N_X_ARG_REG) + return get_x_addr (context, (state->ngrn)++); +} + +static void * +allocate_to_s (struct call_context *context, struct arg_state *state) +{ + FFI_ASSERT (state->nsrn < N_V_ARG_REG) + return get_s_addr (context, (state->nsrn)++); +} + +static void * +allocate_to_d (struct call_context *context, struct arg_state *state) +{ + FFI_ASSERT (state->nsrn < N_V_ARG_REG) + return get_d_addr (context, (state->nsrn)++); +} + +static void * +allocate_to_v (struct call_context *context, struct arg_state *state) +{ + FFI_ASSERT (state->nsrn < N_V_ARG_REG) + return get_v_addr (context, (state->nsrn)++); +} + +/* Allocate an aligned slot on the stack and return a pointer to it. */ +static void * +allocate_to_stack (struct arg_state *state, void *stack, unsigned alignment, + unsigned size) +{ + void *allocation; + + /* Round up the NSAA to the larger of 8 or the natural + alignment of the argument's type. */ + state->nsaa = ALIGN (state->nsaa, alignment); + state->nsaa = ALIGN (state->nsaa, alignment); + state->nsaa = ALIGN (state->nsaa, 8); + + allocation = stack + state->nsaa; + + state->nsaa += size; + return allocation; +} + +static void +copy_basic_type (void *dest, void *source, unsigned short type) +{ + /* This is neccessary to ensure that basic types are copied + sign extended to 64-bits as libffi expects. */ + switch (type) + { + case FFI_TYPE_FLOAT: + *(float *) dest = *(float *) source; + break; + case FFI_TYPE_DOUBLE: + *(double *) dest = *(double *) source; + break; + case FFI_TYPE_LONGDOUBLE: + *(long double *) dest = *(long double *) source; + break; + case FFI_TYPE_UINT8: + *(ffi_arg *) dest = *(UINT8 *) source; + break; + case FFI_TYPE_SINT8: + *(ffi_sarg *) dest = *(SINT8 *) source; + break; + case FFI_TYPE_UINT16: + *(ffi_arg *) dest = *(UINT16 *) source; + break; + case FFI_TYPE_SINT16: + *(ffi_sarg *) dest = *(SINT16 *) source; + break; + case FFI_TYPE_UINT32: + *(ffi_arg *) dest = *(UINT32 *) source; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + *(ffi_sarg *) dest = *(SINT32 *) source; + break; + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + *(ffi_arg *) dest = *(UINT64 *) source; + break; + case FFI_TYPE_SINT64: + *(ffi_sarg *) dest = *(SINT64 *) source; + break; + + default: + FFI_ASSERT (0); + } +} + +static void +copy_hfa_to_reg_or_stack (void *memory, + ffi_type *ty, + struct call_context *context, + unsigned char *stack, + struct arg_state *state) +{ + unsigned elems = element_count (ty); + if (available_v (state) < elems) + { + /* There are insufficient V registers. Further V register allocations + are prevented, the NSAA is adjusted (by allocate_to_stack ()) + and the argument is copied to memory at the adjusted NSAA. */ + state->nsrn = N_V_ARG_REG; + memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size), + memory, + ty->size); + } + else + { + int i; + unsigned short type = get_homogeneous_type (ty); + unsigned elems = element_count (ty); + for (i = 0; i < elems; i++) + { + void *reg = allocate_to_v (context, state); + copy_basic_type (reg, memory, type); + memory += get_basic_type_size (type); + } + } +} + +/* Either allocate an appropriate register for the argument type, or if + none are available, allocate a stack slot and return a pointer + to the allocated space. */ + +static void * +allocate_to_register_or_stack (struct call_context *context, + unsigned char *stack, + struct arg_state *state, + unsigned short type) +{ + size_t alignment = get_basic_type_alignment (type); + size_t size = alignment; + switch (type) + { + case FFI_TYPE_FLOAT: + /* This is the only case for which the allocated stack size + should not match the alignment of the type. */ + size = sizeof (UINT32); + /* Fall through. */ + case FFI_TYPE_DOUBLE: + if (state->nsrn < N_V_ARG_REG) + return allocate_to_d (context, state); + state->nsrn = N_V_ARG_REG; + break; + case FFI_TYPE_LONGDOUBLE: + if (state->nsrn < N_V_ARG_REG) + return allocate_to_v (context, state); + state->nsrn = N_V_ARG_REG; + break; + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + if (state->ngrn < N_X_ARG_REG) + return allocate_to_x (context, state); + state->ngrn = N_X_ARG_REG; + break; + default: + FFI_ASSERT (0); + } + + return allocate_to_stack (state, stack, alignment, size); +} + +/* Copy a value to an appropriate register, or if none are + available, to the stack. */ + +static void +copy_to_register_or_stack (struct call_context *context, + unsigned char *stack, + struct arg_state *state, + void *value, + unsigned short type) +{ + copy_basic_type ( + allocate_to_register_or_stack (context, stack, state, type), + value, + type); +} + +/* Marshall the arguments from FFI representation to procedure call + context and stack. */ + +static unsigned +aarch64_prep_args (struct call_context *context, unsigned char *stack, + extended_cif *ecif) +{ + int i; + struct arg_state state; + + arg_init (&state, ALIGN(ecif->cif->bytes, 16)); + + for (i = 0; i < ecif->cif->nargs; i++) + { + ffi_type *ty = ecif->cif->arg_types[i]; + switch (ty->type) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + /* If the argument is a basic type the argument is allocated to an + appropriate register, or if none are available, to the stack. */ + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + copy_to_register_or_stack (context, stack, &state, + ecif->avalue[i], ty->type); + break; + + case FFI_TYPE_STRUCT: + if (is_hfa (ty)) + { + copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context, + stack, &state); + } + else if (ty->size > 16) + { + /* If the argument is a composite type that is larger than 16 + bytes, then the argument has been copied to memory, and + the argument is replaced by a pointer to the copy. */ + + copy_to_register_or_stack (context, stack, &state, + &(ecif->avalue[i]), FFI_TYPE_POINTER); + } + else if (available_x (&state) >= (ty->size + 7) / 8) + { + /* If the argument is a composite type and the size in + double-words is not more than the number of available + X registers, then the argument is copied into consecutive + X registers. */ + int j; + for (j = 0; j < (ty->size + 7) / 8; j++) + { + memcpy (allocate_to_x (context, &state), + &(((UINT64 *) ecif->avalue[i])[j]), + sizeof (UINT64)); + } + } + else + { + /* Otherwise, there are insufficient X registers. Further X + register allocations are prevented, the NSAA is adjusted + (by allocate_to_stack ()) and the argument is copied to + memory at the adjusted NSAA. */ + state.ngrn = N_X_ARG_REG; + + memcpy (allocate_to_stack (&state, stack, ty->alignment, + ty->size), ecif->avalue + i, ty->size); + } + break; + + default: + FFI_ASSERT (0); + break; + } + } + + return ecif->cif->aarch64_flags; +} + +ffi_status +ffi_prep_cif_machdep (ffi_cif *cif) +{ + /* Round the stack up to a multiple of the stack alignment requirement. */ + cif->bytes = + (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1); + + /* Initialize our flags. We are interested if this CIF will touch a + vector register, if so we will enable context save and load to + those registers, otherwise not. This is intended to be friendly + to lazy float context switching in the kernel. */ + cif->aarch64_flags = 0; + + if (is_v_register_candidate (cif->rtype)) + { + cif->aarch64_flags |= AARCH64_FFI_WITH_V; + } + else + { + int i; + for (i = 0; i < cif->nargs; i++) + if (is_v_register_candidate (cif->arg_types[i])) + { + cif->aarch64_flags |= AARCH64_FFI_WITH_V; + break; + } + } + + return FFI_OK; +} + +/* Call a function with the provided arguments and capture the return + value. */ +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + ecif.cif = cif; + ecif.avalue = avalue; + ecif.rvalue = rvalue; + + switch (cif->abi) + { + case FFI_SYSV: + { + struct call_context context; + unsigned stack_bytes; + + /* Figure out the total amount of stack space we need, the + above call frame space needs to be 16 bytes aligned to + ensure correct alignment of the first object inserted in + that space hence the ALIGN applied to cif->bytes.*/ + stack_bytes = ALIGN(cif->bytes, 16); + + memset (&context, 0, sizeof (context)); + if (is_register_candidate (cif->rtype)) + { + ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_INT: + case FFI_TYPE_SINT64: + { + void *addr = get_basic_type_addr (cif->rtype->type, + &context, 0); + copy_basic_type (rvalue, addr, cif->rtype->type); + break; + } + + case FFI_TYPE_STRUCT: + if (is_hfa (cif->rtype)) + { + int j; + unsigned short type = get_homogeneous_type (cif->rtype); + unsigned elems = element_count (cif->rtype); + for (j = 0; j < elems; j++) + { + void *reg = get_basic_type_addr (type, &context, j); + copy_basic_type (rvalue, reg, type); + rvalue += get_basic_type_size (type); + } + } + else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) + { + unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)); + memcpy (rvalue, get_x_addr (&context, 0), size); + } + else + { + FFI_ASSERT (0); + } + break; + + default: + FFI_ASSERT (0); + break; + } + } + else + { + memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64)); + ffi_call_SYSV (aarch64_prep_args, &context, &ecif, + stack_bytes, fn); + } + break; + } + + default: + FFI_ASSERT (0); + break; + } +} + +static unsigned char trampoline [] = +{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */ + 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */ + 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ +}; + +/* Build a trampoline. */ + +#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \ + ({unsigned char *__tramp = (unsigned char*)(TRAMP); \ + UINT64 __fun = (UINT64)(FUN); \ + UINT64 __ctx = (UINT64)(CTX); \ + UINT64 __flags = (UINT64)(FLAGS); \ + memcpy (__tramp, trampoline, sizeof (trampoline)); \ + memcpy (__tramp + 12, &__fun, sizeof (__fun)); \ + memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \ + memcpy (__tramp + 28, &__flags, sizeof (__flags)); \ + __clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \ + }) + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc, + cif->aarch64_flags); + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + +/* Primary handler to setup and invoke a function within a closure. + + A closure when invoked enters via the assembler wrapper + ffi_closure_SYSV(). The wrapper allocates a call context on the + stack, saves the interesting registers (from the perspective of + the calling convention) into the context then passes control to + ffi_closure_SYSV_inner() passing the saved context and a pointer to + the stack at the point ffi_closure_SYSV() was invoked. + + On the return path the assembler wrapper will reload call context + regsiters. + + ffi_closure_SYSV_inner() marshalls the call context into ffi value + desriptors, invokes the wrapped function, then marshalls the return + value back into the call context. */ + +void +ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, + void *stack) +{ + ffi_cif *cif = closure->cif; + void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); + void *rvalue = NULL; + int i; + struct arg_state state; + + arg_init (&state, ALIGN(cif->bytes, 16)); + + for (i = 0; i < cif->nargs; i++) + { + ffi_type *ty = cif->arg_types[i]; + + switch (ty->type) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_INT: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + avalue[i] = allocate_to_register_or_stack (context, stack, + &state, ty->type); + break; + + case FFI_TYPE_STRUCT: + if (is_hfa (ty)) + { + unsigned n = element_count (ty); + if (available_v (&state) < n) + { + state.nsrn = N_V_ARG_REG; + avalue[i] = allocate_to_stack (&state, stack, ty->alignment, + ty->size); + } + else + { + switch (get_homogeneous_type (ty)) + { + case FFI_TYPE_FLOAT: + { + /* Eeek! We need a pointer to the structure, + however the homogeneous float elements are + being passed in individual S registers, + therefore the structure is not represented as + a contiguous sequence of bytes in our saved + register context. We need to fake up a copy + of the structure layed out in memory + correctly. The fake can be tossed once the + closure function has returned hence alloca() + is sufficient. */ + int j; + UINT32 *p = avalue[i] = alloca (ty->size); + for (j = 0; j < element_count (ty); j++) + memcpy (&p[j], + allocate_to_s (context, &state), + sizeof (*p)); + break; + } + + case FFI_TYPE_DOUBLE: + { + /* Eeek! We need a pointer to the structure, + however the homogeneous float elements are + being passed in individual S registers, + therefore the structure is not represented as + a contiguous sequence of bytes in our saved + register context. We need to fake up a copy + of the structure layed out in memory + correctly. The fake can be tossed once the + closure function has returned hence alloca() + is sufficient. */ + int j; + UINT64 *p = avalue[i] = alloca (ty->size); + for (j = 0; j < element_count (ty); j++) + memcpy (&p[j], + allocate_to_d (context, &state), + sizeof (*p)); + break; + } + + case FFI_TYPE_LONGDOUBLE: + memcpy (&avalue[i], + allocate_to_v (context, &state), + sizeof (*avalue)); + break; + + default: + FFI_ASSERT (0); + break; + } + } + } + else if (ty->size > 16) + { + /* Replace Composite type of size greater than 16 with a + pointer. */ + memcpy (&avalue[i], + allocate_to_register_or_stack (context, stack, + &state, FFI_TYPE_POINTER), + sizeof (avalue[i])); + } + else if (available_x (&state) >= (ty->size + 7) / 8) + { + avalue[i] = get_x_addr (context, state.ngrn); + state.ngrn += (ty->size + 7) / 8; + } + else + { + state.ngrn = N_X_ARG_REG; + + avalue[i] = allocate_to_stack (&state, stack, ty->alignment, + ty->size); + } + break; + + default: + FFI_ASSERT (0); + break; + } + } + + /* Figure out where the return value will be passed, either in + registers or in a memory block allocated by the caller and passed + in x8. */ + + if (is_register_candidate (cif->rtype)) + { + /* Register candidates are *always* returned in registers. */ + + /* Allocate a scratchpad for the return value, we will let the + callee scrible the result into the scratch pad then move the + contents into the appropriate return value location for the + call convention. */ + rvalue = alloca (cif->rtype->size); + (closure->fun) (cif, rvalue, avalue, closure->user_data); + + /* Copy the return value into the call context so that it is returned + as expected to our caller. */ + switch (cif->rtype->type) + { + case FFI_TYPE_VOID: + break; + + case FFI_TYPE_UINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_SINT64: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + { + void *addr = get_basic_type_addr (cif->rtype->type, context, 0); + copy_basic_type (addr, rvalue, cif->rtype->type); + break; + } + case FFI_TYPE_STRUCT: + if (is_hfa (cif->rtype)) + { + int i; + unsigned short type = get_homogeneous_type (cif->rtype); + unsigned elems = element_count (cif->rtype); + for (i = 0; i < elems; i++) + { + void *reg = get_basic_type_addr (type, context, i); + copy_basic_type (reg, rvalue, type); + rvalue += get_basic_type_size (type); + } + } + else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) + { + unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)) ; + memcpy (get_x_addr (context, 0), rvalue, size); + } + else + { + FFI_ASSERT (0); + } + break; + default: + FFI_ASSERT (0); + break; + } + } + else + { + memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64)); + (closure->fun) (cif, rvalue, avalue, closure->user_data); + } +} + diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h new file mode 100644 index 0000000..6f1a348 --- /dev/null +++ b/src/aarch64/ffitarget.h @@ -0,0 +1,59 @@ +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi + { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_SYSV + } ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 36 +#define FFI_NATIVE_RAW_API 0 + +/* ---- Internal ---- */ + + +#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags + +#define AARCH64_FFI_WITH_V_BIT 0 + +#define AARCH64_N_XREG 32 +#define AARCH64_N_VREG 32 +#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16) + +#endif diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S new file mode 100644 index 0000000..b8cd421 --- /dev/null +++ b/src/aarch64/sysv.S @@ -0,0 +1,307 @@ +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off +#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off +#define cfi_restore(reg) .cfi_restore reg +#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg + + .text + .globl ffi_call_SYSV + .type ffi_call_SYSV, #function + +/* ffi_call_SYSV() + + Create a stack frame, setup an argument context, call the callee + and extract the result. + + The maximum required argument stack size is provided, + ffi_call_SYSV() allocates that stack space then calls the + prepare_fn to populate register context and stack. The + argument passing registers are loaded from the register + context and the callee called, on return the register passing + register are saved back to the context. Our caller will + extract the return value from the final state of the saved + register context. + + Prototype: + + extern unsigned + ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *, + extended_cif *), + struct call_context *context, + extended_cif *, + unsigned required_stack_size, + void (*fn)(void)); + + Therefore on entry we have: + + x0 prepare_fn + x1 &context + x2 &ecif + x3 bytes + x4 fn + + This function uses the following stack frame layout: + + == + saved x30(lr) + x29(fp)-> saved x29(fp) + saved x24 + saved x23 + saved x22 + sp' -> saved x21 + ... + sp -> (constructed callee stack arguments) + == + + Voila! */ + +#define ffi_call_SYSV_FS (8 * 4) + + .cfi_startproc +ffi_call_SYSV: + stp x29, x30, [sp, #-16]! + cfi_adjust_cfa_offset (16) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + mov x29, sp + cfi_def_cfa_register (x29) + sub sp, sp, #ffi_call_SYSV_FS + + stp x21, x22, [sp, 0] + cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS) + cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS) + + stp x23, x24, [sp, 16] + cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS) + cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS) + + mov x21, x1 + mov x22, x2 + mov x24, x4 + + /* Allocate the stack space for the actual arguments, many + arguments will be passed in registers, but we assume + worst case and allocate sufficient stack for ALL of + the arguments. */ + sub sp, sp, x3 + + /* unsigned (*prepare_fn) (struct call_context *context, + unsigned char *stack, extended_cif *ecif); + */ + mov x23, x0 + mov x0, x1 + mov x1, sp + /* x2 already in place */ + blr x23 + + /* Preserve the flags returned. */ + mov x23, x0 + + /* Figure out if we should touch the vector registers. */ + tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f + + /* Load the vector argument passing registers. */ + ldp q0, q1, [x21, #8*32 + 0] + ldp q2, q3, [x21, #8*32 + 32] + ldp q4, q5, [x21, #8*32 + 64] + ldp q6, q7, [x21, #8*32 + 96] +1: + /* Load the core argument passing registers. */ + ldp x0, x1, [x21, #0] + ldp x2, x3, [x21, #16] + ldp x4, x5, [x21, #32] + ldp x6, x7, [x21, #48] + + /* Don't forget x8 which may be holding the address of a return buffer. + */ + ldr x8, [x21, #8*8] + + blr x24 + + /* Save the core argument passing registers. */ + stp x0, x1, [x21, #0] + stp x2, x3, [x21, #16] + stp x4, x5, [x21, #32] + stp x6, x7, [x21, #48] + + /* Note nothing useful ever comes back in x8! */ + + /* Figure out if we should touch the vector registers. */ + tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f + + /* Save the vector argument passing registers. */ + stp q0, q1, [x21, #8*32 + 0] + stp q2, q3, [x21, #8*32 + 32] + stp q4, q5, [x21, #8*32 + 64] + stp q6, q7, [x21, #8*32 + 96] +1: + /* All done, unwind our stack frame. */ + ldp x21, x22, [x29, # - ffi_call_SYSV_FS] + cfi_restore (x21) + cfi_restore (x22) + + ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16] + cfi_restore (x23) + cfi_restore (x24) + + mov sp, x29 + cfi_def_cfa_register (sp) + + ldp x29, x30, [sp], #16 + cfi_adjust_cfa_offset (-16) + cfi_restore (x29) + cfi_restore (x30) + + ret + + .cfi_endproc + .size ffi_call_SYSV, .-ffi_call_SYSV + +#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE) + +/* ffi_closure_SYSV + + Closure invocation glue. This is the low level code invoked directly by + the closure trampoline to setup and call a closure. + + On entry x17 points to a struct trampoline_data, x16 has been clobbered + all other registers are preserved. + + We allocate a call context and save the argument passing registers, + then invoked the generic C ffi_closure_SYSV_inner() function to do all + the real work, on return we load the result passing registers back from + the call context. + + On entry + + extern void + ffi_closure_SYSV (struct trampoline_data *); + + struct trampoline_data + { + UINT64 *ffi_closure; + UINT64 flags; + }; + + This function uses the following stack frame layout: + + == + saved x30(lr) + x29(fp)-> saved x29(fp) + saved x22 + saved x21 + ... + sp -> call_context + == + + Voila! */ + + .text + .globl ffi_closure_SYSV + .cfi_startproc +ffi_closure_SYSV: + stp x29, x30, [sp, #-16]! + cfi_adjust_cfa_offset (16) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + mov x29, sp + + sub sp, sp, #ffi_closure_SYSV_FS + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + + stp x21, x22, [x29, #-16] + cfi_rel_offset (x21, 0) + cfi_rel_offset (x22, 8) + + /* Load x21 with &call_context. */ + mov x21, sp + /* Preserve our struct trampoline_data * */ + mov x22, x17 + + /* Save the rest of the argument passing registers. */ + stp x0, x1, [x21, #0] + stp x2, x3, [x21, #16] + stp x4, x5, [x21, #32] + stp x6, x7, [x21, #48] + /* Don't forget we may have been given a result scratch pad address. + */ + str x8, [x21, #64] + + /* Figure out if we should touch the vector registers. */ + ldr x0, [x22, #8] + tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f + + /* Save the argument passing vector registers. */ + stp q0, q1, [x21, #8*32 + 0] + stp q2, q3, [x21, #8*32 + 32] + stp q4, q5, [x21, #8*32 + 64] + stp q6, q7, [x21, #8*32 + 96] +1: + /* Load &ffi_closure.. */ + ldr x0, [x22, #0] + mov x1, x21 + /* Compute the location of the stack at the point that the + trampoline was called. */ + add x2, x29, #16 + + bl ffi_closure_SYSV_inner + + /* Figure out if we should touch the vector registers. */ + ldr x0, [x22, #8] + tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f + + /* Load the result passing vector registers. */ + ldp q0, q1, [x21, #8*32 + 0] + ldp q2, q3, [x21, #8*32 + 32] + ldp q4, q5, [x21, #8*32 + 64] + ldp q6, q7, [x21, #8*32 + 96] +1: + /* Load the result passing core registers. */ + ldp x0, x1, [x21, #0] + ldp x2, x3, [x21, #16] + ldp x4, x5, [x21, #32] + ldp x6, x7, [x21, #48] + /* Note nothing usefull is returned in x8. */ + + /* We are done, unwind our frame. */ + ldp x21, x22, [x29, #-16] + cfi_restore (x21) + cfi_restore (x22) + + mov sp, x29 + cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) + + ldp x29, x30, [sp], #16 + cfi_adjust_cfa_offset (-16) + cfi_restore (x29) + cfi_restore (x30) + + ret + .cfi_endproc + .size ffi_closure_SYSV, .-ffi_closure_SYSV diff --git a/src/arm/ffi.c b/src/arm/ffi.c index 1f8597d..3ccceb9 100644 --- a/src/arm/ffi.c +++ b/src/arm/ffi.c @@ -251,8 +251,10 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) break; case FFI_VFP: +#ifdef __ARM_EABI__ ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); break; +#endif default: FFI_ASSERT(0); @@ -609,8 +611,10 @@ ffi_prep_closure_loc (ffi_closure* closure, if (cif->abi == FFI_SYSV) closure_func = &ffi_closure_SYSV; +#ifdef __ARM_EABI__ else if (cif->abi == FFI_VFP) closure_func = &ffi_closure_VFP; +#endif else return FFI_BAD_ABI; diff --git a/src/arm/sysv.S b/src/arm/sysv.S index 60e2ae3..fb38cd6 100644 --- a/src/arm/sysv.S +++ b/src/arm/sysv.S @@ -41,7 +41,7 @@ #define CNAME(x) x #endif #ifdef __APPLE__ -#define ENTRY(x) .globl CNAME(x); CNAME(x): +#define ENTRY(x) .globl _##x; _##x: #else #define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x): #endif /* __APPLE__ */ @@ -187,7 +187,7 @@ ARM_FUNC_START ffi_call_SYSV @ r1 already set @ Call ffi_prep_args(stack, &ecif) - bl ffi_prep_args + bl CNAME(ffi_prep_args) @ move first 4 parameters in registers ldmia sp, {r0-r3} @@ -334,7 +334,9 @@ ARM_FUNC_START ffi_closure_SYSV /* Below are VFP hard-float ABI call and closure implementations. - Add VFP FPU directive here. */ + Add VFP FPU directive here. This is only compiled into the library + under EABI. */ +#ifdef __ARM_EABI__ .fpu vfp @ r0: fn @@ -362,7 +364,7 @@ ARM_FUNC_START ffi_call_VFP sub r2, fp, #64 @ VFP scratch space @ Call ffi_prep_args(stack, &ecif, vfp_space) - bl ffi_prep_args + bl CNAME(ffi_prep_args) @ Load VFP register args if needed cmp r0, #0 @@ -444,7 +446,7 @@ ARM_FUNC_START ffi_closure_VFP sub sp, sp, #72 str sp, [sp, #64] add r1, sp, #64 - bl ffi_closure_SYSV_inner + bl CNAME(ffi_closure_SYSV_inner) cmp r0, #FFI_TYPE_INT beq .Lretint_vfp @@ -491,6 +493,7 @@ ARM_FUNC_START ffi_closure_VFP .ffi_closure_VFP_end: UNWIND .fnend .size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP) +#endif ENTRY(ffi_arm_trampoline) stmfd sp!, {r0-r3} diff --git a/src/arm/trampoline.S b/src/arm/trampoline.S index 7b47429..935e8de 100644 --- a/src/arm/trampoline.S +++ b/src/arm/trampoline.S @@ -1,5 +1,5 @@ # GENERATED CODE - DO NOT EDIT -# This file was generated by ./gentramp.sh +# This file was generated by src/arm/gentramp.sh # Copyright (c) 2010, Plausible Labs Cooperative, Inc. # diff --git a/src/bfin/ffi.c b/src/bfin/ffi.c new file mode 100644 index 0000000..0beccc1 --- /dev/null +++ b/src/bfin/ffi.c @@ -0,0 +1,195 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com> + + Blackfin Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> +#include <stdio.h> + +/* Maximum number of GPRs available for argument passing. */ +#define MAX_GPRARGS 3 + +/* + * Return types + */ +#define FFIBFIN_RET_VOID 0 +#define FFIBFIN_RET_BYTE 1 +#define FFIBFIN_RET_HALFWORD 2 +#define FFIBFIN_RET_INT64 3 +#define FFIBFIN_RET_INT32 4 + +/*====================================================================*/ +/* PROTOTYPE * + /*====================================================================*/ +void ffi_prep_args(unsigned char *, extended_cif *); + +/*====================================================================*/ +/* Externals */ +/* (Assembly) */ +/*====================================================================*/ + +extern void ffi_call_SYSV(unsigned, extended_cif *, void(*)(unsigned char *, extended_cif *), unsigned, void *, void(*fn)(void)); + +/*====================================================================*/ +/* Implementation */ +/* */ +/*====================================================================*/ + + +/* + * This function calculates the return type (size) based on type. + */ + +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* --------------------------------------* + * Return handling * + * --------------------------------------*/ + switch (cif->rtype->type) { + case FFI_TYPE_VOID: + cif->flags = FFIBFIN_RET_VOID; + break; + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + cif->flags = FFIBFIN_RET_HALFWORD; + break; + case FFI_TYPE_UINT8: + cif->flags = FFIBFIN_RET_BYTE; + break; + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_FLOAT: + case FFI_TYPE_POINTER: + case FFI_TYPE_SINT8: + cif->flags = FFIBFIN_RET_INT32; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_DOUBLE: + cif->flags = FFIBFIN_RET_INT64; + break; + case FFI_TYPE_STRUCT: + if (cif->rtype->size <= 4){ + cif->flags = FFIBFIN_RET_INT32; + }else if (cif->rtype->size == 8){ + cif->flags = FFIBFIN_RET_INT64; + }else{ + //it will return via a hidden pointer in P0 + cif->flags = FFIBFIN_RET_VOID; + } + break; + default: + FFI_ASSERT(0); + break; + } + return FFI_OK; +} + +/* + * This will prepare the arguments and will call the assembly routine + * cif = the call interface + * fn = the function to be called + * rvalue = the return value + * avalue = the arguments + */ +void ffi_call(ffi_cif *cif, void(*fn)(void), void *rvalue, void **avalue) +{ + int ret_type = cif->flags; + extended_cif ecif; + ecif.cif = cif; + ecif.avalue = avalue; + ecif.rvalue = rvalue; + + switch (cif->abi) { + case FFI_SYSV: + ffi_call_SYSV(cif->bytes, &ecif, ffi_prep_args, ret_type, ecif.rvalue, fn); + break; + default: + FFI_ASSERT(0); + break; + } +} + + +/* +* This function prepares the parameters (copies them from the ecif to the stack) +* to call the function (ffi_prep_args is called by the assembly routine in file +* sysv.S, which also calls the actual function) +*/ +void ffi_prep_args(unsigned char *stack, extended_cif *ecif) +{ + register unsigned int i = 0; + void **p_argv; + unsigned char *argp; + ffi_type **p_arg; + argp = stack; + p_argv = ecif->avalue; + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; + (i != 0); + i--, p_arg++) { + size_t z; + z = (*p_arg)->size; + if (z < sizeof(int)) { + z = sizeof(int); + switch ((*p_arg)->type) { + case FFI_TYPE_SINT8: { + signed char v = *(SINT8 *)(* p_argv); + signed int t = v; + *(signed int *) argp = t; + } + break; + case FFI_TYPE_UINT8: { + unsigned char v = *(UINT8 *)(* p_argv); + unsigned int t = v; + *(unsigned int *) argp = t; + } + break; + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int) * (SINT16 *)(* p_argv); + break; + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int) * (UINT16 *)(* p_argv); + break; + case FFI_TYPE_STRUCT: + memcpy(argp, *p_argv, (*p_arg)->size); + break; + default: + FFI_ASSERT(0); + break; + } + } else if (z == sizeof(int)) { + *(unsigned int *) argp = (unsigned int) * (UINT32 *)(* p_argv); + } else { + memcpy(argp, *p_argv, z); + } + p_argv++; + argp += z; + } +} + + + diff --git a/src/bfin/ffitarget.h b/src/bfin/ffitarget.h new file mode 100644 index 0000000..2175c01 --- /dev/null +++ b/src/bfin/ffitarget.h @@ -0,0 +1,43 @@ +/* ----------------------------------------------------------------------- + ffitarget.h - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com> + + Blackfin Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_SYSV +} ffi_abi; +#endif + +#endif + diff --git a/src/bfin/sysv.S b/src/bfin/sysv.S new file mode 100644 index 0000000..ae7a152 --- /dev/null +++ b/src/bfin/sysv.S @@ -0,0 +1,177 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com> + + Blackfin Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +.text +.align 4 + + /* + There is a "feature" in the bfin toolchain that it puts a _ before funcion names + that's why the function here it's called _ffi_call_SYSV and not ffi_call_SYSV + */ + .global _ffi_call_SYSV; + .type _ffi_call_SYSV, STT_FUNC; + .func ffi_call_SYSV + + /* + cif->bytes = R0 (fp+8) + &ecif = R1 (fp+12) + ffi_prep_args = R2 (fp+16) + ret_type = stack (fp+20) + ecif.rvalue = stack (fp+24) + fn = stack (fp+28) + got (fp+32) + There is room for improvement here (we can use temporary registers + instead of saving the values in the memory) + REGS: + P5 => Stack pointer (function arguments) + R5 => cif->bytes + R4 => ret->type + + FP-20 = P3 + FP-16 = SP (parameters area) + FP-12 = SP (temp) + FP-08 = function return part 1 [R0] + FP-04 = function return part 2 [R1] + */ + +_ffi_call_SYSV: +.prologue: + LINK 20; + [FP-20] = P3; + [FP+8] = R0; + [FP+12] = R1; + [FP+16] = R2; + +.allocate_stack: + //alocate cif->bytes into the stack + R1 = [FP+8]; + R0 = SP; + R0 = R0 - R1; + R1 = 4; + R0 = R0 - R1; + [FP-12] = SP; + SP = R0; + [FP-16] = SP; + +.call_prep_args: + //get the addr of prep_args + P0 = [P3 + _ffi_prep_args@FUNCDESC_GOT17M4]; + P1 = [P0]; + P3 = [P0+4]; + R0 = [FP-16];//SP (parameter area) + R1 = [FP+12];//ecif + call (P1); + +.call_user_function: + //ajust SP so as to allow the user function access the parameters on the stack + SP = [FP-16]; //point to function parameters + R0 = [SP]; + R1 = [SP+4]; + R2 = [SP+8]; + //load user function address + P0 = FP; + P0 +=28; + P1 = [P0]; + P1 = [P1]; + P3 = [P0+4]; + /* + For functions returning aggregate values (struct) occupying more than 8 bytes, + the caller allocates the return value object on the stack and the address + of this object is passed to the callee as a hidden argument in register P0. + */ + P0 = [FP+24]; + + call (P1); + SP = [FP-12]; +.compute_return: + P2 = [FP-20]; + [FP-8] = R0; + [FP-4] = R1; + + R0 = [FP+20]; + R1 = R0 << 2; + + R0 = [P2+.rettable@GOT17M4]; + R0 = R1 + R0; + P2 = R0; + R1 = [P2]; + + P2 = [FP+-20]; + R0 = [P2+.rettable@GOT17M4]; + R0 = R1 + R0; + P2 = R0; + R0 = [FP-8]; + R1 = [FP-4]; + jump (P2); + +/* +#define FFIBFIN_RET_VOID 0 +#define FFIBFIN_RET_BYTE 1 +#define FFIBFIN_RET_HALFWORD 2 +#define FFIBFIN_RET_INT64 3 +#define FFIBFIN_RET_INT32 4 +*/ +.align 4 +.align 4 +.rettable: + .dd .epilogue - .rettable + .dd .rbyte - .rettable; + .dd .rhalfword - .rettable; + .dd .rint64 - .rettable; + .dd .rint32 - .rettable; + +.rbyte: + P0 = [FP+24]; + R0 = R0.B (Z); + [P0] = R0; + JUMP .epilogue +.rhalfword: + P0 = [FP+24]; + R0 = R0.L; + [P0] = R0; + JUMP .epilogue +.rint64: + P0 = [FP+24];// &rvalue + [P0] = R0; + [P0+4] = R1; + JUMP .epilogue +.rint32: + P0 = [FP+24]; + [P0] = R0; +.epilogue: + R0 = [FP+8]; + R1 = [FP+12]; + R2 = [FP+16]; + P3 = [FP-20]; + UNLINK; + RTS; + +.size _ffi_call_SYSV,.-_ffi_call_SYSV; +.endfunc diff --git a/src/closures.c b/src/closures.c index 1b37827..6298d6f 100644 --- a/src/closures.c +++ b/src/closures.c @@ -172,6 +172,25 @@ selinux_enabled_check (void) #endif /* !FFI_MMAP_EXEC_SELINUX */ +/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */ +#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX +#include <stdlib.h> + +static int emutramp_enabled = -1; + +static int +emutramp_enabled_check (void) +{ + if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL) + return 1; + else + return 0; +} + +#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \ + : (emutramp_enabled = emutramp_enabled_check ())) +#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */ + #elif defined (__CYGWIN__) || defined(__INTERIX) #include <sys/mman.h> @@ -181,6 +200,10 @@ selinux_enabled_check (void) #endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */ +#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX +#define is_emutramp_enabled() 0 +#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */ + /* Declare all functions defined in dlmalloc.c as static. */ static void *dlmalloc(size_t); static void dlfree(void*); @@ -458,6 +481,12 @@ dlmmap (void *start, size_t length, int prot, printf ("mapping in %zi\n", length); #endif + if (execfd == -1 && is_emutramp_enabled ()) + { + ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset); + return ptr; + } + if (execfd == -1 && !is_selinux_enabled ()) { ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset); diff --git a/src/m68k/ffi.c b/src/m68k/ffi.c index 37a0784..0dee938 100644 --- a/src/m68k/ffi.c +++ b/src/m68k/ffi.c @@ -123,6 +123,8 @@ ffi_prep_args (void *stack, extended_cif *ecif) #define CIF_FLAGS_POINTER 32 #define CIF_FLAGS_STRUCT1 64 #define CIF_FLAGS_STRUCT2 128 +#define CIF_FLAGS_SINT8 256 +#define CIF_FLAGS_SINT16 512 /* Perform machine dependent cif processing */ ffi_status @@ -200,6 +202,14 @@ ffi_prep_cif_machdep (ffi_cif *cif) cif->flags = CIF_FLAGS_DINT; break; + case FFI_TYPE_SINT16: + cif->flags = CIF_FLAGS_SINT16; + break; + + case FFI_TYPE_SINT8: + cif->flags = CIF_FLAGS_SINT8; + break; + default: cif->flags = CIF_FLAGS_INT; break; diff --git a/src/m68k/sysv.S b/src/m68k/sysv.S index f6f4ef9..ec2b14f 100644 --- a/src/m68k/sysv.S +++ b/src/m68k/sysv.S @@ -2,9 +2,10 @@ sysv.S - Copyright (c) 2012 Alan Hourihane Copyright (c) 1998, 2012 Andreas Schwab - Copyright (c) 2008 Red Hat, Inc. - - m68k Foreign Function Interface + Copyright (c) 2008 Red Hat, Inc. + Copyright (c) 2012 Thorsten Glaser + + m68k Foreign Function Interface Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -168,8 +169,28 @@ retstruct1: retstruct2: btst #7,%d2 - jbeq noretval + jbeq retsint8 move.w %d0,(%a1) + jbra epilogue + +retsint8: + btst #8,%d2 + jbeq retsint16 + | NOTE: On the mc68000, extb is not supported. 8->16, then 16->32. +#if !defined(__mc68020__) && !defined(__mc68030__) && !defined(__mc68040__) && !defined(__mc68060__) && !defined(__mcoldfire__) + ext.w %d0 + ext.l %d0 +#else + extb.l %d0 +#endif + move.l %d0,(%a1) + jbra epilogue + +retsint16: + btst #9,%d2 + jbeq noretval + ext.l %d0 + move.l %d0,(%a1) noretval: epilogue: @@ -201,8 +222,10 @@ CALLFUNC(ffi_closure_SYSV): lsr.l #1,%d0 jne 1f jcc .Lcls_epilogue + | CIF_FLAGS_INT move.l -12(%fp),%d0 .Lcls_epilogue: + | no CIF_FLAGS_* unlk %fp rts 1: @@ -210,6 +233,7 @@ CALLFUNC(ffi_closure_SYSV): lsr.l #2,%d0 jne 1f jcs .Lcls_ret_float + | CIF_FLAGS_DINT move.l (%a0)+,%d0 move.l (%a0),%d1 jra .Lcls_epilogue @@ -224,6 +248,7 @@ CALLFUNC(ffi_closure_SYSV): lsr.l #2,%d0 jne 1f jcs .Lcls_ret_ldouble + | CIF_FLAGS_DOUBLE #if defined(__MC68881__) || defined(__HAVE_68881__) fmove.d (%a0),%fp0 #else @@ -242,17 +267,37 @@ CALLFUNC(ffi_closure_SYSV): jra .Lcls_epilogue 1: lsr.l #2,%d0 - jne .Lcls_ret_struct2 + jne 1f jcs .Lcls_ret_struct1 + | CIF_FLAGS_POINTER move.l (%a0),%a0 move.l %a0,%d0 jra .Lcls_epilogue .Lcls_ret_struct1: move.b (%a0),%d0 jra .Lcls_epilogue -.Lcls_ret_struct2: +1: + lsr.l #2,%d0 + jne 1f + jcs .Lcls_ret_sint8 + | CIF_FLAGS_STRUCT2 move.w (%a0),%d0 jra .Lcls_epilogue +.Lcls_ret_sint8: + move.l (%a0),%d0 + | NOTE: On the mc68000, extb is not supported. 8->16, then 16->32. +#if !defined(__mc68020__) && !defined(__mc68030__) && !defined(__mc68040__) && !defined(__mc68060__) && !defined(__mcoldfire__) + ext.w %d0 + ext.l %d0 +#else + extb.l %d0 +#endif + jra .Lcls_epilogue +1: + | CIF_FLAGS_SINT16 + move.l (%a0),%d0 + ext.l %d0 + jra .Lcls_epilogue CFI_ENDPROC() .size CALLFUNC(ffi_closure_SYSV),.-CALLFUNC(ffi_closure_SYSV) diff --git a/src/metag/ffi.c b/src/metag/ffi.c new file mode 100644 index 0000000..46b383e --- /dev/null +++ b/src/metag/ffi.c @@ -0,0 +1,330 @@ +/* ---------------------------------------------------------------------- + ffi.c - Copyright (c) 2013 Imagination Technologies + + Meta Foreign Function Interface + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + `Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED `AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. +----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +#include <stdlib.h> + +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +/* + * ffi_prep_args is called by the assembly routine once stack space has been + * allocated for the function's arguments + */ + +unsigned int ffi_prep_args(char *stack, extended_cif *ecif) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + argp = stack; + + /* Store return value */ + if ( ecif->cif->flags == FFI_TYPE_STRUCT ) { + argp -= 4; + *(void **) argp = ecif->rvalue; + } + + p_argv = ecif->avalue; + + /* point to next location */ + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; (i != 0); i--, p_arg++, p_argv++) + { + size_t z; + + /* Move argp to address of argument */ + z = (*p_arg)->size; + argp -= z; + + /* Align if necessary */ + argp = (char *) ALIGN_DOWN(ALIGN_DOWN(argp, (*p_arg)->alignment), 4); + + if (z < sizeof(int)) { + z = sizeof(int); + switch ((*p_arg)->type) + { + case FFI_TYPE_SINT8: + *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv); + break; + case FFI_TYPE_UINT8: + *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv); + break; + case FFI_TYPE_SINT16: + *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv); + break; + case FFI_TYPE_UINT16: + *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv); + case FFI_TYPE_STRUCT: + memcpy(argp, *p_argv, (*p_arg)->size); + break; + default: + FFI_ASSERT(0); + } + } else if ( z == sizeof(int)) { + *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv); + } else { + memcpy(argp, *p_argv, z); + } + } + + /* return the size of the arguments to be passed in registers, + padded to an 8 byte boundary to preserve stack alignment */ + return ALIGN(MIN(stack - argp, 6*4), 8); +} + +/* Perform machine dependent cif processing */ +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + ffi_type **ptr; + unsigned i, bytes = 0; + + for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) { + if ((*ptr)->size == 0) + return FFI_BAD_TYPEDEF; + + /* Perform a sanity check on the argument type, do this + check after the initialization. */ + FFI_ASSERT_VALID_TYPE(*ptr); + + /* Add any padding if necessary */ + if (((*ptr)->alignment - 1) & bytes) + bytes = ALIGN(bytes, (*ptr)->alignment); + + bytes += ALIGN((*ptr)->size, 4); + } + + /* Ensure arg space is aligned to an 8-byte boundary */ + bytes = ALIGN(bytes, 8); + + /* Make space for the return structure pointer */ + if (cif->rtype->type == FFI_TYPE_STRUCT) { + bytes += sizeof(void*); + + /* Ensure stack is aligned to an 8-byte boundary */ + bytes = ALIGN(bytes, 8); + } + + cif->bytes = bytes; + + /* Set the return type flag */ + switch (cif->rtype->type) { + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + cif->flags = (unsigned) cif->rtype->type; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + cif->flags = (unsigned) FFI_TYPE_SINT64; + break; + case FFI_TYPE_STRUCT: + /* Meta can store return values which are <= 64 bits */ + if (cif->rtype->size <= 4) + /* Returned to D0Re0 as 32-bit value */ + cif->flags = (unsigned)FFI_TYPE_INT; + else if ((cif->rtype->size > 4) && (cif->rtype->size <= 8)) + /* Returned valued is stored to D1Re0|R0Re0 */ + cif->flags = (unsigned)FFI_TYPE_DOUBLE; + else + /* value stored in memory */ + cif->flags = (unsigned)FFI_TYPE_STRUCT; + break; + default: + cif->flags = (unsigned)FFI_TYPE_INT; + break; + } + return FFI_OK; +} + +extern void ffi_call_SYSV(void (*fn)(void), extended_cif *, unsigned, unsigned, double *); + +/* + * Exported in API. Entry point + * cif -> ffi_cif object + * fn -> function pointer + * rvalue -> pointer to return value + * avalue -> vector of void * pointers pointing to memory locations holding the + * arguments + */ +void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + + int small_struct = (((cif->flags == FFI_TYPE_INT) || (cif->flags == FFI_TYPE_DOUBLE)) && (cif->rtype->type == FFI_TYPE_STRUCT)); + ecif.cif = cif; + ecif.avalue = avalue; + + double temp; + + /* + * If the return value is a struct and we don't have a return value address + * then we need to make one + */ + + if ((rvalue == NULL ) && (cif->flags == FFI_TYPE_STRUCT)) + ecif.rvalue = alloca(cif->rtype->size); + else if (small_struct) + ecif.rvalue = &temp; + else + ecif.rvalue = rvalue; + + switch (cif->abi) { + case FFI_SYSV: + ffi_call_SYSV(fn, &ecif, cif->bytes, cif->flags, ecif.rvalue); + break; + default: + FFI_ASSERT(0); + break; + } + + if (small_struct) + memcpy (rvalue, &temp, cif->rtype->size); +} + +/* private members */ + +static void ffi_prep_incoming_args_SYSV (char *, void **, void **, + ffi_cif*, float *); + +void ffi_closure_SYSV (ffi_closure *); + +/* Do NOT change that without changing the FFI_TRAMPOLINE_SIZE */ +extern unsigned int ffi_metag_trampoline[10]; /* 10 instructions */ + +/* end of private members */ + +/* + * __tramp: trampoline memory location + * __fun: assembly routine + * __ctx: memory location for wrapper + * + * At this point, tramp[0] == __ctx ! + */ +void ffi_init_trampoline(unsigned char *__tramp, unsigned int __fun, unsigned int __ctx) { + memcpy (__tramp, ffi_metag_trampoline, sizeof(ffi_metag_trampoline)); + *(unsigned int*) &__tramp[40] = __ctx; + *(unsigned int*) &__tramp[44] = __fun; + /* This will flush the instruction cache */ + __builtin_meta2_cachewd(&__tramp[0], 1); + __builtin_meta2_cachewd(&__tramp[47], 1); +} + + + +/* the cif must already be prepared */ + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + void (*closure_func)(ffi_closure*) = NULL; + + if (cif->abi == FFI_SYSV) + closure_func = &ffi_closure_SYSV; + else + return FFI_BAD_ABI; + + ffi_init_trampoline( + (unsigned char*)&closure->tramp[0], + (unsigned int)closure_func, + (unsigned int)codeloc); + + closure->cif = cif; + closure->user_data = user_data; + closure->fun = fun; + + return FFI_OK; +} + + +/* This function is jumped to by the trampoline */ +unsigned int ffi_closure_SYSV_inner (closure, respp, args, vfp_args) + ffi_closure *closure; + void **respp; + void *args; + void *vfp_args; +{ + ffi_cif *cif; + void **arg_area; + + cif = closure->cif; + arg_area = (void**) alloca (cif->nargs * sizeof (void*)); + + /* + * This call will initialize ARG_AREA, such that each + * element in that array points to the corresponding + * value on the stack; and if the function returns + * a structure, it will re-set RESP to point to the + * structure return address. + */ + ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args); + + (closure->fun) ( cif, *respp, arg_area, closure->user_data); + + return cif->flags; +} + +static void ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, + void **avalue, ffi_cif *cif, + float *vfp_stack) +{ + register unsigned int i; + register void **p_argv; + register char *argp; + register ffi_type **p_arg; + + /* stack points to original arguments */ + argp = stack; + + /* Store return value */ + if ( cif->flags == FFI_TYPE_STRUCT ) { + argp -= 4; + *rvalue = *(void **) argp; + } + + p_argv = avalue; + + for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) { + size_t z; + size_t alignment; + + alignment = (*p_arg)->alignment; + if (alignment < 4) + alignment = 4; + if ((alignment - 1) & (unsigned)argp) + argp = (char *) ALIGN(argp, alignment); + + z = (*p_arg)->size; + *p_argv = (void*) argp; + p_argv++; + argp -= z; + } + return; +} diff --git a/src/metag/ffitarget.h b/src/metag/ffitarget.h new file mode 100644 index 0000000..7b9dbeb --- /dev/null +++ b/src/metag/ffitarget.h @@ -0,0 +1,53 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2013 Imagination Technologies Ltd. + Target configuration macros for Meta + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_DEFAULT_ABI = FFI_SYSV, + FFI_LAST_ABI = FFI_DEFAULT_ABI + 1, +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_TRAMPOLINE_SIZE 48 +#define FFI_NATIVE_RAW_API 0 + +#endif + diff --git a/src/metag/sysv.S b/src/metag/sysv.S new file mode 100644 index 0000000..b4b2a3b --- /dev/null +++ b/src/metag/sysv.S @@ -0,0 +1,311 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2013 Imagination Technologies Ltd. + + Meta Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ +#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define CNAME(x) x +#endif +#define ENTRY(x) .globl CNAME(x); .type CNAME(x), %function; CNAME(x): +#endif + +#ifdef __ELF__ +#define LSYM(x) .x +#else +#define LSYM(x) x +#endif + +.macro call_reg x= + .text + .balign 4 + mov D1RtP, \x + swap D1RtP, PC +.endm + +! Save register arguments +.macro SAVE_ARGS + .text + .balign 4 + setl [A0StP++], D0Ar6, D1Ar5 + setl [A0StP++], D0Ar4, D1Ar3 + setl [A0StP++], D0Ar2, D1Ar1 +.endm + +! Save retrun, frame pointer and other regs +.macro SAVE_REGS regs= + .text + .balign 4 + setl [A0StP++], D0FrT, D1RtP + ! Needs to be a pair of regs + .ifnc "\regs","" + setl [A0StP++], \regs + .endif +.endm + +! Declare a global function +.macro METAG_FUNC_START name + .text + .balign 4 + ENTRY(\name) +.endm + +! Return registers from the stack. Reverse SAVE_REGS operation +.macro RET_REGS regs=, cond= + .ifnc "\regs", "" + getl \regs, [--A0StP] + .endif + getl D0FrT, D1RtP, [--A0StP] +.endm + +! Return arguments +.macro RET_ARGS + getl D0Ar2, D1Ar1, [--A0StP] + getl D0Ar4, D1Ar3, [--A0StP] + getl D0Ar6, D1Ar5, [--A0StP] +.endm + + + ! D1Ar1: fn + ! D0Ar2: &ecif + ! D1Ar3: cif->bytes + ! D0Ar4: fig->flags + ! D1Ar5: ecif.rvalue + + ! This assumes we are using GNU as +METAG_FUNC_START ffi_call_SYSV + ! Save argument registers + + SAVE_ARGS + + ! new frame + mov D0FrT, A0FrP + add A0FrP, A0StP, #0 + + ! Preserve the old frame pointer + SAVE_REGS "D1.5, D0.5" + + ! Make room for new args. cifs->bytes is the total space for input + ! and return arguments + + add A0StP, A0StP, D1Ar3 + + ! Preserve cifs->bytes & fn + mov D0.5, D1Ar3 + mov D1.5, D1Ar1 + + ! Place all of the ffi_prep_args in position + mov D1Ar1, A0StP + + ! Call ffi_prep_args(stack, &ecif) +#ifdef __PIC__ + callr D1RtP, CNAME(ffi_prep_args@PLT) +#else + callr D1RtP, CNAME(ffi_prep_args) +#endif + + ! Restore fn pointer + + ! The foreign stack should look like this + ! XXXXX XXXXXX <--- stack pointer + ! FnArgN rvalue + ! FnArgN+2 FnArgN+1 + ! FnArgN+4 FnArgN+3 + ! .... + ! + + ! A0StP now points to the first (or return) argument + 4 + + ! Preserve cif->bytes + getl D0Ar2, D1Ar1, [--A0StP] + getl D0Ar4, D1Ar3, [--A0StP] + getl D0Ar6, D1Ar5, [--A0StP] + + ! Place A0StP to the first argument again + add A0StP, A0StP, #24 ! That's because we loaded 6 regs x 4 byte each + + ! A0FrP points to the initial stack without the reserved space for the + ! cifs->bytes, whilst A0StP points to the stack after the space allocation + + ! fn was the first argument of ffi_call_SYSV. + ! The stack at this point looks like this: + ! + ! A0StP(on entry to _SYSV) -> Arg6 Arg5 | low + ! Arg4 Arg3 | + ! Arg2 Arg1 | + ! A0FrP ----> D0FrtP D1RtP | + ! D1.5 D0.5 | + ! A0StP(bf prep_args) -> FnArgn FnArgn-1 | + ! FnArgn-2FnArgn-3 | + ! ................ | <= cifs->bytes + ! FnArg4 FnArg3 | + ! A0StP (prv_A0StP+cifs->bytes) FnArg2 FnArg1 | high + ! + ! fn was in Arg1 so it's located in in A0FrP+#-0xC + ! + + ! D0Re0 contains the size of arguments stored in registers + sub A0StP, A0StP, D0Re0 + + ! Arg1 is the function pointer for the foreign call. This has been + ! preserved in D1.5 + + ! Time to call (fn). Arguments should be like this: + ! Arg1-Arg6 are loaded to regs + ! The rest of the arguments are stored in stack pointed by A0StP + + call_reg D1.5 + + ! Reset stack. + + mov A0StP, A0FrP + + ! Load Arg1 with the pointer to storage for the return type + ! This was stored in Arg5 + + getd D1Ar1, [A0FrP+#-20] + + ! Load D0Ar2 with the return type code. This was stored in Arg4 (flags) + + getd D0Ar2, [A0FrP+#-16] + + ! We are ready to start processing the return value + ! D0Re0 (and D1Re0) hold the return value + + ! If the return value is NULL, assume no return value + cmp D1Ar1, #0 + beq LSYM(Lepilogue) + + ! return INT + cmp D0Ar2, #FFI_TYPE_INT + ! Sadly, there is no setd{cc} instruction so we need to workaround that + bne .INT64 + setd [D1Ar1], D0Re0 + b LSYM(Lepilogue) + + ! return INT64 +.INT64: + cmp D0Ar2, #FFI_TYPE_SINT64 + setleq [D1Ar1], D0Re0, D1Re0 + + ! return DOUBLE + cmp D0Ar2, #FFI_TYPE_DOUBLE + setl [D1AR1++], D0Re0, D1Re0 + +LSYM(Lepilogue): + ! At this point, the stack pointer points right after the argument + ! saved area. We need to restore 4 regs, therefore we need to move + ! 16 bytes ahead. + add A0StP, A0StP, #16 + RET_REGS "D1.5, D0.5" + RET_ARGS + getd D0Re0, [A0StP] + mov A0FrP, D0FrT + swap D1RtP, PC + +.ffi_call_SYSV_end: + .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV) + + +/* + (called by ffi_metag_trampoline) + void ffi_closure_SYSV (ffi_closure*) + + (called by ffi_closure_SYSV) + unsigned int FFI_HIDDEN + ffi_closure_SYSV_inner (closure,respp, args) + ffi_closure *closure; + void **respp; + void *args; +*/ + +METAG_FUNC_START ffi_closure_SYSV + ! We assume that D1Ar1 holds the address of the + ! ffi_closure struct. We will use that to fetch the + ! arguments. The stack pointer points to an empty space + ! and it is ready to store more data. + + ! D1Ar1 is ready + ! Allocate stack space for return value + add A0StP, A0StP, #8 + ! Store it to D0Ar2 + sub D0Ar2, A0StP, #8 + + sub D1Ar3, A0FrP, #4 + + ! D1Ar3 contains the address of the original D1Ar1 argument + ! We need to subtract #4 later on + + ! Preverve D0Ar2 + mov D0.5, D0Ar2 + +#ifdef __PIC__ + callr D1RtP, CNAME(ffi_closure_SYSV_inner@PLT) +#else + callr D1RtP, CNAME(ffi_closure_SYSV_inner) +#endif + + ! Check the return value and store it to D0.5 + cmp D0Re0, #FFI_TYPE_INT + beq .Lretint + cmp D0Re0, #FFI_TYPE_DOUBLE + beq .Lretdouble +.Lclosure_epilogue: + sub A0StP, A0StP, #8 + RET_REGS "D1.5, D0.5" + RET_ARGS + swap D1RtP, PC + +.Lretint: + setd [D0.5], D0Re0 + b .Lclosure_epilogue +.Lretdouble: + setl [D0.5++], D0Re0, D1Re0 + b .Lclosure_epilogue +.ffi_closure_SYSV_end: +.size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV) + + +ENTRY(ffi_metag_trampoline) + SAVE_ARGS + ! New frame + mov A0FrP, A0StP + SAVE_REGS "D1.5, D0.5" + mov D0.5, PC + ! Load D1Ar1 the value of ffi_metag_trampoline + getd D1Ar1, [D0.5 + #8] + ! Jump to ffi_closure_SYSV + getd PC, [D0.5 + #12] diff --git a/src/microblaze/ffi.c b/src/microblaze/ffi.c new file mode 100644 index 0000000..5c155c5 --- /dev/null +++ b/src/microblaze/ffi.c @@ -0,0 +1,321 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2012, 2013 Xilinx, Inc + + MicroBlaze Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +extern void ffi_call_SYSV(void (*)(void*, extended_cif*), extended_cif*, + unsigned int, unsigned int, unsigned int*, void (*fn)(void), + unsigned int, unsigned int); + +extern void ffi_closure_SYSV(void); + +#define WORD_SIZE sizeof(unsigned int) +#define ARGS_REGISTER_SIZE (WORD_SIZE * 6) +#define WORD_ALIGN(x) ALIGN(x, WORD_SIZE) + +/* ffi_prep_args is called by the assembly routine once stack space + has been allocated for the function's arguments */ +void ffi_prep_args(void* stack, extended_cif* ecif) +{ + unsigned int i; + ffi_type** p_arg; + void** p_argv; + void* stack_args_p = stack; + + p_argv = ecif->avalue; + + if (ecif == NULL || ecif->cif == NULL) { + return; /* no description to prepare */ + } + + if ((ecif->cif->rtype != NULL) && + (ecif->cif->rtype->type == FFI_TYPE_STRUCT)) + { + /* if return type is a struct which is referenced on the stack/reg5, + * by a pointer. Stored the return value pointer in r5. + */ + char* addr = stack_args_p; + memcpy(addr, &(ecif->rvalue), WORD_SIZE); + stack_args_p += WORD_SIZE; + } + + if (ecif->avalue == NULL) { + return; /* no arguments to prepare */ + } + + for (i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs; + i++, p_arg++) + { + size_t size = (*p_arg)->size; + int type = (*p_arg)->type; + void* value = p_argv[i]; + char* addr = stack_args_p; + int aligned_size = WORD_ALIGN(size); + + /* force word alignment on the stack */ + stack_args_p += aligned_size; + + switch (type) + { + case FFI_TYPE_UINT8: + *(unsigned int *)addr = (unsigned int)*(UINT8*)(value); + break; + case FFI_TYPE_SINT8: + *(signed int *)addr = (signed int)*(SINT8*)(value); + break; + case FFI_TYPE_UINT16: + *(unsigned int *)addr = (unsigned int)*(UINT16*)(value); + break; + case FFI_TYPE_SINT16: + *(signed int *)addr = (signed int)*(SINT16*)(value); + break; + case FFI_TYPE_STRUCT: +#if __BIG_ENDIAN__ + /* + * MicroBlaze toolchain appears to emit: + * bsrli r5, r5, 8 (caller) + * ... + * <branch to callee> + * ... + * bslli r5, r5, 8 (callee) + * + * For structs like "struct a { uint8_t a[3]; };", when passed + * by value. + * + * Structs like "struct b { uint16_t a; };" are also expected + * to be packed strangely in registers. + * + * This appears to be because the microblaze toolchain expects + * "struct b == uint16_t", which is only any issue for big + * endian. + * + * The following is a work around for big-endian only, for the + * above mentioned case, it will re-align the contents of a + * <= 3-byte struct value. + */ + if (size < WORD_SIZE) + { + memcpy (addr + (WORD_SIZE - size), value, size); + break; + } +#endif + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_FLOAT: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_DOUBLE: + default: + memcpy(addr, value, aligned_size); + } + } +} + +ffi_status ffi_prep_cif_machdep(ffi_cif* cif) +{ + /* check ABI */ + switch (cif->abi) + { + case FFI_SYSV: + break; + default: + return FFI_BAD_ABI; + } + return FFI_OK; +} + +void ffi_call(ffi_cif* cif, void (*fn)(void), void* rvalue, void** avalue) +{ + extended_cif ecif; + ecif.cif = cif; + ecif.avalue = avalue; + + /* If the return value is a struct and we don't have a return */ + /* value address then we need to make one */ + if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) { + ecif.rvalue = alloca(cif->rtype->size); + } else { + ecif.rvalue = rvalue; + } + + switch (cif->abi) + { + case FFI_SYSV: + ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, + ecif.rvalue, fn, cif->rtype->type, cif->rtype->size); + break; + default: + FFI_ASSERT(0); + break; + } +} + +void ffi_closure_call_SYSV(void* register_args, void* stack_args, + ffi_closure* closure, void* rvalue, + unsigned int* rtype, unsigned int* rsize) +{ + /* prepare arguments for closure call */ + ffi_cif* cif = closure->cif; + ffi_type** arg_types = cif->arg_types; + + /* re-allocate data for the args. This needs to be done in order to keep + * multi-word objects (e.g. structs) in contigious memory. Callers are not + * required to store the value of args in the lower 6 words in the stack + * (although they are allocated in the stack). + */ + char* stackclone = alloca(cif->bytes); + void** avalue = alloca(cif->nargs * sizeof(void*)); + void* struct_rvalue = NULL; + char* ptr = stackclone; + int i; + + /* copy registers into stack clone */ + int registers_used = cif->bytes; + if (registers_used > ARGS_REGISTER_SIZE) { + registers_used = ARGS_REGISTER_SIZE; + } + memcpy(stackclone, register_args, registers_used); + + /* copy stack allocated args into stack clone */ + if (cif->bytes > ARGS_REGISTER_SIZE) { + int stack_used = cif->bytes - ARGS_REGISTER_SIZE; + memcpy(stackclone + ARGS_REGISTER_SIZE, stack_args, stack_used); + } + + /* preserve struct type return pointer passing */ + if ((cif->rtype != NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) { + struct_rvalue = *((void**)ptr); + ptr += WORD_SIZE; + } + + /* populate arg pointer list */ + for (i = 0; i < cif->nargs; i++) + { + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: +#ifdef __BIG_ENDIAN__ + avalue[i] = ptr + 3; +#else + avalue[i] = ptr; +#endif + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: +#ifdef __BIG_ENDIAN__ + avalue[i] = ptr + 2; +#else + avalue[i] = ptr; +#endif + break; + case FFI_TYPE_STRUCT: +#if __BIG_ENDIAN__ + /* + * Work around strange ABI behaviour. + * (see info in ffi_prep_args) + */ + if (arg_types[i]->size < WORD_SIZE) + { + memcpy (ptr, ptr + (WORD_SIZE - arg_types[i]->size), arg_types[i]->size); + } +#endif + avalue[i] = (void*)ptr; + break; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_DOUBLE: + avalue[i] = ptr; + break; + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_FLOAT: + default: + /* default 4-byte argument */ + avalue[i] = ptr; + break; + } + ptr += WORD_ALIGN(arg_types[i]->size); + } + + /* set the return type info passed back to the wrapper */ + *rsize = cif->rtype->size; + *rtype = cif->rtype->type; + if (struct_rvalue != NULL) { + closure->fun(cif, struct_rvalue, avalue, closure->user_data); + /* copy struct return pointer value into function return value */ + *((void**)rvalue) = struct_rvalue; + } else { + closure->fun(cif, rvalue, avalue, closure->user_data); + } +} + +ffi_status ffi_prep_closure_loc( + ffi_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void* user_data, void* codeloc) +{ + unsigned long* tramp = (unsigned long*)&(closure->tramp[0]); + unsigned long cls = (unsigned long)codeloc; + unsigned long fn = 0; + unsigned long fn_closure_call_sysv = (unsigned long)ffi_closure_call_SYSV; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + switch (cif->abi) + { + case FFI_SYSV: + fn = (unsigned long)ffi_closure_SYSV; + + /* load r11 (temp) with fn */ + /* imm fn(upper) */ + tramp[0] = 0xb0000000 | ((fn >> 16) & 0xffff); + /* addik r11, r0, fn(lower) */ + tramp[1] = 0x31600000 | (fn & 0xffff); + + /* load r12 (temp) with cls */ + /* imm cls(upper) */ + tramp[2] = 0xb0000000 | ((cls >> 16) & 0xffff); + /* addik r12, r0, cls(lower) */ + tramp[3] = 0x31800000 | (cls & 0xffff); + + /* load r3 (temp) with ffi_closure_call_SYSV */ + /* imm fn_closure_call_sysv(upper) */ + tramp[4] = 0xb0000000 | ((fn_closure_call_sysv >> 16) & 0xffff); + /* addik r3, r0, fn_closure_call_sysv(lower) */ + tramp[5] = 0x30600000 | (fn_closure_call_sysv & 0xffff); + /* branch/jump to address stored in r11 (fn) */ + tramp[6] = 0x98085800; /* bra r11 */ + + break; + default: + return FFI_BAD_ABI; + } + return FFI_OK; +} diff --git a/src/microblaze/ffitarget.h b/src/microblaze/ffitarget.h new file mode 100644 index 0000000..c6fa5a4 --- /dev/null +++ b/src/microblaze/ffitarget.h @@ -0,0 +1,53 @@ +/* ----------------------------------------------------------------------- + ffitarget.h - Copyright (c) 2012, 2013 Xilinx, Inc + + Target configuration macros for MicroBlaze. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_SYSV +} ffi_abi; +#endif + +/* Definitions for closures */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +#define FFI_TRAMPOLINE_SIZE (4*8) + +#endif diff --git a/src/microblaze/sysv.S b/src/microblaze/sysv.S new file mode 100644 index 0000000..7a195a6 --- /dev/null +++ b/src/microblaze/sysv.S @@ -0,0 +1,302 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2012, 2013 Xilinx, Inc + + MicroBlaze Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + + /* + * arg[0] (r5) = ffi_prep_args, + * arg[1] (r6) = &ecif, + * arg[2] (r7) = cif->bytes, + * arg[3] (r8) = cif->flags, + * arg[4] (r9) = ecif.rvalue, + * arg[5] (r10) = fn + * arg[6] (sp[0]) = cif->rtype->type + * arg[7] (sp[4]) = cif->rtype->size + */ + .text + .globl ffi_call_SYSV + .type ffi_call_SYSV, @function +ffi_call_SYSV: + /* push callee saves */ + addik r1, r1, -20 + swi r19, r1, 0 /* Frame Pointer */ + swi r20, r1, 4 /* PIC register */ + swi r21, r1, 8 /* PIC register */ + swi r22, r1, 12 /* save for locals */ + swi r23, r1, 16 /* save for locals */ + + /* save the r5-r10 registers in the stack */ + addik r1, r1, -24 /* increment sp to store 6x 32-bit words */ + swi r5, r1, 0 + swi r6, r1, 4 + swi r7, r1, 8 + swi r8, r1, 12 + swi r9, r1, 16 + swi r10, r1, 20 + + /* save function pointer */ + addik r3, r5, 0 /* copy ffi_prep_args into r3 */ + addik r22, r1, 0 /* save sp for unallocated args into r22 (callee-saved) */ + addik r23, r10, 0 /* save function address into r23 (callee-saved) */ + + /* prepare stack with allocation for n (bytes = r7) args */ + rsub r1, r7, r1 /* subtract bytes from sp */ + + /* prep args for ffi_prep_args call */ + addik r5, r1, 0 /* store stack pointer into arg[0] */ + /* r6 still holds ecif for arg[1] */ + + /* Call ffi_prep_args(stack, &ecif). */ + addik r1, r1, -4 + swi r15, r1, 0 /* store the link register in the frame */ + brald r15, r3 + nop /* branch has delay slot */ + lwi r15, r1, 0 + addik r1, r1, 4 /* restore the link register from the frame */ + /* returns calling stack pointer location */ + + /* prepare args for fn call, prep_args populates them onto the stack */ + lwi r5, r1, 0 /* arg[0] */ + lwi r6, r1, 4 /* arg[1] */ + lwi r7, r1, 8 /* arg[2] */ + lwi r8, r1, 12 /* arg[3] */ + lwi r9, r1, 16 /* arg[4] */ + lwi r10, r1, 20 /* arg[5] */ + + /* call (fn) (...). */ + addik r1, r1, -4 + swi r15, r1, 0 /* store the link register in the frame */ + brald r15, r23 + nop /* branch has delay slot */ + lwi r15, r1, 0 + addik r1, r1, 4 /* restore the link register from the frame */ + + /* Remove the space we pushed for the args. */ + addik r1, r22, 0 /* restore old SP */ + + /* restore this functions parameters */ + lwi r5, r1, 0 /* arg[0] */ + lwi r6, r1, 4 /* arg[1] */ + lwi r7, r1, 8 /* arg[2] */ + lwi r8, r1, 12 /* arg[3] */ + lwi r9, r1, 16 /* arg[4] */ + lwi r10, r1, 20 /* arg[5] */ + addik r1, r1, 24 /* decrement sp to de-allocate 6x 32-bit words */ + + /* If the return value pointer is NULL, assume no return value. */ + beqi r9, ffi_call_SYSV_end + + lwi r22, r1, 48 /* get return type (20 for locals + 28 for arg[6]) */ + lwi r23, r1, 52 /* get return size (20 for locals + 32 for arg[7]) */ + + /* Check if return type is actually a struct, do nothing */ + rsubi r11, r22, FFI_TYPE_STRUCT + beqi r11, ffi_call_SYSV_end + + /* Return 8bit */ + rsubi r11, r23, 1 + beqi r11, ffi_call_SYSV_store8 + + /* Return 16bit */ + rsubi r11, r23, 2 + beqi r11, ffi_call_SYSV_store16 + + /* Return 32bit */ + rsubi r11, r23, 4 + beqi r11, ffi_call_SYSV_store32 + + /* Return 64bit */ + rsubi r11, r23, 8 + beqi r11, ffi_call_SYSV_store64 + + /* Didnt match anything */ + bri ffi_call_SYSV_end + +ffi_call_SYSV_store64: + swi r3, r9, 0 /* store word r3 into return value */ + swi r4, r9, 4 /* store word r4 into return value */ + bri ffi_call_SYSV_end + +ffi_call_SYSV_store32: + swi r3, r9, 0 /* store word r3 into return value */ + bri ffi_call_SYSV_end + +ffi_call_SYSV_store16: +#ifdef __BIG_ENDIAN__ + shi r3, r9, 2 /* store half-word r3 into return value */ +#else + shi r3, r9, 0 /* store half-word r3 into return value */ +#endif + bri ffi_call_SYSV_end + +ffi_call_SYSV_store8: +#ifdef __BIG_ENDIAN__ + sbi r3, r9, 3 /* store byte r3 into return value */ +#else + sbi r3, r9, 0 /* store byte r3 into return value */ +#endif + bri ffi_call_SYSV_end + +ffi_call_SYSV_end: + /* callee restores */ + lwi r19, r1, 0 /* frame pointer */ + lwi r20, r1, 4 /* PIC register */ + lwi r21, r1, 8 /* PIC register */ + lwi r22, r1, 12 + lwi r23, r1, 16 + addik r1, r1, 20 + + /* return from sub-routine (with delay slot) */ + rtsd r15, 8 + nop + + .size ffi_call_SYSV, . - ffi_call_SYSV + +/* ------------------------------------------------------------------------- */ + + /* + * args passed into this function, are passed down to the callee. + * this function is the target of the closure trampoline, as such r12 is + * a pointer to the closure object. + */ + .text + .globl ffi_closure_SYSV + .type ffi_closure_SYSV, @function +ffi_closure_SYSV: + /* push callee saves */ + addik r11, r1, 28 /* save stack args start location (excluding regs/link) */ + addik r1, r1, -12 + swi r19, r1, 0 /* Frame Pointer */ + swi r20, r1, 4 /* PIC register */ + swi r21, r1, 8 /* PIC register */ + + /* store register args on stack */ + addik r1, r1, -24 + swi r5, r1, 0 + swi r6, r1, 4 + swi r7, r1, 8 + swi r8, r1, 12 + swi r9, r1, 16 + swi r10, r1, 20 + + /* setup args */ + addik r5, r1, 0 /* register_args */ + addik r6, r11, 0 /* stack_args */ + addik r7, r12, 0 /* closure object */ + addik r1, r1, -8 /* allocate return value */ + addik r8, r1, 0 /* void* rvalue */ + addik r1, r1, -8 /* allocate for reutrn type/size values */ + addik r9, r1, 0 /* void* rtype */ + addik r10, r1, 4 /* void* rsize */ + + /* call the wrap_call function */ + addik r1, r1, -28 /* allocate args + link reg */ + swi r15, r1, 0 /* store the link register in the frame */ + brald r15, r3 + nop /* branch has delay slot */ + lwi r15, r1, 0 + addik r1, r1, 28 /* restore the link register from the frame */ + +ffi_closure_SYSV_prepare_return: + lwi r9, r1, 0 /* rtype */ + lwi r10, r1, 4 /* rsize */ + addik r1, r1, 8 /* de-allocate return info values */ + + /* Check if return type is actually a struct, store 4 bytes */ + rsubi r11, r9, FFI_TYPE_STRUCT + beqi r11, ffi_closure_SYSV_store32 + + /* Return 8bit */ + rsubi r11, r10, 1 + beqi r11, ffi_closure_SYSV_store8 + + /* Return 16bit */ + rsubi r11, r10, 2 + beqi r11, ffi_closure_SYSV_store16 + + /* Return 32bit */ + rsubi r11, r10, 4 + beqi r11, ffi_closure_SYSV_store32 + + /* Return 64bit */ + rsubi r11, r10, 8 + beqi r11, ffi_closure_SYSV_store64 + + /* Didnt match anything */ + bri ffi_closure_SYSV_end + +ffi_closure_SYSV_store64: + lwi r3, r1, 0 /* store word r3 into return value */ + lwi r4, r1, 4 /* store word r4 into return value */ + /* 64 bits == 2 words, no sign extend occurs */ + bri ffi_closure_SYSV_end + +ffi_closure_SYSV_store32: + lwi r3, r1, 0 /* store word r3 into return value */ + /* 32 bits == 1 word, no sign extend occurs */ + bri ffi_closure_SYSV_end + +ffi_closure_SYSV_store16: +#ifdef __BIG_ENDIAN__ + lhui r3, r1, 2 /* store half-word r3 into return value */ +#else + lhui r3, r1, 0 /* store half-word r3 into return value */ +#endif + rsubi r11, r9, FFI_TYPE_SINT16 + bnei r11, ffi_closure_SYSV_end + sext16 r3, r3 /* fix sign extend of sint8 */ + bri ffi_closure_SYSV_end + +ffi_closure_SYSV_store8: +#ifdef __BIG_ENDIAN__ + lbui r3, r1, 3 /* store byte r3 into return value */ +#else + lbui r3, r1, 0 /* store byte r3 into return value */ +#endif + rsubi r11, r9, FFI_TYPE_SINT8 + bnei r11, ffi_closure_SYSV_end + sext8 r3, r3 /* fix sign extend of sint8 */ + bri ffi_closure_SYSV_end + +ffi_closure_SYSV_end: + addik r1, r1, 8 /* de-allocate return value */ + + /* de-allocate stored args */ + addik r1, r1, 24 + + /* callee restores */ + lwi r19, r1, 0 /* frame pointer */ + lwi r20, r1, 4 /* PIC register */ + lwi r21, r1, 8 /* PIC register */ + addik r1, r1, 12 + + /* return from sub-routine (with delay slot) */ + rtsd r15, 8 + nop + + .size ffi_closure_SYSV, . - ffi_closure_SYSV diff --git a/src/mips/ffi.c b/src/mips/ffi.c index 79cff9b..03121e3 100644 --- a/src/mips/ffi.c +++ b/src/mips/ffi.c @@ -670,9 +670,16 @@ ffi_prep_closure_loc (ffi_closure *closure, if (cif->abi != FFI_O32 && cif->abi != FFI_O32_SOFT_FLOAT) return FFI_BAD_ABI; fn = ffi_closure_O32; -#else /* FFI_MIPS_N32 */ - if (cif->abi != FFI_N32 && cif->abi != FFI_N64) +#else +#if _MIPS_SIM ==_ABIN32 + if (cif->abi != FFI_N32 + && cif->abi != FFI_N32_SOFT_FLOAT) + return FFI_BAD_ABI; +#else + if (cif->abi != FFI_N64 + && cif->abi != FFI_N64_SOFT_FLOAT) return FFI_BAD_ABI; +#endif fn = ffi_closure_N32; #endif /* FFI_MIPS_O32 */ diff --git a/src/moxie/eabi.S b/src/moxie/eabi.S index 379ea4b..ac7aceb 100644 --- a/src/moxie/eabi.S +++ b/src/moxie/eabi.S @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- - eabi.S - Copyright (c) 2004 Anthony Green + eabi.S - Copyright (c) 2012, 2013 Anthony Green - FR-V Assembly glue. + Moxie Assembly glue. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -34,95 +34,68 @@ .globl ffi_call_EABI .type ffi_call_EABI, @function - # gr8 : ffi_prep_args - # gr9 : &ecif - # gr10: cif->bytes - # gr11: fig->flags - # gr12: ecif.rvalue - # gr13: fn + # $r0 : ffi_prep_args + # $r1 : &ecif + # $r2 : cif->bytes + # $r3 : fig->flags + # $r4 : ecif.rvalue + # $r5 : fn -ffi_call_EABI: - addi sp, #-80, sp - sti fp, @(sp, #24) - addi sp, #24, fp - movsg lr, gr5 +ffi_call_EABI: + push $sp, $r6 + push $sp, $r7 + push $sp, $r8 + dec $sp, 24 - /* Make room for the new arguments. */ - /* subi sp, fp, gr10 */ - - /* Store return address and incoming args on stack. */ - sti gr5, @(fp, #8) - sti gr8, @(fp, #-4) - sti gr9, @(fp, #-8) - sti gr10, @(fp, #-12) - sti gr11, @(fp, #-16) - sti gr12, @(fp, #-20) - sti gr13, @(fp, #-24) - - sub sp, gr10, sp + /* Store incoming args on stack. */ + sto.l 0($sp), $r0 /* ffi_prep_args */ + sto.l 4($sp), $r1 /* ecif */ + sto.l 8($sp), $r2 /* bytes */ + sto.l 12($sp), $r3 /* flags */ + sto.l 16($sp), $r4 /* &rvalue */ + sto.l 20($sp), $r5 /* fn */ /* Call ffi_prep_args. */ - ldi @(fp, #-4), gr4 - addi sp, #0, gr8 - ldi @(fp, #-8), gr9 -#ifdef __FRV_FDPIC__ - ldd @(gr4, gr0), gr14 - calll @(gr14, gr0) -#else - calll @(gr4, gr0) -#endif - - /* ffi_prep_args returns the new stack pointer. */ - mov gr8, gr4 - - ldi @(sp, #0), gr8 - ldi @(sp, #4), gr9 - ldi @(sp, #8), gr10 - ldi @(sp, #12), gr11 - ldi @(sp, #16), gr12 - ldi @(sp, #20), gr13 + mov $r6, $r4 /* Save result buffer */ + mov $r7, $r5 /* Save the target fn */ + mov $r8, $r3 /* Save the flags */ + sub.l $sp, $r2 /* Allocate stack space */ + mov $r0, $sp /* We can stomp over $r0 */ + /* $r1 is already set up */ + jsra ffi_prep_args - /* Always copy the return value pointer into the hidden - parameter register. This is only strictly necessary - when we're returning an aggregate type, but it doesn't - hurt to do this all the time, and it saves a branch. */ - ldi @(fp, #-20), gr3 - - /* Use the ffi_prep_args return value for the new sp. */ - mov gr4, sp + /* Load register arguments. */ + ldo.l $r0, 0($sp) + ldo.l $r1, 4($sp) + ldo.l $r2, 8($sp) + ldo.l $r3, 12($sp) + ldo.l $r4, 16($sp) + ldo.l $r5, 20($sp) /* Call the target function. */ - ldi @(fp, -24), gr4 -#ifdef __FRV_FDPIC__ - ldd @(gr4, gr0), gr14 - calll @(gr14, gr0) -#else - calll @(gr4, gr0) -#endif + jsr $r7 + + ldi.l $r7, 0xffffffff + cmp $r8, $r7 + beq retstruct + + ldi.l $r7, 4 + cmp $r8, $r7 + bgt ret2reg - /* Store the result. */ - ldi @(fp, #-16), gr10 /* fig->flags */ - ldi @(fp, #-20), gr4 /* ecif.rvalue */ + st.l ($r6), $r0 + jmpa retdone - /* Is the return value stored in two registers? */ - cmpi gr10, #8, icc0 - bne icc0, 0, .L2 - /* Yes, save them. */ - sti gr8, @(gr4, #0) - sti gr9, @(gr4, #4) - bra .L3 -.L2: - /* Is the return value a structure? */ - cmpi gr10, #-1, icc0 - beq icc0, 0, .L3 - /* No, save a 4 byte return value. */ - sti gr8, @(gr4, #0) -.L3: +ret2reg: + st.l ($r6), $r0 + sto.l 4($r6), $r1 - /* Restore the stack, and return. */ - ldi @(fp, 8), gr5 - ld @(fp, gr0), fp - addi sp,#80,sp - jmpl @(gr5,gr0) +retstruct: +retdone: + /* Return. */ + ldo.l $r6, -4($fp) + ldo.l $r7, -8($fp) + ldo.l $r8, -12($fp) + ret .size ffi_call_EABI, .-ffi_call_EABI diff --git a/src/moxie/ffi.c b/src/moxie/ffi.c index 54cbbb9..540a042 100644 --- a/src/moxie/ffi.c +++ b/src/moxie/ffi.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------- - ffi.c - Copyright (C) 2009 Anthony Green + ffi.c - Copyright (C) 2012, 2013 Anthony Green Moxie Foreign Function Interface @@ -43,6 +43,12 @@ void *ffi_prep_args(char *stack, extended_cif *ecif) p_argv = ecif->avalue; argp = stack; + if (ecif->cif->rtype->type == FFI_TYPE_STRUCT) + { + *(void **) argp = ecif->rvalue; + argp += 4; + } + for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; (i != 0); i--, p_arg++) @@ -56,17 +62,6 @@ void *ffi_prep_args(char *stack, extended_cif *ecif) z = sizeof(void*); *(void **) argp = *p_argv; } - /* if ((*p_arg)->type == FFI_TYPE_FLOAT) - { - if (count > 24) - { - // This is going on the stack. Turn it into a double. - *(double *) argp = (double) *(float*)(* p_argv); - z = sizeof(double); - } - else - *(void **) argp = *(void **)(* p_argv); - } */ else if (z < sizeof(int)) { z = sizeof(int); @@ -147,8 +142,7 @@ void ffi_call(ffi_cif *cif, } else ecif.rvalue = rvalue; - - + switch (cif->abi) { case FFI_EABI: @@ -165,19 +159,25 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3, unsigned arg4, unsigned arg5, unsigned arg6) { /* This function is called by a trampoline. The trampoline stows a - pointer to the ffi_closure object in gr7. We must save this + pointer to the ffi_closure object in $r7. We must save this pointer in a place that will persist while we do our work. */ - register ffi_closure *creg __asm__ ("gr7"); + register ffi_closure *creg __asm__ ("$r12"); ffi_closure *closure = creg; /* Arguments that don't fit in registers are found on the stack at a fixed offset above the current frame pointer. */ - register char *frame_pointer __asm__ ("fp"); - char *stack_args = frame_pointer + 16; + register char *frame_pointer __asm__ ("$fp"); + + /* Pointer to a struct return value. */ + void *struct_rvalue = (void *) arg1; + + /* 6 words reserved for register args + 3 words from jsr */ + char *stack_args = frame_pointer + 9*4; /* Lay the register arguments down in a continuous chunk of memory. */ unsigned register_args[6] = { arg1, arg2, arg3, arg4, arg5, arg6 }; + char *register_args_ptr = (char *) register_args; ffi_cif *cif = closure->cif; ffi_type **arg_types = cif->arg_types; @@ -185,6 +185,12 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3, char *ptr = (char *) register_args; int i; + /* preserve struct type return pointer passing */ + if ((cif->rtype != NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) { + ptr += 4; + register_args_ptr = (char *)®ister_args[1]; + } + /* Find the address of each argument. */ for (i = 0; i < cif->nargs; i++) { @@ -201,6 +207,7 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3, case FFI_TYPE_SINT32: case FFI_TYPE_UINT32: case FFI_TYPE_FLOAT: + case FFI_TYPE_POINTER: avalue[i] = ptr; break; case FFI_TYPE_STRUCT: @@ -216,30 +223,21 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3, /* If we've handled more arguments than fit in registers, start looking at the those passed on the stack. */ - if (ptr == ((char *)register_args + (6*4))) + if (ptr == ®ister_args[6]) ptr = stack_args; } /* Invoke the closure. */ - if (cif->rtype->type == FFI_TYPE_STRUCT) + if (cif->rtype && (cif->rtype->type == FFI_TYPE_STRUCT)) { - /* The caller allocates space for the return structure, and - passes a pointer to this space in gr3. Use this value directly - as the return value. */ - register void *return_struct_ptr __asm__("gr3"); - (closure->fun) (cif, return_struct_ptr, avalue, closure->user_data); + (closure->fun) (cif, struct_rvalue, avalue, closure->user_data); } else { /* Allocate space for the return value and call the function. */ long long rvalue; (closure->fun) (cif, &rvalue, avalue, closure->user_data); - - /* Functions return 4-byte or smaller results in gr8. 8-byte - values also use gr9. We fill the both, even for small return - values, just to avoid a branch. */ - asm ("ldi @(%0, #0), gr8" : : "r" (&rvalue)); - asm ("ldi @(%0, #0), gr9" : : "r" (&((int *) &rvalue)[1])); + asm ("mov $r12, %0\n ld.l $r0, ($r12)\n ldo.l $r1, 4($r12)" : : "r" (&rvalue)); } } @@ -250,27 +248,25 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - unsigned int *tramp = (unsigned int *) &closure->tramp[0]; + unsigned short *tramp = (unsigned short *) &closure->tramp[0]; unsigned long fn = (long) ffi_closure_eabi; unsigned long cls = (long) codeloc; - int i; + + if (cif->abi != FFI_EABI) + return FFI_BAD_ABI; fn = (unsigned long) ffi_closure_eabi; - tramp[0] = 0x8cfc0000 + (fn & 0xffff); /* setlos lo(fn), gr6 */ - tramp[1] = 0x8efc0000 + (cls & 0xffff); /* setlos lo(cls), gr7 */ - tramp[2] = 0x8cf80000 + (fn >> 16); /* sethi hi(fn), gr6 */ - tramp[3] = 0x8ef80000 + (cls >> 16); /* sethi hi(cls), gr7 */ - tramp[4] = 0x80300006; /* jmpl @(gr0, gr6) */ + tramp[0] = 0x01e0; /* ldi.l $r7, .... */ + tramp[1] = cls >> 16; + tramp[2] = cls & 0xffff; + tramp[3] = 0x1a00; /* jmpa .... */ + tramp[4] = fn >> 16; + tramp[5] = fn & 0xffff; closure->cif = cif; closure->fun = fun; closure->user_data = user_data; - /* Cache flushing. */ - for (i = 0; i < FFI_TRAMPOLINE_SIZE; i++) - __asm__ volatile ("dcf @(%0,%1)\n\tici @(%2,%1)" :: "r" (tramp), "r" (i), - "r" (codeloc)); - return FFI_OK; } diff --git a/src/moxie/ffitarget.h b/src/moxie/ffitarget.h index f5305d1..623e3ec 100644 --- a/src/moxie/ffitarget.h +++ b/src/moxie/ffitarget.h @@ -1,5 +1,5 @@ /* -----------------------------------------------------------------*-C-*- - ffitarget.h - Copyright (c) 2009 Anthony Green + ffitarget.h - Copyright (c) 2012, 2013 Anthony Green Target configuration macros for Moxie Permission is hereby granted, free of charge, to any person obtaining @@ -35,22 +35,18 @@ typedef signed long ffi_sarg; typedef enum ffi_abi { FFI_FIRST_ABI = 0, - -#ifdef MOXIE FFI_EABI, FFI_DEFAULT_ABI = FFI_EABI, -#endif - FFI_LAST_ABI = FFI_DEFAULT_ABI + 1 } ffi_abi; #endif /* ---- Definitions for closures ----------------------------------------- */ -#define FFI_CLOSURES 0 +#define FFI_CLOSURES 1 #define FFI_NATIVE_RAW_API 0 -/* Trampolines are 5 4-byte instructions long. */ -#define FFI_TRAMPOLINE_SIZE (5*4) +/* Trampolines are 12-bytes long. See ffi_prep_closure_loc. */ +#define FFI_TRAMPOLINE_SIZE (12) #endif diff --git a/src/powerpc/aix.S b/src/powerpc/aix.S index 213f2db..349e78c 100644 --- a/src/powerpc/aix.S +++ b/src/powerpc/aix.S @@ -137,7 +137,7 @@ ffi_call_AIX: mtcrf 0x40, r31 mtctr r0 /* Load all those argument registers. */ - // We have set up a nice stack frame, just load it into registers. + /* We have set up a nice stack frame, just load it into registers. */ ld r3, 40+(1*8)(r1) ld r4, 40+(2*8)(r1) ld r5, 40+(3*8)(r1) @@ -150,7 +150,7 @@ ffi_call_AIX: L1: /* Load all the FP registers. */ - bf 6,L2 // 2f + 0x18 + bf 6,L2 /* 2f + 0x18 */ lfd f1,-32-(13*8)(r28) lfd f2,-32-(12*8)(r28) lfd f3,-32-(11*8)(r28) @@ -239,7 +239,7 @@ L(float_return_value): mtcrf 0x40, r31 mtctr r0 /* Load all those argument registers. */ - // We have set up a nice stack frame, just load it into registers. + /* We have set up a nice stack frame, just load it into registers. */ lwz r3, 20+(1*4)(r1) lwz r4, 20+(2*4)(r1) lwz r5, 20+(3*4)(r1) @@ -252,7 +252,7 @@ L(float_return_value): L1: /* Load all the FP registers. */ - bf 6,L2 // 2f + 0x18 + bf 6,L2 /* 2f + 0x18 */ lfd f1,-16-(13*8)(r28) lfd f2,-16-(12*8)(r28) lfd f3,-16-(11*8)(r28) @@ -307,7 +307,7 @@ L(float_return_value): #endif .long 0 .byte 0,0,0,1,128,4,0,0 -//END(ffi_call_AIX) +/* END(ffi_call_AIX) */ .csect .text[PR] .align 2 @@ -325,4 +325,4 @@ ffi_call_DARWIN: blr .long 0 .byte 0,0,0,0,0,0,0,0 -//END(ffi_call_DARWIN) +/* END(ffi_call_DARWIN) */ diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c index 1920c91..54f2731 100644 --- a/src/powerpc/ffi.c +++ b/src/powerpc/ffi.c @@ -48,6 +48,11 @@ enum { FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */ + FLAG_SYSV_SMST_R4 = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte + structs. */ + FLAG_SYSV_SMST_R3 = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte + structs. */ + FLAG_ARG_NEEDS_COPY = 1 << (31- 7), #ifndef __NO_FPRS__ FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */ @@ -146,6 +151,7 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS; intarg_count = 0; #ifndef __NO_FPRS__ + double double_tmp; fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS; fparg_count = 0; copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c); @@ -155,9 +161,9 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) next_arg.u = stack + 2; /* Check that everything starts aligned properly. */ - FFI_ASSERT (((unsigned) (char *) stack & 0xF) == 0); - FFI_ASSERT (((unsigned) copy_space.c & 0xF) == 0); - FFI_ASSERT (((unsigned) stacktop.c & 0xF) == 0); + FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0); + FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0); + FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0); FFI_ASSERT ((bytes & 0xF) == 0); FFI_ASSERT (copy_space.c >= next_arg.c); @@ -211,8 +217,6 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) case FFI_TYPE_DOUBLE: /* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */ - if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT) - goto soft_double_prep; double_tmp = **p_argv.d; if (fparg_count >= NUM_FPR_ARG_REGISTERS) @@ -368,7 +372,13 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) /* Check that we didn't overrun the stack... */ FFI_ASSERT (copy_space.c >= next_arg.c); FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS); + /* The assert below is testing that the number of integer arguments agrees + with the number found in ffi_prep_cif_machdep(). However, intarg_count + is incremeneted whenever we place an FP arg on the stack, so account for + that before our assert test. */ #ifndef __NO_FPRS__ + if (fparg_count > NUM_FPR_ARG_REGISTERS) + intarg_count -= fparg_count - NUM_FPR_ARG_REGISTERS; FFI_ASSERT (fpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); #endif @@ -665,9 +675,11 @@ ffi_prep_cif_machdep (ffi_cif *cif) switch (type) { #ifndef __NO_FPRS__ +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: flags |= FLAG_RETURNS_128BITS; /* Fall through. */ +#endif case FFI_TYPE_DOUBLE: flags |= FLAG_RETURNS_64BITS; /* Fall through. */ @@ -685,18 +697,35 @@ ffi_prep_cif_machdep (ffi_cif *cif) break; case FFI_TYPE_STRUCT: - /* - * The final SYSV ABI says that structures smaller or equal 8 bytes - * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them - * in memory. - * - * NOTE: The assembly code can safely assume that it just needs to - * store both r3 and r4 into a 8-byte word-aligned buffer, as - * we allocate a temporary buffer in ffi_call() if this flag is - * set. - */ - if (cif->abi == FFI_SYSV && size <= 8) - flags |= FLAG_RETURNS_SMST; + if (cif->abi == FFI_SYSV) + { + /* The final SYSV ABI says that structures smaller or equal 8 bytes + are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them + in memory. */ + + /* Treat structs with size <= 8 bytes. */ + if (size <= 8) + { + flags |= FLAG_RETURNS_SMST; + /* These structs are returned in r3. We pack the type and the + precalculated shift value (needed in the sysv.S) into flags. + The same applies for the structs returned in r3/r4. */ + if (size <= 4) + { + flags |= FLAG_SYSV_SMST_R3; + flags |= 8 * (4 - size) << 8; + break; + } + /* These structs are returned in r3 and r4. See above. */ + if (size <= 8) + { + flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4; + flags |= 8 * (8 - size) << 8; + break; + } + } + } + intarg_count++; flags |= FLAG_RETVAL_REFERENCE; /* Fall through. */ @@ -925,7 +954,7 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) */ unsigned int smst_buffer[2]; extended_cif ecif; - unsigned int rsize; + unsigned int rsize = 0; ecif.cif = cif; ecif.avalue = avalue; @@ -1132,7 +1161,7 @@ ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue, if (nf < 8) { - temp = pfr->d; + double temp = pfr->d; pfr->f = (float) temp; avalue[i] = pfr; nf++; diff --git a/src/powerpc/ffi_darwin.c b/src/powerpc/ffi_darwin.c index c3cd1da..1d1d48c 100644 --- a/src/powerpc/ffi_darwin.c +++ b/src/powerpc/ffi_darwin.c @@ -302,10 +302,10 @@ ffi_prep_args (extended_cif *ecif, unsigned long *const stack) } /* Check that we didn't overrun the stack... */ - //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS); - //FFI_ASSERT((unsigned *)fpr_base - // <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); - //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4); + /* FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS); + FFI_ASSERT((unsigned *)fpr_base + <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); + FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4); */ } #if defined(POWERPC_DARWIN64) @@ -1235,7 +1235,7 @@ ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue, if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE) size_al = ALIGN(arg_types[i]->size, 8); # if defined(POWERPC64) - FFI_ASSERT (cif->abi != FFI_DARWIN) + FFI_ASSERT (cif->abi != FFI_DARWIN); avalue[i] = pgr; pgr += (size_al + 7) / 8; # else diff --git a/src/powerpc/linux64.S b/src/powerpc/linux64.S index 57b56cb..f28da81 100644 --- a/src/powerpc/linux64.S +++ b/src/powerpc/linux64.S @@ -30,16 +30,25 @@ #include <ffi.h> #ifdef __powerpc64__ - .hidden ffi_call_LINUX64, .ffi_call_LINUX64 - .globl ffi_call_LINUX64, .ffi_call_LINUX64 + .hidden ffi_call_LINUX64 + .globl ffi_call_LINUX64 .section ".opd","aw" .align 3 ffi_call_LINUX64: +#ifdef _CALL_LINUX + .quad .L.ffi_call_LINUX64,.TOC.@tocbase,0 + .type ffi_call_LINUX64,@function + .text +.L.ffi_call_LINUX64: +#else + .hidden .ffi_call_LINUX64 + .globl .ffi_call_LINUX64 .quad .ffi_call_LINUX64,.TOC.@tocbase,0 .size ffi_call_LINUX64,24 .type .ffi_call_LINUX64,@function .text .ffi_call_LINUX64: +#endif .LFB1: mflr %r0 std %r28, -32(%r1) @@ -58,7 +67,11 @@ ffi_call_LINUX64: /* Call ffi_prep_args64. */ mr %r4, %r1 +#ifdef _CALL_LINUX + bl ffi_prep_args64 +#else bl .ffi_prep_args64 +#endif ld %r0, 0(%r29) ld %r2, 8(%r29) @@ -137,7 +150,11 @@ ffi_call_LINUX64: .LFE1: .long 0 .byte 0,12,0,1,128,4,0,0 +#ifdef _CALL_LINUX + .size ffi_call_LINUX64,.-.L.ffi_call_LINUX64 +#else .size .ffi_call_LINUX64,.-.ffi_call_LINUX64 +#endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: diff --git a/src/powerpc/linux64_closure.S b/src/powerpc/linux64_closure.S index f7aa2c9..b1e1219 100644 --- a/src/powerpc/linux64_closure.S +++ b/src/powerpc/linux64_closure.S @@ -32,16 +32,24 @@ #ifdef __powerpc64__ FFI_HIDDEN (ffi_closure_LINUX64) - FFI_HIDDEN (.ffi_closure_LINUX64) - .globl ffi_closure_LINUX64, .ffi_closure_LINUX64 + .globl ffi_closure_LINUX64 .section ".opd","aw" .align 3 ffi_closure_LINUX64: +#ifdef _CALL_LINUX + .quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0 + .type ffi_closure_LINUX64,@function + .text +.L.ffi_closure_LINUX64: +#else + FFI_HIDDEN (.ffi_closure_LINUX64) + .globl .ffi_closure_LINUX64 .quad .ffi_closure_LINUX64,.TOC.@tocbase,0 .size ffi_closure_LINUX64,24 .type .ffi_closure_LINUX64,@function .text .ffi_closure_LINUX64: +#endif .LFB1: # save general regs into parm save area std %r3, 48(%r1) @@ -91,7 +99,11 @@ ffi_closure_LINUX64: addi %r6, %r1, 128 # make the call +#ifdef _CALL_LINUX + bl ffi_closure_helper_LINUX64 +#else bl .ffi_closure_helper_LINUX64 +#endif .Lret: # now r3 contains the return type @@ -194,7 +206,11 @@ ffi_closure_LINUX64: .LFE1: .long 0 .byte 0,12,0,1,128,0,0,0 +#ifdef _CALL_LINUX + .size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64 +#else .size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64 +#endif .section .eh_frame,EH_FRAME_FLAGS,@progbits .Lframe1: diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S index 675ed03..5ee3a19 100644 --- a/src/powerpc/sysv.S +++ b/src/powerpc/sysv.S @@ -142,14 +142,19 @@ L(float_return_value): #endif L(small_struct_return_value): - /* - * The C code always allocates a properly-aligned 8-byte bounce - * buffer to make this assembly code very simple. Just write out - * r3 and r4 to the buffer to allow the C code to handle the rest. - */ - stw %r3, 0(%r30) - stw %r4, 4(%r30) - b L(done_return_value) + extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */ + mtcrf 0x02,%r31 /* copy flags to cr[24:27] (cr6) */ + extrwi %r5,%r31,5,19 /* r5 <- number of bits of padding */ + subfic %r6,%r6,4 /* r6 <- number of useful bytes in r3 */ + bf- 25,L(done_return_value) /* struct in r3 ? if not, done. */ +/* smst_one_register: */ + slw %r3,%r3,%r5 /* Left-justify value in r3 */ + mtxer %r6 /* move byte count to XER ... */ + stswx %r3,0,%r30 /* ... and store that many bytes */ + bf+ 26,L(done_return_value) /* struct in r3:r4 ? */ + add %r6,%r6,%r30 /* adjust pointer */ + stswi %r4,%r6,4 /* store last four bytes */ + b L(done_return_value) .LFE1: END(ffi_call_SYSV) diff --git a/src/prep_cif.c b/src/prep_cif.c index eb68341..e8ec5cf 100644 --- a/src/prep_cif.c +++ b/src/prep_cif.c @@ -140,6 +140,13 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi, #ifdef SPARC && (cif->abi != FFI_V9 || cif->rtype->size > 32) #endif +#ifdef TILE + && (cif->rtype->size > 10 * FFI_SIZEOF_ARG) +#endif +#ifdef XTENSA + && (cif->rtype->size > 16) +#endif + ) bytes = STACK_ARG_SIZE(sizeof(void*)); #endif @@ -169,6 +176,20 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi, if (((*ptr)->alignment - 1) & bytes) bytes = ALIGN(bytes, (*ptr)->alignment); +#ifdef TILE + if (bytes < 10 * FFI_SIZEOF_ARG && + bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG) + { + /* An argument is never split between the 10 parameter + registers and the stack. */ + bytes = 10 * FFI_SIZEOF_ARG; + } +#endif +#ifdef XTENSA + if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4) + bytes = 6*4; +#endif + bytes += STACK_ARG_SIZE((*ptr)->size); } #endif diff --git a/src/s390/ffi.c b/src/s390/ffi.c index ca2675b..8adb5bc 100644 --- a/src/s390/ffi.c +++ b/src/s390/ffi.c @@ -750,7 +750,8 @@ ffi_prep_closure_loc (ffi_closure *closure, void *user_data, void *codeloc) { - FFI_ASSERT (cif->abi == FFI_SYSV); + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; #ifndef __s390x__ *(short *)&closure->tramp [0] = 0x0d10; /* basr %r1,0 */ diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c index 1ac5d46..9f0fded 100644 --- a/src/sparc/ffi.c +++ b/src/sparc/ffi.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------- - ffi.c - Copyright (c) 2011 Anthony Green + ffi.c - Copyright (c) 2011, 2013 Anthony Green Copyright (c) 1996, 2003-2004, 2007-2008 Red Hat, Inc. SPARC Foreign Function Interface @@ -376,6 +376,10 @@ extern int ffi_call_v8(void *, extended_cif *, unsigned, unsigned, unsigned *, void (*fn)(void)); #endif +#ifndef __GNUC__ +void ffi_flush_icache (void *, size_t); +#endif + void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { extended_cif ecif; @@ -417,7 +421,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) /* behind "call", so we alloc some executable space for it. */ /* l7 is used, we need to make sure v8.S doesn't use %l7. */ unsigned int *call_struct = NULL; - ffi_closure_alloc(32, &call_struct); + ffi_closure_alloc(32, (void **)&call_struct); if (call_struct) { unsigned long f = (unsigned long)fn; @@ -432,10 +436,14 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) call_struct[5] = 0x01000000; /* nop */ call_struct[6] = 0x81c7e008; /* ret */ call_struct[7] = 0xbe100017; /* mov %l7, %i7 */ +#ifdef __GNUC__ asm volatile ("iflush %0; iflush %0+8; iflush %0+16; iflush %0+24" : : "r" (call_struct) : "memory"); /* SPARC v8 requires 5 instructions for flush to be visible */ asm volatile ("nop; nop; nop; nop; nop"); +#else + ffi_flush_icache (call_struct, 32); +#endif ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes, cif->flags, rvalue, call_struct); ffi_closure_free(call_struct); @@ -513,6 +521,7 @@ ffi_prep_closure_loc (ffi_closure* closure, closure->user_data = user_data; /* Flush the Icache. closure is 8 bytes aligned. */ +#ifdef __GNUC__ #ifdef SPARC64 asm volatile ("flush %0; flush %0+8" : : "r" (closure) : "memory"); #else @@ -520,6 +529,9 @@ ffi_prep_closure_loc (ffi_closure* closure, /* SPARC v8 requires 5 instructions for flush to be visible */ asm volatile ("nop; nop; nop; nop; nop"); #endif +#else + ffi_flush_icache (closure, 16); +#endif return FFI_OK; } diff --git a/src/sparc/v8.S b/src/sparc/v8.S index 2c4eb60..6bf7ac0 100644 --- a/src/sparc/v8.S +++ b/src/sparc/v8.S @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------- - v8.S - Copyright (c) 1996, 1997, 2003, 2004, 2008 Red Hat, Inc. + v8.S - Copyright (c) 2013 The Written Word, Inc. + Copyright (c) 1996, 1997, 2003, 2004, 2008 Red Hat, Inc. SPARC Foreign Function Interface @@ -31,11 +32,39 @@ #define STACKFRAME 96 /* Minimum stack framesize for SPARC */ #define ARGS (64+4) /* Offset of register area in frame */ -.text +#ifndef __GNUC__ + .text + .align 8 +.globl ffi_flush_icache +.globl _ffi_flush_icache + +ffi_flush_icache: +_ffi_flush_icache: + add %o0, %o1, %o2 +#ifdef SPARC64 +1: flush %o0 +#else +1: iflush %o0 +#endif + add %o0, 8, %o0 + cmp %o0, %o2 + blt 1b + nop + nop + nop + nop + nop + retl + nop +.ffi_flush_icache_end: + .size ffi_flush_icache,.ffi_flush_icache_end-ffi_flush_icache +#endif + + .text .align 8 .globl ffi_call_v8 .globl _ffi_call_v8 - + ffi_call_v8: _ffi_call_v8: .LLFB1: @@ -213,6 +242,10 @@ ffi_closure_v8: be,a done1 ldd [%fp-8], %i0 + cmp %o0, FFI_TYPE_UINT64 + be,a done1 + ldd [%fp-8], %i0 + ld [%fp-8], %i0 done1: jmp %i7+8 diff --git a/src/sparc/v9.S b/src/sparc/v9.S index 489ff02..bf31a2b 100644 --- a/src/sparc/v9.S +++ b/src/sparc/v9.S @@ -32,7 +32,7 @@ /* Only compile this in for 64bit builds, because otherwise the object file will have inproper architecture due to used instructions. */ -#define STACKFRAME 128 /* Minimum stack framesize for SPARC */ +#define STACKFRAME 176 /* Minimum stack framesize for SPARC 64-bit */ #define STACK_BIAS 2047 #define ARGS (128) /* Offset of register area in frame */ diff --git a/src/tile/ffi.c b/src/tile/ffi.c new file mode 100644 index 0000000..3a94469 --- /dev/null +++ b/src/tile/ffi.c @@ -0,0 +1,355 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2012 Tilera Corp. + + TILE Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <arch/abi.h> +#include <arch/icache.h> +#include <arch/opcode.h> + + +/* The first 10 registers are used to pass arguments and return values. */ +#define NUM_ARG_REGS 10 + +/* Performs a raw function call with the given NUM_ARG_REGS register arguments + and the specified additional stack arguments (if any). */ +extern void ffi_call_tile(ffi_sarg reg_args[NUM_ARG_REGS], + const ffi_sarg *stack_args, + size_t stack_args_bytes, + void (*fnaddr)(void)) + FFI_HIDDEN; + +/* This handles the raw call from the closure stub, cleaning up the + parameters and delegating to ffi_closure_tile_inner. */ +extern void ffi_closure_tile(void) FFI_HIDDEN; + + +ffi_status +ffi_prep_cif_machdep(ffi_cif *cif) +{ + /* We always allocate room for all registers. Even if we don't + use them as parameters, they get returned in the same array + as struct return values so we need to make room. */ + if (cif->bytes < NUM_ARG_REGS * FFI_SIZEOF_ARG) + cif->bytes = NUM_ARG_REGS * FFI_SIZEOF_ARG; + + if (cif->rtype->size > NUM_ARG_REGS * FFI_SIZEOF_ARG) + cif->flags = FFI_TYPE_STRUCT; + else + cif->flags = FFI_TYPE_INT; + + /* Nothing to do. */ + return FFI_OK; +} + + +static long +assign_to_ffi_arg(ffi_sarg *out, void *in, const ffi_type *type, + int write_to_reg) +{ + switch (type->type) + { + case FFI_TYPE_SINT8: + *out = *(SINT8 *)in; + return 1; + + case FFI_TYPE_UINT8: + *out = *(UINT8 *)in; + return 1; + + case FFI_TYPE_SINT16: + *out = *(SINT16 *)in; + return 1; + + case FFI_TYPE_UINT16: + *out = *(UINT16 *)in; + return 1; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: +#ifndef __LP64__ + case FFI_TYPE_POINTER: +#endif + /* Note that even unsigned 32-bit quantities are sign extended + on tilegx when stored in a register. */ + *out = *(SINT32 *)in; + return 1; + + case FFI_TYPE_FLOAT: +#ifdef __tilegx__ + if (write_to_reg) + { + /* Properly sign extend the value. */ + union { float f; SINT32 s32; } val; + val.f = *(float *)in; + *out = val.s32; + } + else +#endif + { + *(float *)out = *(float *)in; + } + return 1; + + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + case FFI_TYPE_DOUBLE: +#ifdef __LP64__ + case FFI_TYPE_POINTER: +#endif + *(UINT64 *)out = *(UINT64 *)in; + return sizeof(UINT64) / FFI_SIZEOF_ARG; + + case FFI_TYPE_STRUCT: + memcpy(out, in, type->size); + return (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG; + + case FFI_TYPE_VOID: + /* Must be a return type. Nothing to do. */ + return 0; + + default: + FFI_ASSERT(0); + return -1; + } +} + + +void +ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_sarg * const arg_mem = alloca(cif->bytes); + ffi_sarg * const reg_args = arg_mem; + ffi_sarg * const stack_args = ®_args[NUM_ARG_REGS]; + ffi_sarg *argp = arg_mem; + ffi_type ** const arg_types = cif->arg_types; + const long num_args = cif->nargs; + long i; + + if (cif->flags == FFI_TYPE_STRUCT) + { + /* Pass a hidden pointer to the return value. We make sure there + is scratch space for the callee to store the return value even if + our caller doesn't care about it. */ + *argp++ = (intptr_t)(rvalue ? rvalue : alloca(cif->rtype->size)); + + /* No more work needed to return anything. */ + rvalue = NULL; + } + + for (i = 0; i < num_args; i++) + { + ffi_type *type = arg_types[i]; + void * const arg_in = avalue[i]; + ptrdiff_t arg_word = argp - arg_mem; + +#ifndef __tilegx__ + /* Doubleword-aligned values are always in an even-number register + pair, or doubleword-aligned stack slot if out of registers. */ + long align = arg_word & (type->alignment > FFI_SIZEOF_ARG); + argp += align; + arg_word += align; +#endif + + if (type->type == FFI_TYPE_STRUCT) + { + const size_t arg_size_in_words = + (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG; + + if (arg_word < NUM_ARG_REGS && + arg_word + arg_size_in_words > NUM_ARG_REGS) + { + /* Args are not allowed to span registers and the stack. */ + argp = stack_args; + } + + memcpy(argp, arg_in, type->size); + argp += arg_size_in_words; + } + else + { + argp += assign_to_ffi_arg(argp, arg_in, arg_types[i], 1); + } + } + + /* Actually do the call. */ + ffi_call_tile(reg_args, stack_args, + cif->bytes - (NUM_ARG_REGS * FFI_SIZEOF_ARG), fn); + + if (rvalue != NULL) + assign_to_ffi_arg(rvalue, reg_args, cif->rtype, 0); +} + + +/* Template code for closure. */ +extern const UINT64 ffi_template_tramp_tile[] FFI_HIDDEN; + + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ +#ifdef __tilegx__ + /* TILE-Gx */ + SINT64 c; + SINT64 h; + int s; + UINT64 *out; + + if (cif->abi != FFI_UNIX) + return FFI_BAD_ABI; + + out = (UINT64 *)closure->tramp; + + c = (intptr_t)closure; + h = (intptr_t)ffi_closure_tile; + s = 0; + + /* Find the smallest shift count that doesn't lose information + (i.e. no need to explicitly insert high bits of the address that + are just the sign extension of the low bits). */ + while ((c >> s) != (SINT16)(c >> s) || (h >> s) != (SINT16)(h >> s)) + s += 16; + +#define OPS(a, b, shift) \ + (create_Imm16_X0((a) >> (shift)) | create_Imm16_X1((b) >> (shift))) + + /* Emit the moveli. */ + *out++ = ffi_template_tramp_tile[0] | OPS(c, h, s); + for (s -= 16; s >= 0; s -= 16) + *out++ = ffi_template_tramp_tile[1] | OPS(c, h, s); + +#undef OPS + + *out++ = ffi_template_tramp_tile[2]; + +#else + /* TILEPro */ + UINT64 *out; + intptr_t delta; + + if (cif->abi != FFI_UNIX) + return FFI_BAD_ABI; + + out = (UINT64 *)closure->tramp; + delta = (intptr_t)ffi_closure_tile - (intptr_t)codeloc; + + *out++ = ffi_template_tramp_tile[0] | create_JOffLong_X1(delta >> 3); +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + invalidate_icache(closure->tramp, (char *)out - closure->tramp, + getpagesize()); + + return FFI_OK; +} + + +/* This is called by the assembly wrapper for closures. This does + all of the work. On entry reg_args[0] holds the values the registers + had when the closure was invoked. On return reg_args[1] holds the register + values to be returned to the caller (many of which may be garbage). */ +void FFI_HIDDEN +ffi_closure_tile_inner(ffi_closure *closure, + ffi_sarg reg_args[2][NUM_ARG_REGS], + ffi_sarg *stack_args) +{ + ffi_cif * const cif = closure->cif; + void ** const avalue = alloca(cif->nargs * sizeof(void *)); + void *rvalue; + ffi_type ** const arg_types = cif->arg_types; + ffi_sarg * const reg_args_in = reg_args[0]; + ffi_sarg * const reg_args_out = reg_args[1]; + ffi_sarg * argp; + long i, arg_word, nargs = cif->nargs; + /* Use a union to guarantee proper alignment for double. */ + union { ffi_sarg arg[NUM_ARG_REGS]; double d; UINT64 u64; } closure_ret; + + /* Start out reading register arguments. */ + argp = reg_args_in; + + /* Copy the caller's structure return address to that the closure + returns the data directly to the caller. */ + if (cif->flags == FFI_TYPE_STRUCT) + { + /* Return by reference via hidden pointer. */ + rvalue = (void *)(intptr_t)*argp++; + arg_word = 1; + } + else + { + /* Return the value in registers. */ + rvalue = &closure_ret; + arg_word = 0; + } + + /* Grab the addresses of the arguments. */ + for (i = 0; i < nargs; i++) + { + ffi_type * const type = arg_types[i]; + const size_t arg_size_in_words = + (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG; + +#ifndef __tilegx__ + /* Doubleword-aligned values are always in an even-number register + pair, or doubleword-aligned stack slot if out of registers. */ + long align = arg_word & (type->alignment > FFI_SIZEOF_ARG); + argp += align; + arg_word += align; +#endif + + if (arg_word == NUM_ARG_REGS || + (arg_word < NUM_ARG_REGS && + arg_word + arg_size_in_words > NUM_ARG_REGS)) + { + /* Switch to reading arguments from the stack. */ + argp = stack_args; + arg_word = NUM_ARG_REGS; + } + + avalue[i] = argp; + argp += arg_size_in_words; + arg_word += arg_size_in_words; + } + + /* Invoke the closure. */ + closure->fun(cif, rvalue, avalue, closure->user_data); + + if (cif->flags != FFI_TYPE_STRUCT) + { + /* Canonicalize for register representation. */ + assign_to_ffi_arg(reg_args_out, &closure_ret, cif->rtype, 1); + } +} diff --git a/src/tile/ffitarget.h b/src/tile/ffitarget.h new file mode 100644 index 0000000..679fb5d --- /dev/null +++ b/src/tile/ffitarget.h @@ -0,0 +1,65 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2012 Tilera Corp. + Target configuration macros for TILE. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM + +#include <arch/abi.h> + +typedef uint_reg_t ffi_arg; +typedef int_reg_t ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_UNIX, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_UNIX +} ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ +#define FFI_CLOSURES 1 + +#ifdef __tilegx__ +/* We always pass 8-byte values, even in -m32 mode. */ +# define FFI_SIZEOF_ARG 8 +# ifdef __LP64__ +# define FFI_TRAMPOLINE_SIZE (8 * 5) /* 5 bundles */ +# else +# define FFI_TRAMPOLINE_SIZE (8 * 3) /* 3 bundles */ +# endif +#else +# define FFI_SIZEOF_ARG 4 +# define FFI_TRAMPOLINE_SIZE 8 /* 1 bundle */ +#endif +#define FFI_NATIVE_RAW_API 0 + +#endif diff --git a/src/tile/tile.S b/src/tile/tile.S new file mode 100644 index 0000000..a186e1f --- /dev/null +++ b/src/tile/tile.S @@ -0,0 +1,360 @@ +/* ----------------------------------------------------------------------- + tile.S - Copyright (c) 2011 Tilera Corp. + + Tilera TILEPro and TILE-Gx Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +/* Number of bytes in a register. */ +#define REG_SIZE FFI_SIZEOF_ARG + +/* Number of bytes in stack linkage area for backtracing. + + A note about the ABI: on entry to a procedure, sp points to a stack + slot where it must spill the return address if it's not a leaf. + REG_SIZE bytes beyond that is a slot owned by the caller which + contains the sp value that the caller had when it was originally + entered (i.e. the caller's frame pointer). */ +#define LINKAGE_SIZE (2 * REG_SIZE) + +/* The first 10 registers are used to pass arguments and return values. */ +#define NUM_ARG_REGS 10 + +#ifdef __tilegx__ +#define SW st +#define LW ld +#define BGZT bgtzt +#else +#define SW sw +#define LW lw +#define BGZT bgzt +#endif + + +/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS], + const int_reg_t *stack_args, + unsigned long stack_args_bytes, + void (*fnaddr)(void)); + + On entry, REG_ARGS contain the outgoing register values, + and STACK_ARGS containts STACK_ARG_BYTES of additional values + to be passed on the stack. If STACK_ARG_BYTES is zero, then + STACK_ARGS is ignored. + + When the invoked function returns, the values of r0-r9 are + blindly stored back into REG_ARGS for the caller to examine. */ + + .section .text.ffi_call_tile, "ax", @progbits + .align 8 + .globl ffi_call_tile + FFI_HIDDEN(ffi_call_tile) +ffi_call_tile: + +/* Incoming arguments. */ +#define REG_ARGS r0 +#define INCOMING_STACK_ARGS r1 +#define STACK_ARG_BYTES r2 +#define ORIG_FNADDR r3 + +/* Temporary values. */ +#define FRAME_SIZE r10 +#define TMP r11 +#define TMP2 r12 +#define OUTGOING_STACK_ARGS r13 +#define REG_ADDR_PTR r14 +#define RETURN_REG_ADDR r15 +#define FNADDR r16 + + .cfi_startproc + { + /* Save return address. */ + SW sp, lr + .cfi_offset lr, 0 + /* Prepare to spill incoming r52. */ + addi TMP, sp, -REG_SIZE + /* Increase frame size to have room to spill r52 and REG_ARGS. + The +7 is to round up mod 8. */ + addi FRAME_SIZE, STACK_ARG_BYTES, \ + REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7 + } + { + /* Round stack frame size to a multiple of 8 to satisfy ABI. */ + andi FRAME_SIZE, FRAME_SIZE, -8 + /* Compute where to spill REG_ARGS value. */ + addi TMP2, sp, -(REG_SIZE * 2) + } + { + /* Spill incoming r52. */ + SW TMP, r52 + .cfi_offset r52, -REG_SIZE + /* Set up our frame pointer. */ + move r52, sp + .cfi_def_cfa_register r52 + /* Push stack frame. */ + sub sp, sp, FRAME_SIZE + } + { + /* Prepare to set up stack linkage. */ + addi TMP, sp, REG_SIZE + /* Prepare to memcpy stack args. */ + addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE + /* Save REG_ARGS which we will need after we call the subroutine. */ + SW TMP2, REG_ARGS + } + { + /* Set up linkage info to hold incoming stack pointer. */ + SW TMP, r52 + } + { + /* Skip stack args memcpy if we don't have any stack args (common). */ + blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy + } + +.Lmemcpy_stack_args: + { + /* Load incoming argument from stack_args. */ + LW TMP, INCOMING_STACK_ARGS + addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE + } + { + /* Store stack argument into outgoing stack argument area. */ + SW OUTGOING_STACK_ARGS, TMP + addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE + addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE + } + { + BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args + } +.Ldone_stack_args_memcpy: + + { + /* Copy aside ORIG_FNADDR so we can overwrite its register. */ + move FNADDR, ORIG_FNADDR + /* Prepare to load argument registers. */ + addi REG_ADDR_PTR, r0, REG_SIZE + /* Load outgoing r0. */ + LW r0, r0 + } + + /* Load up argument registers from the REG_ARGS array. */ +#define LOAD_REG(REG, PTR) \ + { \ + LW REG, PTR ; \ + addi PTR, PTR, REG_SIZE \ + } + + LOAD_REG(r1, REG_ADDR_PTR) + LOAD_REG(r2, REG_ADDR_PTR) + LOAD_REG(r3, REG_ADDR_PTR) + LOAD_REG(r4, REG_ADDR_PTR) + LOAD_REG(r5, REG_ADDR_PTR) + LOAD_REG(r6, REG_ADDR_PTR) + LOAD_REG(r7, REG_ADDR_PTR) + LOAD_REG(r8, REG_ADDR_PTR) + LOAD_REG(r9, REG_ADDR_PTR) + + { + /* Call the subroutine. */ + jalr FNADDR + } + + { + /* Restore original lr. */ + LW lr, r52 + /* Prepare to recover ARGS, which we spilled earlier. */ + addi TMP, r52, -(2 * REG_SIZE) + } + { + /* Restore ARGS, so we can fill it in with the return regs r0-r9. */ + LW RETURN_REG_ADDR, TMP + /* Prepare to restore original r52. */ + addi TMP, r52, -REG_SIZE + } + + { + /* Pop stack frame. */ + move sp, r52 + /* Restore original r52. */ + LW r52, TMP + } + +#define STORE_REG(REG, PTR) \ + { \ + SW PTR, REG ; \ + addi PTR, PTR, REG_SIZE \ + } + + /* Return all register values by reference. */ + STORE_REG(r0, RETURN_REG_ADDR) + STORE_REG(r1, RETURN_REG_ADDR) + STORE_REG(r2, RETURN_REG_ADDR) + STORE_REG(r3, RETURN_REG_ADDR) + STORE_REG(r4, RETURN_REG_ADDR) + STORE_REG(r5, RETURN_REG_ADDR) + STORE_REG(r6, RETURN_REG_ADDR) + STORE_REG(r7, RETURN_REG_ADDR) + STORE_REG(r8, RETURN_REG_ADDR) + STORE_REG(r9, RETURN_REG_ADDR) + + { + jrp lr + } + + .cfi_endproc + .size ffi_call_tile, .-ffi_call_tile + +/* ffi_closure_tile(...) + + On entry, lr points to the closure plus 8 bytes, and r10 + contains the actual return address. + + This function simply dumps all register parameters into a stack array + and passes the closure, the registers array, and the stack arguments + to C code that does all of the actual closure processing. */ + + .section .text.ffi_closure_tile, "ax", @progbits + .align 8 + .globl ffi_closure_tile + FFI_HIDDEN(ffi_closure_tile) + + .cfi_startproc +/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */ +#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8) +ffi_closure_tile: + { +#ifdef __tilegx__ + st sp, lr + .cfi_offset lr, 0 +#else + /* Save return address (in r10 due to closure stub wrapper). */ + SW sp, r10 + .cfi_return_column r10 + .cfi_offset r10, 0 +#endif + /* Compute address for stack frame linkage. */ + addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE) + } + { + /* Save incoming stack pointer in linkage area. */ + SW r10, sp + .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE) + /* Push a new stack frame. */ + addli sp, sp, -CLOSURE_FRAME_SIZE + .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE + } + + { + /* Create pointer to where to start spilling registers. */ + addi r10, sp, LINKAGE_SIZE + } + + /* Spill all the incoming registers. */ + STORE_REG(r0, r10) + STORE_REG(r1, r10) + STORE_REG(r2, r10) + STORE_REG(r3, r10) + STORE_REG(r4, r10) + STORE_REG(r5, r10) + STORE_REG(r6, r10) + STORE_REG(r7, r10) + STORE_REG(r8, r10) + { + /* Save r9. */ + SW r10, r9 +#ifdef __tilegx__ + /* Pointer to closure is passed in r11. */ + move r0, r11 +#else + /* Compute pointer to the closure object. Because the closure + starts with a "jal ffi_closure_tile", we can just take the + value of lr (a phony return address pointing into the closure) + and subtract 8. */ + addi r0, lr, -8 +#endif + /* Compute a pointer to the register arguments we just spilled. */ + addi r1, sp, LINKAGE_SIZE + } + { + /* Compute a pointer to the extra stack arguments (if any). */ + addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE + /* Call C code to deal with all of the grotty details. */ + jal ffi_closure_tile_inner + } + { + addli r10, sp, CLOSURE_FRAME_SIZE + } + { + /* Restore the return address. */ + LW lr, r10 + /* Compute pointer to registers array. */ + addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE) + } + /* Return all the register values, which C code may have set. */ + LOAD_REG(r0, r10) + LOAD_REG(r1, r10) + LOAD_REG(r2, r10) + LOAD_REG(r3, r10) + LOAD_REG(r4, r10) + LOAD_REG(r5, r10) + LOAD_REG(r6, r10) + LOAD_REG(r7, r10) + LOAD_REG(r8, r10) + LOAD_REG(r9, r10) + { + /* Pop the frame. */ + addli sp, sp, CLOSURE_FRAME_SIZE + jrp lr + } + + .cfi_endproc + .size ffi_closure_tile, . - ffi_closure_tile + + +/* What follows are code template instructions that get copied to the + closure trampoline by ffi_prep_closure_loc. The zeroed operands + get replaced by their proper values at runtime. */ + + .section .text.ffi_template_tramp_tile, "ax", @progbits + .align 8 + .globl ffi_template_tramp_tile + FFI_HIDDEN(ffi_template_tramp_tile) +ffi_template_tramp_tile: +#ifdef __tilegx__ + { + moveli r11, 0 /* backpatched to address of containing closure. */ + moveli r10, 0 /* backpatched to ffi_closure_tile. */ + } + /* Note: the following bundle gets generated multiple times + depending on the pointer value (esp. useful for -m32 mode). */ + { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 } + { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 } +#else + /* 'jal .' yields a PC-relative offset of zero so we can OR in the + right offset at runtime. */ + { move r10, lr ; jal . /* ffi_closure_tile */ } +#endif + + .size ffi_template_tramp_tile, . - ffi_template_tramp_tile diff --git a/src/x86/ffi.c b/src/x86/ffi.c index 9343c26..0600414 100644 --- a/src/x86/ffi.c +++ b/src/x86/ffi.c @@ -58,7 +58,8 @@ void ffi_prep_args(char *stack, extended_cif *ecif) argp = stack; - if (ecif->cif->flags == FFI_TYPE_STRUCT + if ((ecif->cif->flags == FFI_TYPE_STRUCT + || ecif->cif->flags == FFI_TYPE_MS_STRUCT) #ifdef X86_WIN64 && (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2 && ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8) @@ -279,7 +280,12 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) else #endif { - cif->flags = FFI_TYPE_STRUCT; +#ifdef X86_WIN32 + if (cif->abi == FFI_MS_CDECL) + cif->flags = FFI_TYPE_MS_STRUCT; + else +#endif + cif->flags = FFI_TYPE_STRUCT; /* allocate space for return value pointer */ cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG); } @@ -309,9 +315,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif) cif->bytes += 4 * sizeof(ffi_arg); #endif -#ifdef X86_DARWIN cif->bytes = (cif->bytes + 15) & ~0xF; -#endif return FFI_OK; } @@ -349,7 +353,8 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) } #else if (rvalue == NULL - && cif->flags == FFI_TYPE_STRUCT) + && (cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT)) { ecif.rvalue = alloca(cif->rtype->size); } @@ -368,6 +373,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) #elif defined(X86_WIN32) case FFI_SYSV: case FFI_STDCALL: + case FFI_MS_CDECL: ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, ecif.rvalue, fn); break; @@ -416,7 +422,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) /** private members **/ /* The following __attribute__((regparm(1))) decorations will have no effect - on MSVC - standard cdecl convention applies. */ + on MSVC or SUNPRO_C -- standard conventions apply. */ static void ffi_prep_incoming_args_SYSV (char *stack, void **ret, void** args, ffi_cif* cif); void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *) @@ -426,6 +432,8 @@ unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *) void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *) __attribute__ ((regparm(1))); #ifdef X86_WIN32 +void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *) + __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *) __attribute__ ((regparm(1))); void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *) @@ -511,7 +519,8 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, argp += sizeof(void *); } #else - if ( cif->flags == FFI_TYPE_STRUCT ) { + if ( cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT ) { *rvalue = *(void **) argp; argp += sizeof(void *); } @@ -593,7 +602,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue, { unsigned char *__tramp = (unsigned char*)(TRAMP); \ unsigned int __fun = (unsigned int)(FUN); \ unsigned int __ctx = (unsigned int)(CTX); \ - unsigned int __dis = __fun - (__ctx + 22); \ + unsigned int __dis = __fun - (__ctx + 49); \ unsigned short __size = (unsigned short)(SIZE); \ *(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \ *(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \ @@ -671,6 +680,12 @@ ffi_prep_closure_loc (ffi_closure* closure, &ffi_closure_STDCALL, (void*)codeloc, cif->bytes); } + else if (cif->abi == FFI_MS_CDECL) + { + FFI_INIT_TRAMPOLINE (&closure->tramp[0], + &ffi_closure_SYSV, + (void*)codeloc); + } #endif /* X86_WIN32 */ #endif /* !X86_WIN64 */ else @@ -699,6 +714,9 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure, int i; if (cif->abi != FFI_SYSV) { +#ifdef X86_WIN32 + if (cif->abi != FFI_THISCALL) +#endif return FFI_BAD_ABI; } @@ -713,10 +731,20 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure, FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE); } - +#ifdef X86_WIN32 + if (cif->abi == FFI_SYSV) + { +#endif FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV, codeloc); - +#ifdef X86_WIN32 + } + else if (cif->abi == FFI_THISCALL) + { + FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL, + codeloc, cif->bytes); + } +#endif closure->cif = cif; closure->user_data = user_data; closure->fun = fun; @@ -747,8 +775,9 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) /* If the return value is a struct and we don't have a return */ /* value address then we need to make one */ - if ((rvalue == NULL) && - (cif->rtype->type == FFI_TYPE_STRUCT)) + if (rvalue == NULL + && (cif->flags == FFI_TYPE_STRUCT + || cif->flags == FFI_TYPE_MS_STRUCT)) { ecif.rvalue = alloca(cif->rtype->size); } @@ -761,7 +790,8 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) #ifdef X86_WIN32 case FFI_SYSV: case FFI_STDCALL: - ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags, + case FFI_MS_CDECL: + ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags, ecif.rvalue, fn); break; case FFI_THISCALL: @@ -789,7 +819,7 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue) cif->abi = abi = FFI_THISCALL; if (passed_regs < 1 && abi == FFI_THISCALL) cif->abi = abi = FFI_STDCALL; - ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags, + ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags, ecif.rvalue, fn); } break; diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c index defd774..2014af2 100644 --- a/src/x86/ffi64.c +++ b/src/x86/ffi64.c @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------- - ffi64.c - Copyright (c) 20011 Anthony Green + ffi64.c - Copyright (c) 2013 The Written Word, Inc. + Copyright (c) 2011 Anthony Green Copyright (c) 2008, 2010 Red Hat, Inc. Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> @@ -37,11 +38,29 @@ #define MAX_GPR_REGS 6 #define MAX_SSE_REGS 8 +#if defined(__INTEL_COMPILER) +#define UINT128 __m128 +#else +#if defined(__SUNPRO_C) +#include <sunmedia_types.h> +#define UINT128 __m128i +#else +#define UINT128 __int128_t +#endif +#endif + +union big_int_union +{ + UINT32 i32; + UINT64 i64; + UINT128 i128; +}; + struct register_args { /* Registers for argument passing. */ UINT64 gpr[MAX_GPR_REGS]; - __int128_t sse[MAX_SSE_REGS]; + union big_int_union sse[MAX_SSE_REGS]; }; extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, @@ -465,16 +484,33 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) { case X86_64_INTEGER_CLASS: case X86_64_INTEGERSI_CLASS: - reg_args->gpr[gprcount] = 0; - memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + /* Sign-extend integer arguments passed in general + purpose registers, to cope with the fact that + LLVM incorrectly assumes that this will be done + (the x86-64 PS ABI does not specify this). */ + switch (arg_types[i]->type) + { + case FFI_TYPE_SINT8: + *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); + break; + case FFI_TYPE_SINT16: + *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); + break; + case FFI_TYPE_SINT32: + *(SINT64 *)®_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); + break; + default: + reg_args->gpr[gprcount] = 0; + memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); + } gprcount++; break; case X86_64_SSE_CLASS: case X86_64_SSEDF_CLASS: - reg_args->sse[ssecount++] = *(UINT64 *) a; + reg_args->sse[ssecount++].i64 = *(UINT64 *) a; break; case X86_64_SSESF_CLASS: - reg_args->sse[ssecount++] = *(UINT32 *) a; + reg_args->sse[ssecount++].i32 = *(UINT32 *) a; break; default: abort(); diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h index 54a6121..46f294c 100644 --- a/src/x86/ffitarget.h +++ b/src/x86/ffitarget.h @@ -61,8 +61,9 @@ typedef unsigned long long ffi_arg; typedef long long ffi_sarg; #endif #else -#if defined __x86_64__ && !defined __LP64__ +#if defined __x86_64__ && defined __ILP32__ #define FFI_SIZEOF_ARG 8 +#define FFI_SIZEOF_JAVA_RAW 4 typedef unsigned long long ffi_arg; typedef long long ffi_sarg; #else @@ -80,9 +81,13 @@ typedef enum ffi_abi { FFI_STDCALL, FFI_THISCALL, FFI_FASTCALL, + FFI_MS_CDECL, FFI_LAST_ABI, - /* TODO: Add fastcall support for the sake of completeness */ +#ifdef _MSC_VER + FFI_DEFAULT_ABI = FFI_MS_CDECL +#else FFI_DEFAULT_ABI = FFI_SYSV +#endif #elif defined(X86_WIN64) FFI_WIN64, @@ -109,6 +114,7 @@ typedef enum ffi_abi { #define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) #define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) #define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) +#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4) #if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) #define FFI_TRAMPOLINE_SIZE 24 diff --git a/src/x86/sysv.S b/src/x86/sysv.S index f108dd8..3bd5477 100644 --- a/src/x86/sysv.S +++ b/src/x86/sysv.S @@ -1,5 +1,6 @@ /* ----------------------------------------------------------------------- - sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc. + sysv.S - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. X86 Foreign Function Interface @@ -181,9 +182,19 @@ ffi_closure_SYSV: leal -24(%ebp), %edx movl %edx, -12(%ebp) /* resp */ leal 8(%ebp), %edx +#ifdef __SUNPRO_C + /* The SUNPRO compiler doesn't support GCC's regparm function + attribute, so we have to pass all three arguments to + ffi_closure_SYSV_inner on the stack. */ + movl %edx, 8(%esp) /* args = __builtin_dwarf_cfa () */ + leal -12(%ebp), %edx + movl %edx, 4(%esp) /* &resp */ + movl %eax, (%esp) /* closure */ +#else movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */ leal -12(%ebp), %edx movl %edx, (%esp) /* &resp */ +#endif #if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__ call ffi_closure_SYSV_inner #else @@ -328,6 +339,9 @@ ffi_closure_raw_SYSV: .size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV #endif +#if defined __GNUC__ +/* Only emit dwarf unwind info when building with GNU toolchain. */ + #if defined __PIC__ # if defined __sun__ && defined __svr4__ /* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22 @@ -460,6 +474,7 @@ ffi_closure_raw_SYSV: .LEFDE3: #endif +#endif #endif /* ifndef __x86_64__ */ diff --git a/src/x86/unix64.S b/src/x86/unix64.S index 7a6619a..dcd6bc7 100644 --- a/src/x86/unix64.S +++ b/src/x86/unix64.S @@ -1,6 +1,7 @@ /* ----------------------------------------------------------------------- - unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de> - Copyright (c) 2008 Red Hat, Inc + unix64.S - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 2008 Red Hat, Inc + - Copyright (c) 2002 Bo Thorsen <bo@suse.de> x86-64 Foreign Function Interface @@ -324,6 +325,9 @@ ffi_closure_unix64: .LUW9: .size ffi_closure_unix64,.-ffi_closure_unix64 +#ifdef __GNUC__ +/* Only emit DWARF unwind info when building with the GNU toolchain. */ + #ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE .section .eh_frame,"a",@unwind #else @@ -419,6 +423,8 @@ ffi_closure_unix64: .align 8 .LEFDE3: +#endif /* __GNUC__ */ + #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ diff --git a/src/x86/win32.S b/src/x86/win32.S index 47629fe..24b7bbd 100644 --- a/src/x86/win32.S +++ b/src/x86/win32.S @@ -108,31 +108,37 @@ ca_jumpdata: dd offset ca_retfloat ;; FFI_TYPE_FLOAT dd offset ca_retdouble ;; FFI_TYPE_DOUBLE dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset ca_retint8 ;; FFI_TYPE_UINT8 - dd offset ca_retint8 ;; FFI_TYPE_SINT8 - dd offset ca_retint16 ;; FFI_TYPE_UINT16 - dd offset ca_retint16 ;; FFI_TYPE_SINT16 + dd offset ca_retuint8 ;; FFI_TYPE_UINT8 + dd offset ca_retsint8 ;; FFI_TYPE_SINT8 + dd offset ca_retuint16 ;; FFI_TYPE_UINT16 + dd offset ca_retsint16 ;; FFI_TYPE_SINT16 dd offset ca_retint ;; FFI_TYPE_UINT32 dd offset ca_retint ;; FFI_TYPE_SINT32 dd offset ca_retint64 ;; FFI_TYPE_UINT64 dd offset ca_retint64 ;; FFI_TYPE_SINT64 dd offset ca_epilogue ;; FFI_TYPE_STRUCT dd offset ca_retint ;; FFI_TYPE_POINTER - dd offset ca_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset ca_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset ca_retstruct1b ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset ca_retstruct2b ;; FFI_TYPE_SMALL_STRUCT_2B dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B + dd offset ca_epilogue ;; FFI_TYPE_MS_STRUCT -ca_retint8: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], al - jmp ca_epilogue + /* Sign/zero extend as appropriate. */ +ca_retuint8: + movzx eax, al + jmp ca_retint -ca_retint16: - ;; Load %ecx with the pointer to storage for the return value - mov ecx, rvalue - mov [ecx + 0], ax - jmp ca_epilogue +ca_retsint8: + movsx eax, al + jmp ca_retint + +ca_retuint16: + movzx eax, ax + jmp ca_retint + +ca_retsint16: + movsx eax, ax + jmp ca_retint ca_retint: ;; Load %ecx with the pointer to storage for the return value @@ -165,14 +171,24 @@ ca_retlongdouble: fstp TBYTE PTR [ecx] jmp ca_epilogue +ca_retstruct1b: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], al + jmp ca_epilogue + +ca_retstruct2b: + ;; Load %ecx with the pointer to storage for the return value + mov ecx, rvalue + mov [ecx + 0], ax + jmp ca_epilogue + ca_epilogue: ;; Epilogue code is autogenerated. ret ffi_call_win32 ENDP ffi_closure_THISCALL PROC NEAR FORCEFRAME - push ebp - mov ebp, esp sub esp, 40 lea edx, [ebp -24] mov [ebp - 12], edx /* resp */ @@ -204,26 +220,35 @@ cs_jumpdata: dd offset cs_retfloat ;; FFI_TYPE_FLOAT dd offset cs_retdouble ;; FFI_TYPE_DOUBLE dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cs_retint8 ;; FFI_TYPE_UINT8 - dd offset cs_retint8 ;; FFI_TYPE_SINT8 - dd offset cs_retint16 ;; FFI_TYPE_UINT16 - dd offset cs_retint16 ;; FFI_TYPE_SINT16 + dd offset cs_retuint8 ;; FFI_TYPE_UINT8 + dd offset cs_retsint8 ;; FFI_TYPE_SINT8 + dd offset cs_retuint16 ;; FFI_TYPE_UINT16 + dd offset cs_retsint16 ;; FFI_TYPE_SINT16 dd offset cs_retint ;; FFI_TYPE_UINT32 dd offset cs_retint ;; FFI_TYPE_SINT32 dd offset cs_retint64 ;; FFI_TYPE_UINT64 dd offset cs_retint64 ;; FFI_TYPE_SINT64 dd offset cs_retstruct ;; FFI_TYPE_STRUCT dd offset cs_retint ;; FFI_TYPE_POINTER - dd offset cs_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cs_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cs_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cs_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B + dd offset cs_retmsstruct ;; FFI_TYPE_MS_STRUCT + +cs_retuint8: + movzx eax, BYTE PTR [ecx] + jmp cs_epilogue + +cs_retsint8: + movsx eax, BYTE PTR [ecx] + jmp cs_epilogue -cs_retint8: - mov al, [ecx] +cs_retuint16: + movzx eax, WORD PTR [ecx] jmp cs_epilogue -cs_retint16: - mov ax, [ecx] +cs_retsint16: + movsx eax, WORD PTR [ecx] jmp cs_epilogue cs_retint: @@ -252,6 +277,12 @@ cs_retstruct: ;; Epilogue code is autogenerated. ret 4 +cs_retmsstruct: + ;; Caller expects us to return a pointer to the real return value. + mov eax, ecx + ;; Caller doesn't expects us to pop struct return value pointer hidden arg. + jmp cs_epilogue + cs_epilogue: ;; Epilogue code is autogenerated. ret @@ -264,7 +295,16 @@ ffi_closure_SYSV ENDP #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) #define CIF_FLAGS_OFFSET 20 -ffi_closure_raw_SYSV PROC NEAR USES esi +ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME + sub esp, 36 + mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif + mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data + mov [esp + 12], edx + lea edx, [ebp + 12] + jmp stubraw +ffi_closure_raw_THISCALL ENDP + +ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME ;; the ffi_closure ctx is passed in eax by the trampoline. sub esp, 40 @@ -272,6 +312,7 @@ ffi_closure_raw_SYSV PROC NEAR USES esi mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data mov [esp + 12], edx ;; user_data lea edx, [ebp + 8] +stubraw:: mov [esp + 8], edx ;; raw_args lea edx, [ebp - 24] mov [esp + 4], edx ;; &res @@ -289,26 +330,35 @@ cr_jumpdata: dd offset cr_retfloat ;; FFI_TYPE_FLOAT dd offset cr_retdouble ;; FFI_TYPE_DOUBLE dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cr_retint8 ;; FFI_TYPE_UINT8 - dd offset cr_retint8 ;; FFI_TYPE_SINT8 - dd offset cr_retint16 ;; FFI_TYPE_UINT16 - dd offset cr_retint16 ;; FFI_TYPE_SINT16 + dd offset cr_retuint8 ;; FFI_TYPE_UINT8 + dd offset cr_retsint8 ;; FFI_TYPE_SINT8 + dd offset cr_retuint16 ;; FFI_TYPE_UINT16 + dd offset cr_retsint16 ;; FFI_TYPE_SINT16 dd offset cr_retint ;; FFI_TYPE_UINT32 dd offset cr_retint ;; FFI_TYPE_SINT32 dd offset cr_retint64 ;; FFI_TYPE_UINT64 dd offset cr_retint64 ;; FFI_TYPE_SINT64 dd offset cr_epilogue ;; FFI_TYPE_STRUCT dd offset cr_retint ;; FFI_TYPE_POINTER - dd offset cr_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cr_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cr_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cr_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B + dd offset cr_epilogue ;; FFI_TYPE_MS_STRUCT + +cr_retuint8: + movzx eax, BYTE PTR [ecx] + jmp cr_epilogue + +cr_retsint8: + movsx eax, BYTE PTR [ecx] + jmp cr_epilogue -cr_retint8: - mov al, [ecx] +cr_retuint16: + movzx eax, WORD PTR [ecx] jmp cr_epilogue -cr_retint16: - mov ax, [ecx] +cr_retsint16: + movsx eax, WORD PTR [ecx] jmp cr_epilogue cr_retint: @@ -362,26 +412,34 @@ cd_jumpdata: dd offset cd_retfloat ;; FFI_TYPE_FLOAT dd offset cd_retdouble ;; FFI_TYPE_DOUBLE dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE - dd offset cd_retint8 ;; FFI_TYPE_UINT8 - dd offset cd_retint8 ;; FFI_TYPE_SINT8 - dd offset cd_retint16 ;; FFI_TYPE_UINT16 - dd offset cd_retint16 ;; FFI_TYPE_SINT16 + dd offset cd_retuint8 ;; FFI_TYPE_UINT8 + dd offset cd_retsint8 ;; FFI_TYPE_SINT8 + dd offset cd_retuint16 ;; FFI_TYPE_UINT16 + dd offset cd_retsint16 ;; FFI_TYPE_SINT16 dd offset cd_retint ;; FFI_TYPE_UINT32 dd offset cd_retint ;; FFI_TYPE_SINT32 dd offset cd_retint64 ;; FFI_TYPE_UINT64 dd offset cd_retint64 ;; FFI_TYPE_SINT64 dd offset cd_epilogue ;; FFI_TYPE_STRUCT dd offset cd_retint ;; FFI_TYPE_POINTER - dd offset cd_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B - dd offset cd_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B + dd offset cd_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B + dd offset cd_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B -cd_retint8: - mov al, [ecx] +cd_retuint8: + movzx eax, BYTE PTR [ecx] + jmp cd_epilogue + +cd_retsint8: + movsx eax, BYTE PTR [ecx] jmp cd_epilogue -cd_retint16: - mov ax, [ecx] +cd_retuint16: + movzx eax, WORD PTR [ecx] + jmp cd_epilogue + +cd_retsint16: + movsx eax, WORD PTR [ecx] jmp cd_epilogue cd_retint: @@ -502,6 +560,7 @@ _ffi_call_win32: .long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */ .long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */ .long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lretstruct /* FFI_TYPE_MS_STRUCT */ 1: add %ecx, %ecx add %ecx, %ecx @@ -644,6 +703,7 @@ _ffi_closure_SYSV: .long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ .long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ .long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lcls_retmsstruct /* FFI_TYPE_MS_STRUCT */ 1: add %eax, %eax @@ -708,6 +768,12 @@ _ffi_closure_SYSV: popl %ebp ret $0x4 +.Lcls_retmsstruct: + # Caller expects us to return a pointer to the real return value. + mov %ecx, %eax + # Caller doesn't expects us to pop struct return value pointer hidden arg. + jmp .Lcls_epilogue + .Lcls_noretval: .Lcls_epilogue: movl %ebp, %esp @@ -722,7 +788,21 @@ _ffi_closure_SYSV: #define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4) #define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4) #define CIF_FLAGS_OFFSET 20 - + .balign 16 + .globl _ffi_closure_raw_THISCALL +#ifndef __OS2__ + .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef +#endif +_ffi_closure_raw_THISCALL: + pushl %ebp + movl %esp, %ebp + pushl %esi + subl $36, %esp + movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */ + movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ + movl %edx, 12(%esp) /* user_data */ + leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */ + jmp .stubraw # This assumes we are using gas. .balign 16 .globl _ffi_closure_raw_SYSV @@ -742,6 +822,7 @@ _ffi_closure_raw_SYSV: movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */ movl %edx, 12(%esp) /* user_data */ leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */ +.stubraw: movl %edx, 8(%esp) /* raw_args */ leal -24(%ebp), %edx movl %edx, 4(%esp) /* &res */ @@ -770,6 +851,7 @@ _ffi_closure_raw_SYSV: .long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */ .long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */ .long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */ + .long .Lrcls_retstruct /* FFI_TYPE_MS_STRUCT */ 1: add %eax, %eax add %eax, %eax diff --git a/src/xtensa/ffi.c b/src/xtensa/ffi.c new file mode 100644 index 0000000..fd94daf --- /dev/null +++ b/src/xtensa/ffi.c @@ -0,0 +1,298 @@ +/* ----------------------------------------------------------------------- + ffi.c - Copyright (c) 2013 Tensilica, Inc. + + XTENSA Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#include <ffi.h> +#include <ffi_common.h> + +/* + |----------------------------------------| + | | + on entry to ffi_call ----> |----------------------------------------| + | caller stack frame for registers a0-a3 | + |----------------------------------------| + | | + | additional arguments | + entry of the function ---> |----------------------------------------| + | copy of function arguments a2-a7 | + | - - - - - - - - - - - - - | + | | + + The area below the entry line becomes the new stack frame for the function. + +*/ + + +#define FFI_TYPE_STRUCT_REGS FFI_TYPE_LAST + + +extern void ffi_call_SYSV(void *rvalue, unsigned rsize, unsigned flags, + void(*fn)(void), unsigned nbytes, extended_cif*); +extern void ffi_closure_SYSV(void) FFI_HIDDEN; + +ffi_status ffi_prep_cif_machdep(ffi_cif *cif) +{ + switch(cif->rtype->type) { + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + cif->flags = cif->rtype->type; + break; + case FFI_TYPE_VOID: + case FFI_TYPE_FLOAT: + cif->flags = FFI_TYPE_UINT32; + break; + case FFI_TYPE_DOUBLE: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + cif->flags = FFI_TYPE_UINT64; // cif->rtype->type; + break; + case FFI_TYPE_STRUCT: + cif->flags = FFI_TYPE_STRUCT; //_REGS; + /* Up to 16 bytes are returned in registers */ + if (cif->rtype->size > 4 * 4) { + /* returned structure is referenced by a register; use 8 bytes + (including 4 bytes for potential additional alignment) */ + cif->flags = FFI_TYPE_STRUCT; + cif->bytes += 8; + } + break; + + default: + cif->flags = FFI_TYPE_UINT32; + break; + } + + /* Round the stack up to a full 4 register frame, just in case + (we use this size in movsp). This way, it's also a multiple of + 8 bytes for 64-bit arguments. */ + cif->bytes = ALIGN(cif->bytes, 16); + + return FFI_OK; +} + +void ffi_prep_args(extended_cif *ecif, unsigned char* stack) +{ + unsigned int i; + unsigned long *addr; + ffi_type **ptr; + + union { + void **v; + char **c; + signed char **sc; + unsigned char **uc; + signed short **ss; + unsigned short **us; + unsigned int **i; + long long **ll; + float **f; + double **d; + } p_argv; + + /* Verify that everything is aligned up properly */ + FFI_ASSERT (((unsigned long) stack & 0x7) == 0); + + p_argv.v = ecif->avalue; + addr = (unsigned long*)stack; + + /* structures with a size greater than 16 bytes are passed in memory */ + if (ecif->cif->rtype->type == FFI_TYPE_STRUCT && ecif->cif->rtype->size > 16) + { + *addr++ = (unsigned long)ecif->rvalue; + } + + for (i = ecif->cif->nargs, ptr = ecif->cif->arg_types; + i > 0; + i--, ptr++, p_argv.v++) + { + switch ((*ptr)->type) + { + case FFI_TYPE_SINT8: + *addr++ = **p_argv.sc; + break; + case FFI_TYPE_UINT8: + *addr++ = **p_argv.uc; + break; + case FFI_TYPE_SINT16: + *addr++ = **p_argv.ss; + break; + case FFI_TYPE_UINT16: + *addr++ = **p_argv.us; + break; + case FFI_TYPE_FLOAT: + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_POINTER: + *addr++ = **p_argv.i; + break; + case FFI_TYPE_DOUBLE: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + if (((unsigned long)addr & 4) != 0) + addr++; + *(unsigned long long*)addr = **p_argv.ll; + addr += sizeof(unsigned long long) / sizeof (addr); + break; + + case FFI_TYPE_STRUCT: + { + unsigned long offs; + unsigned long size; + + if (((unsigned long)addr & 4) != 0 && (*ptr)->alignment > 4) + addr++; + + offs = (unsigned long) addr - (unsigned long) stack; + size = (*ptr)->size; + + /* Entire structure must fit the argument registers or referenced */ + if (offs < FFI_REGISTER_NARGS * 4 + && offs + size > FFI_REGISTER_NARGS * 4) + addr = (unsigned long*) (stack + FFI_REGISTER_NARGS * 4); + + memcpy((char*) addr, *p_argv.c, size); + addr += (size + 3) / 4; + break; + } + + default: + FFI_ASSERT(0); + } + } +} + + +void ffi_call(ffi_cif* cif, void(*fn)(void), void *rvalue, void **avalue) +{ + extended_cif ecif; + unsigned long rsize = cif->rtype->size; + int flags = cif->flags; + void *alloc = NULL; + + ecif.cif = cif; + ecif.avalue = avalue; + + /* Note that for structures that are returned in registers (size <= 16 bytes) + we allocate a temporary buffer and use memcpy to copy it to the final + destination. The reason is that the target address might be misaligned or + the length not a multiple of 4 bytes. Handling all those cases would be + very complex. */ + + if (flags == FFI_TYPE_STRUCT && (rsize <= 16 || rvalue == NULL)) + { + alloc = alloca(ALIGN(rsize, 4)); + ecif.rvalue = alloc; + } + else + { + ecif.rvalue = rvalue; + } + + if (cif->abi != FFI_SYSV) + FFI_ASSERT(0); + + ffi_call_SYSV (ecif.rvalue, rsize, cif->flags, fn, cif->bytes, &ecif); + + if (alloc != NULL && rvalue != NULL) + memcpy(rvalue, alloc, rsize); +} + +extern void ffi_trampoline(); +extern void ffi_cacheflush(void* start, void* end); + +ffi_status +ffi_prep_closure_loc (ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + /* copye trampoline to stack and patch 'ffi_closure_SYSV' pointer */ + memcpy(closure->tramp, ffi_trampoline, FFI_TRAMPOLINE_SIZE); + *(unsigned int*)(&closure->tramp[8]) = (unsigned int)ffi_closure_SYSV; + + // Do we have this function? + // __builtin___clear_cache(closer->tramp, closer->tramp + FFI_TRAMPOLINE_SIZE) + ffi_cacheflush(closure->tramp, closure->tramp + FFI_TRAMPOLINE_SIZE); + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + return FFI_OK; +} + + +long FFI_HIDDEN +ffi_closure_SYSV_inner(ffi_closure *closure, void **values, void *rvalue) +{ + ffi_cif *cif; + ffi_type **arg_types; + void **avalue; + int i, areg; + + cif = closure->cif; + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + areg = 0; + + int rtype = cif->rtype->type; + if (rtype == FFI_TYPE_STRUCT && cif->rtype->size > 4 * 4) + { + rvalue = *values; + areg++; + } + + cif = closure->cif; + arg_types = cif->arg_types; + avalue = alloca(cif->nargs * sizeof(void *)); + + for (i = 0; i < cif->nargs; i++) + { + if (arg_types[i]->alignment == 8 && (areg & 1) != 0) + areg++; + + // skip the entry 16,a1 framework, add 16 bytes (4 registers) + if (areg == FFI_REGISTER_NARGS) + areg += 4; + + if (arg_types[i]->type == FFI_TYPE_STRUCT) + { + int numregs = ((arg_types[i]->size + 3) & ~3) / 4; + if (areg < FFI_REGISTER_NARGS && areg + numregs > FFI_REGISTER_NARGS) + areg = FFI_REGISTER_NARGS + 4; + } + + avalue[i] = &values[areg]; + areg += (arg_types[i]->size + 3) / 4; + } + + (closure->fun)(cif, rvalue, avalue, closure->user_data); + + return rtype; +} diff --git a/src/xtensa/ffitarget.h b/src/xtensa/ffitarget.h new file mode 100644 index 0000000..0ba728b --- /dev/null +++ b/src/xtensa/ffitarget.h @@ -0,0 +1,53 @@ +/* -----------------------------------------------------------------*-C-*- + ffitarget.h - Copyright (c) 2013 Tensilica, Inc. + Target configuration macros for XTENSA. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; + +typedef enum ffi_abi { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_SYSV +} ffi_abi; +#endif + +#define FFI_REGISTER_NARGS 6 + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 +#define FFI_TRAMPOLINE_SIZE 24 + +#endif diff --git a/src/xtensa/sysv.S b/src/xtensa/sysv.S new file mode 100644 index 0000000..64e6a09 --- /dev/null +++ b/src/xtensa/sysv.S @@ -0,0 +1,253 @@ +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2013 Tensilica, Inc. + + XTENSA Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> + +#define ENTRY(name) .text; .globl name; .type name,@function; .align 4; name: +#define END(name) .size name , . - name + +/* Assert that the table below is in sync with ffi.h. */ + +#if FFI_TYPE_UINT8 != 5 \ + || FFI_TYPE_SINT8 != 6 \ + || FFI_TYPE_UINT16 != 7 \ + || FFI_TYPE_SINT16 != 8 \ + || FFI_TYPE_UINT32 != 9 \ + || FFI_TYPE_SINT32 != 10 \ + || FFI_TYPE_UINT64 != 11 +#error "xtensa/sysv.S out of sync with ffi.h" +#endif + + +/* ffi_call_SYSV (rvalue, rbytes, flags, (*fnaddr)(), bytes, ecif) + void *rvalue; a2 + unsigned long rbytes; a3 + unsigned flags; a4 + void (*fnaddr)(); a5 + unsigned long bytes; a6 + extended_cif* ecif) a7 +*/ + +ENTRY(ffi_call_SYSV) + + entry a1, 32 # 32 byte frame for using call8 below + + mov a10, a7 # a10(->arg0): ecif + sub a11, a1, a6 # a11(->arg1): stack pointer + mov a7, a1 # fp + movsp a1, a11 # set new sp = old_sp - bytes + + movi a8, ffi_prep_args + callx8 a8 # ffi_prep_args(ecif, stack) + + # prepare to move stack pointer back up to 6 arguments + # note that 'bytes' is already aligned + + movi a10, 6*4 + sub a11, a6, a10 + movgez a6, a10, a11 + add a6, a1, a6 + + + # we can pass up to 6 arguments in registers + # for simplicity, just load 6 arguments + # (the stack size is at least 32 bytes, so no risk to cross boundaries) + + l32i a10, a1, 0 + l32i a11, a1, 4 + l32i a12, a1, 8 + l32i a13, a1, 12 + l32i a14, a1, 16 + l32i a15, a1, 20 + + # move stack pointer + + movsp a1, a6 + + callx8 a5 # (*fn)(args...) + + # Handle return value(s) + + beqz a2, .Lexit + + movi a5, FFI_TYPE_STRUCT + bne a4, a5, .Lstore + movi a5, 16 + blt a5, a3, .Lexit + + s32i a10, a2, 0 + blti a3, 5, .Lexit + addi a3, a3, -1 + s32i a11, a2, 4 + blti a3, 8, .Lexit + s32i a12, a2, 8 + blti a3, 12, .Lexit + s32i a13, a2, 12 + +.Lexit: retw + +.Lstore: + addi a4, a4, -FFI_TYPE_UINT8 + bgei a4, 7, .Lexit # should never happen + movi a6, store_calls + add a4, a4, a4 + addx4 a6, a4, a6 # store_table + idx * 8 + jx a6 + + .align 8 +store_calls: + # UINT8 + s8i a10, a2, 0 + retw + + # SINT8 + .align 8 + s8i a10, a2, 0 + retw + + # UINT16 + .align 8 + s16i a10, a2, 0 + retw + + # SINT16 + .align 8 + s16i a10, a2, 0 + retw + + # UINT32 + .align 8 + s32i a10, a2, 0 + retw + + # SINT32 + .align 8 + s32i a10, a2, 0 + retw + + # UINT64 + .align 8 + s32i a10, a2, 0 + s32i a11, a2, 4 + retw + +END(ffi_call_SYSV) + + +/* + * void ffi_cacheflush (unsigned long start, unsigned long end) + */ + +#define EXTRA_ARGS_SIZE 24 + +ENTRY(ffi_cacheflush) + + entry a1, 16 + +1: dhwbi a2, 0 + ihi a2, 0 + addi a2, a2, 4 + blt a2, a3, 1b + + retw + +END(ffi_cacheflush) + +/* ffi_trampoline is copied to the stack */ + +ENTRY(ffi_trampoline) + + entry a1, 16 + (FFI_REGISTER_NARGS * 4) + (4 * 4) # [ 0] + j 2f # [ 3] + .align 4 # [ 6] +1: .long 0 # [ 8] +2: l32r a15, 1b # [12] + _mov a14, a0 # [15] + callx0 a15 # [18] + # [21] +END(ffi_trampoline) + +/* + * ffi_closure() + * + * a0: closure + 21 + * a14: return address (a0) + */ + +ENTRY(ffi_closure_SYSV) + + /* intentionally omitting entry here */ + + # restore return address (a0) and move pointer to closure to a10 + addi a10, a0, -21 + mov a0, a14 + + # allow up to 4 arguments as return values + addi a11, a1, 4 * 4 + + # save up to 6 arguments to stack (allocated by entry below) + s32i a2, a11, 0 + s32i a3, a11, 4 + s32i a4, a11, 8 + s32i a5, a11, 12 + s32i a6, a11, 16 + s32i a7, a11, 20 + + movi a8, ffi_closure_SYSV_inner + mov a12, a1 + callx8 a8 # .._inner(*closure, **avalue, *rvalue) + + # load up to four return arguments + l32i a2, a1, 0 + l32i a3, a1, 4 + l32i a4, a1, 8 + l32i a5, a1, 12 + + # (sign-)extend return value + movi a11, FFI_TYPE_UINT8 + bne a10, a11, 1f + extui a2, a2, 0, 8 + retw + +1: movi a11, FFI_TYPE_SINT8 + bne a10, a11, 1f + sext a2, a2, 7 + retw + +1: movi a11, FFI_TYPE_UINT16 + bne a10, a11, 1f + extui a2, a2, 0, 16 + retw + +1: movi a11, FFI_TYPE_SINT16 + bne a10, a11, 1f + sext a2, a2, 15 + +1: retw + +END(ffi_closure_SYSV) |