summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/aarch64/ffi.c1076
-rw-r--r--src/aarch64/ffitarget.h59
-rw-r--r--src/aarch64/sysv.S307
-rw-r--r--src/arm/ffi.c4
-rw-r--r--src/arm/sysv.S13
-rw-r--r--src/arm/trampoline.S2
-rw-r--r--src/bfin/ffi.c195
-rw-r--r--src/bfin/ffitarget.h43
-rw-r--r--src/bfin/sysv.S177
-rw-r--r--src/closures.c29
-rw-r--r--src/m68k/ffi.c10
-rw-r--r--src/m68k/sysv.S57
-rw-r--r--src/metag/ffi.c330
-rw-r--r--src/metag/ffitarget.h53
-rw-r--r--src/metag/sysv.S311
-rw-r--r--src/microblaze/ffi.c321
-rw-r--r--src/microblaze/ffitarget.h53
-rw-r--r--src/microblaze/sysv.S302
-rw-r--r--src/mips/ffi.c11
-rw-r--r--src/moxie/eabi.S137
-rw-r--r--src/moxie/ffi.c82
-rw-r--r--src/moxie/ffitarget.h12
-rw-r--r--src/powerpc/aix.S12
-rw-r--r--src/powerpc/ffi.c67
-rw-r--r--src/powerpc/ffi_darwin.c10
-rw-r--r--src/powerpc/linux64.S21
-rw-r--r--src/powerpc/linux64_closure.S20
-rw-r--r--src/powerpc/sysv.S21
-rw-r--r--src/prep_cif.c21
-rw-r--r--src/s390/ffi.c3
-rw-r--r--src/sparc/ffi.c16
-rw-r--r--src/sparc/v8.S39
-rw-r--r--src/sparc/v9.S2
-rw-r--r--src/tile/ffi.c355
-rw-r--r--src/tile/ffitarget.h65
-rw-r--r--src/tile/tile.S360
-rw-r--r--src/x86/ffi.c58
-rw-r--r--src/x86/ffi64.c48
-rw-r--r--src/x86/ffitarget.h10
-rw-r--r--src/x86/sysv.S17
-rw-r--r--src/x86/unix64.S10
-rw-r--r--src/x86/win32.S182
-rw-r--r--src/xtensa/ffi.c298
-rw-r--r--src/xtensa/ffitarget.h53
-rw-r--r--src/xtensa/sysv.S253
45 files changed, 5254 insertions, 271 deletions
diff --git a/src/aarch64/ffi.c b/src/aarch64/ffi.c
new file mode 100644
index 0000000..1405665
--- /dev/null
+++ b/src/aarch64/ffi.c
@@ -0,0 +1,1076 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include <stdio.h>
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+/* Stack alignment requirement in bytes */
+#define AARCH64_STACK_ALIGN 16
+
+#define N_X_ARG_REG 8
+#define N_V_ARG_REG 8
+
+#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
+
+union _d
+{
+ UINT64 d;
+ UINT32 s[2];
+};
+
+struct call_context
+{
+ UINT64 x [AARCH64_N_XREG];
+ struct
+ {
+ union _d d[2];
+ } v [AARCH64_N_VREG];
+};
+
+static void *
+get_x_addr (struct call_context *context, unsigned n)
+{
+ return &context->x[n];
+}
+
+static void *
+get_s_addr (struct call_context *context, unsigned n)
+{
+#if defined __AARCH64EB__
+ return &context->v[n].d[1].s[1];
+#else
+ return &context->v[n].d[0].s[0];
+#endif
+}
+
+static void *
+get_d_addr (struct call_context *context, unsigned n)
+{
+#if defined __AARCH64EB__
+ return &context->v[n].d[1];
+#else
+ return &context->v[n].d[0];
+#endif
+}
+
+static void *
+get_v_addr (struct call_context *context, unsigned n)
+{
+ return &context->v[n];
+}
+
+/* Return the memory location at which a basic type would reside
+ were it to have been stored in register n. */
+
+static void *
+get_basic_type_addr (unsigned short type, struct call_context *context,
+ unsigned n)
+{
+ switch (type)
+ {
+ case FFI_TYPE_FLOAT:
+ return get_s_addr (context, n);
+ case FFI_TYPE_DOUBLE:
+ return get_d_addr (context, n);
+ case FFI_TYPE_LONGDOUBLE:
+ return get_v_addr (context, n);
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ return get_x_addr (context, n);
+ default:
+ FFI_ASSERT (0);
+ return NULL;
+ }
+}
+
+/* Return the alignment width for each of the basic types. */
+
+static size_t
+get_basic_type_alignment (unsigned short type)
+{
+ switch (type)
+ {
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ return sizeof (UINT64);
+ case FFI_TYPE_LONGDOUBLE:
+ return sizeof (long double);
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ return sizeof (UINT64);
+
+ default:
+ FFI_ASSERT (0);
+ return 0;
+ }
+}
+
+/* Return the size in bytes for each of the basic types. */
+
+static size_t
+get_basic_type_size (unsigned short type)
+{
+ switch (type)
+ {
+ case FFI_TYPE_FLOAT:
+ return sizeof (UINT32);
+ case FFI_TYPE_DOUBLE:
+ return sizeof (UINT64);
+ case FFI_TYPE_LONGDOUBLE:
+ return sizeof (long double);
+ case FFI_TYPE_UINT8:
+ return sizeof (UINT8);
+ case FFI_TYPE_SINT8:
+ return sizeof (SINT8);
+ case FFI_TYPE_UINT16:
+ return sizeof (UINT16);
+ case FFI_TYPE_SINT16:
+ return sizeof (SINT16);
+ case FFI_TYPE_UINT32:
+ return sizeof (UINT32);
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ return sizeof (SINT32);
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ return sizeof (UINT64);
+ case FFI_TYPE_SINT64:
+ return sizeof (SINT64);
+
+ default:
+ FFI_ASSERT (0);
+ return 0;
+ }
+}
+
+extern void
+ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
+ extended_cif *),
+ struct call_context *context,
+ extended_cif *,
+ unsigned,
+ void (*fn)(void));
+
+extern void
+ffi_closure_SYSV (ffi_closure *);
+
+/* Test for an FFI floating point representation. */
+
+static unsigned
+is_floating_type (unsigned short type)
+{
+ return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
+ || type == FFI_TYPE_LONGDOUBLE);
+}
+
+/* Test for a homogeneous structure. */
+
+static unsigned short
+get_homogeneous_type (ffi_type *ty)
+{
+ if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+ {
+ unsigned i;
+ unsigned short candidate_type
+ = get_homogeneous_type (ty->elements[0]);
+ for (i =1; ty->elements[i]; i++)
+ {
+ unsigned short iteration_type = 0;
+ /* If we have a nested struct, we must find its homogeneous type.
+ If that fits with our candidate type, we are still
+ homogeneous. */
+ if (ty->elements[i]->type == FFI_TYPE_STRUCT
+ && ty->elements[i]->elements)
+ {
+ iteration_type = get_homogeneous_type (ty->elements[i]);
+ }
+ else
+ {
+ iteration_type = ty->elements[i]->type;
+ }
+
+ /* If we are not homogeneous, return FFI_TYPE_STRUCT. */
+ if (candidate_type != iteration_type)
+ return FFI_TYPE_STRUCT;
+ }
+ return candidate_type;
+ }
+
+ /* Base case, we have no more levels of nesting, so we
+ are a basic type, and so, trivially homogeneous in that type. */
+ return ty->type;
+}
+
+/* Determine the number of elements within a STRUCT.
+
+ Note, we must handle nested structs.
+
+ If ty is not a STRUCT this function will return 0. */
+
+static unsigned
+element_count (ffi_type *ty)
+{
+ if (ty->type == FFI_TYPE_STRUCT && ty->elements)
+ {
+ unsigned n;
+ unsigned elems = 0;
+ for (n = 0; ty->elements[n]; n++)
+ {
+ if (ty->elements[n]->type == FFI_TYPE_STRUCT
+ && ty->elements[n]->elements)
+ elems += element_count (ty->elements[n]);
+ else
+ elems++;
+ }
+ return elems;
+ }
+ return 0;
+}
+
+/* Test for a homogeneous floating point aggregate.
+
+ A homogeneous floating point aggregate is a homogeneous aggregate of
+ a half- single- or double- precision floating point type with one
+ to four elements. Note that this includes nested structs of the
+ basic type. */
+
+static int
+is_hfa (ffi_type *ty)
+{
+ if (ty->type == FFI_TYPE_STRUCT
+ && ty->elements[0]
+ && is_floating_type (get_homogeneous_type (ty)))
+ {
+ unsigned n = element_count (ty);
+ return n >= 1 && n <= 4;
+ }
+ return 0;
+}
+
+/* Test if an ffi_type is a candidate for passing in a register.
+
+ This test does not check that sufficient registers of the
+ appropriate class are actually available, merely that IFF
+ sufficient registers are available then the argument will be passed
+ in register(s).
+
+ Note that an ffi_type that is deemed to be a register candidate
+ will always be returned in registers.
+
+ Returns 1 if a register candidate else 0. */
+
+static int
+is_register_candidate (ffi_type *ty)
+{
+ switch (ty->type)
+ {
+ case FFI_TYPE_VOID:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT64:
+ return 1;
+
+ case FFI_TYPE_STRUCT:
+ if (is_hfa (ty))
+ {
+ return 1;
+ }
+ else if (ty->size > 16)
+ {
+ /* Too large. Will be replaced with a pointer to memory. The
+ pointer MAY be passed in a register, but the value will
+ not. This test specifically fails since the argument will
+ never be passed by value in registers. */
+ return 0;
+ }
+ else
+ {
+ /* Might be passed in registers depending on the number of
+ registers required. */
+ return (ty->size + 7) / 8 < N_X_ARG_REG;
+ }
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+
+ return 0;
+}
+
+/* Test if an ffi_type argument or result is a candidate for a vector
+ register. */
+
+static int
+is_v_register_candidate (ffi_type *ty)
+{
+ return is_floating_type (ty->type)
+ || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
+}
+
+/* Representation of the procedure call argument marshalling
+ state.
+
+ The terse state variable names match the names used in the AARCH64
+ PCS. */
+
+struct arg_state
+{
+ unsigned ngrn; /* Next general-purpose register number. */
+ unsigned nsrn; /* Next vector register number. */
+ unsigned nsaa; /* Next stack offset. */
+};
+
+/* Initialize a procedure call argument marshalling state. */
+static void
+arg_init (struct arg_state *state, unsigned call_frame_size)
+{
+ state->ngrn = 0;
+ state->nsrn = 0;
+ state->nsaa = 0;
+}
+
+/* Return the number of available consecutive core argument
+ registers. */
+
+static unsigned
+available_x (struct arg_state *state)
+{
+ return N_X_ARG_REG - state->ngrn;
+}
+
+/* Return the number of available consecutive vector argument
+ registers. */
+
+static unsigned
+available_v (struct arg_state *state)
+{
+ return N_V_ARG_REG - state->nsrn;
+}
+
+static void *
+allocate_to_x (struct call_context *context, struct arg_state *state)
+{
+ FFI_ASSERT (state->ngrn < N_X_ARG_REG)
+ return get_x_addr (context, (state->ngrn)++);
+}
+
+static void *
+allocate_to_s (struct call_context *context, struct arg_state *state)
+{
+ FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+ return get_s_addr (context, (state->nsrn)++);
+}
+
+static void *
+allocate_to_d (struct call_context *context, struct arg_state *state)
+{
+ FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+ return get_d_addr (context, (state->nsrn)++);
+}
+
+static void *
+allocate_to_v (struct call_context *context, struct arg_state *state)
+{
+ FFI_ASSERT (state->nsrn < N_V_ARG_REG)
+ return get_v_addr (context, (state->nsrn)++);
+}
+
+/* Allocate an aligned slot on the stack and return a pointer to it. */
+static void *
+allocate_to_stack (struct arg_state *state, void *stack, unsigned alignment,
+ unsigned size)
+{
+ void *allocation;
+
+ /* Round up the NSAA to the larger of 8 or the natural
+ alignment of the argument's type. */
+ state->nsaa = ALIGN (state->nsaa, alignment);
+ state->nsaa = ALIGN (state->nsaa, alignment);
+ state->nsaa = ALIGN (state->nsaa, 8);
+
+ allocation = stack + state->nsaa;
+
+ state->nsaa += size;
+ return allocation;
+}
+
+static void
+copy_basic_type (void *dest, void *source, unsigned short type)
+{
+ /* This is neccessary to ensure that basic types are copied
+ sign extended to 64-bits as libffi expects. */
+ switch (type)
+ {
+ case FFI_TYPE_FLOAT:
+ *(float *) dest = *(float *) source;
+ break;
+ case FFI_TYPE_DOUBLE:
+ *(double *) dest = *(double *) source;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ *(long double *) dest = *(long double *) source;
+ break;
+ case FFI_TYPE_UINT8:
+ *(ffi_arg *) dest = *(UINT8 *) source;
+ break;
+ case FFI_TYPE_SINT8:
+ *(ffi_sarg *) dest = *(SINT8 *) source;
+ break;
+ case FFI_TYPE_UINT16:
+ *(ffi_arg *) dest = *(UINT16 *) source;
+ break;
+ case FFI_TYPE_SINT16:
+ *(ffi_sarg *) dest = *(SINT16 *) source;
+ break;
+ case FFI_TYPE_UINT32:
+ *(ffi_arg *) dest = *(UINT32 *) source;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ *(ffi_sarg *) dest = *(SINT32 *) source;
+ break;
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ *(ffi_arg *) dest = *(UINT64 *) source;
+ break;
+ case FFI_TYPE_SINT64:
+ *(ffi_sarg *) dest = *(SINT64 *) source;
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ }
+}
+
+static void
+copy_hfa_to_reg_or_stack (void *memory,
+ ffi_type *ty,
+ struct call_context *context,
+ unsigned char *stack,
+ struct arg_state *state)
+{
+ unsigned elems = element_count (ty);
+ if (available_v (state) < elems)
+ {
+ /* There are insufficient V registers. Further V register allocations
+ are prevented, the NSAA is adjusted (by allocate_to_stack ())
+ and the argument is copied to memory at the adjusted NSAA. */
+ state->nsrn = N_V_ARG_REG;
+ memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
+ memory,
+ ty->size);
+ }
+ else
+ {
+ int i;
+ unsigned short type = get_homogeneous_type (ty);
+ unsigned elems = element_count (ty);
+ for (i = 0; i < elems; i++)
+ {
+ void *reg = allocate_to_v (context, state);
+ copy_basic_type (reg, memory, type);
+ memory += get_basic_type_size (type);
+ }
+ }
+}
+
+/* Either allocate an appropriate register for the argument type, or if
+ none are available, allocate a stack slot and return a pointer
+ to the allocated space. */
+
+static void *
+allocate_to_register_or_stack (struct call_context *context,
+ unsigned char *stack,
+ struct arg_state *state,
+ unsigned short type)
+{
+ size_t alignment = get_basic_type_alignment (type);
+ size_t size = alignment;
+ switch (type)
+ {
+ case FFI_TYPE_FLOAT:
+ /* This is the only case for which the allocated stack size
+ should not match the alignment of the type. */
+ size = sizeof (UINT32);
+ /* Fall through. */
+ case FFI_TYPE_DOUBLE:
+ if (state->nsrn < N_V_ARG_REG)
+ return allocate_to_d (context, state);
+ state->nsrn = N_V_ARG_REG;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ if (state->nsrn < N_V_ARG_REG)
+ return allocate_to_v (context, state);
+ state->nsrn = N_V_ARG_REG;
+ break;
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ if (state->ngrn < N_X_ARG_REG)
+ return allocate_to_x (context, state);
+ state->ngrn = N_X_ARG_REG;
+ break;
+ default:
+ FFI_ASSERT (0);
+ }
+
+ return allocate_to_stack (state, stack, alignment, size);
+}
+
+/* Copy a value to an appropriate register, or if none are
+ available, to the stack. */
+
+static void
+copy_to_register_or_stack (struct call_context *context,
+ unsigned char *stack,
+ struct arg_state *state,
+ void *value,
+ unsigned short type)
+{
+ copy_basic_type (
+ allocate_to_register_or_stack (context, stack, state, type),
+ value,
+ type);
+}
+
+/* Marshall the arguments from FFI representation to procedure call
+ context and stack. */
+
+static unsigned
+aarch64_prep_args (struct call_context *context, unsigned char *stack,
+ extended_cif *ecif)
+{
+ int i;
+ struct arg_state state;
+
+ arg_init (&state, ALIGN(ecif->cif->bytes, 16));
+
+ for (i = 0; i < ecif->cif->nargs; i++)
+ {
+ ffi_type *ty = ecif->cif->arg_types[i];
+ switch (ty->type)
+ {
+ case FFI_TYPE_VOID:
+ FFI_ASSERT (0);
+ break;
+
+ /* If the argument is a basic type the argument is allocated to an
+ appropriate register, or if none are available, to the stack. */
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ copy_to_register_or_stack (context, stack, &state,
+ ecif->avalue[i], ty->type);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ if (is_hfa (ty))
+ {
+ copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
+ stack, &state);
+ }
+ else if (ty->size > 16)
+ {
+ /* If the argument is a composite type that is larger than 16
+ bytes, then the argument has been copied to memory, and
+ the argument is replaced by a pointer to the copy. */
+
+ copy_to_register_or_stack (context, stack, &state,
+ &(ecif->avalue[i]), FFI_TYPE_POINTER);
+ }
+ else if (available_x (&state) >= (ty->size + 7) / 8)
+ {
+ /* If the argument is a composite type and the size in
+ double-words is not more than the number of available
+ X registers, then the argument is copied into consecutive
+ X registers. */
+ int j;
+ for (j = 0; j < (ty->size + 7) / 8; j++)
+ {
+ memcpy (allocate_to_x (context, &state),
+ &(((UINT64 *) ecif->avalue[i])[j]),
+ sizeof (UINT64));
+ }
+ }
+ else
+ {
+ /* Otherwise, there are insufficient X registers. Further X
+ register allocations are prevented, the NSAA is adjusted
+ (by allocate_to_stack ()) and the argument is copied to
+ memory at the adjusted NSAA. */
+ state.ngrn = N_X_ARG_REG;
+
+ memcpy (allocate_to_stack (&state, stack, ty->alignment,
+ ty->size), ecif->avalue + i, ty->size);
+ }
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+ }
+
+ return ecif->cif->aarch64_flags;
+}
+
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ /* Round the stack up to a multiple of the stack alignment requirement. */
+ cif->bytes =
+ (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
+
+ /* Initialize our flags. We are interested if this CIF will touch a
+ vector register, if so we will enable context save and load to
+ those registers, otherwise not. This is intended to be friendly
+ to lazy float context switching in the kernel. */
+ cif->aarch64_flags = 0;
+
+ if (is_v_register_candidate (cif->rtype))
+ {
+ cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+ }
+ else
+ {
+ int i;
+ for (i = 0; i < cif->nargs; i++)
+ if (is_v_register_candidate (cif->arg_types[i]))
+ {
+ cif->aarch64_flags |= AARCH64_FFI_WITH_V;
+ break;
+ }
+ }
+
+ return FFI_OK;
+}
+
+/* Call a function with the provided arguments and capture the return
+ value. */
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+ ecif.rvalue = rvalue;
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ {
+ struct call_context context;
+ unsigned stack_bytes;
+
+ /* Figure out the total amount of stack space we need, the
+ above call frame space needs to be 16 bytes aligned to
+ ensure correct alignment of the first object inserted in
+ that space hence the ALIGN applied to cif->bytes.*/
+ stack_bytes = ALIGN(cif->bytes, 16);
+
+ memset (&context, 0, sizeof (context));
+ if (is_register_candidate (cif->rtype))
+ {
+ ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
+ switch (cif->rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT64:
+ {
+ void *addr = get_basic_type_addr (cif->rtype->type,
+ &context, 0);
+ copy_basic_type (rvalue, addr, cif->rtype->type);
+ break;
+ }
+
+ case FFI_TYPE_STRUCT:
+ if (is_hfa (cif->rtype))
+ {
+ int j;
+ unsigned short type = get_homogeneous_type (cif->rtype);
+ unsigned elems = element_count (cif->rtype);
+ for (j = 0; j < elems; j++)
+ {
+ void *reg = get_basic_type_addr (type, &context, j);
+ copy_basic_type (rvalue, reg, type);
+ rvalue += get_basic_type_size (type);
+ }
+ }
+ else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+ {
+ unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64));
+ memcpy (rvalue, get_x_addr (&context, 0), size);
+ }
+ else
+ {
+ FFI_ASSERT (0);
+ }
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+ }
+ else
+ {
+ memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
+ ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
+ stack_bytes, fn);
+ }
+ break;
+ }
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+}
+
+static unsigned char trampoline [] =
+{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */
+ 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */
+ 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
+};
+
+/* Build a trampoline. */
+
+#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \
+ ({unsigned char *__tramp = (unsigned char*)(TRAMP); \
+ UINT64 __fun = (UINT64)(FUN); \
+ UINT64 __ctx = (UINT64)(CTX); \
+ UINT64 __flags = (UINT64)(FLAGS); \
+ memcpy (__tramp, trampoline, sizeof (trampoline)); \
+ memcpy (__tramp + 12, &__fun, sizeof (__fun)); \
+ memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \
+ memcpy (__tramp + 28, &__flags, sizeof (__flags)); \
+ __clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \
+ })
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
+
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
+ cif->aarch64_flags);
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+/* Primary handler to setup and invoke a function within a closure.
+
+ A closure when invoked enters via the assembler wrapper
+ ffi_closure_SYSV(). The wrapper allocates a call context on the
+ stack, saves the interesting registers (from the perspective of
+ the calling convention) into the context then passes control to
+ ffi_closure_SYSV_inner() passing the saved context and a pointer to
+ the stack at the point ffi_closure_SYSV() was invoked.
+
+ On the return path the assembler wrapper will reload call context
+ regsiters.
+
+ ffi_closure_SYSV_inner() marshalls the call context into ffi value
+ desriptors, invokes the wrapped function, then marshalls the return
+ value back into the call context. */
+
+void
+ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
+ void *stack)
+{
+ ffi_cif *cif = closure->cif;
+ void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+ void *rvalue = NULL;
+ int i;
+ struct arg_state state;
+
+ arg_init (&state, ALIGN(cif->bytes, 16));
+
+ for (i = 0; i < cif->nargs; i++)
+ {
+ ffi_type *ty = cif->arg_types[i];
+
+ switch (ty->type)
+ {
+ case FFI_TYPE_VOID:
+ FFI_ASSERT (0);
+ break;
+
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ avalue[i] = allocate_to_register_or_stack (context, stack,
+ &state, ty->type);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ if (is_hfa (ty))
+ {
+ unsigned n = element_count (ty);
+ if (available_v (&state) < n)
+ {
+ state.nsrn = N_V_ARG_REG;
+ avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+ ty->size);
+ }
+ else
+ {
+ switch (get_homogeneous_type (ty))
+ {
+ case FFI_TYPE_FLOAT:
+ {
+ /* Eeek! We need a pointer to the structure,
+ however the homogeneous float elements are
+ being passed in individual S registers,
+ therefore the structure is not represented as
+ a contiguous sequence of bytes in our saved
+ register context. We need to fake up a copy
+ of the structure layed out in memory
+ correctly. The fake can be tossed once the
+ closure function has returned hence alloca()
+ is sufficient. */
+ int j;
+ UINT32 *p = avalue[i] = alloca (ty->size);
+ for (j = 0; j < element_count (ty); j++)
+ memcpy (&p[j],
+ allocate_to_s (context, &state),
+ sizeof (*p));
+ break;
+ }
+
+ case FFI_TYPE_DOUBLE:
+ {
+ /* Eeek! We need a pointer to the structure,
+ however the homogeneous float elements are
+ being passed in individual S registers,
+ therefore the structure is not represented as
+ a contiguous sequence of bytes in our saved
+ register context. We need to fake up a copy
+ of the structure layed out in memory
+ correctly. The fake can be tossed once the
+ closure function has returned hence alloca()
+ is sufficient. */
+ int j;
+ UINT64 *p = avalue[i] = alloca (ty->size);
+ for (j = 0; j < element_count (ty); j++)
+ memcpy (&p[j],
+ allocate_to_d (context, &state),
+ sizeof (*p));
+ break;
+ }
+
+ case FFI_TYPE_LONGDOUBLE:
+ memcpy (&avalue[i],
+ allocate_to_v (context, &state),
+ sizeof (*avalue));
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+ }
+ }
+ else if (ty->size > 16)
+ {
+ /* Replace Composite type of size greater than 16 with a
+ pointer. */
+ memcpy (&avalue[i],
+ allocate_to_register_or_stack (context, stack,
+ &state, FFI_TYPE_POINTER),
+ sizeof (avalue[i]));
+ }
+ else if (available_x (&state) >= (ty->size + 7) / 8)
+ {
+ avalue[i] = get_x_addr (context, state.ngrn);
+ state.ngrn += (ty->size + 7) / 8;
+ }
+ else
+ {
+ state.ngrn = N_X_ARG_REG;
+
+ avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
+ ty->size);
+ }
+ break;
+
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+ }
+
+ /* Figure out where the return value will be passed, either in
+ registers or in a memory block allocated by the caller and passed
+ in x8. */
+
+ if (is_register_candidate (cif->rtype))
+ {
+ /* Register candidates are *always* returned in registers. */
+
+ /* Allocate a scratchpad for the return value, we will let the
+ callee scrible the result into the scratch pad then move the
+ contents into the appropriate return value location for the
+ call convention. */
+ rvalue = alloca (cif->rtype->size);
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
+
+ /* Copy the return value into the call context so that it is returned
+ as expected to our caller. */
+ switch (cif->rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ break;
+
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ {
+ void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
+ copy_basic_type (addr, rvalue, cif->rtype->type);
+ break;
+ }
+ case FFI_TYPE_STRUCT:
+ if (is_hfa (cif->rtype))
+ {
+ int i;
+ unsigned short type = get_homogeneous_type (cif->rtype);
+ unsigned elems = element_count (cif->rtype);
+ for (i = 0; i < elems; i++)
+ {
+ void *reg = get_basic_type_addr (type, context, i);
+ copy_basic_type (reg, rvalue, type);
+ rvalue += get_basic_type_size (type);
+ }
+ }
+ else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
+ {
+ unsigned size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
+ memcpy (get_x_addr (context, 0), rvalue, size);
+ }
+ else
+ {
+ FFI_ASSERT (0);
+ }
+ break;
+ default:
+ FFI_ASSERT (0);
+ break;
+ }
+ }
+ else
+ {
+ memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
+ (closure->fun) (cif, rvalue, avalue, closure->user_data);
+ }
+}
+
diff --git a/src/aarch64/ffitarget.h b/src/aarch64/ffitarget.h
new file mode 100644
index 0000000..6f1a348
--- /dev/null
+++ b/src/aarch64/ffitarget.h
@@ -0,0 +1,59 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi
+ {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+ } ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 36
+#define FFI_NATIVE_RAW_API 0
+
+/* ---- Internal ---- */
+
+
+#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_flags
+
+#define AARCH64_FFI_WITH_V_BIT 0
+
+#define AARCH64_N_XREG 32
+#define AARCH64_N_VREG 32
+#define AARCH64_CALL_CONTEXT_SIZE (AARCH64_N_XREG * 8 + AARCH64_N_VREG * 16)
+
+#endif
diff --git a/src/aarch64/sysv.S b/src/aarch64/sysv.S
new file mode 100644
index 0000000..b8cd421
--- /dev/null
+++ b/src/aarch64/sysv.S
@@ -0,0 +1,307 @@
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
+#define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
+#define cfi_restore(reg) .cfi_restore reg
+#define cfi_def_cfa_register(reg) .cfi_def_cfa_register reg
+
+ .text
+ .globl ffi_call_SYSV
+ .type ffi_call_SYSV, #function
+
+/* ffi_call_SYSV()
+
+ Create a stack frame, setup an argument context, call the callee
+ and extract the result.
+
+ The maximum required argument stack size is provided,
+ ffi_call_SYSV() allocates that stack space then calls the
+ prepare_fn to populate register context and stack. The
+ argument passing registers are loaded from the register
+ context and the callee called, on return the register passing
+ register are saved back to the context. Our caller will
+ extract the return value from the final state of the saved
+ register context.
+
+ Prototype:
+
+ extern unsigned
+ ffi_call_SYSV (void (*)(struct call_context *context, unsigned char *,
+ extended_cif *),
+ struct call_context *context,
+ extended_cif *,
+ unsigned required_stack_size,
+ void (*fn)(void));
+
+ Therefore on entry we have:
+
+ x0 prepare_fn
+ x1 &context
+ x2 &ecif
+ x3 bytes
+ x4 fn
+
+ This function uses the following stack frame layout:
+
+ ==
+ saved x30(lr)
+ x29(fp)-> saved x29(fp)
+ saved x24
+ saved x23
+ saved x22
+ sp' -> saved x21
+ ...
+ sp -> (constructed callee stack arguments)
+ ==
+
+ Voila! */
+
+#define ffi_call_SYSV_FS (8 * 4)
+
+ .cfi_startproc
+ffi_call_SYSV:
+ stp x29, x30, [sp, #-16]!
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ mov x29, sp
+ cfi_def_cfa_register (x29)
+ sub sp, sp, #ffi_call_SYSV_FS
+
+ stp x21, x22, [sp, 0]
+ cfi_rel_offset (x21, 0 - ffi_call_SYSV_FS)
+ cfi_rel_offset (x22, 8 - ffi_call_SYSV_FS)
+
+ stp x23, x24, [sp, 16]
+ cfi_rel_offset (x23, 16 - ffi_call_SYSV_FS)
+ cfi_rel_offset (x24, 24 - ffi_call_SYSV_FS)
+
+ mov x21, x1
+ mov x22, x2
+ mov x24, x4
+
+ /* Allocate the stack space for the actual arguments, many
+ arguments will be passed in registers, but we assume
+ worst case and allocate sufficient stack for ALL of
+ the arguments. */
+ sub sp, sp, x3
+
+ /* unsigned (*prepare_fn) (struct call_context *context,
+ unsigned char *stack, extended_cif *ecif);
+ */
+ mov x23, x0
+ mov x0, x1
+ mov x1, sp
+ /* x2 already in place */
+ blr x23
+
+ /* Preserve the flags returned. */
+ mov x23, x0
+
+ /* Figure out if we should touch the vector registers. */
+ tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
+
+ /* Load the vector argument passing registers. */
+ ldp q0, q1, [x21, #8*32 + 0]
+ ldp q2, q3, [x21, #8*32 + 32]
+ ldp q4, q5, [x21, #8*32 + 64]
+ ldp q6, q7, [x21, #8*32 + 96]
+1:
+ /* Load the core argument passing registers. */
+ ldp x0, x1, [x21, #0]
+ ldp x2, x3, [x21, #16]
+ ldp x4, x5, [x21, #32]
+ ldp x6, x7, [x21, #48]
+
+ /* Don't forget x8 which may be holding the address of a return buffer.
+ */
+ ldr x8, [x21, #8*8]
+
+ blr x24
+
+ /* Save the core argument passing registers. */
+ stp x0, x1, [x21, #0]
+ stp x2, x3, [x21, #16]
+ stp x4, x5, [x21, #32]
+ stp x6, x7, [x21, #48]
+
+ /* Note nothing useful ever comes back in x8! */
+
+ /* Figure out if we should touch the vector registers. */
+ tbz x23, #AARCH64_FFI_WITH_V_BIT, 1f
+
+ /* Save the vector argument passing registers. */
+ stp q0, q1, [x21, #8*32 + 0]
+ stp q2, q3, [x21, #8*32 + 32]
+ stp q4, q5, [x21, #8*32 + 64]
+ stp q6, q7, [x21, #8*32 + 96]
+1:
+ /* All done, unwind our stack frame. */
+ ldp x21, x22, [x29, # - ffi_call_SYSV_FS]
+ cfi_restore (x21)
+ cfi_restore (x22)
+
+ ldp x23, x24, [x29, # - ffi_call_SYSV_FS + 16]
+ cfi_restore (x23)
+ cfi_restore (x24)
+
+ mov sp, x29
+ cfi_def_cfa_register (sp)
+
+ ldp x29, x30, [sp], #16
+ cfi_adjust_cfa_offset (-16)
+ cfi_restore (x29)
+ cfi_restore (x30)
+
+ ret
+
+ .cfi_endproc
+ .size ffi_call_SYSV, .-ffi_call_SYSV
+
+#define ffi_closure_SYSV_FS (8 * 2 + AARCH64_CALL_CONTEXT_SIZE)
+
+/* ffi_closure_SYSV
+
+ Closure invocation glue. This is the low level code invoked directly by
+ the closure trampoline to setup and call a closure.
+
+ On entry x17 points to a struct trampoline_data, x16 has been clobbered
+ all other registers are preserved.
+
+ We allocate a call context and save the argument passing registers,
+ then invoked the generic C ffi_closure_SYSV_inner() function to do all
+ the real work, on return we load the result passing registers back from
+ the call context.
+
+ On entry
+
+ extern void
+ ffi_closure_SYSV (struct trampoline_data *);
+
+ struct trampoline_data
+ {
+ UINT64 *ffi_closure;
+ UINT64 flags;
+ };
+
+ This function uses the following stack frame layout:
+
+ ==
+ saved x30(lr)
+ x29(fp)-> saved x29(fp)
+ saved x22
+ saved x21
+ ...
+ sp -> call_context
+ ==
+
+ Voila! */
+
+ .text
+ .globl ffi_closure_SYSV
+ .cfi_startproc
+ffi_closure_SYSV:
+ stp x29, x30, [sp, #-16]!
+ cfi_adjust_cfa_offset (16)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ mov x29, sp
+
+ sub sp, sp, #ffi_closure_SYSV_FS
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+
+ stp x21, x22, [x29, #-16]
+ cfi_rel_offset (x21, 0)
+ cfi_rel_offset (x22, 8)
+
+ /* Load x21 with &call_context. */
+ mov x21, sp
+ /* Preserve our struct trampoline_data * */
+ mov x22, x17
+
+ /* Save the rest of the argument passing registers. */
+ stp x0, x1, [x21, #0]
+ stp x2, x3, [x21, #16]
+ stp x4, x5, [x21, #32]
+ stp x6, x7, [x21, #48]
+ /* Don't forget we may have been given a result scratch pad address.
+ */
+ str x8, [x21, #64]
+
+ /* Figure out if we should touch the vector registers. */
+ ldr x0, [x22, #8]
+ tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [x21, #8*32 + 0]
+ stp q2, q3, [x21, #8*32 + 32]
+ stp q4, q5, [x21, #8*32 + 64]
+ stp q6, q7, [x21, #8*32 + 96]
+1:
+ /* Load &ffi_closure.. */
+ ldr x0, [x22, #0]
+ mov x1, x21
+ /* Compute the location of the stack at the point that the
+ trampoline was called. */
+ add x2, x29, #16
+
+ bl ffi_closure_SYSV_inner
+
+ /* Figure out if we should touch the vector registers. */
+ ldr x0, [x22, #8]
+ tbz x0, #AARCH64_FFI_WITH_V_BIT, 1f
+
+ /* Load the result passing vector registers. */
+ ldp q0, q1, [x21, #8*32 + 0]
+ ldp q2, q3, [x21, #8*32 + 32]
+ ldp q4, q5, [x21, #8*32 + 64]
+ ldp q6, q7, [x21, #8*32 + 96]
+1:
+ /* Load the result passing core registers. */
+ ldp x0, x1, [x21, #0]
+ ldp x2, x3, [x21, #16]
+ ldp x4, x5, [x21, #32]
+ ldp x6, x7, [x21, #48]
+ /* Note nothing usefull is returned in x8. */
+
+ /* We are done, unwind our frame. */
+ ldp x21, x22, [x29, #-16]
+ cfi_restore (x21)
+ cfi_restore (x22)
+
+ mov sp, x29
+ cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+
+ ldp x29, x30, [sp], #16
+ cfi_adjust_cfa_offset (-16)
+ cfi_restore (x29)
+ cfi_restore (x30)
+
+ ret
+ .cfi_endproc
+ .size ffi_closure_SYSV, .-ffi_closure_SYSV
diff --git a/src/arm/ffi.c b/src/arm/ffi.c
index 1f8597d..3ccceb9 100644
--- a/src/arm/ffi.c
+++ b/src/arm/ffi.c
@@ -251,8 +251,10 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
break;
case FFI_VFP:
+#ifdef __ARM_EABI__
ffi_call_VFP (fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
break;
+#endif
default:
FFI_ASSERT(0);
@@ -609,8 +611,10 @@ ffi_prep_closure_loc (ffi_closure* closure,
if (cif->abi == FFI_SYSV)
closure_func = &ffi_closure_SYSV;
+#ifdef __ARM_EABI__
else if (cif->abi == FFI_VFP)
closure_func = &ffi_closure_VFP;
+#endif
else
return FFI_BAD_ABI;
diff --git a/src/arm/sysv.S b/src/arm/sysv.S
index 60e2ae3..fb38cd6 100644
--- a/src/arm/sysv.S
+++ b/src/arm/sysv.S
@@ -41,7 +41,7 @@
#define CNAME(x) x
#endif
#ifdef __APPLE__
-#define ENTRY(x) .globl CNAME(x); CNAME(x):
+#define ENTRY(x) .globl _##x; _##x:
#else
#define ENTRY(x) .globl CNAME(x); .type CNAME(x),%function; CNAME(x):
#endif /* __APPLE__ */
@@ -187,7 +187,7 @@ ARM_FUNC_START ffi_call_SYSV
@ r1 already set
@ Call ffi_prep_args(stack, &ecif)
- bl ffi_prep_args
+ bl CNAME(ffi_prep_args)
@ move first 4 parameters in registers
ldmia sp, {r0-r3}
@@ -334,7 +334,9 @@ ARM_FUNC_START ffi_closure_SYSV
/* Below are VFP hard-float ABI call and closure implementations.
- Add VFP FPU directive here. */
+ Add VFP FPU directive here. This is only compiled into the library
+ under EABI. */
+#ifdef __ARM_EABI__
.fpu vfp
@ r0: fn
@@ -362,7 +364,7 @@ ARM_FUNC_START ffi_call_VFP
sub r2, fp, #64 @ VFP scratch space
@ Call ffi_prep_args(stack, &ecif, vfp_space)
- bl ffi_prep_args
+ bl CNAME(ffi_prep_args)
@ Load VFP register args if needed
cmp r0, #0
@@ -444,7 +446,7 @@ ARM_FUNC_START ffi_closure_VFP
sub sp, sp, #72
str sp, [sp, #64]
add r1, sp, #64
- bl ffi_closure_SYSV_inner
+ bl CNAME(ffi_closure_SYSV_inner)
cmp r0, #FFI_TYPE_INT
beq .Lretint_vfp
@@ -491,6 +493,7 @@ ARM_FUNC_START ffi_closure_VFP
.ffi_closure_VFP_end:
UNWIND .fnend
.size CNAME(ffi_closure_VFP),.ffi_closure_VFP_end-CNAME(ffi_closure_VFP)
+#endif
ENTRY(ffi_arm_trampoline)
stmfd sp!, {r0-r3}
diff --git a/src/arm/trampoline.S b/src/arm/trampoline.S
index 7b47429..935e8de 100644
--- a/src/arm/trampoline.S
+++ b/src/arm/trampoline.S
@@ -1,5 +1,5 @@
# GENERATED CODE - DO NOT EDIT
-# This file was generated by ./gentramp.sh
+# This file was generated by src/arm/gentramp.sh
# Copyright (c) 2010, Plausible Labs Cooperative, Inc.
#
diff --git a/src/bfin/ffi.c b/src/bfin/ffi.c
new file mode 100644
index 0000000..0beccc1
--- /dev/null
+++ b/src/bfin/ffi.c
@@ -0,0 +1,195 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com>
+
+ Blackfin Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Maximum number of GPRs available for argument passing. */
+#define MAX_GPRARGS 3
+
+/*
+ * Return types
+ */
+#define FFIBFIN_RET_VOID 0
+#define FFIBFIN_RET_BYTE 1
+#define FFIBFIN_RET_HALFWORD 2
+#define FFIBFIN_RET_INT64 3
+#define FFIBFIN_RET_INT32 4
+
+/*====================================================================*/
+/* PROTOTYPE *
+ /*====================================================================*/
+void ffi_prep_args(unsigned char *, extended_cif *);
+
+/*====================================================================*/
+/* Externals */
+/* (Assembly) */
+/*====================================================================*/
+
+extern void ffi_call_SYSV(unsigned, extended_cif *, void(*)(unsigned char *, extended_cif *), unsigned, void *, void(*fn)(void));
+
+/*====================================================================*/
+/* Implementation */
+/* */
+/*====================================================================*/
+
+
+/*
+ * This function calculates the return type (size) based on type.
+ */
+
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ /* --------------------------------------*
+ * Return handling *
+ * --------------------------------------*/
+ switch (cif->rtype->type) {
+ case FFI_TYPE_VOID:
+ cif->flags = FFIBFIN_RET_VOID;
+ break;
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ cif->flags = FFIBFIN_RET_HALFWORD;
+ break;
+ case FFI_TYPE_UINT8:
+ cif->flags = FFIBFIN_RET_BYTE;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_POINTER:
+ case FFI_TYPE_SINT8:
+ cif->flags = FFIBFIN_RET_INT32;
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_DOUBLE:
+ cif->flags = FFIBFIN_RET_INT64;
+ break;
+ case FFI_TYPE_STRUCT:
+ if (cif->rtype->size <= 4){
+ cif->flags = FFIBFIN_RET_INT32;
+ }else if (cif->rtype->size == 8){
+ cif->flags = FFIBFIN_RET_INT64;
+ }else{
+ //it will return via a hidden pointer in P0
+ cif->flags = FFIBFIN_RET_VOID;
+ }
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+ return FFI_OK;
+}
+
+/*
+ * This will prepare the arguments and will call the assembly routine
+ * cif = the call interface
+ * fn = the function to be called
+ * rvalue = the return value
+ * avalue = the arguments
+ */
+void ffi_call(ffi_cif *cif, void(*fn)(void), void *rvalue, void **avalue)
+{
+ int ret_type = cif->flags;
+ extended_cif ecif;
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+ ecif.rvalue = rvalue;
+
+ switch (cif->abi) {
+ case FFI_SYSV:
+ ffi_call_SYSV(cif->bytes, &ecif, ffi_prep_args, ret_type, ecif.rvalue, fn);
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+
+/*
+* This function prepares the parameters (copies them from the ecif to the stack)
+* to call the function (ffi_prep_args is called by the assembly routine in file
+* sysv.S, which also calls the actual function)
+*/
+void ffi_prep_args(unsigned char *stack, extended_cif *ecif)
+{
+ register unsigned int i = 0;
+ void **p_argv;
+ unsigned char *argp;
+ ffi_type **p_arg;
+ argp = stack;
+ p_argv = ecif->avalue;
+ for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
+ (i != 0);
+ i--, p_arg++) {
+ size_t z;
+ z = (*p_arg)->size;
+ if (z < sizeof(int)) {
+ z = sizeof(int);
+ switch ((*p_arg)->type) {
+ case FFI_TYPE_SINT8: {
+ signed char v = *(SINT8 *)(* p_argv);
+ signed int t = v;
+ *(signed int *) argp = t;
+ }
+ break;
+ case FFI_TYPE_UINT8: {
+ unsigned char v = *(UINT8 *)(* p_argv);
+ unsigned int t = v;
+ *(unsigned int *) argp = t;
+ }
+ break;
+ case FFI_TYPE_SINT16:
+ *(signed int *) argp = (signed int) * (SINT16 *)(* p_argv);
+ break;
+ case FFI_TYPE_UINT16:
+ *(unsigned int *) argp = (unsigned int) * (UINT16 *)(* p_argv);
+ break;
+ case FFI_TYPE_STRUCT:
+ memcpy(argp, *p_argv, (*p_arg)->size);
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+ } else if (z == sizeof(int)) {
+ *(unsigned int *) argp = (unsigned int) * (UINT32 *)(* p_argv);
+ } else {
+ memcpy(argp, *p_argv, z);
+ }
+ p_argv++;
+ argp += z;
+ }
+}
+
+
+
diff --git a/src/bfin/ffitarget.h b/src/bfin/ffitarget.h
new file mode 100644
index 0000000..2175c01
--- /dev/null
+++ b/src/bfin/ffitarget.h
@@ -0,0 +1,43 @@
+/* -----------------------------------------------------------------------
+ ffitarget.h - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com>
+
+ Blackfin Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+} ffi_abi;
+#endif
+
+#endif
+
diff --git a/src/bfin/sysv.S b/src/bfin/sysv.S
new file mode 100644
index 0000000..ae7a152
--- /dev/null
+++ b/src/bfin/sysv.S
@@ -0,0 +1,177 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2012 Alexandre K. I. de Mendonca <alexandre.keunecke@gmail.com>
+
+ Blackfin Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+.text
+.align 4
+
+ /*
+ There is a "feature" in the bfin toolchain that it puts a _ before funcion names
+ that's why the function here it's called _ffi_call_SYSV and not ffi_call_SYSV
+ */
+ .global _ffi_call_SYSV;
+ .type _ffi_call_SYSV, STT_FUNC;
+ .func ffi_call_SYSV
+
+ /*
+ cif->bytes = R0 (fp+8)
+ &ecif = R1 (fp+12)
+ ffi_prep_args = R2 (fp+16)
+ ret_type = stack (fp+20)
+ ecif.rvalue = stack (fp+24)
+ fn = stack (fp+28)
+ got (fp+32)
+ There is room for improvement here (we can use temporary registers
+ instead of saving the values in the memory)
+ REGS:
+ P5 => Stack pointer (function arguments)
+ R5 => cif->bytes
+ R4 => ret->type
+
+ FP-20 = P3
+ FP-16 = SP (parameters area)
+ FP-12 = SP (temp)
+ FP-08 = function return part 1 [R0]
+ FP-04 = function return part 2 [R1]
+ */
+
+_ffi_call_SYSV:
+.prologue:
+ LINK 20;
+ [FP-20] = P3;
+ [FP+8] = R0;
+ [FP+12] = R1;
+ [FP+16] = R2;
+
+.allocate_stack:
+ //alocate cif->bytes into the stack
+ R1 = [FP+8];
+ R0 = SP;
+ R0 = R0 - R1;
+ R1 = 4;
+ R0 = R0 - R1;
+ [FP-12] = SP;
+ SP = R0;
+ [FP-16] = SP;
+
+.call_prep_args:
+ //get the addr of prep_args
+ P0 = [P3 + _ffi_prep_args@FUNCDESC_GOT17M4];
+ P1 = [P0];
+ P3 = [P0+4];
+ R0 = [FP-16];//SP (parameter area)
+ R1 = [FP+12];//ecif
+ call (P1);
+
+.call_user_function:
+ //ajust SP so as to allow the user function access the parameters on the stack
+ SP = [FP-16]; //point to function parameters
+ R0 = [SP];
+ R1 = [SP+4];
+ R2 = [SP+8];
+ //load user function address
+ P0 = FP;
+ P0 +=28;
+ P1 = [P0];
+ P1 = [P1];
+ P3 = [P0+4];
+ /*
+ For functions returning aggregate values (struct) occupying more than 8 bytes,
+ the caller allocates the return value object on the stack and the address
+ of this object is passed to the callee as a hidden argument in register P0.
+ */
+ P0 = [FP+24];
+
+ call (P1);
+ SP = [FP-12];
+.compute_return:
+ P2 = [FP-20];
+ [FP-8] = R0;
+ [FP-4] = R1;
+
+ R0 = [FP+20];
+ R1 = R0 << 2;
+
+ R0 = [P2+.rettable@GOT17M4];
+ R0 = R1 + R0;
+ P2 = R0;
+ R1 = [P2];
+
+ P2 = [FP+-20];
+ R0 = [P2+.rettable@GOT17M4];
+ R0 = R1 + R0;
+ P2 = R0;
+ R0 = [FP-8];
+ R1 = [FP-4];
+ jump (P2);
+
+/*
+#define FFIBFIN_RET_VOID 0
+#define FFIBFIN_RET_BYTE 1
+#define FFIBFIN_RET_HALFWORD 2
+#define FFIBFIN_RET_INT64 3
+#define FFIBFIN_RET_INT32 4
+*/
+.align 4
+.align 4
+.rettable:
+ .dd .epilogue - .rettable
+ .dd .rbyte - .rettable;
+ .dd .rhalfword - .rettable;
+ .dd .rint64 - .rettable;
+ .dd .rint32 - .rettable;
+
+.rbyte:
+ P0 = [FP+24];
+ R0 = R0.B (Z);
+ [P0] = R0;
+ JUMP .epilogue
+.rhalfword:
+ P0 = [FP+24];
+ R0 = R0.L;
+ [P0] = R0;
+ JUMP .epilogue
+.rint64:
+ P0 = [FP+24];// &rvalue
+ [P0] = R0;
+ [P0+4] = R1;
+ JUMP .epilogue
+.rint32:
+ P0 = [FP+24];
+ [P0] = R0;
+.epilogue:
+ R0 = [FP+8];
+ R1 = [FP+12];
+ R2 = [FP+16];
+ P3 = [FP-20];
+ UNLINK;
+ RTS;
+
+.size _ffi_call_SYSV,.-_ffi_call_SYSV;
+.endfunc
diff --git a/src/closures.c b/src/closures.c
index 1b37827..6298d6f 100644
--- a/src/closures.c
+++ b/src/closures.c
@@ -172,6 +172,25 @@ selinux_enabled_check (void)
#endif /* !FFI_MMAP_EXEC_SELINUX */
+/* On PaX enable kernels that have MPROTECT enable we can't use PROT_EXEC. */
+#ifdef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#include <stdlib.h>
+
+static int emutramp_enabled = -1;
+
+static int
+emutramp_enabled_check (void)
+{
+ if (getenv ("FFI_DISABLE_EMUTRAMP") == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+#define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
+ : (emutramp_enabled = emutramp_enabled_check ()))
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
#elif defined (__CYGWIN__) || defined(__INTERIX)
#include <sys/mman.h>
@@ -181,6 +200,10 @@ selinux_enabled_check (void)
#endif /* !defined(X86_WIN32) && !defined(X86_WIN64) */
+#ifndef FFI_MMAP_EXEC_EMUTRAMP_PAX
+#define is_emutramp_enabled() 0
+#endif /* FFI_MMAP_EXEC_EMUTRAMP_PAX */
+
/* Declare all functions defined in dlmalloc.c as static. */
static void *dlmalloc(size_t);
static void dlfree(void*);
@@ -458,6 +481,12 @@ dlmmap (void *start, size_t length, int prot,
printf ("mapping in %zi\n", length);
#endif
+ if (execfd == -1 && is_emutramp_enabled ())
+ {
+ ptr = mmap (start, length, prot & ~PROT_EXEC, flags, fd, offset);
+ return ptr;
+ }
+
if (execfd == -1 && !is_selinux_enabled ())
{
ptr = mmap (start, length, prot | PROT_EXEC, flags, fd, offset);
diff --git a/src/m68k/ffi.c b/src/m68k/ffi.c
index 37a0784..0dee938 100644
--- a/src/m68k/ffi.c
+++ b/src/m68k/ffi.c
@@ -123,6 +123,8 @@ ffi_prep_args (void *stack, extended_cif *ecif)
#define CIF_FLAGS_POINTER 32
#define CIF_FLAGS_STRUCT1 64
#define CIF_FLAGS_STRUCT2 128
+#define CIF_FLAGS_SINT8 256
+#define CIF_FLAGS_SINT16 512
/* Perform machine dependent cif processing */
ffi_status
@@ -200,6 +202,14 @@ ffi_prep_cif_machdep (ffi_cif *cif)
cif->flags = CIF_FLAGS_DINT;
break;
+ case FFI_TYPE_SINT16:
+ cif->flags = CIF_FLAGS_SINT16;
+ break;
+
+ case FFI_TYPE_SINT8:
+ cif->flags = CIF_FLAGS_SINT8;
+ break;
+
default:
cif->flags = CIF_FLAGS_INT;
break;
diff --git a/src/m68k/sysv.S b/src/m68k/sysv.S
index f6f4ef9..ec2b14f 100644
--- a/src/m68k/sysv.S
+++ b/src/m68k/sysv.S
@@ -2,9 +2,10 @@
sysv.S - Copyright (c) 2012 Alan Hourihane
Copyright (c) 1998, 2012 Andreas Schwab
- Copyright (c) 2008 Red Hat, Inc.
-
- m68k Foreign Function Interface
+ Copyright (c) 2008 Red Hat, Inc.
+ Copyright (c) 2012 Thorsten Glaser
+
+ m68k Foreign Function Interface
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -168,8 +169,28 @@ retstruct1:
retstruct2:
btst #7,%d2
- jbeq noretval
+ jbeq retsint8
move.w %d0,(%a1)
+ jbra epilogue
+
+retsint8:
+ btst #8,%d2
+ jbeq retsint16
+ | NOTE: On the mc68000, extb is not supported. 8->16, then 16->32.
+#if !defined(__mc68020__) && !defined(__mc68030__) && !defined(__mc68040__) && !defined(__mc68060__) && !defined(__mcoldfire__)
+ ext.w %d0
+ ext.l %d0
+#else
+ extb.l %d0
+#endif
+ move.l %d0,(%a1)
+ jbra epilogue
+
+retsint16:
+ btst #9,%d2
+ jbeq noretval
+ ext.l %d0
+ move.l %d0,(%a1)
noretval:
epilogue:
@@ -201,8 +222,10 @@ CALLFUNC(ffi_closure_SYSV):
lsr.l #1,%d0
jne 1f
jcc .Lcls_epilogue
+ | CIF_FLAGS_INT
move.l -12(%fp),%d0
.Lcls_epilogue:
+ | no CIF_FLAGS_*
unlk %fp
rts
1:
@@ -210,6 +233,7 @@ CALLFUNC(ffi_closure_SYSV):
lsr.l #2,%d0
jne 1f
jcs .Lcls_ret_float
+ | CIF_FLAGS_DINT
move.l (%a0)+,%d0
move.l (%a0),%d1
jra .Lcls_epilogue
@@ -224,6 +248,7 @@ CALLFUNC(ffi_closure_SYSV):
lsr.l #2,%d0
jne 1f
jcs .Lcls_ret_ldouble
+ | CIF_FLAGS_DOUBLE
#if defined(__MC68881__) || defined(__HAVE_68881__)
fmove.d (%a0),%fp0
#else
@@ -242,17 +267,37 @@ CALLFUNC(ffi_closure_SYSV):
jra .Lcls_epilogue
1:
lsr.l #2,%d0
- jne .Lcls_ret_struct2
+ jne 1f
jcs .Lcls_ret_struct1
+ | CIF_FLAGS_POINTER
move.l (%a0),%a0
move.l %a0,%d0
jra .Lcls_epilogue
.Lcls_ret_struct1:
move.b (%a0),%d0
jra .Lcls_epilogue
-.Lcls_ret_struct2:
+1:
+ lsr.l #2,%d0
+ jne 1f
+ jcs .Lcls_ret_sint8
+ | CIF_FLAGS_STRUCT2
move.w (%a0),%d0
jra .Lcls_epilogue
+.Lcls_ret_sint8:
+ move.l (%a0),%d0
+ | NOTE: On the mc68000, extb is not supported. 8->16, then 16->32.
+#if !defined(__mc68020__) && !defined(__mc68030__) && !defined(__mc68040__) && !defined(__mc68060__) && !defined(__mcoldfire__)
+ ext.w %d0
+ ext.l %d0
+#else
+ extb.l %d0
+#endif
+ jra .Lcls_epilogue
+1:
+ | CIF_FLAGS_SINT16
+ move.l (%a0),%d0
+ ext.l %d0
+ jra .Lcls_epilogue
CFI_ENDPROC()
.size CALLFUNC(ffi_closure_SYSV),.-CALLFUNC(ffi_closure_SYSV)
diff --git a/src/metag/ffi.c b/src/metag/ffi.c
new file mode 100644
index 0000000..46b383e
--- /dev/null
+++ b/src/metag/ffi.c
@@ -0,0 +1,330 @@
+/* ----------------------------------------------------------------------
+ ffi.c - Copyright (c) 2013 Imagination Technologies
+
+ Meta Foreign Function Interface
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ `Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED `AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL SIMON POSNJAK BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ OTHER DEALINGS IN THE SOFTWARE.
+----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+
+/*
+ * ffi_prep_args is called by the assembly routine once stack space has been
+ * allocated for the function's arguments
+ */
+
+unsigned int ffi_prep_args(char *stack, extended_cif *ecif)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ argp = stack;
+
+ /* Store return value */
+ if ( ecif->cif->flags == FFI_TYPE_STRUCT ) {
+ argp -= 4;
+ *(void **) argp = ecif->rvalue;
+ }
+
+ p_argv = ecif->avalue;
+
+ /* point to next location */
+ for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types; (i != 0); i--, p_arg++, p_argv++)
+ {
+ size_t z;
+
+ /* Move argp to address of argument */
+ z = (*p_arg)->size;
+ argp -= z;
+
+ /* Align if necessary */
+ argp = (char *) ALIGN_DOWN(ALIGN_DOWN(argp, (*p_arg)->alignment), 4);
+
+ if (z < sizeof(int)) {
+ z = sizeof(int);
+ switch ((*p_arg)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
+ break;
+ case FFI_TYPE_UINT8:
+ *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
+ break;
+ case FFI_TYPE_SINT16:
+ *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
+ break;
+ case FFI_TYPE_UINT16:
+ *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
+ case FFI_TYPE_STRUCT:
+ memcpy(argp, *p_argv, (*p_arg)->size);
+ break;
+ default:
+ FFI_ASSERT(0);
+ }
+ } else if ( z == sizeof(int)) {
+ *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
+ } else {
+ memcpy(argp, *p_argv, z);
+ }
+ }
+
+ /* return the size of the arguments to be passed in registers,
+ padded to an 8 byte boundary to preserve stack alignment */
+ return ALIGN(MIN(stack - argp, 6*4), 8);
+}
+
+/* Perform machine dependent cif processing */
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ ffi_type **ptr;
+ unsigned i, bytes = 0;
+
+ for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) {
+ if ((*ptr)->size == 0)
+ return FFI_BAD_TYPEDEF;
+
+ /* Perform a sanity check on the argument type, do this
+ check after the initialization. */
+ FFI_ASSERT_VALID_TYPE(*ptr);
+
+ /* Add any padding if necessary */
+ if (((*ptr)->alignment - 1) & bytes)
+ bytes = ALIGN(bytes, (*ptr)->alignment);
+
+ bytes += ALIGN((*ptr)->size, 4);
+ }
+
+ /* Ensure arg space is aligned to an 8-byte boundary */
+ bytes = ALIGN(bytes, 8);
+
+ /* Make space for the return structure pointer */
+ if (cif->rtype->type == FFI_TYPE_STRUCT) {
+ bytes += sizeof(void*);
+
+ /* Ensure stack is aligned to an 8-byte boundary */
+ bytes = ALIGN(bytes, 8);
+ }
+
+ cif->bytes = bytes;
+
+ /* Set the return type flag */
+ switch (cif->rtype->type) {
+ case FFI_TYPE_VOID:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ cif->flags = (unsigned) cif->rtype->type;
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ cif->flags = (unsigned) FFI_TYPE_SINT64;
+ break;
+ case FFI_TYPE_STRUCT:
+ /* Meta can store return values which are <= 64 bits */
+ if (cif->rtype->size <= 4)
+ /* Returned to D0Re0 as 32-bit value */
+ cif->flags = (unsigned)FFI_TYPE_INT;
+ else if ((cif->rtype->size > 4) && (cif->rtype->size <= 8))
+ /* Returned valued is stored to D1Re0|R0Re0 */
+ cif->flags = (unsigned)FFI_TYPE_DOUBLE;
+ else
+ /* value stored in memory */
+ cif->flags = (unsigned)FFI_TYPE_STRUCT;
+ break;
+ default:
+ cif->flags = (unsigned)FFI_TYPE_INT;
+ break;
+ }
+ return FFI_OK;
+}
+
+extern void ffi_call_SYSV(void (*fn)(void), extended_cif *, unsigned, unsigned, double *);
+
+/*
+ * Exported in API. Entry point
+ * cif -> ffi_cif object
+ * fn -> function pointer
+ * rvalue -> pointer to return value
+ * avalue -> vector of void * pointers pointing to memory locations holding the
+ * arguments
+ */
+void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+
+ int small_struct = (((cif->flags == FFI_TYPE_INT) || (cif->flags == FFI_TYPE_DOUBLE)) && (cif->rtype->type == FFI_TYPE_STRUCT));
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ double temp;
+
+ /*
+ * If the return value is a struct and we don't have a return value address
+ * then we need to make one
+ */
+
+ if ((rvalue == NULL ) && (cif->flags == FFI_TYPE_STRUCT))
+ ecif.rvalue = alloca(cif->rtype->size);
+ else if (small_struct)
+ ecif.rvalue = &temp;
+ else
+ ecif.rvalue = rvalue;
+
+ switch (cif->abi) {
+ case FFI_SYSV:
+ ffi_call_SYSV(fn, &ecif, cif->bytes, cif->flags, ecif.rvalue);
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+
+ if (small_struct)
+ memcpy (rvalue, &temp, cif->rtype->size);
+}
+
+/* private members */
+
+static void ffi_prep_incoming_args_SYSV (char *, void **, void **,
+ ffi_cif*, float *);
+
+void ffi_closure_SYSV (ffi_closure *);
+
+/* Do NOT change that without changing the FFI_TRAMPOLINE_SIZE */
+extern unsigned int ffi_metag_trampoline[10]; /* 10 instructions */
+
+/* end of private members */
+
+/*
+ * __tramp: trampoline memory location
+ * __fun: assembly routine
+ * __ctx: memory location for wrapper
+ *
+ * At this point, tramp[0] == __ctx !
+ */
+void ffi_init_trampoline(unsigned char *__tramp, unsigned int __fun, unsigned int __ctx) {
+ memcpy (__tramp, ffi_metag_trampoline, sizeof(ffi_metag_trampoline));
+ *(unsigned int*) &__tramp[40] = __ctx;
+ *(unsigned int*) &__tramp[44] = __fun;
+ /* This will flush the instruction cache */
+ __builtin_meta2_cachewd(&__tramp[0], 1);
+ __builtin_meta2_cachewd(&__tramp[47], 1);
+}
+
+
+
+/* the cif must already be prepared */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ void (*closure_func)(ffi_closure*) = NULL;
+
+ if (cif->abi == FFI_SYSV)
+ closure_func = &ffi_closure_SYSV;
+ else
+ return FFI_BAD_ABI;
+
+ ffi_init_trampoline(
+ (unsigned char*)&closure->tramp[0],
+ (unsigned int)closure_func,
+ (unsigned int)codeloc);
+
+ closure->cif = cif;
+ closure->user_data = user_data;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+
+
+/* This function is jumped to by the trampoline */
+unsigned int ffi_closure_SYSV_inner (closure, respp, args, vfp_args)
+ ffi_closure *closure;
+ void **respp;
+ void *args;
+ void *vfp_args;
+{
+ ffi_cif *cif;
+ void **arg_area;
+
+ cif = closure->cif;
+ arg_area = (void**) alloca (cif->nargs * sizeof (void*));
+
+ /*
+ * This call will initialize ARG_AREA, such that each
+ * element in that array points to the corresponding
+ * value on the stack; and if the function returns
+ * a structure, it will re-set RESP to point to the
+ * structure return address.
+ */
+ ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif, vfp_args);
+
+ (closure->fun) ( cif, *respp, arg_area, closure->user_data);
+
+ return cif->flags;
+}
+
+static void ffi_prep_incoming_args_SYSV(char *stack, void **rvalue,
+ void **avalue, ffi_cif *cif,
+ float *vfp_stack)
+{
+ register unsigned int i;
+ register void **p_argv;
+ register char *argp;
+ register ffi_type **p_arg;
+
+ /* stack points to original arguments */
+ argp = stack;
+
+ /* Store return value */
+ if ( cif->flags == FFI_TYPE_STRUCT ) {
+ argp -= 4;
+ *rvalue = *(void **) argp;
+ }
+
+ p_argv = avalue;
+
+ for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++) {
+ size_t z;
+ size_t alignment;
+
+ alignment = (*p_arg)->alignment;
+ if (alignment < 4)
+ alignment = 4;
+ if ((alignment - 1) & (unsigned)argp)
+ argp = (char *) ALIGN(argp, alignment);
+
+ z = (*p_arg)->size;
+ *p_argv = (void*) argp;
+ p_argv++;
+ argp -= z;
+ }
+ return;
+}
diff --git a/src/metag/ffitarget.h b/src/metag/ffitarget.h
new file mode 100644
index 0000000..7b9dbeb
--- /dev/null
+++ b/src/metag/ffitarget.h
@@ -0,0 +1,53 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 2013 Imagination Technologies Ltd.
+ Target configuration macros for Meta
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_DEFAULT_ABI = FFI_SYSV,
+ FFI_LAST_ABI = FFI_DEFAULT_ABI + 1,
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 48
+#define FFI_NATIVE_RAW_API 0
+
+#endif
+
diff --git a/src/metag/sysv.S b/src/metag/sysv.S
new file mode 100644
index 0000000..b4b2a3b
--- /dev/null
+++ b/src/metag/sysv.S
@@ -0,0 +1,311 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2013 Imagination Technologies Ltd.
+
+ Meta Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#ifdef HAVE_MACHINE_ASM_H
+#include <machine/asm.h>
+#else
+#ifdef __USER_LABEL_PREFIX__
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels. */
+#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+#define CNAME(x) x
+#endif
+#define ENTRY(x) .globl CNAME(x); .type CNAME(x), %function; CNAME(x):
+#endif
+
+#ifdef __ELF__
+#define LSYM(x) .x
+#else
+#define LSYM(x) x
+#endif
+
+.macro call_reg x=
+ .text
+ .balign 4
+ mov D1RtP, \x
+ swap D1RtP, PC
+.endm
+
+! Save register arguments
+.macro SAVE_ARGS
+ .text
+ .balign 4
+ setl [A0StP++], D0Ar6, D1Ar5
+ setl [A0StP++], D0Ar4, D1Ar3
+ setl [A0StP++], D0Ar2, D1Ar1
+.endm
+
+! Save retrun, frame pointer and other regs
+.macro SAVE_REGS regs=
+ .text
+ .balign 4
+ setl [A0StP++], D0FrT, D1RtP
+ ! Needs to be a pair of regs
+ .ifnc "\regs",""
+ setl [A0StP++], \regs
+ .endif
+.endm
+
+! Declare a global function
+.macro METAG_FUNC_START name
+ .text
+ .balign 4
+ ENTRY(\name)
+.endm
+
+! Return registers from the stack. Reverse SAVE_REGS operation
+.macro RET_REGS regs=, cond=
+ .ifnc "\regs", ""
+ getl \regs, [--A0StP]
+ .endif
+ getl D0FrT, D1RtP, [--A0StP]
+.endm
+
+! Return arguments
+.macro RET_ARGS
+ getl D0Ar2, D1Ar1, [--A0StP]
+ getl D0Ar4, D1Ar3, [--A0StP]
+ getl D0Ar6, D1Ar5, [--A0StP]
+.endm
+
+
+ ! D1Ar1: fn
+ ! D0Ar2: &ecif
+ ! D1Ar3: cif->bytes
+ ! D0Ar4: fig->flags
+ ! D1Ar5: ecif.rvalue
+
+ ! This assumes we are using GNU as
+METAG_FUNC_START ffi_call_SYSV
+ ! Save argument registers
+
+ SAVE_ARGS
+
+ ! new frame
+ mov D0FrT, A0FrP
+ add A0FrP, A0StP, #0
+
+ ! Preserve the old frame pointer
+ SAVE_REGS "D1.5, D0.5"
+
+ ! Make room for new args. cifs->bytes is the total space for input
+ ! and return arguments
+
+ add A0StP, A0StP, D1Ar3
+
+ ! Preserve cifs->bytes & fn
+ mov D0.5, D1Ar3
+ mov D1.5, D1Ar1
+
+ ! Place all of the ffi_prep_args in position
+ mov D1Ar1, A0StP
+
+ ! Call ffi_prep_args(stack, &ecif)
+#ifdef __PIC__
+ callr D1RtP, CNAME(ffi_prep_args@PLT)
+#else
+ callr D1RtP, CNAME(ffi_prep_args)
+#endif
+
+ ! Restore fn pointer
+
+ ! The foreign stack should look like this
+ ! XXXXX XXXXXX <--- stack pointer
+ ! FnArgN rvalue
+ ! FnArgN+2 FnArgN+1
+ ! FnArgN+4 FnArgN+3
+ ! ....
+ !
+
+ ! A0StP now points to the first (or return) argument + 4
+
+ ! Preserve cif->bytes
+ getl D0Ar2, D1Ar1, [--A0StP]
+ getl D0Ar4, D1Ar3, [--A0StP]
+ getl D0Ar6, D1Ar5, [--A0StP]
+
+ ! Place A0StP to the first argument again
+ add A0StP, A0StP, #24 ! That's because we loaded 6 regs x 4 byte each
+
+ ! A0FrP points to the initial stack without the reserved space for the
+ ! cifs->bytes, whilst A0StP points to the stack after the space allocation
+
+ ! fn was the first argument of ffi_call_SYSV.
+ ! The stack at this point looks like this:
+ !
+ ! A0StP(on entry to _SYSV) -> Arg6 Arg5 | low
+ ! Arg4 Arg3 |
+ ! Arg2 Arg1 |
+ ! A0FrP ----> D0FrtP D1RtP |
+ ! D1.5 D0.5 |
+ ! A0StP(bf prep_args) -> FnArgn FnArgn-1 |
+ ! FnArgn-2FnArgn-3 |
+ ! ................ | <= cifs->bytes
+ ! FnArg4 FnArg3 |
+ ! A0StP (prv_A0StP+cifs->bytes) FnArg2 FnArg1 | high
+ !
+ ! fn was in Arg1 so it's located in in A0FrP+#-0xC
+ !
+
+ ! D0Re0 contains the size of arguments stored in registers
+ sub A0StP, A0StP, D0Re0
+
+ ! Arg1 is the function pointer for the foreign call. This has been
+ ! preserved in D1.5
+
+ ! Time to call (fn). Arguments should be like this:
+ ! Arg1-Arg6 are loaded to regs
+ ! The rest of the arguments are stored in stack pointed by A0StP
+
+ call_reg D1.5
+
+ ! Reset stack.
+
+ mov A0StP, A0FrP
+
+ ! Load Arg1 with the pointer to storage for the return type
+ ! This was stored in Arg5
+
+ getd D1Ar1, [A0FrP+#-20]
+
+ ! Load D0Ar2 with the return type code. This was stored in Arg4 (flags)
+
+ getd D0Ar2, [A0FrP+#-16]
+
+ ! We are ready to start processing the return value
+ ! D0Re0 (and D1Re0) hold the return value
+
+ ! If the return value is NULL, assume no return value
+ cmp D1Ar1, #0
+ beq LSYM(Lepilogue)
+
+ ! return INT
+ cmp D0Ar2, #FFI_TYPE_INT
+ ! Sadly, there is no setd{cc} instruction so we need to workaround that
+ bne .INT64
+ setd [D1Ar1], D0Re0
+ b LSYM(Lepilogue)
+
+ ! return INT64
+.INT64:
+ cmp D0Ar2, #FFI_TYPE_SINT64
+ setleq [D1Ar1], D0Re0, D1Re0
+
+ ! return DOUBLE
+ cmp D0Ar2, #FFI_TYPE_DOUBLE
+ setl [D1AR1++], D0Re0, D1Re0
+
+LSYM(Lepilogue):
+ ! At this point, the stack pointer points right after the argument
+ ! saved area. We need to restore 4 regs, therefore we need to move
+ ! 16 bytes ahead.
+ add A0StP, A0StP, #16
+ RET_REGS "D1.5, D0.5"
+ RET_ARGS
+ getd D0Re0, [A0StP]
+ mov A0FrP, D0FrT
+ swap D1RtP, PC
+
+.ffi_call_SYSV_end:
+ .size CNAME(ffi_call_SYSV),.ffi_call_SYSV_end-CNAME(ffi_call_SYSV)
+
+
+/*
+ (called by ffi_metag_trampoline)
+ void ffi_closure_SYSV (ffi_closure*)
+
+ (called by ffi_closure_SYSV)
+ unsigned int FFI_HIDDEN
+ ffi_closure_SYSV_inner (closure,respp, args)
+ ffi_closure *closure;
+ void **respp;
+ void *args;
+*/
+
+METAG_FUNC_START ffi_closure_SYSV
+ ! We assume that D1Ar1 holds the address of the
+ ! ffi_closure struct. We will use that to fetch the
+ ! arguments. The stack pointer points to an empty space
+ ! and it is ready to store more data.
+
+ ! D1Ar1 is ready
+ ! Allocate stack space for return value
+ add A0StP, A0StP, #8
+ ! Store it to D0Ar2
+ sub D0Ar2, A0StP, #8
+
+ sub D1Ar3, A0FrP, #4
+
+ ! D1Ar3 contains the address of the original D1Ar1 argument
+ ! We need to subtract #4 later on
+
+ ! Preverve D0Ar2
+ mov D0.5, D0Ar2
+
+#ifdef __PIC__
+ callr D1RtP, CNAME(ffi_closure_SYSV_inner@PLT)
+#else
+ callr D1RtP, CNAME(ffi_closure_SYSV_inner)
+#endif
+
+ ! Check the return value and store it to D0.5
+ cmp D0Re0, #FFI_TYPE_INT
+ beq .Lretint
+ cmp D0Re0, #FFI_TYPE_DOUBLE
+ beq .Lretdouble
+.Lclosure_epilogue:
+ sub A0StP, A0StP, #8
+ RET_REGS "D1.5, D0.5"
+ RET_ARGS
+ swap D1RtP, PC
+
+.Lretint:
+ setd [D0.5], D0Re0
+ b .Lclosure_epilogue
+.Lretdouble:
+ setl [D0.5++], D0Re0, D1Re0
+ b .Lclosure_epilogue
+.ffi_closure_SYSV_end:
+.size CNAME(ffi_closure_SYSV),.ffi_closure_SYSV_end-CNAME(ffi_closure_SYSV)
+
+
+ENTRY(ffi_metag_trampoline)
+ SAVE_ARGS
+ ! New frame
+ mov A0FrP, A0StP
+ SAVE_REGS "D1.5, D0.5"
+ mov D0.5, PC
+ ! Load D1Ar1 the value of ffi_metag_trampoline
+ getd D1Ar1, [D0.5 + #8]
+ ! Jump to ffi_closure_SYSV
+ getd PC, [D0.5 + #12]
diff --git a/src/microblaze/ffi.c b/src/microblaze/ffi.c
new file mode 100644
index 0000000..5c155c5
--- /dev/null
+++ b/src/microblaze/ffi.c
@@ -0,0 +1,321 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 2012, 2013 Xilinx, Inc
+
+ MicroBlaze Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+extern void ffi_call_SYSV(void (*)(void*, extended_cif*), extended_cif*,
+ unsigned int, unsigned int, unsigned int*, void (*fn)(void),
+ unsigned int, unsigned int);
+
+extern void ffi_closure_SYSV(void);
+
+#define WORD_SIZE sizeof(unsigned int)
+#define ARGS_REGISTER_SIZE (WORD_SIZE * 6)
+#define WORD_ALIGN(x) ALIGN(x, WORD_SIZE)
+
+/* ffi_prep_args is called by the assembly routine once stack space
+ has been allocated for the function's arguments */
+void ffi_prep_args(void* stack, extended_cif* ecif)
+{
+ unsigned int i;
+ ffi_type** p_arg;
+ void** p_argv;
+ void* stack_args_p = stack;
+
+ p_argv = ecif->avalue;
+
+ if (ecif == NULL || ecif->cif == NULL) {
+ return; /* no description to prepare */
+ }
+
+ if ((ecif->cif->rtype != NULL) &&
+ (ecif->cif->rtype->type == FFI_TYPE_STRUCT))
+ {
+ /* if return type is a struct which is referenced on the stack/reg5,
+ * by a pointer. Stored the return value pointer in r5.
+ */
+ char* addr = stack_args_p;
+ memcpy(addr, &(ecif->rvalue), WORD_SIZE);
+ stack_args_p += WORD_SIZE;
+ }
+
+ if (ecif->avalue == NULL) {
+ return; /* no arguments to prepare */
+ }
+
+ for (i = 0, p_arg = ecif->cif->arg_types; i < ecif->cif->nargs;
+ i++, p_arg++)
+ {
+ size_t size = (*p_arg)->size;
+ int type = (*p_arg)->type;
+ void* value = p_argv[i];
+ char* addr = stack_args_p;
+ int aligned_size = WORD_ALIGN(size);
+
+ /* force word alignment on the stack */
+ stack_args_p += aligned_size;
+
+ switch (type)
+ {
+ case FFI_TYPE_UINT8:
+ *(unsigned int *)addr = (unsigned int)*(UINT8*)(value);
+ break;
+ case FFI_TYPE_SINT8:
+ *(signed int *)addr = (signed int)*(SINT8*)(value);
+ break;
+ case FFI_TYPE_UINT16:
+ *(unsigned int *)addr = (unsigned int)*(UINT16*)(value);
+ break;
+ case FFI_TYPE_SINT16:
+ *(signed int *)addr = (signed int)*(SINT16*)(value);
+ break;
+ case FFI_TYPE_STRUCT:
+#if __BIG_ENDIAN__
+ /*
+ * MicroBlaze toolchain appears to emit:
+ * bsrli r5, r5, 8 (caller)
+ * ...
+ * <branch to callee>
+ * ...
+ * bslli r5, r5, 8 (callee)
+ *
+ * For structs like "struct a { uint8_t a[3]; };", when passed
+ * by value.
+ *
+ * Structs like "struct b { uint16_t a; };" are also expected
+ * to be packed strangely in registers.
+ *
+ * This appears to be because the microblaze toolchain expects
+ * "struct b == uint16_t", which is only any issue for big
+ * endian.
+ *
+ * The following is a work around for big-endian only, for the
+ * above mentioned case, it will re-align the contents of a
+ * <= 3-byte struct value.
+ */
+ if (size < WORD_SIZE)
+ {
+ memcpy (addr + (WORD_SIZE - size), value, size);
+ break;
+ }
+#endif
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_DOUBLE:
+ default:
+ memcpy(addr, value, aligned_size);
+ }
+ }
+}
+
+ffi_status ffi_prep_cif_machdep(ffi_cif* cif)
+{
+ /* check ABI */
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
+ return FFI_OK;
+}
+
+void ffi_call(ffi_cif* cif, void (*fn)(void), void* rvalue, void** avalue)
+{
+ extended_cif ecif;
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* If the return value is a struct and we don't have a return */
+ /* value address then we need to make one */
+ if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) {
+ ecif.rvalue = alloca(cif->rtype->size);
+ } else {
+ ecif.rvalue = rvalue;
+ }
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags,
+ ecif.rvalue, fn, cif->rtype->type, cif->rtype->size);
+ break;
+ default:
+ FFI_ASSERT(0);
+ break;
+ }
+}
+
+void ffi_closure_call_SYSV(void* register_args, void* stack_args,
+ ffi_closure* closure, void* rvalue,
+ unsigned int* rtype, unsigned int* rsize)
+{
+ /* prepare arguments for closure call */
+ ffi_cif* cif = closure->cif;
+ ffi_type** arg_types = cif->arg_types;
+
+ /* re-allocate data for the args. This needs to be done in order to keep
+ * multi-word objects (e.g. structs) in contigious memory. Callers are not
+ * required to store the value of args in the lower 6 words in the stack
+ * (although they are allocated in the stack).
+ */
+ char* stackclone = alloca(cif->bytes);
+ void** avalue = alloca(cif->nargs * sizeof(void*));
+ void* struct_rvalue = NULL;
+ char* ptr = stackclone;
+ int i;
+
+ /* copy registers into stack clone */
+ int registers_used = cif->bytes;
+ if (registers_used > ARGS_REGISTER_SIZE) {
+ registers_used = ARGS_REGISTER_SIZE;
+ }
+ memcpy(stackclone, register_args, registers_used);
+
+ /* copy stack allocated args into stack clone */
+ if (cif->bytes > ARGS_REGISTER_SIZE) {
+ int stack_used = cif->bytes - ARGS_REGISTER_SIZE;
+ memcpy(stackclone + ARGS_REGISTER_SIZE, stack_args, stack_used);
+ }
+
+ /* preserve struct type return pointer passing */
+ if ((cif->rtype != NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) {
+ struct_rvalue = *((void**)ptr);
+ ptr += WORD_SIZE;
+ }
+
+ /* populate arg pointer list */
+ for (i = 0; i < cif->nargs; i++)
+ {
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+#ifdef __BIG_ENDIAN__
+ avalue[i] = ptr + 3;
+#else
+ avalue[i] = ptr;
+#endif
+ break;
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+#ifdef __BIG_ENDIAN__
+ avalue[i] = ptr + 2;
+#else
+ avalue[i] = ptr;
+#endif
+ break;
+ case FFI_TYPE_STRUCT:
+#if __BIG_ENDIAN__
+ /*
+ * Work around strange ABI behaviour.
+ * (see info in ffi_prep_args)
+ */
+ if (arg_types[i]->size < WORD_SIZE)
+ {
+ memcpy (ptr, ptr + (WORD_SIZE - arg_types[i]->size), arg_types[i]->size);
+ }
+#endif
+ avalue[i] = (void*)ptr;
+ break;
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_DOUBLE:
+ avalue[i] = ptr;
+ break;
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_FLOAT:
+ default:
+ /* default 4-byte argument */
+ avalue[i] = ptr;
+ break;
+ }
+ ptr += WORD_ALIGN(arg_types[i]->size);
+ }
+
+ /* set the return type info passed back to the wrapper */
+ *rsize = cif->rtype->size;
+ *rtype = cif->rtype->type;
+ if (struct_rvalue != NULL) {
+ closure->fun(cif, struct_rvalue, avalue, closure->user_data);
+ /* copy struct return pointer value into function return value */
+ *((void**)rvalue) = struct_rvalue;
+ } else {
+ closure->fun(cif, rvalue, avalue, closure->user_data);
+ }
+}
+
+ffi_status ffi_prep_closure_loc(
+ ffi_closure* closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void* user_data, void* codeloc)
+{
+ unsigned long* tramp = (unsigned long*)&(closure->tramp[0]);
+ unsigned long cls = (unsigned long)codeloc;
+ unsigned long fn = 0;
+ unsigned long fn_closure_call_sysv = (unsigned long)ffi_closure_call_SYSV;
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ switch (cif->abi)
+ {
+ case FFI_SYSV:
+ fn = (unsigned long)ffi_closure_SYSV;
+
+ /* load r11 (temp) with fn */
+ /* imm fn(upper) */
+ tramp[0] = 0xb0000000 | ((fn >> 16) & 0xffff);
+ /* addik r11, r0, fn(lower) */
+ tramp[1] = 0x31600000 | (fn & 0xffff);
+
+ /* load r12 (temp) with cls */
+ /* imm cls(upper) */
+ tramp[2] = 0xb0000000 | ((cls >> 16) & 0xffff);
+ /* addik r12, r0, cls(lower) */
+ tramp[3] = 0x31800000 | (cls & 0xffff);
+
+ /* load r3 (temp) with ffi_closure_call_SYSV */
+ /* imm fn_closure_call_sysv(upper) */
+ tramp[4] = 0xb0000000 | ((fn_closure_call_sysv >> 16) & 0xffff);
+ /* addik r3, r0, fn_closure_call_sysv(lower) */
+ tramp[5] = 0x30600000 | (fn_closure_call_sysv & 0xffff);
+ /* branch/jump to address stored in r11 (fn) */
+ tramp[6] = 0x98085800; /* bra r11 */
+
+ break;
+ default:
+ return FFI_BAD_ABI;
+ }
+ return FFI_OK;
+}
diff --git a/src/microblaze/ffitarget.h b/src/microblaze/ffitarget.h
new file mode 100644
index 0000000..c6fa5a4
--- /dev/null
+++ b/src/microblaze/ffitarget.h
@@ -0,0 +1,53 @@
+/* -----------------------------------------------------------------------
+ ffitarget.h - Copyright (c) 2012, 2013 Xilinx, Inc
+
+ Target configuration macros for MicroBlaze.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+} ffi_abi;
+#endif
+
+/* Definitions for closures */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#define FFI_TRAMPOLINE_SIZE (4*8)
+
+#endif
diff --git a/src/microblaze/sysv.S b/src/microblaze/sysv.S
new file mode 100644
index 0000000..7a195a6
--- /dev/null
+++ b/src/microblaze/sysv.S
@@ -0,0 +1,302 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2012, 2013 Xilinx, Inc
+
+ MicroBlaze Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+ /*
+ * arg[0] (r5) = ffi_prep_args,
+ * arg[1] (r6) = &ecif,
+ * arg[2] (r7) = cif->bytes,
+ * arg[3] (r8) = cif->flags,
+ * arg[4] (r9) = ecif.rvalue,
+ * arg[5] (r10) = fn
+ * arg[6] (sp[0]) = cif->rtype->type
+ * arg[7] (sp[4]) = cif->rtype->size
+ */
+ .text
+ .globl ffi_call_SYSV
+ .type ffi_call_SYSV, @function
+ffi_call_SYSV:
+ /* push callee saves */
+ addik r1, r1, -20
+ swi r19, r1, 0 /* Frame Pointer */
+ swi r20, r1, 4 /* PIC register */
+ swi r21, r1, 8 /* PIC register */
+ swi r22, r1, 12 /* save for locals */
+ swi r23, r1, 16 /* save for locals */
+
+ /* save the r5-r10 registers in the stack */
+ addik r1, r1, -24 /* increment sp to store 6x 32-bit words */
+ swi r5, r1, 0
+ swi r6, r1, 4
+ swi r7, r1, 8
+ swi r8, r1, 12
+ swi r9, r1, 16
+ swi r10, r1, 20
+
+ /* save function pointer */
+ addik r3, r5, 0 /* copy ffi_prep_args into r3 */
+ addik r22, r1, 0 /* save sp for unallocated args into r22 (callee-saved) */
+ addik r23, r10, 0 /* save function address into r23 (callee-saved) */
+
+ /* prepare stack with allocation for n (bytes = r7) args */
+ rsub r1, r7, r1 /* subtract bytes from sp */
+
+ /* prep args for ffi_prep_args call */
+ addik r5, r1, 0 /* store stack pointer into arg[0] */
+ /* r6 still holds ecif for arg[1] */
+
+ /* Call ffi_prep_args(stack, &ecif). */
+ addik r1, r1, -4
+ swi r15, r1, 0 /* store the link register in the frame */
+ brald r15, r3
+ nop /* branch has delay slot */
+ lwi r15, r1, 0
+ addik r1, r1, 4 /* restore the link register from the frame */
+ /* returns calling stack pointer location */
+
+ /* prepare args for fn call, prep_args populates them onto the stack */
+ lwi r5, r1, 0 /* arg[0] */
+ lwi r6, r1, 4 /* arg[1] */
+ lwi r7, r1, 8 /* arg[2] */
+ lwi r8, r1, 12 /* arg[3] */
+ lwi r9, r1, 16 /* arg[4] */
+ lwi r10, r1, 20 /* arg[5] */
+
+ /* call (fn) (...). */
+ addik r1, r1, -4
+ swi r15, r1, 0 /* store the link register in the frame */
+ brald r15, r23
+ nop /* branch has delay slot */
+ lwi r15, r1, 0
+ addik r1, r1, 4 /* restore the link register from the frame */
+
+ /* Remove the space we pushed for the args. */
+ addik r1, r22, 0 /* restore old SP */
+
+ /* restore this functions parameters */
+ lwi r5, r1, 0 /* arg[0] */
+ lwi r6, r1, 4 /* arg[1] */
+ lwi r7, r1, 8 /* arg[2] */
+ lwi r8, r1, 12 /* arg[3] */
+ lwi r9, r1, 16 /* arg[4] */
+ lwi r10, r1, 20 /* arg[5] */
+ addik r1, r1, 24 /* decrement sp to de-allocate 6x 32-bit words */
+
+ /* If the return value pointer is NULL, assume no return value. */
+ beqi r9, ffi_call_SYSV_end
+
+ lwi r22, r1, 48 /* get return type (20 for locals + 28 for arg[6]) */
+ lwi r23, r1, 52 /* get return size (20 for locals + 32 for arg[7]) */
+
+ /* Check if return type is actually a struct, do nothing */
+ rsubi r11, r22, FFI_TYPE_STRUCT
+ beqi r11, ffi_call_SYSV_end
+
+ /* Return 8bit */
+ rsubi r11, r23, 1
+ beqi r11, ffi_call_SYSV_store8
+
+ /* Return 16bit */
+ rsubi r11, r23, 2
+ beqi r11, ffi_call_SYSV_store16
+
+ /* Return 32bit */
+ rsubi r11, r23, 4
+ beqi r11, ffi_call_SYSV_store32
+
+ /* Return 64bit */
+ rsubi r11, r23, 8
+ beqi r11, ffi_call_SYSV_store64
+
+ /* Didnt match anything */
+ bri ffi_call_SYSV_end
+
+ffi_call_SYSV_store64:
+ swi r3, r9, 0 /* store word r3 into return value */
+ swi r4, r9, 4 /* store word r4 into return value */
+ bri ffi_call_SYSV_end
+
+ffi_call_SYSV_store32:
+ swi r3, r9, 0 /* store word r3 into return value */
+ bri ffi_call_SYSV_end
+
+ffi_call_SYSV_store16:
+#ifdef __BIG_ENDIAN__
+ shi r3, r9, 2 /* store half-word r3 into return value */
+#else
+ shi r3, r9, 0 /* store half-word r3 into return value */
+#endif
+ bri ffi_call_SYSV_end
+
+ffi_call_SYSV_store8:
+#ifdef __BIG_ENDIAN__
+ sbi r3, r9, 3 /* store byte r3 into return value */
+#else
+ sbi r3, r9, 0 /* store byte r3 into return value */
+#endif
+ bri ffi_call_SYSV_end
+
+ffi_call_SYSV_end:
+ /* callee restores */
+ lwi r19, r1, 0 /* frame pointer */
+ lwi r20, r1, 4 /* PIC register */
+ lwi r21, r1, 8 /* PIC register */
+ lwi r22, r1, 12
+ lwi r23, r1, 16
+ addik r1, r1, 20
+
+ /* return from sub-routine (with delay slot) */
+ rtsd r15, 8
+ nop
+
+ .size ffi_call_SYSV, . - ffi_call_SYSV
+
+/* ------------------------------------------------------------------------- */
+
+ /*
+ * args passed into this function, are passed down to the callee.
+ * this function is the target of the closure trampoline, as such r12 is
+ * a pointer to the closure object.
+ */
+ .text
+ .globl ffi_closure_SYSV
+ .type ffi_closure_SYSV, @function
+ffi_closure_SYSV:
+ /* push callee saves */
+ addik r11, r1, 28 /* save stack args start location (excluding regs/link) */
+ addik r1, r1, -12
+ swi r19, r1, 0 /* Frame Pointer */
+ swi r20, r1, 4 /* PIC register */
+ swi r21, r1, 8 /* PIC register */
+
+ /* store register args on stack */
+ addik r1, r1, -24
+ swi r5, r1, 0
+ swi r6, r1, 4
+ swi r7, r1, 8
+ swi r8, r1, 12
+ swi r9, r1, 16
+ swi r10, r1, 20
+
+ /* setup args */
+ addik r5, r1, 0 /* register_args */
+ addik r6, r11, 0 /* stack_args */
+ addik r7, r12, 0 /* closure object */
+ addik r1, r1, -8 /* allocate return value */
+ addik r8, r1, 0 /* void* rvalue */
+ addik r1, r1, -8 /* allocate for reutrn type/size values */
+ addik r9, r1, 0 /* void* rtype */
+ addik r10, r1, 4 /* void* rsize */
+
+ /* call the wrap_call function */
+ addik r1, r1, -28 /* allocate args + link reg */
+ swi r15, r1, 0 /* store the link register in the frame */
+ brald r15, r3
+ nop /* branch has delay slot */
+ lwi r15, r1, 0
+ addik r1, r1, 28 /* restore the link register from the frame */
+
+ffi_closure_SYSV_prepare_return:
+ lwi r9, r1, 0 /* rtype */
+ lwi r10, r1, 4 /* rsize */
+ addik r1, r1, 8 /* de-allocate return info values */
+
+ /* Check if return type is actually a struct, store 4 bytes */
+ rsubi r11, r9, FFI_TYPE_STRUCT
+ beqi r11, ffi_closure_SYSV_store32
+
+ /* Return 8bit */
+ rsubi r11, r10, 1
+ beqi r11, ffi_closure_SYSV_store8
+
+ /* Return 16bit */
+ rsubi r11, r10, 2
+ beqi r11, ffi_closure_SYSV_store16
+
+ /* Return 32bit */
+ rsubi r11, r10, 4
+ beqi r11, ffi_closure_SYSV_store32
+
+ /* Return 64bit */
+ rsubi r11, r10, 8
+ beqi r11, ffi_closure_SYSV_store64
+
+ /* Didnt match anything */
+ bri ffi_closure_SYSV_end
+
+ffi_closure_SYSV_store64:
+ lwi r3, r1, 0 /* store word r3 into return value */
+ lwi r4, r1, 4 /* store word r4 into return value */
+ /* 64 bits == 2 words, no sign extend occurs */
+ bri ffi_closure_SYSV_end
+
+ffi_closure_SYSV_store32:
+ lwi r3, r1, 0 /* store word r3 into return value */
+ /* 32 bits == 1 word, no sign extend occurs */
+ bri ffi_closure_SYSV_end
+
+ffi_closure_SYSV_store16:
+#ifdef __BIG_ENDIAN__
+ lhui r3, r1, 2 /* store half-word r3 into return value */
+#else
+ lhui r3, r1, 0 /* store half-word r3 into return value */
+#endif
+ rsubi r11, r9, FFI_TYPE_SINT16
+ bnei r11, ffi_closure_SYSV_end
+ sext16 r3, r3 /* fix sign extend of sint8 */
+ bri ffi_closure_SYSV_end
+
+ffi_closure_SYSV_store8:
+#ifdef __BIG_ENDIAN__
+ lbui r3, r1, 3 /* store byte r3 into return value */
+#else
+ lbui r3, r1, 0 /* store byte r3 into return value */
+#endif
+ rsubi r11, r9, FFI_TYPE_SINT8
+ bnei r11, ffi_closure_SYSV_end
+ sext8 r3, r3 /* fix sign extend of sint8 */
+ bri ffi_closure_SYSV_end
+
+ffi_closure_SYSV_end:
+ addik r1, r1, 8 /* de-allocate return value */
+
+ /* de-allocate stored args */
+ addik r1, r1, 24
+
+ /* callee restores */
+ lwi r19, r1, 0 /* frame pointer */
+ lwi r20, r1, 4 /* PIC register */
+ lwi r21, r1, 8 /* PIC register */
+ addik r1, r1, 12
+
+ /* return from sub-routine (with delay slot) */
+ rtsd r15, 8
+ nop
+
+ .size ffi_closure_SYSV, . - ffi_closure_SYSV
diff --git a/src/mips/ffi.c b/src/mips/ffi.c
index 79cff9b..03121e3 100644
--- a/src/mips/ffi.c
+++ b/src/mips/ffi.c
@@ -670,9 +670,16 @@ ffi_prep_closure_loc (ffi_closure *closure,
if (cif->abi != FFI_O32 && cif->abi != FFI_O32_SOFT_FLOAT)
return FFI_BAD_ABI;
fn = ffi_closure_O32;
-#else /* FFI_MIPS_N32 */
- if (cif->abi != FFI_N32 && cif->abi != FFI_N64)
+#else
+#if _MIPS_SIM ==_ABIN32
+ if (cif->abi != FFI_N32
+ && cif->abi != FFI_N32_SOFT_FLOAT)
+ return FFI_BAD_ABI;
+#else
+ if (cif->abi != FFI_N64
+ && cif->abi != FFI_N64_SOFT_FLOAT)
return FFI_BAD_ABI;
+#endif
fn = ffi_closure_N32;
#endif /* FFI_MIPS_O32 */
diff --git a/src/moxie/eabi.S b/src/moxie/eabi.S
index 379ea4b..ac7aceb 100644
--- a/src/moxie/eabi.S
+++ b/src/moxie/eabi.S
@@ -1,7 +1,7 @@
/* -----------------------------------------------------------------------
- eabi.S - Copyright (c) 2004 Anthony Green
+ eabi.S - Copyright (c) 2012, 2013 Anthony Green
- FR-V Assembly glue.
+ Moxie Assembly glue.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -34,95 +34,68 @@
.globl ffi_call_EABI
.type ffi_call_EABI, @function
- # gr8 : ffi_prep_args
- # gr9 : &ecif
- # gr10: cif->bytes
- # gr11: fig->flags
- # gr12: ecif.rvalue
- # gr13: fn
+ # $r0 : ffi_prep_args
+ # $r1 : &ecif
+ # $r2 : cif->bytes
+ # $r3 : fig->flags
+ # $r4 : ecif.rvalue
+ # $r5 : fn
-ffi_call_EABI:
- addi sp, #-80, sp
- sti fp, @(sp, #24)
- addi sp, #24, fp
- movsg lr, gr5
+ffi_call_EABI:
+ push $sp, $r6
+ push $sp, $r7
+ push $sp, $r8
+ dec $sp, 24
- /* Make room for the new arguments. */
- /* subi sp, fp, gr10 */
-
- /* Store return address and incoming args on stack. */
- sti gr5, @(fp, #8)
- sti gr8, @(fp, #-4)
- sti gr9, @(fp, #-8)
- sti gr10, @(fp, #-12)
- sti gr11, @(fp, #-16)
- sti gr12, @(fp, #-20)
- sti gr13, @(fp, #-24)
-
- sub sp, gr10, sp
+ /* Store incoming args on stack. */
+ sto.l 0($sp), $r0 /* ffi_prep_args */
+ sto.l 4($sp), $r1 /* ecif */
+ sto.l 8($sp), $r2 /* bytes */
+ sto.l 12($sp), $r3 /* flags */
+ sto.l 16($sp), $r4 /* &rvalue */
+ sto.l 20($sp), $r5 /* fn */
/* Call ffi_prep_args. */
- ldi @(fp, #-4), gr4
- addi sp, #0, gr8
- ldi @(fp, #-8), gr9
-#ifdef __FRV_FDPIC__
- ldd @(gr4, gr0), gr14
- calll @(gr14, gr0)
-#else
- calll @(gr4, gr0)
-#endif
-
- /* ffi_prep_args returns the new stack pointer. */
- mov gr8, gr4
-
- ldi @(sp, #0), gr8
- ldi @(sp, #4), gr9
- ldi @(sp, #8), gr10
- ldi @(sp, #12), gr11
- ldi @(sp, #16), gr12
- ldi @(sp, #20), gr13
+ mov $r6, $r4 /* Save result buffer */
+ mov $r7, $r5 /* Save the target fn */
+ mov $r8, $r3 /* Save the flags */
+ sub.l $sp, $r2 /* Allocate stack space */
+ mov $r0, $sp /* We can stomp over $r0 */
+ /* $r1 is already set up */
+ jsra ffi_prep_args
- /* Always copy the return value pointer into the hidden
- parameter register. This is only strictly necessary
- when we're returning an aggregate type, but it doesn't
- hurt to do this all the time, and it saves a branch. */
- ldi @(fp, #-20), gr3
-
- /* Use the ffi_prep_args return value for the new sp. */
- mov gr4, sp
+ /* Load register arguments. */
+ ldo.l $r0, 0($sp)
+ ldo.l $r1, 4($sp)
+ ldo.l $r2, 8($sp)
+ ldo.l $r3, 12($sp)
+ ldo.l $r4, 16($sp)
+ ldo.l $r5, 20($sp)
/* Call the target function. */
- ldi @(fp, -24), gr4
-#ifdef __FRV_FDPIC__
- ldd @(gr4, gr0), gr14
- calll @(gr14, gr0)
-#else
- calll @(gr4, gr0)
-#endif
+ jsr $r7
+
+ ldi.l $r7, 0xffffffff
+ cmp $r8, $r7
+ beq retstruct
+
+ ldi.l $r7, 4
+ cmp $r8, $r7
+ bgt ret2reg
- /* Store the result. */
- ldi @(fp, #-16), gr10 /* fig->flags */
- ldi @(fp, #-20), gr4 /* ecif.rvalue */
+ st.l ($r6), $r0
+ jmpa retdone
- /* Is the return value stored in two registers? */
- cmpi gr10, #8, icc0
- bne icc0, 0, .L2
- /* Yes, save them. */
- sti gr8, @(gr4, #0)
- sti gr9, @(gr4, #4)
- bra .L3
-.L2:
- /* Is the return value a structure? */
- cmpi gr10, #-1, icc0
- beq icc0, 0, .L3
- /* No, save a 4 byte return value. */
- sti gr8, @(gr4, #0)
-.L3:
+ret2reg:
+ st.l ($r6), $r0
+ sto.l 4($r6), $r1
- /* Restore the stack, and return. */
- ldi @(fp, 8), gr5
- ld @(fp, gr0), fp
- addi sp,#80,sp
- jmpl @(gr5,gr0)
+retstruct:
+retdone:
+ /* Return. */
+ ldo.l $r6, -4($fp)
+ ldo.l $r7, -8($fp)
+ ldo.l $r8, -12($fp)
+ ret
.size ffi_call_EABI, .-ffi_call_EABI
diff --git a/src/moxie/ffi.c b/src/moxie/ffi.c
index 54cbbb9..540a042 100644
--- a/src/moxie/ffi.c
+++ b/src/moxie/ffi.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------
- ffi.c - Copyright (C) 2009 Anthony Green
+ ffi.c - Copyright (C) 2012, 2013 Anthony Green
Moxie Foreign Function Interface
@@ -43,6 +43,12 @@ void *ffi_prep_args(char *stack, extended_cif *ecif)
p_argv = ecif->avalue;
argp = stack;
+ if (ecif->cif->rtype->type == FFI_TYPE_STRUCT)
+ {
+ *(void **) argp = ecif->rvalue;
+ argp += 4;
+ }
+
for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
(i != 0);
i--, p_arg++)
@@ -56,17 +62,6 @@ void *ffi_prep_args(char *stack, extended_cif *ecif)
z = sizeof(void*);
*(void **) argp = *p_argv;
}
- /* if ((*p_arg)->type == FFI_TYPE_FLOAT)
- {
- if (count > 24)
- {
- // This is going on the stack. Turn it into a double.
- *(double *) argp = (double) *(float*)(* p_argv);
- z = sizeof(double);
- }
- else
- *(void **) argp = *(void **)(* p_argv);
- } */
else if (z < sizeof(int))
{
z = sizeof(int);
@@ -147,8 +142,7 @@ void ffi_call(ffi_cif *cif,
}
else
ecif.rvalue = rvalue;
-
-
+
switch (cif->abi)
{
case FFI_EABI:
@@ -165,19 +159,25 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
unsigned arg4, unsigned arg5, unsigned arg6)
{
/* This function is called by a trampoline. The trampoline stows a
- pointer to the ffi_closure object in gr7. We must save this
+ pointer to the ffi_closure object in $r7. We must save this
pointer in a place that will persist while we do our work. */
- register ffi_closure *creg __asm__ ("gr7");
+ register ffi_closure *creg __asm__ ("$r12");
ffi_closure *closure = creg;
/* Arguments that don't fit in registers are found on the stack
at a fixed offset above the current frame pointer. */
- register char *frame_pointer __asm__ ("fp");
- char *stack_args = frame_pointer + 16;
+ register char *frame_pointer __asm__ ("$fp");
+
+ /* Pointer to a struct return value. */
+ void *struct_rvalue = (void *) arg1;
+
+ /* 6 words reserved for register args + 3 words from jsr */
+ char *stack_args = frame_pointer + 9*4;
/* Lay the register arguments down in a continuous chunk of memory. */
unsigned register_args[6] =
{ arg1, arg2, arg3, arg4, arg5, arg6 };
+ char *register_args_ptr = (char *) register_args;
ffi_cif *cif = closure->cif;
ffi_type **arg_types = cif->arg_types;
@@ -185,6 +185,12 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
char *ptr = (char *) register_args;
int i;
+ /* preserve struct type return pointer passing */
+ if ((cif->rtype != NULL) && (cif->rtype->type == FFI_TYPE_STRUCT)) {
+ ptr += 4;
+ register_args_ptr = (char *)&register_args[1];
+ }
+
/* Find the address of each argument. */
for (i = 0; i < cif->nargs; i++)
{
@@ -201,6 +207,7 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
case FFI_TYPE_SINT32:
case FFI_TYPE_UINT32:
case FFI_TYPE_FLOAT:
+ case FFI_TYPE_POINTER:
avalue[i] = ptr;
break;
case FFI_TYPE_STRUCT:
@@ -216,30 +223,21 @@ void ffi_closure_eabi (unsigned arg1, unsigned arg2, unsigned arg3,
/* If we've handled more arguments than fit in registers,
start looking at the those passed on the stack. */
- if (ptr == ((char *)register_args + (6*4)))
+ if (ptr == &register_args[6])
ptr = stack_args;
}
/* Invoke the closure. */
- if (cif->rtype->type == FFI_TYPE_STRUCT)
+ if (cif->rtype && (cif->rtype->type == FFI_TYPE_STRUCT))
{
- /* The caller allocates space for the return structure, and
- passes a pointer to this space in gr3. Use this value directly
- as the return value. */
- register void *return_struct_ptr __asm__("gr3");
- (closure->fun) (cif, return_struct_ptr, avalue, closure->user_data);
+ (closure->fun) (cif, struct_rvalue, avalue, closure->user_data);
}
else
{
/* Allocate space for the return value and call the function. */
long long rvalue;
(closure->fun) (cif, &rvalue, avalue, closure->user_data);
-
- /* Functions return 4-byte or smaller results in gr8. 8-byte
- values also use gr9. We fill the both, even for small return
- values, just to avoid a branch. */
- asm ("ldi @(%0, #0), gr8" : : "r" (&rvalue));
- asm ("ldi @(%0, #0), gr9" : : "r" (&((int *) &rvalue)[1]));
+ asm ("mov $r12, %0\n ld.l $r0, ($r12)\n ldo.l $r1, 4($r12)" : : "r" (&rvalue));
}
}
@@ -250,27 +248,25 @@ ffi_prep_closure_loc (ffi_closure* closure,
void *user_data,
void *codeloc)
{
- unsigned int *tramp = (unsigned int *) &closure->tramp[0];
+ unsigned short *tramp = (unsigned short *) &closure->tramp[0];
unsigned long fn = (long) ffi_closure_eabi;
unsigned long cls = (long) codeloc;
- int i;
+
+ if (cif->abi != FFI_EABI)
+ return FFI_BAD_ABI;
fn = (unsigned long) ffi_closure_eabi;
- tramp[0] = 0x8cfc0000 + (fn & 0xffff); /* setlos lo(fn), gr6 */
- tramp[1] = 0x8efc0000 + (cls & 0xffff); /* setlos lo(cls), gr7 */
- tramp[2] = 0x8cf80000 + (fn >> 16); /* sethi hi(fn), gr6 */
- tramp[3] = 0x8ef80000 + (cls >> 16); /* sethi hi(cls), gr7 */
- tramp[4] = 0x80300006; /* jmpl @(gr0, gr6) */
+ tramp[0] = 0x01e0; /* ldi.l $r7, .... */
+ tramp[1] = cls >> 16;
+ tramp[2] = cls & 0xffff;
+ tramp[3] = 0x1a00; /* jmpa .... */
+ tramp[4] = fn >> 16;
+ tramp[5] = fn & 0xffff;
closure->cif = cif;
closure->fun = fun;
closure->user_data = user_data;
- /* Cache flushing. */
- for (i = 0; i < FFI_TRAMPOLINE_SIZE; i++)
- __asm__ volatile ("dcf @(%0,%1)\n\tici @(%2,%1)" :: "r" (tramp), "r" (i),
- "r" (codeloc));
-
return FFI_OK;
}
diff --git a/src/moxie/ffitarget.h b/src/moxie/ffitarget.h
index f5305d1..623e3ec 100644
--- a/src/moxie/ffitarget.h
+++ b/src/moxie/ffitarget.h
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------*-C-*-
- ffitarget.h - Copyright (c) 2009 Anthony Green
+ ffitarget.h - Copyright (c) 2012, 2013 Anthony Green
Target configuration macros for Moxie
Permission is hereby granted, free of charge, to any person obtaining
@@ -35,22 +35,18 @@ typedef signed long ffi_sarg;
typedef enum ffi_abi {
FFI_FIRST_ABI = 0,
-
-#ifdef MOXIE
FFI_EABI,
FFI_DEFAULT_ABI = FFI_EABI,
-#endif
-
FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
} ffi_abi;
#endif
/* ---- Definitions for closures ----------------------------------------- */
-#define FFI_CLOSURES 0
+#define FFI_CLOSURES 1
#define FFI_NATIVE_RAW_API 0
-/* Trampolines are 5 4-byte instructions long. */
-#define FFI_TRAMPOLINE_SIZE (5*4)
+/* Trampolines are 12-bytes long. See ffi_prep_closure_loc. */
+#define FFI_TRAMPOLINE_SIZE (12)
#endif
diff --git a/src/powerpc/aix.S b/src/powerpc/aix.S
index 213f2db..349e78c 100644
--- a/src/powerpc/aix.S
+++ b/src/powerpc/aix.S
@@ -137,7 +137,7 @@ ffi_call_AIX:
mtcrf 0x40, r31
mtctr r0
/* Load all those argument registers. */
- // We have set up a nice stack frame, just load it into registers.
+ /* We have set up a nice stack frame, just load it into registers. */
ld r3, 40+(1*8)(r1)
ld r4, 40+(2*8)(r1)
ld r5, 40+(3*8)(r1)
@@ -150,7 +150,7 @@ ffi_call_AIX:
L1:
/* Load all the FP registers. */
- bf 6,L2 // 2f + 0x18
+ bf 6,L2 /* 2f + 0x18 */
lfd f1,-32-(13*8)(r28)
lfd f2,-32-(12*8)(r28)
lfd f3,-32-(11*8)(r28)
@@ -239,7 +239,7 @@ L(float_return_value):
mtcrf 0x40, r31
mtctr r0
/* Load all those argument registers. */
- // We have set up a nice stack frame, just load it into registers.
+ /* We have set up a nice stack frame, just load it into registers. */
lwz r3, 20+(1*4)(r1)
lwz r4, 20+(2*4)(r1)
lwz r5, 20+(3*4)(r1)
@@ -252,7 +252,7 @@ L(float_return_value):
L1:
/* Load all the FP registers. */
- bf 6,L2 // 2f + 0x18
+ bf 6,L2 /* 2f + 0x18 */
lfd f1,-16-(13*8)(r28)
lfd f2,-16-(12*8)(r28)
lfd f3,-16-(11*8)(r28)
@@ -307,7 +307,7 @@ L(float_return_value):
#endif
.long 0
.byte 0,0,0,1,128,4,0,0
-//END(ffi_call_AIX)
+/* END(ffi_call_AIX) */
.csect .text[PR]
.align 2
@@ -325,4 +325,4 @@ ffi_call_DARWIN:
blr
.long 0
.byte 0,0,0,0,0,0,0,0
-//END(ffi_call_DARWIN)
+/* END(ffi_call_DARWIN) */
diff --git a/src/powerpc/ffi.c b/src/powerpc/ffi.c
index 1920c91..54f2731 100644
--- a/src/powerpc/ffi.c
+++ b/src/powerpc/ffi.c
@@ -48,6 +48,11 @@ enum {
FLAG_RETURNS_128BITS = 1 << (31-27), /* cr6 */
+ FLAG_SYSV_SMST_R4 = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
+ structs. */
+ FLAG_SYSV_SMST_R3 = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
+ structs. */
+
FLAG_ARG_NEEDS_COPY = 1 << (31- 7),
#ifndef __NO_FPRS__
FLAG_FP_ARGUMENTS = 1 << (31- 6), /* cr1.eq; specified by ABI */
@@ -146,6 +151,7 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
intarg_count = 0;
#ifndef __NO_FPRS__
+ double double_tmp;
fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
fparg_count = 0;
copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
@@ -155,9 +161,9 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
next_arg.u = stack + 2;
/* Check that everything starts aligned properly. */
- FFI_ASSERT (((unsigned) (char *) stack & 0xF) == 0);
- FFI_ASSERT (((unsigned) copy_space.c & 0xF) == 0);
- FFI_ASSERT (((unsigned) stacktop.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
+ FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
FFI_ASSERT ((bytes & 0xF) == 0);
FFI_ASSERT (copy_space.c >= next_arg.c);
@@ -211,8 +217,6 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
case FFI_TYPE_DOUBLE:
/* With FFI_LINUX_SOFT_FLOAT doubles are handled like UINT64. */
- if (ecif->cif->abi == FFI_LINUX_SOFT_FLOAT)
- goto soft_double_prep;
double_tmp = **p_argv.d;
if (fparg_count >= NUM_FPR_ARG_REGISTERS)
@@ -368,7 +372,13 @@ ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
/* Check that we didn't overrun the stack... */
FFI_ASSERT (copy_space.c >= next_arg.c);
FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
+ /* The assert below is testing that the number of integer arguments agrees
+ with the number found in ffi_prep_cif_machdep(). However, intarg_count
+ is incremeneted whenever we place an FP arg on the stack, so account for
+ that before our assert test. */
#ifndef __NO_FPRS__
+ if (fparg_count > NUM_FPR_ARG_REGISTERS)
+ intarg_count -= fparg_count - NUM_FPR_ARG_REGISTERS;
FFI_ASSERT (fpr_base.u
<= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
#endif
@@ -665,9 +675,11 @@ ffi_prep_cif_machdep (ffi_cif *cif)
switch (type)
{
#ifndef __NO_FPRS__
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
case FFI_TYPE_LONGDOUBLE:
flags |= FLAG_RETURNS_128BITS;
/* Fall through. */
+#endif
case FFI_TYPE_DOUBLE:
flags |= FLAG_RETURNS_64BITS;
/* Fall through. */
@@ -685,18 +697,35 @@ ffi_prep_cif_machdep (ffi_cif *cif)
break;
case FFI_TYPE_STRUCT:
- /*
- * The final SYSV ABI says that structures smaller or equal 8 bytes
- * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
- * in memory.
- *
- * NOTE: The assembly code can safely assume that it just needs to
- * store both r3 and r4 into a 8-byte word-aligned buffer, as
- * we allocate a temporary buffer in ffi_call() if this flag is
- * set.
- */
- if (cif->abi == FFI_SYSV && size <= 8)
- flags |= FLAG_RETURNS_SMST;
+ if (cif->abi == FFI_SYSV)
+ {
+ /* The final SYSV ABI says that structures smaller or equal 8 bytes
+ are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
+ in memory. */
+
+ /* Treat structs with size <= 8 bytes. */
+ if (size <= 8)
+ {
+ flags |= FLAG_RETURNS_SMST;
+ /* These structs are returned in r3. We pack the type and the
+ precalculated shift value (needed in the sysv.S) into flags.
+ The same applies for the structs returned in r3/r4. */
+ if (size <= 4)
+ {
+ flags |= FLAG_SYSV_SMST_R3;
+ flags |= 8 * (4 - size) << 8;
+ break;
+ }
+ /* These structs are returned in r3 and r4. See above. */
+ if (size <= 8)
+ {
+ flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
+ flags |= 8 * (8 - size) << 8;
+ break;
+ }
+ }
+ }
+
intarg_count++;
flags |= FLAG_RETVAL_REFERENCE;
/* Fall through. */
@@ -925,7 +954,7 @@ ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
*/
unsigned int smst_buffer[2];
extended_cif ecif;
- unsigned int rsize;
+ unsigned int rsize = 0;
ecif.cif = cif;
ecif.avalue = avalue;
@@ -1132,7 +1161,7 @@ ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
if (nf < 8)
{
- temp = pfr->d;
+ double temp = pfr->d;
pfr->f = (float) temp;
avalue[i] = pfr;
nf++;
diff --git a/src/powerpc/ffi_darwin.c b/src/powerpc/ffi_darwin.c
index c3cd1da..1d1d48c 100644
--- a/src/powerpc/ffi_darwin.c
+++ b/src/powerpc/ffi_darwin.c
@@ -302,10 +302,10 @@ ffi_prep_args (extended_cif *ecif, unsigned long *const stack)
}
/* Check that we didn't overrun the stack... */
- //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
- //FFI_ASSERT((unsigned *)fpr_base
- // <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
- //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
+ /* FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
+ FFI_ASSERT((unsigned *)fpr_base
+ <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
+ FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4); */
}
#if defined(POWERPC_DARWIN64)
@@ -1235,7 +1235,7 @@ ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue,
if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
size_al = ALIGN(arg_types[i]->size, 8);
# if defined(POWERPC64)
- FFI_ASSERT (cif->abi != FFI_DARWIN)
+ FFI_ASSERT (cif->abi != FFI_DARWIN);
avalue[i] = pgr;
pgr += (size_al + 7) / 8;
# else
diff --git a/src/powerpc/linux64.S b/src/powerpc/linux64.S
index 57b56cb..f28da81 100644
--- a/src/powerpc/linux64.S
+++ b/src/powerpc/linux64.S
@@ -30,16 +30,25 @@
#include <ffi.h>
#ifdef __powerpc64__
- .hidden ffi_call_LINUX64, .ffi_call_LINUX64
- .globl ffi_call_LINUX64, .ffi_call_LINUX64
+ .hidden ffi_call_LINUX64
+ .globl ffi_call_LINUX64
.section ".opd","aw"
.align 3
ffi_call_LINUX64:
+#ifdef _CALL_LINUX
+ .quad .L.ffi_call_LINUX64,.TOC.@tocbase,0
+ .type ffi_call_LINUX64,@function
+ .text
+.L.ffi_call_LINUX64:
+#else
+ .hidden .ffi_call_LINUX64
+ .globl .ffi_call_LINUX64
.quad .ffi_call_LINUX64,.TOC.@tocbase,0
.size ffi_call_LINUX64,24
.type .ffi_call_LINUX64,@function
.text
.ffi_call_LINUX64:
+#endif
.LFB1:
mflr %r0
std %r28, -32(%r1)
@@ -58,7 +67,11 @@ ffi_call_LINUX64:
/* Call ffi_prep_args64. */
mr %r4, %r1
+#ifdef _CALL_LINUX
+ bl ffi_prep_args64
+#else
bl .ffi_prep_args64
+#endif
ld %r0, 0(%r29)
ld %r2, 8(%r29)
@@ -137,7 +150,11 @@ ffi_call_LINUX64:
.LFE1:
.long 0
.byte 0,12,0,1,128,4,0,0
+#ifdef _CALL_LINUX
+ .size ffi_call_LINUX64,.-.L.ffi_call_LINUX64
+#else
.size .ffi_call_LINUX64,.-.ffi_call_LINUX64
+#endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
diff --git a/src/powerpc/linux64_closure.S b/src/powerpc/linux64_closure.S
index f7aa2c9..b1e1219 100644
--- a/src/powerpc/linux64_closure.S
+++ b/src/powerpc/linux64_closure.S
@@ -32,16 +32,24 @@
#ifdef __powerpc64__
FFI_HIDDEN (ffi_closure_LINUX64)
- FFI_HIDDEN (.ffi_closure_LINUX64)
- .globl ffi_closure_LINUX64, .ffi_closure_LINUX64
+ .globl ffi_closure_LINUX64
.section ".opd","aw"
.align 3
ffi_closure_LINUX64:
+#ifdef _CALL_LINUX
+ .quad .L.ffi_closure_LINUX64,.TOC.@tocbase,0
+ .type ffi_closure_LINUX64,@function
+ .text
+.L.ffi_closure_LINUX64:
+#else
+ FFI_HIDDEN (.ffi_closure_LINUX64)
+ .globl .ffi_closure_LINUX64
.quad .ffi_closure_LINUX64,.TOC.@tocbase,0
.size ffi_closure_LINUX64,24
.type .ffi_closure_LINUX64,@function
.text
.ffi_closure_LINUX64:
+#endif
.LFB1:
# save general regs into parm save area
std %r3, 48(%r1)
@@ -91,7 +99,11 @@ ffi_closure_LINUX64:
addi %r6, %r1, 128
# make the call
+#ifdef _CALL_LINUX
+ bl ffi_closure_helper_LINUX64
+#else
bl .ffi_closure_helper_LINUX64
+#endif
.Lret:
# now r3 contains the return type
@@ -194,7 +206,11 @@ ffi_closure_LINUX64:
.LFE1:
.long 0
.byte 0,12,0,1,128,0,0,0
+#ifdef _CALL_LINUX
+ .size ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
+#else
.size .ffi_closure_LINUX64,.-.ffi_closure_LINUX64
+#endif
.section .eh_frame,EH_FRAME_FLAGS,@progbits
.Lframe1:
diff --git a/src/powerpc/sysv.S b/src/powerpc/sysv.S
index 675ed03..5ee3a19 100644
--- a/src/powerpc/sysv.S
+++ b/src/powerpc/sysv.S
@@ -142,14 +142,19 @@ L(float_return_value):
#endif
L(small_struct_return_value):
- /*
- * The C code always allocates a properly-aligned 8-byte bounce
- * buffer to make this assembly code very simple. Just write out
- * r3 and r4 to the buffer to allow the C code to handle the rest.
- */
- stw %r3, 0(%r30)
- stw %r4, 4(%r30)
- b L(done_return_value)
+ extrwi %r6,%r31,2,19 /* number of bytes padding = shift/8 */
+ mtcrf 0x02,%r31 /* copy flags to cr[24:27] (cr6) */
+ extrwi %r5,%r31,5,19 /* r5 <- number of bits of padding */
+ subfic %r6,%r6,4 /* r6 <- number of useful bytes in r3 */
+ bf- 25,L(done_return_value) /* struct in r3 ? if not, done. */
+/* smst_one_register: */
+ slw %r3,%r3,%r5 /* Left-justify value in r3 */
+ mtxer %r6 /* move byte count to XER ... */
+ stswx %r3,0,%r30 /* ... and store that many bytes */
+ bf+ 26,L(done_return_value) /* struct in r3:r4 ? */
+ add %r6,%r6,%r30 /* adjust pointer */
+ stswi %r4,%r6,4 /* store last four bytes */
+ b L(done_return_value)
.LFE1:
END(ffi_call_SYSV)
diff --git a/src/prep_cif.c b/src/prep_cif.c
index eb68341..e8ec5cf 100644
--- a/src/prep_cif.c
+++ b/src/prep_cif.c
@@ -140,6 +140,13 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
#ifdef SPARC
&& (cif->abi != FFI_V9 || cif->rtype->size > 32)
#endif
+#ifdef TILE
+ && (cif->rtype->size > 10 * FFI_SIZEOF_ARG)
+#endif
+#ifdef XTENSA
+ && (cif->rtype->size > 16)
+#endif
+
)
bytes = STACK_ARG_SIZE(sizeof(void*));
#endif
@@ -169,6 +176,20 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
if (((*ptr)->alignment - 1) & bytes)
bytes = ALIGN(bytes, (*ptr)->alignment);
+#ifdef TILE
+ if (bytes < 10 * FFI_SIZEOF_ARG &&
+ bytes + STACK_ARG_SIZE((*ptr)->size) > 10 * FFI_SIZEOF_ARG)
+ {
+ /* An argument is never split between the 10 parameter
+ registers and the stack. */
+ bytes = 10 * FFI_SIZEOF_ARG;
+ }
+#endif
+#ifdef XTENSA
+ if (bytes <= 6*4 && bytes + STACK_ARG_SIZE((*ptr)->size) > 6*4)
+ bytes = 6*4;
+#endif
+
bytes += STACK_ARG_SIZE((*ptr)->size);
}
#endif
diff --git a/src/s390/ffi.c b/src/s390/ffi.c
index ca2675b..8adb5bc 100644
--- a/src/s390/ffi.c
+++ b/src/s390/ffi.c
@@ -750,7 +750,8 @@ ffi_prep_closure_loc (ffi_closure *closure,
void *user_data,
void *codeloc)
{
- FFI_ASSERT (cif->abi == FFI_SYSV);
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
#ifndef __s390x__
*(short *)&closure->tramp [0] = 0x0d10; /* basr %r1,0 */
diff --git a/src/sparc/ffi.c b/src/sparc/ffi.c
index 1ac5d46..9f0fded 100644
--- a/src/sparc/ffi.c
+++ b/src/sparc/ffi.c
@@ -1,5 +1,5 @@
/* -----------------------------------------------------------------------
- ffi.c - Copyright (c) 2011 Anthony Green
+ ffi.c - Copyright (c) 2011, 2013 Anthony Green
Copyright (c) 1996, 2003-2004, 2007-2008 Red Hat, Inc.
SPARC Foreign Function Interface
@@ -376,6 +376,10 @@ extern int ffi_call_v8(void *, extended_cif *, unsigned,
unsigned, unsigned *, void (*fn)(void));
#endif
+#ifndef __GNUC__
+void ffi_flush_icache (void *, size_t);
+#endif
+
void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
extended_cif ecif;
@@ -417,7 +421,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
/* behind "call", so we alloc some executable space for it. */
/* l7 is used, we need to make sure v8.S doesn't use %l7. */
unsigned int *call_struct = NULL;
- ffi_closure_alloc(32, &call_struct);
+ ffi_closure_alloc(32, (void **)&call_struct);
if (call_struct)
{
unsigned long f = (unsigned long)fn;
@@ -432,10 +436,14 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
call_struct[5] = 0x01000000; /* nop */
call_struct[6] = 0x81c7e008; /* ret */
call_struct[7] = 0xbe100017; /* mov %l7, %i7 */
+#ifdef __GNUC__
asm volatile ("iflush %0; iflush %0+8; iflush %0+16; iflush %0+24" : :
"r" (call_struct) : "memory");
/* SPARC v8 requires 5 instructions for flush to be visible */
asm volatile ("nop; nop; nop; nop; nop");
+#else
+ ffi_flush_icache (call_struct, 32);
+#endif
ffi_call_v8(ffi_prep_args_v8, &ecif, cif->bytes,
cif->flags, rvalue, call_struct);
ffi_closure_free(call_struct);
@@ -513,6 +521,7 @@ ffi_prep_closure_loc (ffi_closure* closure,
closure->user_data = user_data;
/* Flush the Icache. closure is 8 bytes aligned. */
+#ifdef __GNUC__
#ifdef SPARC64
asm volatile ("flush %0; flush %0+8" : : "r" (closure) : "memory");
#else
@@ -520,6 +529,9 @@ ffi_prep_closure_loc (ffi_closure* closure,
/* SPARC v8 requires 5 instructions for flush to be visible */
asm volatile ("nop; nop; nop; nop; nop");
#endif
+#else
+ ffi_flush_icache (closure, 16);
+#endif
return FFI_OK;
}
diff --git a/src/sparc/v8.S b/src/sparc/v8.S
index 2c4eb60..6bf7ac0 100644
--- a/src/sparc/v8.S
+++ b/src/sparc/v8.S
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- v8.S - Copyright (c) 1996, 1997, 2003, 2004, 2008 Red Hat, Inc.
+ v8.S - Copyright (c) 2013 The Written Word, Inc.
+ Copyright (c) 1996, 1997, 2003, 2004, 2008 Red Hat, Inc.
SPARC Foreign Function Interface
@@ -31,11 +32,39 @@
#define STACKFRAME 96 /* Minimum stack framesize for SPARC */
#define ARGS (64+4) /* Offset of register area in frame */
-.text
+#ifndef __GNUC__
+ .text
+ .align 8
+.globl ffi_flush_icache
+.globl _ffi_flush_icache
+
+ffi_flush_icache:
+_ffi_flush_icache:
+ add %o0, %o1, %o2
+#ifdef SPARC64
+1: flush %o0
+#else
+1: iflush %o0
+#endif
+ add %o0, 8, %o0
+ cmp %o0, %o2
+ blt 1b
+ nop
+ nop
+ nop
+ nop
+ nop
+ retl
+ nop
+.ffi_flush_icache_end:
+ .size ffi_flush_icache,.ffi_flush_icache_end-ffi_flush_icache
+#endif
+
+ .text
.align 8
.globl ffi_call_v8
.globl _ffi_call_v8
-
+
ffi_call_v8:
_ffi_call_v8:
.LLFB1:
@@ -213,6 +242,10 @@ ffi_closure_v8:
be,a done1
ldd [%fp-8], %i0
+ cmp %o0, FFI_TYPE_UINT64
+ be,a done1
+ ldd [%fp-8], %i0
+
ld [%fp-8], %i0
done1:
jmp %i7+8
diff --git a/src/sparc/v9.S b/src/sparc/v9.S
index 489ff02..bf31a2b 100644
--- a/src/sparc/v9.S
+++ b/src/sparc/v9.S
@@ -32,7 +32,7 @@
/* Only compile this in for 64bit builds, because otherwise the object file
will have inproper architecture due to used instructions. */
-#define STACKFRAME 128 /* Minimum stack framesize for SPARC */
+#define STACKFRAME 176 /* Minimum stack framesize for SPARC 64-bit */
#define STACK_BIAS 2047
#define ARGS (128) /* Offset of register area in frame */
diff --git a/src/tile/ffi.c b/src/tile/ffi.c
new file mode 100644
index 0000000..3a94469
--- /dev/null
+++ b/src/tile/ffi.c
@@ -0,0 +1,355 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 2012 Tilera Corp.
+
+ TILE Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <arch/abi.h>
+#include <arch/icache.h>
+#include <arch/opcode.h>
+
+
+/* The first 10 registers are used to pass arguments and return values. */
+#define NUM_ARG_REGS 10
+
+/* Performs a raw function call with the given NUM_ARG_REGS register arguments
+ and the specified additional stack arguments (if any). */
+extern void ffi_call_tile(ffi_sarg reg_args[NUM_ARG_REGS],
+ const ffi_sarg *stack_args,
+ size_t stack_args_bytes,
+ void (*fnaddr)(void))
+ FFI_HIDDEN;
+
+/* This handles the raw call from the closure stub, cleaning up the
+ parameters and delegating to ffi_closure_tile_inner. */
+extern void ffi_closure_tile(void) FFI_HIDDEN;
+
+
+ffi_status
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ /* We always allocate room for all registers. Even if we don't
+ use them as parameters, they get returned in the same array
+ as struct return values so we need to make room. */
+ if (cif->bytes < NUM_ARG_REGS * FFI_SIZEOF_ARG)
+ cif->bytes = NUM_ARG_REGS * FFI_SIZEOF_ARG;
+
+ if (cif->rtype->size > NUM_ARG_REGS * FFI_SIZEOF_ARG)
+ cif->flags = FFI_TYPE_STRUCT;
+ else
+ cif->flags = FFI_TYPE_INT;
+
+ /* Nothing to do. */
+ return FFI_OK;
+}
+
+
+static long
+assign_to_ffi_arg(ffi_sarg *out, void *in, const ffi_type *type,
+ int write_to_reg)
+{
+ switch (type->type)
+ {
+ case FFI_TYPE_SINT8:
+ *out = *(SINT8 *)in;
+ return 1;
+
+ case FFI_TYPE_UINT8:
+ *out = *(UINT8 *)in;
+ return 1;
+
+ case FFI_TYPE_SINT16:
+ *out = *(SINT16 *)in;
+ return 1;
+
+ case FFI_TYPE_UINT16:
+ *out = *(UINT16 *)in;
+ return 1;
+
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT32:
+#ifndef __LP64__
+ case FFI_TYPE_POINTER:
+#endif
+ /* Note that even unsigned 32-bit quantities are sign extended
+ on tilegx when stored in a register. */
+ *out = *(SINT32 *)in;
+ return 1;
+
+ case FFI_TYPE_FLOAT:
+#ifdef __tilegx__
+ if (write_to_reg)
+ {
+ /* Properly sign extend the value. */
+ union { float f; SINT32 s32; } val;
+ val.f = *(float *)in;
+ *out = val.s32;
+ }
+ else
+#endif
+ {
+ *(float *)out = *(float *)in;
+ }
+ return 1;
+
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_DOUBLE:
+#ifdef __LP64__
+ case FFI_TYPE_POINTER:
+#endif
+ *(UINT64 *)out = *(UINT64 *)in;
+ return sizeof(UINT64) / FFI_SIZEOF_ARG;
+
+ case FFI_TYPE_STRUCT:
+ memcpy(out, in, type->size);
+ return (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG;
+
+ case FFI_TYPE_VOID:
+ /* Must be a return type. Nothing to do. */
+ return 0;
+
+ default:
+ FFI_ASSERT(0);
+ return -1;
+ }
+}
+
+
+void
+ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+ ffi_sarg * const arg_mem = alloca(cif->bytes);
+ ffi_sarg * const reg_args = arg_mem;
+ ffi_sarg * const stack_args = &reg_args[NUM_ARG_REGS];
+ ffi_sarg *argp = arg_mem;
+ ffi_type ** const arg_types = cif->arg_types;
+ const long num_args = cif->nargs;
+ long i;
+
+ if (cif->flags == FFI_TYPE_STRUCT)
+ {
+ /* Pass a hidden pointer to the return value. We make sure there
+ is scratch space for the callee to store the return value even if
+ our caller doesn't care about it. */
+ *argp++ = (intptr_t)(rvalue ? rvalue : alloca(cif->rtype->size));
+
+ /* No more work needed to return anything. */
+ rvalue = NULL;
+ }
+
+ for (i = 0; i < num_args; i++)
+ {
+ ffi_type *type = arg_types[i];
+ void * const arg_in = avalue[i];
+ ptrdiff_t arg_word = argp - arg_mem;
+
+#ifndef __tilegx__
+ /* Doubleword-aligned values are always in an even-number register
+ pair, or doubleword-aligned stack slot if out of registers. */
+ long align = arg_word & (type->alignment > FFI_SIZEOF_ARG);
+ argp += align;
+ arg_word += align;
+#endif
+
+ if (type->type == FFI_TYPE_STRUCT)
+ {
+ const size_t arg_size_in_words =
+ (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG;
+
+ if (arg_word < NUM_ARG_REGS &&
+ arg_word + arg_size_in_words > NUM_ARG_REGS)
+ {
+ /* Args are not allowed to span registers and the stack. */
+ argp = stack_args;
+ }
+
+ memcpy(argp, arg_in, type->size);
+ argp += arg_size_in_words;
+ }
+ else
+ {
+ argp += assign_to_ffi_arg(argp, arg_in, arg_types[i], 1);
+ }
+ }
+
+ /* Actually do the call. */
+ ffi_call_tile(reg_args, stack_args,
+ cif->bytes - (NUM_ARG_REGS * FFI_SIZEOF_ARG), fn);
+
+ if (rvalue != NULL)
+ assign_to_ffi_arg(rvalue, reg_args, cif->rtype, 0);
+}
+
+
+/* Template code for closure. */
+extern const UINT64 ffi_template_tramp_tile[] FFI_HIDDEN;
+
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure,
+ ffi_cif *cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+#ifdef __tilegx__
+ /* TILE-Gx */
+ SINT64 c;
+ SINT64 h;
+ int s;
+ UINT64 *out;
+
+ if (cif->abi != FFI_UNIX)
+ return FFI_BAD_ABI;
+
+ out = (UINT64 *)closure->tramp;
+
+ c = (intptr_t)closure;
+ h = (intptr_t)ffi_closure_tile;
+ s = 0;
+
+ /* Find the smallest shift count that doesn't lose information
+ (i.e. no need to explicitly insert high bits of the address that
+ are just the sign extension of the low bits). */
+ while ((c >> s) != (SINT16)(c >> s) || (h >> s) != (SINT16)(h >> s))
+ s += 16;
+
+#define OPS(a, b, shift) \
+ (create_Imm16_X0((a) >> (shift)) | create_Imm16_X1((b) >> (shift)))
+
+ /* Emit the moveli. */
+ *out++ = ffi_template_tramp_tile[0] | OPS(c, h, s);
+ for (s -= 16; s >= 0; s -= 16)
+ *out++ = ffi_template_tramp_tile[1] | OPS(c, h, s);
+
+#undef OPS
+
+ *out++ = ffi_template_tramp_tile[2];
+
+#else
+ /* TILEPro */
+ UINT64 *out;
+ intptr_t delta;
+
+ if (cif->abi != FFI_UNIX)
+ return FFI_BAD_ABI;
+
+ out = (UINT64 *)closure->tramp;
+ delta = (intptr_t)ffi_closure_tile - (intptr_t)codeloc;
+
+ *out++ = ffi_template_tramp_tile[0] | create_JOffLong_X1(delta >> 3);
+#endif
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ invalidate_icache(closure->tramp, (char *)out - closure->tramp,
+ getpagesize());
+
+ return FFI_OK;
+}
+
+
+/* This is called by the assembly wrapper for closures. This does
+ all of the work. On entry reg_args[0] holds the values the registers
+ had when the closure was invoked. On return reg_args[1] holds the register
+ values to be returned to the caller (many of which may be garbage). */
+void FFI_HIDDEN
+ffi_closure_tile_inner(ffi_closure *closure,
+ ffi_sarg reg_args[2][NUM_ARG_REGS],
+ ffi_sarg *stack_args)
+{
+ ffi_cif * const cif = closure->cif;
+ void ** const avalue = alloca(cif->nargs * sizeof(void *));
+ void *rvalue;
+ ffi_type ** const arg_types = cif->arg_types;
+ ffi_sarg * const reg_args_in = reg_args[0];
+ ffi_sarg * const reg_args_out = reg_args[1];
+ ffi_sarg * argp;
+ long i, arg_word, nargs = cif->nargs;
+ /* Use a union to guarantee proper alignment for double. */
+ union { ffi_sarg arg[NUM_ARG_REGS]; double d; UINT64 u64; } closure_ret;
+
+ /* Start out reading register arguments. */
+ argp = reg_args_in;
+
+ /* Copy the caller's structure return address to that the closure
+ returns the data directly to the caller. */
+ if (cif->flags == FFI_TYPE_STRUCT)
+ {
+ /* Return by reference via hidden pointer. */
+ rvalue = (void *)(intptr_t)*argp++;
+ arg_word = 1;
+ }
+ else
+ {
+ /* Return the value in registers. */
+ rvalue = &closure_ret;
+ arg_word = 0;
+ }
+
+ /* Grab the addresses of the arguments. */
+ for (i = 0; i < nargs; i++)
+ {
+ ffi_type * const type = arg_types[i];
+ const size_t arg_size_in_words =
+ (type->size + FFI_SIZEOF_ARG - 1) / FFI_SIZEOF_ARG;
+
+#ifndef __tilegx__
+ /* Doubleword-aligned values are always in an even-number register
+ pair, or doubleword-aligned stack slot if out of registers. */
+ long align = arg_word & (type->alignment > FFI_SIZEOF_ARG);
+ argp += align;
+ arg_word += align;
+#endif
+
+ if (arg_word == NUM_ARG_REGS ||
+ (arg_word < NUM_ARG_REGS &&
+ arg_word + arg_size_in_words > NUM_ARG_REGS))
+ {
+ /* Switch to reading arguments from the stack. */
+ argp = stack_args;
+ arg_word = NUM_ARG_REGS;
+ }
+
+ avalue[i] = argp;
+ argp += arg_size_in_words;
+ arg_word += arg_size_in_words;
+ }
+
+ /* Invoke the closure. */
+ closure->fun(cif, rvalue, avalue, closure->user_data);
+
+ if (cif->flags != FFI_TYPE_STRUCT)
+ {
+ /* Canonicalize for register representation. */
+ assign_to_ffi_arg(reg_args_out, &closure_ret, cif->rtype, 1);
+ }
+}
diff --git a/src/tile/ffitarget.h b/src/tile/ffitarget.h
new file mode 100644
index 0000000..679fb5d
--- /dev/null
+++ b/src/tile/ffitarget.h
@@ -0,0 +1,65 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 2012 Tilera Corp.
+ Target configuration macros for TILE.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+
+#include <arch/abi.h>
+
+typedef uint_reg_t ffi_arg;
+typedef int_reg_t ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_UNIX,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_UNIX
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+#define FFI_CLOSURES 1
+
+#ifdef __tilegx__
+/* We always pass 8-byte values, even in -m32 mode. */
+# define FFI_SIZEOF_ARG 8
+# ifdef __LP64__
+# define FFI_TRAMPOLINE_SIZE (8 * 5) /* 5 bundles */
+# else
+# define FFI_TRAMPOLINE_SIZE (8 * 3) /* 3 bundles */
+# endif
+#else
+# define FFI_SIZEOF_ARG 4
+# define FFI_TRAMPOLINE_SIZE 8 /* 1 bundle */
+#endif
+#define FFI_NATIVE_RAW_API 0
+
+#endif
diff --git a/src/tile/tile.S b/src/tile/tile.S
new file mode 100644
index 0000000..a186e1f
--- /dev/null
+++ b/src/tile/tile.S
@@ -0,0 +1,360 @@
+/* -----------------------------------------------------------------------
+ tile.S - Copyright (c) 2011 Tilera Corp.
+
+ Tilera TILEPro and TILE-Gx Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+/* Number of bytes in a register. */
+#define REG_SIZE FFI_SIZEOF_ARG
+
+/* Number of bytes in stack linkage area for backtracing.
+
+ A note about the ABI: on entry to a procedure, sp points to a stack
+ slot where it must spill the return address if it's not a leaf.
+ REG_SIZE bytes beyond that is a slot owned by the caller which
+ contains the sp value that the caller had when it was originally
+ entered (i.e. the caller's frame pointer). */
+#define LINKAGE_SIZE (2 * REG_SIZE)
+
+/* The first 10 registers are used to pass arguments and return values. */
+#define NUM_ARG_REGS 10
+
+#ifdef __tilegx__
+#define SW st
+#define LW ld
+#define BGZT bgtzt
+#else
+#define SW sw
+#define LW lw
+#define BGZT bgzt
+#endif
+
+
+/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
+ const int_reg_t *stack_args,
+ unsigned long stack_args_bytes,
+ void (*fnaddr)(void));
+
+ On entry, REG_ARGS contain the outgoing register values,
+ and STACK_ARGS containts STACK_ARG_BYTES of additional values
+ to be passed on the stack. If STACK_ARG_BYTES is zero, then
+ STACK_ARGS is ignored.
+
+ When the invoked function returns, the values of r0-r9 are
+ blindly stored back into REG_ARGS for the caller to examine. */
+
+ .section .text.ffi_call_tile, "ax", @progbits
+ .align 8
+ .globl ffi_call_tile
+ FFI_HIDDEN(ffi_call_tile)
+ffi_call_tile:
+
+/* Incoming arguments. */
+#define REG_ARGS r0
+#define INCOMING_STACK_ARGS r1
+#define STACK_ARG_BYTES r2
+#define ORIG_FNADDR r3
+
+/* Temporary values. */
+#define FRAME_SIZE r10
+#define TMP r11
+#define TMP2 r12
+#define OUTGOING_STACK_ARGS r13
+#define REG_ADDR_PTR r14
+#define RETURN_REG_ADDR r15
+#define FNADDR r16
+
+ .cfi_startproc
+ {
+ /* Save return address. */
+ SW sp, lr
+ .cfi_offset lr, 0
+ /* Prepare to spill incoming r52. */
+ addi TMP, sp, -REG_SIZE
+ /* Increase frame size to have room to spill r52 and REG_ARGS.
+ The +7 is to round up mod 8. */
+ addi FRAME_SIZE, STACK_ARG_BYTES, \
+ REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
+ }
+ {
+ /* Round stack frame size to a multiple of 8 to satisfy ABI. */
+ andi FRAME_SIZE, FRAME_SIZE, -8
+ /* Compute where to spill REG_ARGS value. */
+ addi TMP2, sp, -(REG_SIZE * 2)
+ }
+ {
+ /* Spill incoming r52. */
+ SW TMP, r52
+ .cfi_offset r52, -REG_SIZE
+ /* Set up our frame pointer. */
+ move r52, sp
+ .cfi_def_cfa_register r52
+ /* Push stack frame. */
+ sub sp, sp, FRAME_SIZE
+ }
+ {
+ /* Prepare to set up stack linkage. */
+ addi TMP, sp, REG_SIZE
+ /* Prepare to memcpy stack args. */
+ addi OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
+ /* Save REG_ARGS which we will need after we call the subroutine. */
+ SW TMP2, REG_ARGS
+ }
+ {
+ /* Set up linkage info to hold incoming stack pointer. */
+ SW TMP, r52
+ }
+ {
+ /* Skip stack args memcpy if we don't have any stack args (common). */
+ blezt STACK_ARG_BYTES, .Ldone_stack_args_memcpy
+ }
+
+.Lmemcpy_stack_args:
+ {
+ /* Load incoming argument from stack_args. */
+ LW TMP, INCOMING_STACK_ARGS
+ addi INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
+ }
+ {
+ /* Store stack argument into outgoing stack argument area. */
+ SW OUTGOING_STACK_ARGS, TMP
+ addi OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
+ addi STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
+ }
+ {
+ BGZT STACK_ARG_BYTES, .Lmemcpy_stack_args
+ }
+.Ldone_stack_args_memcpy:
+
+ {
+ /* Copy aside ORIG_FNADDR so we can overwrite its register. */
+ move FNADDR, ORIG_FNADDR
+ /* Prepare to load argument registers. */
+ addi REG_ADDR_PTR, r0, REG_SIZE
+ /* Load outgoing r0. */
+ LW r0, r0
+ }
+
+ /* Load up argument registers from the REG_ARGS array. */
+#define LOAD_REG(REG, PTR) \
+ { \
+ LW REG, PTR ; \
+ addi PTR, PTR, REG_SIZE \
+ }
+
+ LOAD_REG(r1, REG_ADDR_PTR)
+ LOAD_REG(r2, REG_ADDR_PTR)
+ LOAD_REG(r3, REG_ADDR_PTR)
+ LOAD_REG(r4, REG_ADDR_PTR)
+ LOAD_REG(r5, REG_ADDR_PTR)
+ LOAD_REG(r6, REG_ADDR_PTR)
+ LOAD_REG(r7, REG_ADDR_PTR)
+ LOAD_REG(r8, REG_ADDR_PTR)
+ LOAD_REG(r9, REG_ADDR_PTR)
+
+ {
+ /* Call the subroutine. */
+ jalr FNADDR
+ }
+
+ {
+ /* Restore original lr. */
+ LW lr, r52
+ /* Prepare to recover ARGS, which we spilled earlier. */
+ addi TMP, r52, -(2 * REG_SIZE)
+ }
+ {
+ /* Restore ARGS, so we can fill it in with the return regs r0-r9. */
+ LW RETURN_REG_ADDR, TMP
+ /* Prepare to restore original r52. */
+ addi TMP, r52, -REG_SIZE
+ }
+
+ {
+ /* Pop stack frame. */
+ move sp, r52
+ /* Restore original r52. */
+ LW r52, TMP
+ }
+
+#define STORE_REG(REG, PTR) \
+ { \
+ SW PTR, REG ; \
+ addi PTR, PTR, REG_SIZE \
+ }
+
+ /* Return all register values by reference. */
+ STORE_REG(r0, RETURN_REG_ADDR)
+ STORE_REG(r1, RETURN_REG_ADDR)
+ STORE_REG(r2, RETURN_REG_ADDR)
+ STORE_REG(r3, RETURN_REG_ADDR)
+ STORE_REG(r4, RETURN_REG_ADDR)
+ STORE_REG(r5, RETURN_REG_ADDR)
+ STORE_REG(r6, RETURN_REG_ADDR)
+ STORE_REG(r7, RETURN_REG_ADDR)
+ STORE_REG(r8, RETURN_REG_ADDR)
+ STORE_REG(r9, RETURN_REG_ADDR)
+
+ {
+ jrp lr
+ }
+
+ .cfi_endproc
+ .size ffi_call_tile, .-ffi_call_tile
+
+/* ffi_closure_tile(...)
+
+ On entry, lr points to the closure plus 8 bytes, and r10
+ contains the actual return address.
+
+ This function simply dumps all register parameters into a stack array
+ and passes the closure, the registers array, and the stack arguments
+ to C code that does all of the actual closure processing. */
+
+ .section .text.ffi_closure_tile, "ax", @progbits
+ .align 8
+ .globl ffi_closure_tile
+ FFI_HIDDEN(ffi_closure_tile)
+
+ .cfi_startproc
+/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
+#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
+ffi_closure_tile:
+ {
+#ifdef __tilegx__
+ st sp, lr
+ .cfi_offset lr, 0
+#else
+ /* Save return address (in r10 due to closure stub wrapper). */
+ SW sp, r10
+ .cfi_return_column r10
+ .cfi_offset r10, 0
+#endif
+ /* Compute address for stack frame linkage. */
+ addli r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
+ }
+ {
+ /* Save incoming stack pointer in linkage area. */
+ SW r10, sp
+ .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
+ /* Push a new stack frame. */
+ addli sp, sp, -CLOSURE_FRAME_SIZE
+ .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
+ }
+
+ {
+ /* Create pointer to where to start spilling registers. */
+ addi r10, sp, LINKAGE_SIZE
+ }
+
+ /* Spill all the incoming registers. */
+ STORE_REG(r0, r10)
+ STORE_REG(r1, r10)
+ STORE_REG(r2, r10)
+ STORE_REG(r3, r10)
+ STORE_REG(r4, r10)
+ STORE_REG(r5, r10)
+ STORE_REG(r6, r10)
+ STORE_REG(r7, r10)
+ STORE_REG(r8, r10)
+ {
+ /* Save r9. */
+ SW r10, r9
+#ifdef __tilegx__
+ /* Pointer to closure is passed in r11. */
+ move r0, r11
+#else
+ /* Compute pointer to the closure object. Because the closure
+ starts with a "jal ffi_closure_tile", we can just take the
+ value of lr (a phony return address pointing into the closure)
+ and subtract 8. */
+ addi r0, lr, -8
+#endif
+ /* Compute a pointer to the register arguments we just spilled. */
+ addi r1, sp, LINKAGE_SIZE
+ }
+ {
+ /* Compute a pointer to the extra stack arguments (if any). */
+ addli r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
+ /* Call C code to deal with all of the grotty details. */
+ jal ffi_closure_tile_inner
+ }
+ {
+ addli r10, sp, CLOSURE_FRAME_SIZE
+ }
+ {
+ /* Restore the return address. */
+ LW lr, r10
+ /* Compute pointer to registers array. */
+ addli r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
+ }
+ /* Return all the register values, which C code may have set. */
+ LOAD_REG(r0, r10)
+ LOAD_REG(r1, r10)
+ LOAD_REG(r2, r10)
+ LOAD_REG(r3, r10)
+ LOAD_REG(r4, r10)
+ LOAD_REG(r5, r10)
+ LOAD_REG(r6, r10)
+ LOAD_REG(r7, r10)
+ LOAD_REG(r8, r10)
+ LOAD_REG(r9, r10)
+ {
+ /* Pop the frame. */
+ addli sp, sp, CLOSURE_FRAME_SIZE
+ jrp lr
+ }
+
+ .cfi_endproc
+ .size ffi_closure_tile, . - ffi_closure_tile
+
+
+/* What follows are code template instructions that get copied to the
+ closure trampoline by ffi_prep_closure_loc. The zeroed operands
+ get replaced by their proper values at runtime. */
+
+ .section .text.ffi_template_tramp_tile, "ax", @progbits
+ .align 8
+ .globl ffi_template_tramp_tile
+ FFI_HIDDEN(ffi_template_tramp_tile)
+ffi_template_tramp_tile:
+#ifdef __tilegx__
+ {
+ moveli r11, 0 /* backpatched to address of containing closure. */
+ moveli r10, 0 /* backpatched to ffi_closure_tile. */
+ }
+ /* Note: the following bundle gets generated multiple times
+ depending on the pointer value (esp. useful for -m32 mode). */
+ { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
+ { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
+#else
+ /* 'jal .' yields a PC-relative offset of zero so we can OR in the
+ right offset at runtime. */
+ { move r10, lr ; jal . /* ffi_closure_tile */ }
+#endif
+
+ .size ffi_template_tramp_tile, . - ffi_template_tramp_tile
diff --git a/src/x86/ffi.c b/src/x86/ffi.c
index 9343c26..0600414 100644
--- a/src/x86/ffi.c
+++ b/src/x86/ffi.c
@@ -58,7 +58,8 @@ void ffi_prep_args(char *stack, extended_cif *ecif)
argp = stack;
- if (ecif->cif->flags == FFI_TYPE_STRUCT
+ if ((ecif->cif->flags == FFI_TYPE_STRUCT
+ || ecif->cif->flags == FFI_TYPE_MS_STRUCT)
#ifdef X86_WIN64
&& (ecif->cif->rtype->size != 1 && ecif->cif->rtype->size != 2
&& ecif->cif->rtype->size != 4 && ecif->cif->rtype->size != 8)
@@ -279,7 +280,12 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
else
#endif
{
- cif->flags = FFI_TYPE_STRUCT;
+#ifdef X86_WIN32
+ if (cif->abi == FFI_MS_CDECL)
+ cif->flags = FFI_TYPE_MS_STRUCT;
+ else
+#endif
+ cif->flags = FFI_TYPE_STRUCT;
/* allocate space for return value pointer */
cif->bytes += ALIGN(sizeof(void*), FFI_SIZEOF_ARG);
}
@@ -309,9 +315,7 @@ ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
cif->bytes += 4 * sizeof(ffi_arg);
#endif
-#ifdef X86_DARWIN
cif->bytes = (cif->bytes + 15) & ~0xF;
-#endif
return FFI_OK;
}
@@ -349,7 +353,8 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
}
#else
if (rvalue == NULL
- && cif->flags == FFI_TYPE_STRUCT)
+ && (cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT))
{
ecif.rvalue = alloca(cif->rtype->size);
}
@@ -368,6 +373,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
#elif defined(X86_WIN32)
case FFI_SYSV:
case FFI_STDCALL:
+ case FFI_MS_CDECL:
ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
@@ -416,7 +422,7 @@ void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
/** private members **/
/* The following __attribute__((regparm(1))) decorations will have no effect
- on MSVC - standard cdecl convention applies. */
+ on MSVC or SUNPRO_C -- standard conventions apply. */
static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
void** args, ffi_cif* cif);
void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
@@ -426,6 +432,8 @@ unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
__attribute__ ((regparm(1)));
#ifdef X86_WIN32
+void FFI_HIDDEN ffi_closure_raw_THISCALL (ffi_raw_closure *)
+ __attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_STDCALL (ffi_closure *)
__attribute__ ((regparm(1)));
void FFI_HIDDEN ffi_closure_THISCALL (ffi_closure *)
@@ -511,7 +519,8 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
argp += sizeof(void *);
}
#else
- if ( cif->flags == FFI_TYPE_STRUCT ) {
+ if ( cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT ) {
*rvalue = *(void **) argp;
argp += sizeof(void *);
}
@@ -593,7 +602,7 @@ ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
{ unsigned char *__tramp = (unsigned char*)(TRAMP); \
unsigned int __fun = (unsigned int)(FUN); \
unsigned int __ctx = (unsigned int)(CTX); \
- unsigned int __dis = __fun - (__ctx + 22); \
+ unsigned int __dis = __fun - (__ctx + 49); \
unsigned short __size = (unsigned short)(SIZE); \
*(unsigned int *) &__tramp[0] = 0x8324048b; /* mov (%esp), %eax */ \
*(unsigned int *) &__tramp[4] = 0x4c890cec; /* sub $12, %esp */ \
@@ -671,6 +680,12 @@ ffi_prep_closure_loc (ffi_closure* closure,
&ffi_closure_STDCALL,
(void*)codeloc, cif->bytes);
}
+ else if (cif->abi == FFI_MS_CDECL)
+ {
+ FFI_INIT_TRAMPOLINE (&closure->tramp[0],
+ &ffi_closure_SYSV,
+ (void*)codeloc);
+ }
#endif /* X86_WIN32 */
#endif /* !X86_WIN64 */
else
@@ -699,6 +714,9 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
int i;
if (cif->abi != FFI_SYSV) {
+#ifdef X86_WIN32
+ if (cif->abi != FFI_THISCALL)
+#endif
return FFI_BAD_ABI;
}
@@ -713,10 +731,20 @@ ffi_prep_raw_closure_loc (ffi_raw_closure* closure,
FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
}
-
+#ifdef X86_WIN32
+ if (cif->abi == FFI_SYSV)
+ {
+#endif
FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
codeloc);
-
+#ifdef X86_WIN32
+ }
+ else if (cif->abi == FFI_THISCALL)
+ {
+ FFI_INIT_TRAMPOLINE_THISCALL (&closure->tramp[0], &ffi_closure_raw_THISCALL,
+ codeloc, cif->bytes);
+ }
+#endif
closure->cif = cif;
closure->user_data = user_data;
closure->fun = fun;
@@ -747,8 +775,9 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
/* If the return value is a struct and we don't have a return */
/* value address then we need to make one */
- if ((rvalue == NULL) &&
- (cif->rtype->type == FFI_TYPE_STRUCT))
+ if (rvalue == NULL
+ && (cif->flags == FFI_TYPE_STRUCT
+ || cif->flags == FFI_TYPE_MS_STRUCT))
{
ecif.rvalue = alloca(cif->rtype->size);
}
@@ -761,7 +790,8 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
#ifdef X86_WIN32
case FFI_SYSV:
case FFI_STDCALL:
- ffi_call_win32(ffi_prep_args, &ecif, cif->abi, cif->bytes, cif->flags,
+ case FFI_MS_CDECL:
+ ffi_call_win32(ffi_prep_args_raw, &ecif, cif->abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
break;
case FFI_THISCALL:
@@ -789,7 +819,7 @@ ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *fake_avalue)
cif->abi = abi = FFI_THISCALL;
if (passed_regs < 1 && abi == FFI_THISCALL)
cif->abi = abi = FFI_STDCALL;
- ffi_call_win32(ffi_prep_args, &ecif, abi, cif->bytes, cif->flags,
+ ffi_call_win32(ffi_prep_args_raw, &ecif, abi, cif->bytes, cif->flags,
ecif.rvalue, fn);
}
break;
diff --git a/src/x86/ffi64.c b/src/x86/ffi64.c
index defd774..2014af2 100644
--- a/src/x86/ffi64.c
+++ b/src/x86/ffi64.c
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- ffi64.c - Copyright (c) 20011 Anthony Green
+ ffi64.c - Copyright (c) 2013 The Written Word, Inc.
+ Copyright (c) 2011 Anthony Green
Copyright (c) 2008, 2010 Red Hat, Inc.
Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de>
@@ -37,11 +38,29 @@
#define MAX_GPR_REGS 6
#define MAX_SSE_REGS 8
+#if defined(__INTEL_COMPILER)
+#define UINT128 __m128
+#else
+#if defined(__SUNPRO_C)
+#include <sunmedia_types.h>
+#define UINT128 __m128i
+#else
+#define UINT128 __int128_t
+#endif
+#endif
+
+union big_int_union
+{
+ UINT32 i32;
+ UINT64 i64;
+ UINT128 i128;
+};
+
struct register_args
{
/* Registers for argument passing. */
UINT64 gpr[MAX_GPR_REGS];
- __int128_t sse[MAX_SSE_REGS];
+ union big_int_union sse[MAX_SSE_REGS];
};
extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
@@ -465,16 +484,33 @@ ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
- reg_args->gpr[gprcount] = 0;
- memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ /* Sign-extend integer arguments passed in general
+ purpose registers, to cope with the fact that
+ LLVM incorrectly assumes that this will be done
+ (the x86-64 PS ABI does not specify this). */
+ switch (arg_types[i]->type)
+ {
+ case FFI_TYPE_SINT8:
+ *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
+ break;
+ case FFI_TYPE_SINT16:
+ *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
+ break;
+ case FFI_TYPE_SINT32:
+ *(SINT64 *)&reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
+ break;
+ default:
+ reg_args->gpr[gprcount] = 0;
+ memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
+ }
gprcount++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSEDF_CLASS:
- reg_args->sse[ssecount++] = *(UINT64 *) a;
+ reg_args->sse[ssecount++].i64 = *(UINT64 *) a;
break;
case X86_64_SSESF_CLASS:
- reg_args->sse[ssecount++] = *(UINT32 *) a;
+ reg_args->sse[ssecount++].i32 = *(UINT32 *) a;
break;
default:
abort();
diff --git a/src/x86/ffitarget.h b/src/x86/ffitarget.h
index 54a6121..46f294c 100644
--- a/src/x86/ffitarget.h
+++ b/src/x86/ffitarget.h
@@ -61,8 +61,9 @@ typedef unsigned long long ffi_arg;
typedef long long ffi_sarg;
#endif
#else
-#if defined __x86_64__ && !defined __LP64__
+#if defined __x86_64__ && defined __ILP32__
#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW 4
typedef unsigned long long ffi_arg;
typedef long long ffi_sarg;
#else
@@ -80,9 +81,13 @@ typedef enum ffi_abi {
FFI_STDCALL,
FFI_THISCALL,
FFI_FASTCALL,
+ FFI_MS_CDECL,
FFI_LAST_ABI,
- /* TODO: Add fastcall support for the sake of completeness */
+#ifdef _MSC_VER
+ FFI_DEFAULT_ABI = FFI_MS_CDECL
+#else
FFI_DEFAULT_ABI = FFI_SYSV
+#endif
#elif defined(X86_WIN64)
FFI_WIN64,
@@ -109,6 +114,7 @@ typedef enum ffi_abi {
#define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
#define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
#define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
+#define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4)
#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
#define FFI_TRAMPOLINE_SIZE 24
diff --git a/src/x86/sysv.S b/src/x86/sysv.S
index f108dd8..3bd5477 100644
--- a/src/x86/sysv.S
+++ b/src/x86/sysv.S
@@ -1,5 +1,6 @@
/* -----------------------------------------------------------------------
- sysv.S - Copyright (c) 1996, 1998, 2001-2003, 2005, 2008, 2010 Red Hat, Inc.
+ sysv.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc.
X86 Foreign Function Interface
@@ -181,9 +182,19 @@ ffi_closure_SYSV:
leal -24(%ebp), %edx
movl %edx, -12(%ebp) /* resp */
leal 8(%ebp), %edx
+#ifdef __SUNPRO_C
+ /* The SUNPRO compiler doesn't support GCC's regparm function
+ attribute, so we have to pass all three arguments to
+ ffi_closure_SYSV_inner on the stack. */
+ movl %edx, 8(%esp) /* args = __builtin_dwarf_cfa () */
+ leal -12(%ebp), %edx
+ movl %edx, 4(%esp) /* &resp */
+ movl %eax, (%esp) /* closure */
+#else
movl %edx, 4(%esp) /* args = __builtin_dwarf_cfa () */
leal -12(%ebp), %edx
movl %edx, (%esp) /* &resp */
+#endif
#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
call ffi_closure_SYSV_inner
#else
@@ -328,6 +339,9 @@ ffi_closure_raw_SYSV:
.size ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
#endif
+#if defined __GNUC__
+/* Only emit dwarf unwind info when building with GNU toolchain. */
+
#if defined __PIC__
# if defined __sun__ && defined __svr4__
/* 32-bit Solaris 2/x86 uses datarel encoding for PIC. GNU ld before 2.22
@@ -460,6 +474,7 @@ ffi_closure_raw_SYSV:
.LEFDE3:
#endif
+#endif
#endif /* ifndef __x86_64__ */
diff --git a/src/x86/unix64.S b/src/x86/unix64.S
index 7a6619a..dcd6bc7 100644
--- a/src/x86/unix64.S
+++ b/src/x86/unix64.S
@@ -1,6 +1,7 @@
/* -----------------------------------------------------------------------
- unix64.S - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
- Copyright (c) 2008 Red Hat, Inc
+ unix64.S - Copyright (c) 2013 The Written Word, Inc.
+ - Copyright (c) 2008 Red Hat, Inc
+ - Copyright (c) 2002 Bo Thorsen <bo@suse.de>
x86-64 Foreign Function Interface
@@ -324,6 +325,9 @@ ffi_closure_unix64:
.LUW9:
.size ffi_closure_unix64,.-ffi_closure_unix64
+#ifdef __GNUC__
+/* Only emit DWARF unwind info when building with the GNU toolchain. */
+
#ifdef HAVE_AS_X86_64_UNWIND_SECTION_TYPE
.section .eh_frame,"a",@unwind
#else
@@ -419,6 +423,8 @@ ffi_closure_unix64:
.align 8
.LEFDE3:
+#endif /* __GNUC__ */
+
#endif /* __x86_64__ */
#if defined __ELF__ && defined __linux__
diff --git a/src/x86/win32.S b/src/x86/win32.S
index 47629fe..24b7bbd 100644
--- a/src/x86/win32.S
+++ b/src/x86/win32.S
@@ -108,31 +108,37 @@ ca_jumpdata:
dd offset ca_retfloat ;; FFI_TYPE_FLOAT
dd offset ca_retdouble ;; FFI_TYPE_DOUBLE
dd offset ca_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset ca_retint8 ;; FFI_TYPE_UINT8
- dd offset ca_retint8 ;; FFI_TYPE_SINT8
- dd offset ca_retint16 ;; FFI_TYPE_UINT16
- dd offset ca_retint16 ;; FFI_TYPE_SINT16
+ dd offset ca_retuint8 ;; FFI_TYPE_UINT8
+ dd offset ca_retsint8 ;; FFI_TYPE_SINT8
+ dd offset ca_retuint16 ;; FFI_TYPE_UINT16
+ dd offset ca_retsint16 ;; FFI_TYPE_SINT16
dd offset ca_retint ;; FFI_TYPE_UINT32
dd offset ca_retint ;; FFI_TYPE_SINT32
dd offset ca_retint64 ;; FFI_TYPE_UINT64
dd offset ca_retint64 ;; FFI_TYPE_SINT64
dd offset ca_epilogue ;; FFI_TYPE_STRUCT
dd offset ca_retint ;; FFI_TYPE_POINTER
- dd offset ca_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset ca_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset ca_retstruct1b ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset ca_retstruct2b ;; FFI_TYPE_SMALL_STRUCT_2B
dd offset ca_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+ dd offset ca_epilogue ;; FFI_TYPE_MS_STRUCT
-ca_retint8:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], al
- jmp ca_epilogue
+ /* Sign/zero extend as appropriate. */
+ca_retuint8:
+ movzx eax, al
+ jmp ca_retint
-ca_retint16:
- ;; Load %ecx with the pointer to storage for the return value
- mov ecx, rvalue
- mov [ecx + 0], ax
- jmp ca_epilogue
+ca_retsint8:
+ movsx eax, al
+ jmp ca_retint
+
+ca_retuint16:
+ movzx eax, ax
+ jmp ca_retint
+
+ca_retsint16:
+ movsx eax, ax
+ jmp ca_retint
ca_retint:
;; Load %ecx with the pointer to storage for the return value
@@ -165,14 +171,24 @@ ca_retlongdouble:
fstp TBYTE PTR [ecx]
jmp ca_epilogue
+ca_retstruct1b:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], al
+ jmp ca_epilogue
+
+ca_retstruct2b:
+ ;; Load %ecx with the pointer to storage for the return value
+ mov ecx, rvalue
+ mov [ecx + 0], ax
+ jmp ca_epilogue
+
ca_epilogue:
;; Epilogue code is autogenerated.
ret
ffi_call_win32 ENDP
ffi_closure_THISCALL PROC NEAR FORCEFRAME
- push ebp
- mov ebp, esp
sub esp, 40
lea edx, [ebp -24]
mov [ebp - 12], edx /* resp */
@@ -204,26 +220,35 @@ cs_jumpdata:
dd offset cs_retfloat ;; FFI_TYPE_FLOAT
dd offset cs_retdouble ;; FFI_TYPE_DOUBLE
dd offset cs_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cs_retint8 ;; FFI_TYPE_UINT8
- dd offset cs_retint8 ;; FFI_TYPE_SINT8
- dd offset cs_retint16 ;; FFI_TYPE_UINT16
- dd offset cs_retint16 ;; FFI_TYPE_SINT16
+ dd offset cs_retuint8 ;; FFI_TYPE_UINT8
+ dd offset cs_retsint8 ;; FFI_TYPE_SINT8
+ dd offset cs_retuint16 ;; FFI_TYPE_UINT16
+ dd offset cs_retsint16 ;; FFI_TYPE_SINT16
dd offset cs_retint ;; FFI_TYPE_UINT32
dd offset cs_retint ;; FFI_TYPE_SINT32
dd offset cs_retint64 ;; FFI_TYPE_UINT64
dd offset cs_retint64 ;; FFI_TYPE_SINT64
dd offset cs_retstruct ;; FFI_TYPE_STRUCT
dd offset cs_retint ;; FFI_TYPE_POINTER
- dd offset cs_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cs_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cs_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cs_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
dd offset cs_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+ dd offset cs_retmsstruct ;; FFI_TYPE_MS_STRUCT
+
+cs_retuint8:
+ movzx eax, BYTE PTR [ecx]
+ jmp cs_epilogue
+
+cs_retsint8:
+ movsx eax, BYTE PTR [ecx]
+ jmp cs_epilogue
-cs_retint8:
- mov al, [ecx]
+cs_retuint16:
+ movzx eax, WORD PTR [ecx]
jmp cs_epilogue
-cs_retint16:
- mov ax, [ecx]
+cs_retsint16:
+ movsx eax, WORD PTR [ecx]
jmp cs_epilogue
cs_retint:
@@ -252,6 +277,12 @@ cs_retstruct:
;; Epilogue code is autogenerated.
ret 4
+cs_retmsstruct:
+ ;; Caller expects us to return a pointer to the real return value.
+ mov eax, ecx
+ ;; Caller doesn't expects us to pop struct return value pointer hidden arg.
+ jmp cs_epilogue
+
cs_epilogue:
;; Epilogue code is autogenerated.
ret
@@ -264,7 +295,16 @@ ffi_closure_SYSV ENDP
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
-ffi_closure_raw_SYSV PROC NEAR USES esi
+ffi_closure_raw_THISCALL PROC NEAR USES esi FORCEFRAME
+ sub esp, 36
+ mov esi, [eax + RAW_CLOSURE_CIF_OFFSET] ;; closure->cif
+ mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data
+ mov [esp + 12], edx
+ lea edx, [ebp + 12]
+ jmp stubraw
+ffi_closure_raw_THISCALL ENDP
+
+ffi_closure_raw_SYSV PROC NEAR USES esi FORCEFRAME
;; the ffi_closure ctx is passed in eax by the trampoline.
sub esp, 40
@@ -272,6 +312,7 @@ ffi_closure_raw_SYSV PROC NEAR USES esi
mov edx, [eax + RAW_CLOSURE_USER_DATA_OFFSET] ;; closure->user_data
mov [esp + 12], edx ;; user_data
lea edx, [ebp + 8]
+stubraw::
mov [esp + 8], edx ;; raw_args
lea edx, [ebp - 24]
mov [esp + 4], edx ;; &res
@@ -289,26 +330,35 @@ cr_jumpdata:
dd offset cr_retfloat ;; FFI_TYPE_FLOAT
dd offset cr_retdouble ;; FFI_TYPE_DOUBLE
dd offset cr_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cr_retint8 ;; FFI_TYPE_UINT8
- dd offset cr_retint8 ;; FFI_TYPE_SINT8
- dd offset cr_retint16 ;; FFI_TYPE_UINT16
- dd offset cr_retint16 ;; FFI_TYPE_SINT16
+ dd offset cr_retuint8 ;; FFI_TYPE_UINT8
+ dd offset cr_retsint8 ;; FFI_TYPE_SINT8
+ dd offset cr_retuint16 ;; FFI_TYPE_UINT16
+ dd offset cr_retsint16 ;; FFI_TYPE_SINT16
dd offset cr_retint ;; FFI_TYPE_UINT32
dd offset cr_retint ;; FFI_TYPE_SINT32
dd offset cr_retint64 ;; FFI_TYPE_UINT64
dd offset cr_retint64 ;; FFI_TYPE_SINT64
dd offset cr_epilogue ;; FFI_TYPE_STRUCT
dd offset cr_retint ;; FFI_TYPE_POINTER
- dd offset cr_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cr_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cr_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cr_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
dd offset cr_retint ;; FFI_TYPE_SMALL_STRUCT_4B
+ dd offset cr_epilogue ;; FFI_TYPE_MS_STRUCT
+
+cr_retuint8:
+ movzx eax, BYTE PTR [ecx]
+ jmp cr_epilogue
+
+cr_retsint8:
+ movsx eax, BYTE PTR [ecx]
+ jmp cr_epilogue
-cr_retint8:
- mov al, [ecx]
+cr_retuint16:
+ movzx eax, WORD PTR [ecx]
jmp cr_epilogue
-cr_retint16:
- mov ax, [ecx]
+cr_retsint16:
+ movsx eax, WORD PTR [ecx]
jmp cr_epilogue
cr_retint:
@@ -362,26 +412,34 @@ cd_jumpdata:
dd offset cd_retfloat ;; FFI_TYPE_FLOAT
dd offset cd_retdouble ;; FFI_TYPE_DOUBLE
dd offset cd_retlongdouble ;; FFI_TYPE_LONGDOUBLE
- dd offset cd_retint8 ;; FFI_TYPE_UINT8
- dd offset cd_retint8 ;; FFI_TYPE_SINT8
- dd offset cd_retint16 ;; FFI_TYPE_UINT16
- dd offset cd_retint16 ;; FFI_TYPE_SINT16
+ dd offset cd_retuint8 ;; FFI_TYPE_UINT8
+ dd offset cd_retsint8 ;; FFI_TYPE_SINT8
+ dd offset cd_retuint16 ;; FFI_TYPE_UINT16
+ dd offset cd_retsint16 ;; FFI_TYPE_SINT16
dd offset cd_retint ;; FFI_TYPE_UINT32
dd offset cd_retint ;; FFI_TYPE_SINT32
dd offset cd_retint64 ;; FFI_TYPE_UINT64
dd offset cd_retint64 ;; FFI_TYPE_SINT64
dd offset cd_epilogue ;; FFI_TYPE_STRUCT
dd offset cd_retint ;; FFI_TYPE_POINTER
- dd offset cd_retint8 ;; FFI_TYPE_SMALL_STRUCT_1B
- dd offset cd_retint16 ;; FFI_TYPE_SMALL_STRUCT_2B
+ dd offset cd_retsint8 ;; FFI_TYPE_SMALL_STRUCT_1B
+ dd offset cd_retsint16 ;; FFI_TYPE_SMALL_STRUCT_2B
dd offset cd_retint ;; FFI_TYPE_SMALL_STRUCT_4B
-cd_retint8:
- mov al, [ecx]
+cd_retuint8:
+ movzx eax, BYTE PTR [ecx]
+ jmp cd_epilogue
+
+cd_retsint8:
+ movsx eax, BYTE PTR [ecx]
jmp cd_epilogue
-cd_retint16:
- mov ax, [ecx]
+cd_retuint16:
+ movzx eax, WORD PTR [ecx]
+ jmp cd_epilogue
+
+cd_retsint16:
+ movsx eax, WORD PTR [ecx]
jmp cd_epilogue
cd_retint:
@@ -502,6 +560,7 @@ _ffi_call_win32:
.long .Lretstruct1b /* FFI_TYPE_SMALL_STRUCT_1B */
.long .Lretstruct2b /* FFI_TYPE_SMALL_STRUCT_2B */
.long .Lretstruct4b /* FFI_TYPE_SMALL_STRUCT_4B */
+ .long .Lretstruct /* FFI_TYPE_MS_STRUCT */
1:
add %ecx, %ecx
add %ecx, %ecx
@@ -644,6 +703,7 @@ _ffi_closure_SYSV:
.long .Lcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
.long .Lcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
.long .Lcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
+ .long .Lcls_retmsstruct /* FFI_TYPE_MS_STRUCT */
1:
add %eax, %eax
@@ -708,6 +768,12 @@ _ffi_closure_SYSV:
popl %ebp
ret $0x4
+.Lcls_retmsstruct:
+ # Caller expects us to return a pointer to the real return value.
+ mov %ecx, %eax
+ # Caller doesn't expects us to pop struct return value pointer hidden arg.
+ jmp .Lcls_epilogue
+
.Lcls_noretval:
.Lcls_epilogue:
movl %ebp, %esp
@@ -722,7 +788,21 @@ _ffi_closure_SYSV:
#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
#define CIF_FLAGS_OFFSET 20
-
+ .balign 16
+ .globl _ffi_closure_raw_THISCALL
+#ifndef __OS2__
+ .def _ffi_closure_raw_THISCALL; .scl 2; .type 32; .endef
+#endif
+_ffi_closure_raw_THISCALL:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %esi
+ subl $36, %esp
+ movl RAW_CLOSURE_CIF_OFFSET(%eax), %esi /* closure->cif */
+ movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
+ movl %edx, 12(%esp) /* user_data */
+ leal 12(%ebp), %edx /* __builtin_dwarf_cfa () */
+ jmp .stubraw
# This assumes we are using gas.
.balign 16
.globl _ffi_closure_raw_SYSV
@@ -742,6 +822,7 @@ _ffi_closure_raw_SYSV:
movl RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
movl %edx, 12(%esp) /* user_data */
leal 8(%ebp), %edx /* __builtin_dwarf_cfa () */
+.stubraw:
movl %edx, 8(%esp) /* raw_args */
leal -24(%ebp), %edx
movl %edx, 4(%esp) /* &res */
@@ -770,6 +851,7 @@ _ffi_closure_raw_SYSV:
.long .Lrcls_retstruct1 /* FFI_TYPE_SMALL_STRUCT_1B */
.long .Lrcls_retstruct2 /* FFI_TYPE_SMALL_STRUCT_2B */
.long .Lrcls_retstruct4 /* FFI_TYPE_SMALL_STRUCT_4B */
+ .long .Lrcls_retstruct /* FFI_TYPE_MS_STRUCT */
1:
add %eax, %eax
add %eax, %eax
diff --git a/src/xtensa/ffi.c b/src/xtensa/ffi.c
new file mode 100644
index 0000000..fd94daf
--- /dev/null
+++ b/src/xtensa/ffi.c
@@ -0,0 +1,298 @@
+/* -----------------------------------------------------------------------
+ ffi.c - Copyright (c) 2013 Tensilica, Inc.
+
+ XTENSA Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+/*
+ |----------------------------------------|
+ | |
+ on entry to ffi_call ----> |----------------------------------------|
+ | caller stack frame for registers a0-a3 |
+ |----------------------------------------|
+ | |
+ | additional arguments |
+ entry of the function ---> |----------------------------------------|
+ | copy of function arguments a2-a7 |
+ | - - - - - - - - - - - - - |
+ | |
+
+ The area below the entry line becomes the new stack frame for the function.
+
+*/
+
+
+#define FFI_TYPE_STRUCT_REGS FFI_TYPE_LAST
+
+
+extern void ffi_call_SYSV(void *rvalue, unsigned rsize, unsigned flags,
+ void(*fn)(void), unsigned nbytes, extended_cif*);
+extern void ffi_closure_SYSV(void) FFI_HIDDEN;
+
+ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+{
+ switch(cif->rtype->type) {
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT16:
+ cif->flags = cif->rtype->type;
+ break;
+ case FFI_TYPE_VOID:
+ case FFI_TYPE_FLOAT:
+ cif->flags = FFI_TYPE_UINT32;
+ break;
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ cif->flags = FFI_TYPE_UINT64; // cif->rtype->type;
+ break;
+ case FFI_TYPE_STRUCT:
+ cif->flags = FFI_TYPE_STRUCT; //_REGS;
+ /* Up to 16 bytes are returned in registers */
+ if (cif->rtype->size > 4 * 4) {
+ /* returned structure is referenced by a register; use 8 bytes
+ (including 4 bytes for potential additional alignment) */
+ cif->flags = FFI_TYPE_STRUCT;
+ cif->bytes += 8;
+ }
+ break;
+
+ default:
+ cif->flags = FFI_TYPE_UINT32;
+ break;
+ }
+
+ /* Round the stack up to a full 4 register frame, just in case
+ (we use this size in movsp). This way, it's also a multiple of
+ 8 bytes for 64-bit arguments. */
+ cif->bytes = ALIGN(cif->bytes, 16);
+
+ return FFI_OK;
+}
+
+void ffi_prep_args(extended_cif *ecif, unsigned char* stack)
+{
+ unsigned int i;
+ unsigned long *addr;
+ ffi_type **ptr;
+
+ union {
+ void **v;
+ char **c;
+ signed char **sc;
+ unsigned char **uc;
+ signed short **ss;
+ unsigned short **us;
+ unsigned int **i;
+ long long **ll;
+ float **f;
+ double **d;
+ } p_argv;
+
+ /* Verify that everything is aligned up properly */
+ FFI_ASSERT (((unsigned long) stack & 0x7) == 0);
+
+ p_argv.v = ecif->avalue;
+ addr = (unsigned long*)stack;
+
+ /* structures with a size greater than 16 bytes are passed in memory */
+ if (ecif->cif->rtype->type == FFI_TYPE_STRUCT && ecif->cif->rtype->size > 16)
+ {
+ *addr++ = (unsigned long)ecif->rvalue;
+ }
+
+ for (i = ecif->cif->nargs, ptr = ecif->cif->arg_types;
+ i > 0;
+ i--, ptr++, p_argv.v++)
+ {
+ switch ((*ptr)->type)
+ {
+ case FFI_TYPE_SINT8:
+ *addr++ = **p_argv.sc;
+ break;
+ case FFI_TYPE_UINT8:
+ *addr++ = **p_argv.uc;
+ break;
+ case FFI_TYPE_SINT16:
+ *addr++ = **p_argv.ss;
+ break;
+ case FFI_TYPE_UINT16:
+ *addr++ = **p_argv.us;
+ break;
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_POINTER:
+ *addr++ = **p_argv.i;
+ break;
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ if (((unsigned long)addr & 4) != 0)
+ addr++;
+ *(unsigned long long*)addr = **p_argv.ll;
+ addr += sizeof(unsigned long long) / sizeof (addr);
+ break;
+
+ case FFI_TYPE_STRUCT:
+ {
+ unsigned long offs;
+ unsigned long size;
+
+ if (((unsigned long)addr & 4) != 0 && (*ptr)->alignment > 4)
+ addr++;
+
+ offs = (unsigned long) addr - (unsigned long) stack;
+ size = (*ptr)->size;
+
+ /* Entire structure must fit the argument registers or referenced */
+ if (offs < FFI_REGISTER_NARGS * 4
+ && offs + size > FFI_REGISTER_NARGS * 4)
+ addr = (unsigned long*) (stack + FFI_REGISTER_NARGS * 4);
+
+ memcpy((char*) addr, *p_argv.c, size);
+ addr += (size + 3) / 4;
+ break;
+ }
+
+ default:
+ FFI_ASSERT(0);
+ }
+ }
+}
+
+
+void ffi_call(ffi_cif* cif, void(*fn)(void), void *rvalue, void **avalue)
+{
+ extended_cif ecif;
+ unsigned long rsize = cif->rtype->size;
+ int flags = cif->flags;
+ void *alloc = NULL;
+
+ ecif.cif = cif;
+ ecif.avalue = avalue;
+
+ /* Note that for structures that are returned in registers (size <= 16 bytes)
+ we allocate a temporary buffer and use memcpy to copy it to the final
+ destination. The reason is that the target address might be misaligned or
+ the length not a multiple of 4 bytes. Handling all those cases would be
+ very complex. */
+
+ if (flags == FFI_TYPE_STRUCT && (rsize <= 16 || rvalue == NULL))
+ {
+ alloc = alloca(ALIGN(rsize, 4));
+ ecif.rvalue = alloc;
+ }
+ else
+ {
+ ecif.rvalue = rvalue;
+ }
+
+ if (cif->abi != FFI_SYSV)
+ FFI_ASSERT(0);
+
+ ffi_call_SYSV (ecif.rvalue, rsize, cif->flags, fn, cif->bytes, &ecif);
+
+ if (alloc != NULL && rvalue != NULL)
+ memcpy(rvalue, alloc, rsize);
+}
+
+extern void ffi_trampoline();
+extern void ffi_cacheflush(void* start, void* end);
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*, void*, void**, void*),
+ void *user_data,
+ void *codeloc)
+{
+ /* copye trampoline to stack and patch 'ffi_closure_SYSV' pointer */
+ memcpy(closure->tramp, ffi_trampoline, FFI_TRAMPOLINE_SIZE);
+ *(unsigned int*)(&closure->tramp[8]) = (unsigned int)ffi_closure_SYSV;
+
+ // Do we have this function?
+ // __builtin___clear_cache(closer->tramp, closer->tramp + FFI_TRAMPOLINE_SIZE)
+ ffi_cacheflush(closure->tramp, closure->tramp + FFI_TRAMPOLINE_SIZE);
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+ return FFI_OK;
+}
+
+
+long FFI_HIDDEN
+ffi_closure_SYSV_inner(ffi_closure *closure, void **values, void *rvalue)
+{
+ ffi_cif *cif;
+ ffi_type **arg_types;
+ void **avalue;
+ int i, areg;
+
+ cif = closure->cif;
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
+
+ areg = 0;
+
+ int rtype = cif->rtype->type;
+ if (rtype == FFI_TYPE_STRUCT && cif->rtype->size > 4 * 4)
+ {
+ rvalue = *values;
+ areg++;
+ }
+
+ cif = closure->cif;
+ arg_types = cif->arg_types;
+ avalue = alloca(cif->nargs * sizeof(void *));
+
+ for (i = 0; i < cif->nargs; i++)
+ {
+ if (arg_types[i]->alignment == 8 && (areg & 1) != 0)
+ areg++;
+
+ // skip the entry 16,a1 framework, add 16 bytes (4 registers)
+ if (areg == FFI_REGISTER_NARGS)
+ areg += 4;
+
+ if (arg_types[i]->type == FFI_TYPE_STRUCT)
+ {
+ int numregs = ((arg_types[i]->size + 3) & ~3) / 4;
+ if (areg < FFI_REGISTER_NARGS && areg + numregs > FFI_REGISTER_NARGS)
+ areg = FFI_REGISTER_NARGS + 4;
+ }
+
+ avalue[i] = &values[areg];
+ areg += (arg_types[i]->size + 3) / 4;
+ }
+
+ (closure->fun)(cif, rvalue, avalue, closure->user_data);
+
+ return rtype;
+}
diff --git a/src/xtensa/ffitarget.h b/src/xtensa/ffitarget.h
new file mode 100644
index 0000000..0ba728b
--- /dev/null
+++ b/src/xtensa/ffitarget.h
@@ -0,0 +1,53 @@
+/* -----------------------------------------------------------------*-C-*-
+ ffitarget.h - Copyright (c) 2013 Tensilica, Inc.
+ Target configuration macros for XTENSA.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+} ffi_abi;
+#endif
+
+#define FFI_REGISTER_NARGS 6
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+#define FFI_TRAMPOLINE_SIZE 24
+
+#endif
diff --git a/src/xtensa/sysv.S b/src/xtensa/sysv.S
new file mode 100644
index 0000000..64e6a09
--- /dev/null
+++ b/src/xtensa/sysv.S
@@ -0,0 +1,253 @@
+/* -----------------------------------------------------------------------
+ sysv.S - Copyright (c) 2013 Tensilica, Inc.
+
+ XTENSA Foreign Function Interface
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ ``Software''), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be included
+ in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+ ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#define ENTRY(name) .text; .globl name; .type name,@function; .align 4; name:
+#define END(name) .size name , . - name
+
+/* Assert that the table below is in sync with ffi.h. */
+
+#if FFI_TYPE_UINT8 != 5 \
+ || FFI_TYPE_SINT8 != 6 \
+ || FFI_TYPE_UINT16 != 7 \
+ || FFI_TYPE_SINT16 != 8 \
+ || FFI_TYPE_UINT32 != 9 \
+ || FFI_TYPE_SINT32 != 10 \
+ || FFI_TYPE_UINT64 != 11
+#error "xtensa/sysv.S out of sync with ffi.h"
+#endif
+
+
+/* ffi_call_SYSV (rvalue, rbytes, flags, (*fnaddr)(), bytes, ecif)
+ void *rvalue; a2
+ unsigned long rbytes; a3
+ unsigned flags; a4
+ void (*fnaddr)(); a5
+ unsigned long bytes; a6
+ extended_cif* ecif) a7
+*/
+
+ENTRY(ffi_call_SYSV)
+
+ entry a1, 32 # 32 byte frame for using call8 below
+
+ mov a10, a7 # a10(->arg0): ecif
+ sub a11, a1, a6 # a11(->arg1): stack pointer
+ mov a7, a1 # fp
+ movsp a1, a11 # set new sp = old_sp - bytes
+
+ movi a8, ffi_prep_args
+ callx8 a8 # ffi_prep_args(ecif, stack)
+
+ # prepare to move stack pointer back up to 6 arguments
+ # note that 'bytes' is already aligned
+
+ movi a10, 6*4
+ sub a11, a6, a10
+ movgez a6, a10, a11
+ add a6, a1, a6
+
+
+ # we can pass up to 6 arguments in registers
+ # for simplicity, just load 6 arguments
+ # (the stack size is at least 32 bytes, so no risk to cross boundaries)
+
+ l32i a10, a1, 0
+ l32i a11, a1, 4
+ l32i a12, a1, 8
+ l32i a13, a1, 12
+ l32i a14, a1, 16
+ l32i a15, a1, 20
+
+ # move stack pointer
+
+ movsp a1, a6
+
+ callx8 a5 # (*fn)(args...)
+
+ # Handle return value(s)
+
+ beqz a2, .Lexit
+
+ movi a5, FFI_TYPE_STRUCT
+ bne a4, a5, .Lstore
+ movi a5, 16
+ blt a5, a3, .Lexit
+
+ s32i a10, a2, 0
+ blti a3, 5, .Lexit
+ addi a3, a3, -1
+ s32i a11, a2, 4
+ blti a3, 8, .Lexit
+ s32i a12, a2, 8
+ blti a3, 12, .Lexit
+ s32i a13, a2, 12
+
+.Lexit: retw
+
+.Lstore:
+ addi a4, a4, -FFI_TYPE_UINT8
+ bgei a4, 7, .Lexit # should never happen
+ movi a6, store_calls
+ add a4, a4, a4
+ addx4 a6, a4, a6 # store_table + idx * 8
+ jx a6
+
+ .align 8
+store_calls:
+ # UINT8
+ s8i a10, a2, 0
+ retw
+
+ # SINT8
+ .align 8
+ s8i a10, a2, 0
+ retw
+
+ # UINT16
+ .align 8
+ s16i a10, a2, 0
+ retw
+
+ # SINT16
+ .align 8
+ s16i a10, a2, 0
+ retw
+
+ # UINT32
+ .align 8
+ s32i a10, a2, 0
+ retw
+
+ # SINT32
+ .align 8
+ s32i a10, a2, 0
+ retw
+
+ # UINT64
+ .align 8
+ s32i a10, a2, 0
+ s32i a11, a2, 4
+ retw
+
+END(ffi_call_SYSV)
+
+
+/*
+ * void ffi_cacheflush (unsigned long start, unsigned long end)
+ */
+
+#define EXTRA_ARGS_SIZE 24
+
+ENTRY(ffi_cacheflush)
+
+ entry a1, 16
+
+1: dhwbi a2, 0
+ ihi a2, 0
+ addi a2, a2, 4
+ blt a2, a3, 1b
+
+ retw
+
+END(ffi_cacheflush)
+
+/* ffi_trampoline is copied to the stack */
+
+ENTRY(ffi_trampoline)
+
+ entry a1, 16 + (FFI_REGISTER_NARGS * 4) + (4 * 4) # [ 0]
+ j 2f # [ 3]
+ .align 4 # [ 6]
+1: .long 0 # [ 8]
+2: l32r a15, 1b # [12]
+ _mov a14, a0 # [15]
+ callx0 a15 # [18]
+ # [21]
+END(ffi_trampoline)
+
+/*
+ * ffi_closure()
+ *
+ * a0: closure + 21
+ * a14: return address (a0)
+ */
+
+ENTRY(ffi_closure_SYSV)
+
+ /* intentionally omitting entry here */
+
+ # restore return address (a0) and move pointer to closure to a10
+ addi a10, a0, -21
+ mov a0, a14
+
+ # allow up to 4 arguments as return values
+ addi a11, a1, 4 * 4
+
+ # save up to 6 arguments to stack (allocated by entry below)
+ s32i a2, a11, 0
+ s32i a3, a11, 4
+ s32i a4, a11, 8
+ s32i a5, a11, 12
+ s32i a6, a11, 16
+ s32i a7, a11, 20
+
+ movi a8, ffi_closure_SYSV_inner
+ mov a12, a1
+ callx8 a8 # .._inner(*closure, **avalue, *rvalue)
+
+ # load up to four return arguments
+ l32i a2, a1, 0
+ l32i a3, a1, 4
+ l32i a4, a1, 8
+ l32i a5, a1, 12
+
+ # (sign-)extend return value
+ movi a11, FFI_TYPE_UINT8
+ bne a10, a11, 1f
+ extui a2, a2, 0, 8
+ retw
+
+1: movi a11, FFI_TYPE_SINT8
+ bne a10, a11, 1f
+ sext a2, a2, 7
+ retw
+
+1: movi a11, FFI_TYPE_UINT16
+ bne a10, a11, 1f
+ extui a2, a2, 0, 16
+ retw
+
+1: movi a11, FFI_TYPE_SINT16
+ bne a10, a11, 1f
+ sext a2, a2, 15
+
+1: retw
+
+END(ffi_closure_SYSV)