summaryrefslogtreecommitdiff
path: root/mips
diff options
context:
space:
mode:
authorMandar Sahastrabuddhe <Mandar.Sahastrabuddhe@imgtec.com>2016-08-29 19:07:25 +0530
committerMandar Sahastrabuddhe <Mandar.Sahastrabuddhe@imgtec.com>2016-08-29 19:07:25 +0530
commitee5e39835c90897b8d43d547c1919b6b45d10465 (patch)
treebcb668f91aedfeecadbf46c64444c1590bb8b95f /mips
parent143434ac785f8a97710f494743f887609dffdc73 (diff)
downloadlibpng-ee5e39835c90897b8d43d547c1919b6b45d10465.tar.gz
Added build support for MIPS MSA
Also added one msa optimized function: png_read_filter_row_up_msa Signed-off-by: Mandar Sahastrabuddhe <Mandar.Sahastrabuddhe@imgtec.com>
Diffstat (limited to 'mips')
-rw-r--r--mips/filter_msa_intrinsics.c182
-rw-r--r--mips/mips_init.c115
2 files changed, 297 insertions, 0 deletions
diff --git a/mips/filter_msa_intrinsics.c b/mips/filter_msa_intrinsics.c
new file mode 100644
index 000000000..2943700ad
--- /dev/null
+++ b/mips/filter_msa_intrinsics.c
@@ -0,0 +1,182 @@
+
+/* filter_msa_intrinsics.c - MSA optimised filter functions
+ *
+ * Copyright (c) 2016 Glenn Randers-Pehrson
+ * Written by Mandar Sahastrabuddhe, August 2016.
+ * Last changed in libpng 1.6.25beta03 [August 29, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+/* This code requires -mfpu=msa on the command line: */
+#if PNG_MIPS_MSA_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
+
+#include <msa.h>
+
+/* libpng row pointers are not necessarily aligned to any particular boundary,
+ * however this code will only work with appropriate alignment. mips/mips_init.c
+ * checks for this (and will not compile unless it is done). This code uses
+ * variants of png_aligncast to avoid compiler warnings.
+ */
+#define png_ptr(type,pointer) png_aligncast(type *,pointer)
+#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
+
+/* The following relies on a variable 'temp_pointer' being declared with type
+ * 'type'. This is written this way just to hide the GCC strict aliasing
+ * warning; note that the code is safe because there never is an alias between
+ * the input and output pointers.
+ */
+#define png_ldr(type,pointer)\
+ (temp_pointer = png_ptr(type,pointer), *temp_pointer)
+
+#if PNG_MIPS_MSA_OPT > 0
+
+#define LD_B(RTYPE, psrc) *((RTYPE *) (psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_B2(RTYPE, psrc, stride, out0, out1) \
+{ \
+ out0 = LD_B(RTYPE, (psrc)); \
+ out1 = LD_B(RTYPE, (psrc) + stride); \
+}
+#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
+{ \
+ LD_B2(RTYPE, (psrc), stride, out0, out1); \
+ LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
+}
+#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE *) (pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+#define ST_B2(RTYPE, in0, in1, pdst, stride) \
+{ \
+ ST_B(RTYPE, in0, (pdst)); \
+ ST_B(RTYPE, in1, (pdst) + stride); \
+}
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
+{ \
+ ST_B2(RTYPE, in0, in1, (pdst), stride); \
+ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
+}
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+
+#define ADD2(in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = in0 + in1; \
+ out1 = in2 + in3; \
+}
+#define ADD3(in0, in1, in2, in3, in4, in5, \
+ out0, out1, out2) \
+{ \
+ ADD2(in0, in1, in2, in3, out0, out1); \
+ out2 = in4 + in5; \
+}
+#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) \
+{ \
+ ADD2(in0, in1, in2, in3, out0, out1); \
+ ADD2(in4, in5, in6, in7, out2, out3); \
+}
+
+void png_read_filter_row_up_msa(png_row_infop row_info, png_bytep row,
+ png_const_bytep prev_row)
+{
+ png_size_t i, cnt, cnt16, cnt32;
+ png_size_t istop = row_info->rowbytes;
+ png_bytep rp = row;
+ png_const_bytep pp = prev_row;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+
+ for (i = 0; i < (istop >> 6); i++)
+ {
+ LD_UB4(rp, 16, src0, src1, src2, src3);
+ LD_UB4(pp, 16, src4, src5, src6, src7);
+ pp += 64;
+
+ ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
+ src0, src1, src2, src3);
+
+ ST_UB4(src0, src1, src2, src3, rp, 16);
+ rp += 64;
+ }
+
+ if (istop & 0x3F)
+ {
+ cnt32 = istop & 0x20;
+ cnt16 = istop & 0x10;
+ cnt = istop & 0xF;
+
+ if(cnt32)
+ {
+ if (cnt16 && cnt)
+ {
+ LD_UB4(rp, 16, src0, src1, src2, src3);
+ LD_UB4(pp, 16, src4, src5, src6, src7);
+
+ ADD4(src0, src4, src1, src5, src2, src6, src3, src7,
+ src0, src1, src2, src3);
+
+ ST_UB4(src0, src1, src2, src3, rp, 16);
+ rp += 64;
+ }
+ else if (cnt16 || cnt)
+ {
+ LD_UB2(rp, 16, src0, src1);
+ LD_UB2(pp, 16, src4, src5);
+ pp += 32;
+ src2 = LD_UB(rp + 32);
+ src6 = LD_UB(pp);
+
+ ADD3(src0, src4, src1, src5, src2, src6, src0, src1, src2);
+
+ ST_UB2(src0, src1, rp, 16);
+ rp += 32;
+ ST_UB(src2, rp);
+ rp += 16;
+ }
+ else
+ {
+ LD_UB2(rp, 16, src0, src1);
+ LD_UB2(pp, 16, src4, src5);
+
+ ADD2(src0, src4, src1, src5, src0, src1);
+
+ ST_UB2(src0, src1, rp, 16);
+ rp += 32;
+ }
+ }
+ else if (cnt16 && cnt)
+ {
+ LD_UB2(rp, 16, src0, src1);
+ LD_UB2(pp, 16, src4, src5);
+
+ ADD2(src0, src4, src1, src5, src0, src1);
+
+ ST_UB2(src0, src1, rp, 16);
+ rp += 32;
+ }
+ else if (cnt16 || cnt)
+ {
+ src0 = LD_UB(rp);
+ src4 = LD_UB(pp);
+ pp += 16;
+
+ src0 += src4;
+
+ ST_UB(src0, rp);
+ rp += 16;
+ }
+ }
+}
+
+#endif /* PNG_MIPS_MSA_OPT > 0 */
+#endif /* PNG_MIPS_MSA_IMPLEMENTATION == 1 (intrinsics) */
+#endif /* READ */
diff --git a/mips/mips_init.c b/mips/mips_init.c
new file mode 100644
index 000000000..fd32eea35
--- /dev/null
+++ b/mips/mips_init.c
@@ -0,0 +1,115 @@
+
+/* mips_init.c - MSA optimised filter functions
+ *
+ * Copyright (c) 2016 Glenn Randers-Pehrson
+ * Written by Mandar Sahastrabuddhe, 2016.
+ * Last changed in libpng 1.6.25beta03 [August 29, 2016]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
+ * called.
+ */
+#define _POSIX_SOURCE 1
+
+#include <stdio.h>
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+
+#if PNG_MIPS_MSA_OPT > 0
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */
+/* WARNING: it is strongly recommended that you do not build libpng with
+ * run-time checks for CPU features if at all possible. In the case of the MIPS
+ * MSA instructions there is no processor-specific way of detecting the
+ * presence of the required support, therefore run-time detection is extremely
+ * OS specific.
+ *
+ * You may set the macro PNG_MIPS_MSA_FILE to the file name of file containing
+ * a fragment of C source code which defines the png_have_msa function. There
+ * are a number of implementations in contrib/mips-msa, but the only one that
+ * has partial support is contrib/mips-msa/linux.c - a generic Linux
+ * implementation which reads /proc/cpufino.
+ */
+#ifndef PNG_MIPS_MSA_FILE
+# ifdef __linux__
+# define PNG_MIPS_MSA_FILE "contrib/mips-msa/linux.c"
+# endif
+#endif
+
+#ifdef PNG_MIPS_MSA_FILE
+
+#include <signal.h> /* for sig_atomic_t */
+static int png_have_msa(png_structp png_ptr);
+#include PNG_MIPS_MSA_FILE
+
+#else /* PNG_MIPS_MSA_FILE */
+# error "PNG_MIPS_MSA_FILE undefined: no support for run-time MIPS MSA checks"
+#endif /* PNG_MIPS_MSA_FILE */
+#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+
+#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
+# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
+#endif
+
+void
+png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
+{
+ /* The switch statement is compiled in for MIPS_MSA_API, the call to
+ * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined
+ * the check is only performed if the API has not set the MSA option on
+ * or off explicitly. In this case the check controls what happens.
+ */
+
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+ switch ((pp->options >> PNG_MIPS_MSA) & 3)
+ {
+ case PNG_OPTION_UNSET:
+ /* Allow the run-time check to execute if it has been enabled -
+ * thus both API and CHECK can be turned on. If it isn't supported
+ * this case will fall through to the 'default' below, which just
+ * returns.
+ */
+#endif /* PNG_MIPS_MSA_API_SUPPORTED */
+#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED
+ {
+ static volatile sig_atomic_t no_msa = -1; /* not checked */
+
+ if (no_msa < 0)
+ no_msa = !png_have_msa(pp);
+
+ if (no_msa)
+ return;
+ }
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+ break;
+#endif
+#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
+
+#ifdef PNG_MIPS_MSA_API_SUPPORTED
+ default: /* OFF or INVALID */
+ return;
+
+ case PNG_OPTION_ON:
+ /* Option turned on */
+ break;
+ }
+#endif
+
+ /* IMPORTANT: any new external functions used here must be declared using
+ * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
+ * 'prefix' option to configure works:
+ *
+ * ./configure --with-libpng-prefix=foobar_
+ *
+ * Verify you have got this right by running the above command, doing a build
+ * and examining pngprefix.h; it must contain a #define for every external
+ * function you add. (Notice that this happens automatically for the
+ * initialization function.)
+ */
+ pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_msa;
+}
+#endif /* PNG_MIPS_MSA_OPT > 0 */
+#endif /* READ */