20 files changed, 526 insertions, 44 deletions
diff --git a/ANNOUNCE b/ANNOUNCE
index dd3484ca1..004f0e207 100644
--- a/ANNOUNCE
+++ b/ANNOUNCE
@@ -1,5 +1,5 @@
 
-Libpng 1.7.0beta21 - October 13, 2013
+Libpng 1.7.0beta21 - November 2, 2013
 
 This is not intended to be a public release.  It will be replaced
 within a few weeks by a public version or by another test version.
@@ -398,7 +398,20 @@ Version 1.7.0beta20 [October 13, 2013]
   Make autogen.sh work with automake 1.13 as well as 1.14. Do this by always
     removing the 1.14 'compile' script but never checking for it.
 
-Version 1.7.0beta21 [October 13, 2013]
+Version 1.7.0beta21 [November 2, 2013]
+  Added ARMv8 support (James Yu <james.yu at linaro.org>).  Added file
+    arm/filter_neon_intrinsics.c; enable with -mfpu=neon.
+  Revised pngvalid to generate size images with as many filters as it can
+    manage, limited by the number of rows.
+  Cleaned up ARM NEON compilation handling. The tests are now in pngpriv.h
+    and detect the broken GCC compilers.
+  Allow clang derived from older GCC versions to use ARM intrinsics. This
+    causes all clang builds that use -mfpu=neon to use the intrinsics code,
+    not the assembler code.  This has only been tested on iOS 7. It may be
+    necessary to exclude some earlier clang versions but this seems unlikely.
+  Changed NEON implementation selection mechanism. This allows assembler
+    or intrinsics to be turned on at compile time during the build by defining
+    PNG_ARM_NEON_IMPLEMENTATION to the correct value (2 or 1).  This macro
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 (subscription required; visit
diff --git a/CHANGES b/CHANGES
index cff8756b0..14f955e43 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4687,7 +4687,21 @@ Version 1.7.0beta20 [October 13, 2013]
   Make autogen.sh work with automake 1.13 as well as 1.14. Do this by always
     removing the 1.14 'compile' script but never checking for it.
 
-Version 1.7.0beta21 [October 13, 2013]
+Version 1.7.0beta21 [November 2, 2013]
+  Added ARMv8 support (James Yu <james.yu at linaro.org>).  Added file
+    arm/filter_neon_intrinsics.c; enable with -mfpu=neon.
+  Revised pngvalid to generate size images with as many filters as it can
+    manage, limited by the number of rows.
+  Cleaned up ARM NEON compilation handling. The tests are now in pngpriv.h
+    and detect the broken GCC compilers.
+  Allow clang derived from older GCC versions to use ARM intrinsics. This
+    causes all clang builds that use -mfpu=neon to use the intrinsics code,
+    not the assembler code.  This has only been tested on iOS 7. It may be
+    necessary to exclude some earlier clang versions but this seems unlikely.
+  Changed NEON implementation selection mechanism. This allows assembler
+    or intrinsics to be turned on at compile time during the build by defining
+    PNG_ARM_NEON_IMPLEMENTATION to the correct value (2 or 1).  This macro
+    is undefined by default and the build type is selected in pngpriv.h.
 
 Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 (subscription required; visit
diff --git a/LICENSE b/LICENSE
index 4cc64bfed..4038673d5 100644
--- a/LICENSE
+++ b/LICENSE
@@ -10,7 +10,7 @@ this sentence.
 
 This code is released under the libpng license.
 
-libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, October 13, 2013, are
+libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, November 2, 2013, are
 Copyright (c) 2004, 2006-2013 Glenn Randers-Pehrson, and are
 distributed according to the same disclaimer and license as libpng-1.2.5
 with the following individual added to the list of Contributing Authors
@@ -108,4 +108,4 @@ certification mark of the Open Source Initiative.
 
 Glenn Randers-Pehrson
 glennrp at users.sourceforge.net
-October 13, 2013
+November 2, 2013
diff --git a/Makefile.am b/Makefile.am
index 7b3dadb12..c7b5916b2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -82,7 +82,7 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
 
 if PNG_ARM_NEON
 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
-	arm/filter_neon.S
+	arm/filter_neon.S arm/filter_neon_intrinsics.c
 endif
 
 nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
@@ -326,3 +326,8 @@ uninstall-hook:
 	rm -f '$(DESTDIR)$(libdir)/libpng.sl'
 	rm -f '$(DESTDIR)$(libdir)/libpng.dylib'
 	rm -f '$(DESTDIR)$(libdir)/libpng.dll.a'
+
+# The following addition ensures that 'make all' always builds the test programs
+# too.  It used to, but some change either in libpng or configure stopped this
+# working.
+all-am: $(check_PROGRAMS)
diff --git a/README b/README
index 3b8f30e57..f9083fc56 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-README for libpng version 1.7.0beta21 - October 13, 2013 (shared library 17.0)
+README for libpng version 1.7.0beta21 - November 2, 2013 (shared library 17.0)
 See the note about version numbers near the top of png.h
 
 See INSTALL for instructions on how to install libpng.
diff --git a/arm/filter_neon.S b/arm/filter_neon.S
index b8aef1053..c5d0b5313 100644
--- a/arm/filter_neon.S
+++ b/arm/filter_neon.S
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2013 Glenn Randers-Pehrson
  * Written by Mans Rullgard, 2011.
- * Last changed in libpng 1.5.17 [July 18, 2013]
+ * Last changed in libpng 1.6.7 [(PENDING RELEASE)]
  *
  * This code is released under the libpng license.
  * For conditions of distribution and use, see the disclaimer
@@ -20,6 +20,13 @@
 .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
 #endif
 
+/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
+ * ARM64).  The code in arm/filter_neon_intrinsics.c supports ARM64, however it
+ * only works if -mfpu=neon is specified on the GCC command line.  See pngpriv.h
+ * for the logic which sets PNG_USE_ARM_NEON_ASM:
+ */
+#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
+
 #ifdef PNG_READ_SUPPORTED
 #if PNG_ARM_NEON_OPT > 0
 
@@ -235,3 +242,4 @@ func    png_read_filter_row_paeth3_neon, export=1
 endfunc
 #endif /* PNG_ARM_NEON_OPT > 0 */
 #endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
diff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c
new file mode 100644
index 000000000..ba39d61f3
--- /dev/null
+++ b/arm/filter_neon_intrinsics.c
@@ -0,0 +1,372 @@
+
+/* filter_neon_intrinsics.c - NEON optimised filter functions
+ *
+ * Copyright (c) 2013 Glenn Randers-Pehrson
+ * Written by James Yu <james.yu at linaro.org>, October 2013.
+ * Based on filter_neon.S, written by Mans Rullgard, 2011.
+ *
+ * Last changed in libpng 1.6.7 [(PENDING RELEASE)]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "pngpriv.h"
+
+/* This code requires -mfpu=neon on the command line: */
+#if PNG_ARM_NEON_IMPLEMENTATION == 1 /* intrinsics code */
+
+#include <arm_neon.h>
+
+/* libpng row pointers are not necessarily aligned to any particular boundary,
+ * however this code will only work with appropriate alignment.  arm/arm_init.c
+ * checks for this (and will not compile unless it is done), this code uses
+ * variants of png_aligncast to avoid compiler warnings.
+ */
+#define png_ptr(type,pointer) png_aligncast(type *,pointer)
+#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
+
+/* The following relies on a variable 'temp_pointer' being declared with type
+ * 'type'.  This is written this way just to hide the GCC strict aliasing
+ * warning; note that the code is safe because there never is an alias between
+ * the input and output pointers.
+ */
+#define png_ldr(type,pointer)\
+   (temp_pointer = png_ptr(type,pointer), *temp_pointer)
+
+#ifdef PNG_READ_SUPPORTED
+#if PNG_ARM_NEON_OPT > 0
+
+void
+png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint8x16_t qrp, qpp;
+
+      qrp = vld1q_u8(rp);
+      qpp = vld1q_u8(pp);
+      qrp = vaddq_u8(qrp, qpp);
+      vst1q_u8(rp, qrp);
+   }
+}
+
+void
+png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp = vld1q_u8(rp);
+   uint8x8x2_t *vrpt = png_ptr(uint8x8x2_t, &vtmp);
+   uint8x8x2_t vrp = *vrpt;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   for (; rp < rp_stop;)
+   {
+      uint8x8_t vtmp1, vtmp2;
+      uint32x2_t *temp_pointer;
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+      vtmp2 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vdest.val[1] = vadd_u8(vdest.val[0], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+      vdest.val[2] = vadd_u8(vdest.val[1], vtmp2);
+      vdest.val[3] = vadd_u8(vdest.val[2], vtmp1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t, &vtmp);
+      vrp = *vrpt;
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+
+   PNG_UNUSED(prev_row)
+}
+
+void
+png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   for (; rp < rp_stop; rp += 16)
+   {
+      uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      uint8x8x4_t *vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      uint8x8x4_t vrp = *vrpt;
+      uint32x2x4_t *temp_pointer;
+
+      vdest.val[0] = vadd_u8(vdest.val[3], vrp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[0], vrp.val[1]);
+      vdest.val[2] = vadd_u8(vdest.val[1], vrp.val[2]);
+      vdest.val[3] = vadd_u8(vdest.val[2], vrp.val[3]);
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+
+   PNG_UNUSED(prev_row)
+}
+
+void
+png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp;
+   uint8x8x2_t *vrpt;
+   uint8x8x2_t vrp;
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   vtmp = vld1q_u8(rp);
+   vrpt = png_ptr(uint8x8x2_t,&vtmp);
+   vrp = *vrpt;
+
+   for (; rp < rp_stop; pp += 12)
+   {
+      uint8x8_t vtmp1, vtmp2, vtmp3;
+
+      uint8x8x2_t *vppt;
+      uint8x8x2_t vpp;
+
+      uint32x2_t *temp_pointer;
+
+      vtmp = vld1q_u8(pp);
+      vppt = png_ptr(uint8x8x2_t,&vtmp);
+      vpp = *vppt;
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+      vtmp3 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vdest.val[1] = vhadd_u8(vdest.val[0], vtmp2);
+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 6);
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t,&vtmp);
+      vrp = *vrpt;
+
+      vdest.val[2] = vhadd_u8(vdest.val[1], vtmp2);
+      vdest.val[2] = vadd_u8(vdest.val[2], vtmp3);
+
+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+      vdest.val[3] = vhadd_u8(vdest.val[2], vtmp2);
+      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+}
+
+void
+png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint32x2x4_t vtmp;
+      uint8x8x4_t *vrpt, *vppt;
+      uint8x8x4_t vrp, vpp;
+      uint32x2x4_t *temp_pointer;
+
+      vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      vrp = *vrpt;
+      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+      vppt = png_ptr(uint8x8x4_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = vhadd_u8(vdest.val[3], vpp.val[0]);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+      vdest.val[1] = vhadd_u8(vdest.val[0], vpp.val[1]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+      vdest.val[2] = vhadd_u8(vdest.val[1], vpp.val[2]);
+      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+      vdest.val[3] = vhadd_u8(vdest.val[2], vpp.val[3]);
+      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+}
+
+static uint8x8_t
+paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c)
+{
+   uint8x8_t d, e;
+   uint16x8_t p1, pa, pb, pc;
+
+   p1 = vaddl_u8(a, b); /* a + b */
+   pc = vaddl_u8(c, c); /* c * 2 */
+   pa = vabdl_u8(b, c); /* pa */
+   pb = vabdl_u8(a, c); /* pb */
+   pc = vabdq_u16(p1, pc); /* pc */
+
+   p1 = vcleq_u16(pa, pb); /* pa <= pb */
+   pa = vcleq_u16(pa, pc); /* pa <= pc */
+   pb = vcleq_u16(pb, pc); /* pb <= pc */
+
+   p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */
+
+   d = vmovn_u16(pb);
+   e = vmovn_u16(p1);
+
+   d = vbsl_u8(d, b, c);
+   e = vbsl_u8(e, a, d);
+
+   return e;
+}
+
+void
+png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_const_bytep pp = prev_row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+
+   uint8x16_t vtmp;
+   uint8x8x2_t *vrpt;
+   uint8x8x2_t vrp;
+   uint8x8_t vlast = vdup_n_u8(0);
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   vtmp = vld1q_u8(rp);
+   vrpt = png_ptr(uint8x8x2_t,&vtmp);
+   vrp = *vrpt;
+
+   for (; rp < rp_stop; pp += 12)
+   {
+      uint8x8x2_t *vppt;
+      uint8x8x2_t vpp;
+      uint8x8_t vtmp1, vtmp2, vtmp3;
+      uint32x2_t *temp_pointer;
+
+      vtmp = vld1q_u8(pp);
+      vppt = png_ptr(uint8x8x2_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 3);
+      vtmp2 = vext_u8(vpp.val[0], vpp.val[1], 3);
+      vdest.val[1] = paeth(vdest.val[0], vtmp2, vpp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[0], vrp.val[1], 6);
+      vtmp3 = vext_u8(vpp.val[0], vpp.val[1], 6);
+      vdest.val[2] = paeth(vdest.val[1], vtmp3, vtmp2);
+      vdest.val[2] = vadd_u8(vdest.val[2], vtmp1);
+
+      vtmp1 = vext_u8(vrp.val[1], vrp.val[1], 1);
+      vtmp2 = vext_u8(vpp.val[1], vpp.val[1], 1);
+
+      vtmp = vld1q_u8(rp + 12);
+      vrpt = png_ptr(uint8x8x2_t,&vtmp);
+      vrp = *vrpt;
+
+      vdest.val[3] = paeth(vdest.val[2], vtmp2, vtmp3);
+      vdest.val[3] = vadd_u8(vdest.val[3], vtmp1);
+
+      vlast = vtmp2;
+
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[0]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[1]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[2]), 0);
+      rp += 3;
+      vst1_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2_t,&vdest.val[3]), 0);
+      rp += 3;
+   }
+}
+
+void
+png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
+   png_const_bytep prev_row)
+{
+   png_bytep rp = row;
+   png_bytep rp_stop = row + row_info->rowbytes;
+   png_const_bytep pp = prev_row;
+
+   uint8x8_t vlast = vdup_n_u8(0);
+   uint8x8x4_t vdest;
+   vdest.val[3] = vdup_n_u8(0);
+
+   for (; rp < rp_stop; rp += 16, pp += 16)
+   {
+      uint32x2x4_t vtmp;
+      uint8x8x4_t *vrpt, *vppt;
+      uint8x8x4_t vrp, vpp;
+      uint32x2x4_t *temp_pointer;
+
+      vtmp = vld4_u32(png_ptr(uint32_t,rp));
+      vrpt = png_ptr(uint8x8x4_t,&vtmp);
+      vrp = *vrpt;
+      vtmp = vld4_u32(png_ptrc(uint32_t,pp));
+      vppt = png_ptr(uint8x8x4_t,&vtmp);
+      vpp = *vppt;
+
+      vdest.val[0] = paeth(vdest.val[3], vpp.val[0], vlast);
+      vdest.val[0] = vadd_u8(vdest.val[0], vrp.val[0]);
+      vdest.val[1] = paeth(vdest.val[0], vpp.val[1], vpp.val[0]);
+      vdest.val[1] = vadd_u8(vdest.val[1], vrp.val[1]);
+      vdest.val[2] = paeth(vdest.val[1], vpp.val[2], vpp.val[1]);
+      vdest.val[2] = vadd_u8(vdest.val[2], vrp.val[2]);
+      vdest.val[3] = paeth(vdest.val[2], vpp.val[3], vpp.val[2]);
+      vdest.val[3] = vadd_u8(vdest.val[3], vrp.val[3]);
+
+      vlast = vpp.val[3];
+
+      vst4_lane_u32(png_ptr(uint32_t,rp), png_ldr(uint32x2x4_t,&vdest), 0);
+   }
+}
+
+#endif /* PNG_ARM_NEON_OPT > 0 */
+#endif /* PNG_READ_SUPPORTED */
+#endif /* PNG_ARM_NEON_IMPLEMENTATION == 1 (intrinsics) */
diff --git a/contrib/libtests/pngvalid.c b/contrib/libtests/pngvalid.c
index 6eb1fe03e..cfd22b2f8 100644
--- a/contrib/libtests/pngvalid.c
+++ b/contrib/libtests/pngvalid.c
@@ -2963,6 +2963,12 @@ sbit_modification_init(sbit_modification *me, png_modifier *pm, png_byte sbit)
  * height of 16 rows.  The width and height are stored in the FILEID and, being
  * non-zero, indicate a size file.
  *
+ * Because the PNG filter code is typically the largest CPU consumer within
+ * libpng itself there is a tendency to attempt to optimize it.  This results in
+ * special case code which needs to be validated.  To cause this to happen the
+ * 'size' images are made to use each possible filter, in so far as this is
+ * possible for smaller images.
+ *
  * For palette image (colour type 3) multiple transform images are stored with
  * the same bit depth to allow testing of more colour combinations -
  * particularly important for testing the gamma code because libpng uses a
@@ -3678,6 +3684,7 @@ make_size_image(png_store* PNG_CONST ps, png_byte PNG_CONST colour_type,
          int npasses = npasses_from_interlace_type(pp, interlace_type);
          png_uint_32 y;
          int pass;
+         int nfilter = PNG_FILTER_VALUE_LAST;
          png_byte image[16][SIZE_ROWMAX];
 
          /* To help consistent error detection make the parts of this buffer
@@ -3731,7 +3738,22 @@ make_size_image(png_store* PNG_CONST ps, png_byte PNG_CONST colour_type,
                      continue;
                }
 
-               /* Only get to here if the row has some pixels in it. */
+               /* Only get to here if the row has some pixels in it, set the
+                * filters to 'all' for the very first row and thereafter to a
+                * single filter.  It isn't well documented, but png_set_filter
+                * does accept a filter number (per the spec) as well as a bit
+                * mask.
+                *
+                * The apparent wackiness of decrementing nfilter rather than
+                * incrementing is so that Paeth gets used in all images bigger
+                * than 1 row - it's the tricky one.
+                */
+               png_set_filter(pp, 0/*method*/,
+                  nfilter >= PNG_FILTER_VALUE_LAST ? PNG_ALL_FILTERS : nfilter);
+
+               if (nfilter-- == 0)
+                  nfilter = PNG_FILTER_VALUE_LAST-1;
+
                png_write_row(pp, row);
             }
          }
diff --git a/libpng-manual.txt b/libpng-manual.txt
index 5866c153f..f921a2f71 100644
--- a/libpng-manual.txt
+++ b/libpng-manual.txt
@@ -1,6 +1,6 @@
 libpng-manual.txt - A description on how to use and modify libpng
 
- libpng version 1.7.0beta21 - October 13, 2013
+ libpng version 1.7.0beta21 - November 2, 2013
  Updated and distributed by Glenn Randers-Pehrson
  <glennrp at users.sourceforge.net>
  Copyright (c) 1998-2013 Glenn Randers-Pehrson
@@ -11,7 +11,7 @@ libpng-manual.txt - A description on how to use and modify libpng
 
  Based on:
 
- libpng versions 0.97, January 1998, through 1.7.0beta21 - October 13, 2013
+ libpng versions 0.97, January 1998, through 1.7.0beta21 - November 2, 2013
  Updated and distributed by Glenn Randers-Pehrson
  Copyright (c) 1998-2013 Glenn Randers-Pehrson
 
@@ -5001,10 +5001,11 @@ where "rp" indicates a "restricted pointer".
 
 Error detection in some chunks has improved; in particular the iCCP chunk
 reader now does pretty complete validation of the basic format.  Some bad
-profiles that were previously accepted are now rejected, in particular the
-very old broken Microsoft/HP sRGB profile.  The PNG spec requirement that
-only grayscale profiles may appear in images with color type 0 or 4 and that
-even if the image only contains gray pixels, only RGB profiles may appear
+profiles that were previously accepted are now accepted with a warning or
+rejected, depending upon the png_set_benign_errors() setting, in particular the
+very old broken Microsoft/HP 3144-byte sRGB profile.  The PNG spec requirement
+that only grayscale profiles may appear in images with color type 0 or 4 and
+that even if the image only contains gray pixels, only RGB profiles may appear
 in images with color type 2, 3, or 6, is now enforced.  The sRGB chunk
 is allowed to appear in images with any color type.
 
@@ -5013,7 +5014,9 @@ an empty language field or an empty translated keyword.  Both of these
 are allowed by the PNG specification, so these warnings are no longer issued.
 
 The library now issues an error if the application attempts to set a
-transform after it calls png_read_update_info().
+transform after it calls png_read_update_info() or if it attempts to call
+both png_read_update_info() and png_start_read_image() or to call either
+of them more than once.
 
 The default condition for benign_errors is now to treat benign errors as
 warnings while reading and as errors while writing.
@@ -5247,7 +5250,7 @@ Other rules can be inferred by inspecting the libpng source.
 
 XVII. Y2K Compliance in libpng
 
-October 13, 2013
+November 2, 2013
 
 Since the PNG Development group is an ad-hoc body, we can't make
 an official declaration.
diff --git a/libpng.3 b/libpng.3
index adb41d042..6bc6a72be 100644
--- a/libpng.3
+++ b/libpng.3
@@ -1,4 +1,4 @@
-.TH LIBPNG 3 "October 13, 2013"
+.TH LIBPNG 3 "November 2, 2013"
 .SH NAME
 libpng \- Portable Network Graphics (PNG) Reference Library 1.7.0beta21
 .SH SYNOPSIS
@@ -494,7 +494,7 @@ Following is a copy of the libpng-manual.txt file that accompanies libpng.
 .SH LIBPNG.TXT
 libpng-manual.txt - A description on how to use and modify libpng
 
- libpng version 1.7.0beta21 - October 13, 2013
+ libpng version 1.7.0beta21 - November 2, 2013
  Updated and distributed by Glenn Randers-Pehrson
  <glennrp at users.sourceforge.net>
  Copyright (c) 1998-2013 Glenn Randers-Pehrson
@@ -505,7 +505,7 @@ libpng-manual.txt - A description on how to use and modify libpng
 
  Based on:
 
- libpng versions 0.97, January 1998, through 1.7.0beta21 - October 13, 2013
+ libpng versions 0.97, January 1998, through 1.7.0beta21 - November 2, 2013
  Updated and distributed by Glenn Randers-Pehrson
  Copyright (c) 1998-2013 Glenn Randers-Pehrson
 
@@ -5496,10 +5496,11 @@ where "rp" indicates a "restricted pointer".
 
 Error detection in some chunks has improved; in particular the iCCP chunk
 reader now does pretty complete validation of the basic format.  Some bad
-profiles that were previously accepted are now rejected, in particular the
-very old broken Microsoft/HP sRGB profile.  The PNG spec requirement that
-only grayscale profiles may appear in images with color type 0 or 4 and that
-even if the image only contains gray pixels, only RGB profiles may appear
+profiles that were previously accepted are now accepted with a warning or
+rejected, depending upon the png_set_benign_errors() setting, in particular the
+very old broken Microsoft/HP 3144-byte sRGB profile.  The PNG spec requirement
+that only grayscale profiles may appear in images with color type 0 or 4 and
+that even if the image only contains gray pixels, only RGB profiles may appear
 in images with color type 2, 3, or 6, is now enforced.  The sRGB chunk
 is allowed to appear in images with any color type.
 
@@ -5508,7 +5509,9 @@ an empty language field or an empty translated keyword.  Both of these
 are allowed by the PNG specification, so these warnings are no longer issued.
 
 The library now issues an error if the application attempts to set a
-transform after it calls png_read_update_info().
+transform after it calls png_read_update_info() or if it attempts to call
+both png_read_update_info() and png_start_read_image() or to call either
+of them more than once.
 
 The default condition for benign_errors is now to treat benign errors as
 warnings while reading and as errors while writing.
@@ -5742,7 +5745,7 @@ Other rules can be inferred by inspecting the libpng source.
 
 .SH XVII. Y2K Compliance in libpng
 
-October 13, 2013
+November 2, 2013
 
 Since the PNG Development group is an ad-hoc body, we can't make
 an official declaration.
@@ -6012,7 +6015,7 @@ possible without all of you.
 
 Thanks to Frank J. T. Wojcik for helping with the documentation.
 
-Libpng version 1.7.0beta21 - October 13, 2013:
+Libpng version 1.7.0beta21 - November 2, 2013:
 Initially created in 1995 by Guy Eric Schalnat, then of Group 42, Inc.
 Currently maintained by Glenn Randers-Pehrson (glennrp at users.sourceforge.net).
 
@@ -6035,7 +6038,7 @@ this sentence.
 
 This code is released under the libpng license.
 
-libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, October 13, 2013, are
+libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, November 2, 2013, are
 Copyright (c) 2004,2006-2007 Glenn Randers-Pehrson, and are
 distributed according to the same disclaimer and license as libpng-1.2.5
 with the following individual added to the list of Contributing Authors
@@ -6134,7 +6137,7 @@ certification mark of the Open Source Initiative.
 
 Glenn Randers-Pehrson
 glennrp at users.sourceforge.net
-October 13, 2013
+November 2, 2013
 
 .\" end of man page
 
diff --git a/libpngpf.3 b/libpngpf.3
index 529dd9277..c7014fcca 100644
--- a/libpngpf.3
+++ b/libpngpf.3
@@ -1,4 +1,4 @@
-.TH LIBPNGPF 3 "October 13, 2013"
+.TH LIBPNGPF 3 "November 2, 2013"
 .SH NAME
 libpng \- Portable Network Graphics (PNG) Reference Library 1.7.0beta21
 (private functions)
diff --git a/png.5 b/png.5
index f33fff21c..45d2047fe 100644
--- a/png.5
+++ b/png.5
@@ -1,4 +1,4 @@
-.TH PNG 5 "October 13, 2013"
+.TH PNG 5 "November 2, 2013"
 .SH NAME
 png \- Portable Network Graphics (PNG) format
 .SH DESCRIPTION
diff --git a/png.c b/png.c
index f9710f747..0eabb2598 100644
--- a/png.c
+++ b/png.c
@@ -691,13 +691,13 @@ png_get_copyright(png_const_structrp png_ptr)
 #else
 #  ifdef __STDC__
    return PNG_STRING_NEWLINE \
-     "libpng version 1.7.0beta21 - October 13, 2013" PNG_STRING_NEWLINE \
+     "libpng version 1.7.0beta21 - November 2, 2013" PNG_STRING_NEWLINE \
      "Copyright (c) 1998-2013 Glenn Randers-Pehrson" PNG_STRING_NEWLINE \
      "Copyright (c) 1996-1997 Andreas Dilger" PNG_STRING_NEWLINE \
      "Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc." \
      PNG_STRING_NEWLINE;
 #  else
-      return "libpng version 1.7.0beta21 - October 13, 2013\
+      return "libpng version 1.7.0beta21 - November 2, 2013\
       Copyright (c) 1998-2013 Glenn Randers-Pehrson\
       Copyright (c) 1996-1997 Andreas Dilger\
       Copyright (c) 1995-1996 Guy Eric Schalnat, Group 42, Inc.";
diff --git a/png.h b/png.h
index 017884159..19abd601c 100644
--- a/png.h
+++ b/png.h
@@ -1,7 +1,7 @@
 
 /* png.h - header file for PNG reference library
  *
- * libpng version 1.7.0beta21 - October 13, 2013
+ * libpng version 1.7.0beta21 - November 2, 2013
  * Copyright (c) 1998-2013 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
  * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.)
@@ -11,7 +11,7 @@
  * Authors and maintainers:
  *   libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat
  *   libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger
- *   libpng versions 0.97, January 1998, through 1.7.0beta21 - October 13, 2013: Glenn
+ *   libpng versions 0.97, January 1998, through 1.7.0beta21 - November 2, 2013: Glenn
  *   See also "Contributing Authors", below.
  *
  * Note about libpng version numbers:
@@ -200,7 +200,7 @@
  *
  * This code is released under the libpng license.
  *
- * libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, October 13, 2013, are
+ * libpng versions 1.2.6, August 15, 2004, through 1.7.0beta21, November 2, 2013, are
  * Copyright (c) 2004, 2006-2013 Glenn Randers-Pehrson, and are
  * distributed according to the same disclaimer and license as libpng-1.2.5
  * with the following individual added to the list of Contributing Authors:
@@ -312,7 +312,7 @@
  * Y2K compliance in libpng:
  * =========================
  *
- *    October 13, 2013
+ *    November 2, 2013
  *
  *    Since the PNG Development group is an ad-hoc body, we can't make
  *    an official declaration.
@@ -380,7 +380,7 @@
 /* Version information for png.h - this should match the version in png.c */
 #define PNG_LIBPNG_VER_STRING "1.7.0beta21"
 #define PNG_HEADER_VERSION_STRING \
-     " libpng version 1.7.0beta21 - October 13, 2013\n"
+     " libpng version 1.7.0beta21 - November 2, 2013\n"
 
 #define PNG_LIBPNG_VER_SONUM   17
 #define PNG_LIBPNG_VER_DLLNUM  17
diff --git a/pngconf.h b/pngconf.h
index d68cf7e1c..516de8048 100644
--- a/pngconf.h
+++ b/pngconf.h
@@ -1,7 +1,7 @@
 
 /* pngconf.h - machine configurable file for libpng
  *
- * libpng version 1.7.0beta21 - October 13, 2013
+ * libpng version 1.7.0beta21 - November 2, 2013
  *
  * Copyright (c) 1998-2013 Glenn Randers-Pehrson
  * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger)
diff --git a/pngpriv.h b/pngpriv.h
index 200dd4333..8143dad2b 100644
--- a/pngpriv.h
+++ b/pngpriv.h
@@ -163,7 +163,49 @@
     * callbacks to do this.
     */
 #  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_neon
-#endif
+
+   /* By default the 'intrinsics' code in arm/filter_neon_intrinsics.c is used
+    * if possible - if __ARM_NEON__ is set and the compiler version is not known
+    * to be broken.  This is control by PNG_ARM_NEON_IMPLEMENTATION which can
+    * be:
+    *
+    *    1  The intrinsics code (the default with __ARM_NEON__)
+    *    2  The hand coded assembler (the default without __ARM_NEON__)
+    *
+    * It is possible to set PNG_ARM_NEON_IMPLEMENTATION in CPPFLAGS, however
+    * this is *NOT* supported and may cease to work even after a minor revision
+    * to libpng.  It *is* valid to do this for testing purposes, e.g. speed
+    * testing or a new compiler, but the results should be communicated to the
+    * libpng implementation list for incorporation in the next minor release.
+    */
+#  ifndef PNG_ARM_NEON_IMPLEMENTATION
+#     ifdef __ARM_NEON__
+#        if defined(__clang__)
+            /* At present it is unknown by the libpng developers which versions
+             * of clang support the intrinsics, however some or perhaps all
+             * versions do not work with the assembler so this may be
+             * irrelevant, so just use the default (do nothing here.)
+             */
+#        elif defined(__GNUC__)
+            /* GCC 4.5.4 NEON support is known to be broken.  4.6.3 is known to
+             * work, so if this *is* GCC, or G++, look for a version >4.5
+             */
+#           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)
+#              define PNG_ARM_NEON_IMPLEMENTATION 2
+#           endif /* no GNUC support */
+#        endif /* __GNUC__ */
+#     else /* !defined __ARM_NEON__ */
+         /* The 'intrinsics' code simply won't compile without this -mfpu=neon:
+          */
+#        define PNG_ARM_NEON_IMPLEMENTATION 2
+#     endif /* __ARM_NEON__ */
+#  endif /* !defined PNG_ARM_NEON_IMPLEMENTATION */
+
+#  ifndef PNG_ARM_NEON_IMPLEMENTATION
+      /* Use the intrinsics code by default. */
+#     define PNG_ARM_NEON_IMPLEMENTATION 1
+#  endif
+#endif /* PNG_ARM_NEON_OPT > 0 */
 
 /* Is this a build of a DLL where compilation of the object modules requires
  * different preprocessor settings to those required for a simple library?  If
diff --git a/projects/vstudio/readme.txt b/projects/vstudio/readme.txt
index 742313fb2..21e544d46 100644
--- a/projects/vstudio/readme.txt
+++ b/projects/vstudio/readme.txt
@@ -1,7 +1,7 @@
 
 VisualStudio instructions
 
-libpng version 1.7.0beta21 - October 13, 2013
+libpng version 1.7.0beta21 - November 2, 2013
 
 Copyright (c) 1998-2010 Glenn Randers-Pehrson
 
diff --git a/projects/vstudio/zlib.props b/projects/vstudio/zlib.props
index b56817a74..37c2a7637 100644
--- a/projects/vstudio/zlib.props
+++ b/projects/vstudio/zlib.props
@@ -2,7 +2,7 @@
 <!--
  * zlib.props - location of zlib source
  *
- * libpng version 1.7.0beta21 - October 13, 2013
+ * libpng version 1.7.0beta21 - November 2, 2013
  *
  * Copyright (c) 1998-2011 Glenn Randers-Pehrson
  *
diff --git a/scripts/README.txt b/scripts/README.txt
index 6adedc2a7..c99fedc0a 100644
--- a/scripts/README.txt
+++ b/scripts/README.txt
@@ -1,5 +1,5 @@
 
-Makefiles for  libpng version 1.7.0beta21 - October 13, 2013
+Makefiles for  libpng version 1.7.0beta21 - November 2, 2013
 
 pnglibconf.h.prebuilt       =>  Stores configuration settings
  makefile.linux    =>  Linux/ELF makefile
diff --git a/scripts/pnglibconf.h.prebuilt b/scripts/pnglibconf.h.prebuilt
index d96e6fba8..ae373bf10 100644
--- a/scripts/pnglibconf.h.prebuilt
+++ b/scripts/pnglibconf.h.prebuilt
@@ -2,7 +2,7 @@
 
 /* pnglibconf.h - library build configuration */
 
-/* Libpng version 1.7.0beta21 - October 13, 2013 */
+/* Libpng version 1.7.0beta21 - November 2, 2013 */
 
 /* Copyright (c) 1998-2013 Glenn Randers-Pehrson */