Add tiny JPEG decoder library

author: H. Peter Anvin <hpa@zytor.com> 2006-09-01 21:55:10 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2006-09-01 21:55:10 -0700
commit: 6124926122f979e85aba8beb27a2d76d7edadc3a (patch)
tree: 72a5564580b441c3b71412faa272b9f4e6e5ef72 /com32/lib/jpeg
parent: 743ac8f1721cef695e1393f8bc76ccdb62445762 (diff)
download: syslinux-6124926122f979e85aba8beb27a2d76d7edadc3a.tar.gz
4 files changed, 2470 insertions, 0 deletions
diff --git a/com32/lib/jpeg/README b/com32/lib/jpeg/README
new file mode 100644
index 00000000..6adeef4b
--- /dev/null
+++ b/com32/lib/jpeg/README
@@ -0,0 +1,32 @@
+/*
+ * Small jpeg decoder library
+ *
+ * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the author nor the names of its contributors may be
+ *  used to endorse or promote products derived from this software without
+ *  specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
diff --git a/com32/lib/jpeg/jidctflt.c b/com32/lib/jpeg/jidctflt.c
new file mode 100644
index 00000000..1327b823
--- /dev/null
+++ b/com32/lib/jpeg/jidctflt.c
@@ -0,0 +1,286 @@
+/*
+ * jidctflt.c
+ *
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or 
+ * fitness for a particular purpose.  This software is provided "AS IS", and you,
+ * its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991-1998, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to these
+ * conditions:
+ * (1) If any part of the source code for this software is distributed, then this
+ * README file must be included, with this copyright and no-warranty notice
+ * unaltered; and any additions, deletions, or changes to the original files
+ * must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work of
+ * the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ * 
+ * These conditions apply to any software derived from or based on the IJG code,
+ * not just to the unmodified library.  If you use our work, you ought to
+ * acknowledge us.
+ * 
+ * Permission is NOT granted for the use of any IJG author's name or company name
+ * in advertising or publicity relating to this software or products derived from
+ * it.  This software may be referred to only as "the Independent JPEG Group's
+ * software".
+ * 
+ * We specifically permit and encourage the use of this software as the basis of
+ * commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README).  The following code
+ * is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#include <stdint.h>
+#include "tinyjpeg-internal.h"
+
+#define FAST_FLOAT float
+#define DCTSIZE	   8
+#define DCTSIZE2   (DCTSIZE*DCTSIZE)
+
+#define DEQUANTIZE(coef,quantval)  (((FAST_FLOAT) (coef)) * (quantval))
+
+#if defined(__GNUC__) && defined(__i686__) || defined(__x86_64__) 
+
+static inline unsigned char descale_and_clamp(int x, int shift)
+{
+  __asm__ (
+      "add %3,%1\n"
+      "\tsar %2,%1\n"
+      "\tsub $-128,%1\n"
+      "\tcmovl %5,%1\n"	/* Use the sub to compare to 0 */
+      "\tcmpl %4,%1\n" 
+      "\tcmovg %4,%1\n"
+      : "=r"(x) 
+      : "0"(x), "i"(shift), "i"(1UL<<(shift-1)), "r" (0xff), "r" (0)
+      );
+  return x;
+}
+
+#else
+static inline unsigned char descale_and_clamp(int x, int shift)
+{
+  x += (1UL<<(shift-1));
+  if (x<0)
+    x = (x >> shift) | ((~(0UL)) << (32-(shift)));
+  else
+    x >>= shift;
+  x += 128;
+  if (x>255)
+    return 255;
+  else if (x<0)
+    return 0;
+  else 
+    return x;
+}
+#endif
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+void
+jpeg_idct_float (struct component *compptr, uint8_t *output_buf, int stride)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  int16_t *inptr;
+  FAST_FLOAT *quantptr;
+  FAST_FLOAT *wsptr;
+  uint8_t *outptr;
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = compptr->DCT;
+  quantptr = compptr->Q_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+    
+    if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
+	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
+	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
+	inptr[DCTSIZE*7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+      
+      wsptr[DCTSIZE*0] = dcval;
+      wsptr[DCTSIZE*1] = dcval;
+      wsptr[DCTSIZE*2] = dcval;
+      wsptr[DCTSIZE*3] = dcval;
+      wsptr[DCTSIZE*4] = dcval;
+      wsptr[DCTSIZE*5] = dcval;
+      wsptr[DCTSIZE*6] = dcval;
+      wsptr[DCTSIZE*7] = dcval;
+      
+      inptr++;			/* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+    
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
+
+    tmp10 = tmp0 + tmp2;	/* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;	/* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+    
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
+
+    z13 = tmp6 + tmp5;		/* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;		/* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;	/* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE*0] = tmp0 + tmp7;
+    wsptr[DCTSIZE*7] = tmp0 - tmp7;
+    wsptr[DCTSIZE*1] = tmp1 + tmp6;
+    wsptr[DCTSIZE*6] = tmp1 - tmp6;
+    wsptr[DCTSIZE*2] = tmp2 + tmp5;
+    wsptr[DCTSIZE*5] = tmp2 - tmp5;
+    wsptr[DCTSIZE*4] = tmp3 + tmp4;
+    wsptr[DCTSIZE*3] = tmp3 - tmp4;
+
+    inptr++;			/* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+  
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3. */
+
+  wsptr = workspace;
+  outptr = output_buf;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+    
+    /* Even part */
+
+    tmp10 = wsptr[0] + wsptr[4];
+    tmp11 = wsptr[0] - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */
+    tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */
+    tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] = descale_and_clamp(tmp0 + tmp7, 3);
+    outptr[7] = descale_and_clamp(tmp0 - tmp7, 3);
+    outptr[1] = descale_and_clamp(tmp1 + tmp6, 3);
+    outptr[6] = descale_and_clamp(tmp1 - tmp6, 3);
+    outptr[2] = descale_and_clamp(tmp2 + tmp5, 3);
+    outptr[5] = descale_and_clamp(tmp2 - tmp5, 3);
+    outptr[4] = descale_and_clamp(tmp3 + tmp4, 3);
+    outptr[3] = descale_and_clamp(tmp3 - tmp4, 3);
+
+    
+    wsptr += DCTSIZE;		/* advance pointer to next row */
+    outptr += stride;
+  }
+}
+
diff --git a/com32/lib/jpeg/tinyjpeg-internal.h b/com32/lib/jpeg/tinyjpeg-internal.h
new file mode 100644
index 00000000..8ae0c1b2
--- /dev/null
+++ b/com32/lib/jpeg/tinyjpeg-internal.h
@@ -0,0 +1,107 @@
+/*
+ * Small jpeg decoder library (Internal header)
+ *
+ * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * - Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the author nor the names of its contributors may be
+ *  used to endorse or promote products derived from this software without
+ *  specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+#ifndef __TINYJPEG_INTERNAL_H_
+#define __TINYJPEG_INTERNAL_H_
+
+struct jdec_private;
+
+#define HUFFMAN_HASH_NBITS 9
+#define HUFFMAN_HASH_SIZE  (1UL<<HUFFMAN_HASH_NBITS)
+#define HUFFMAN_HASH_MASK  (HUFFMAN_HASH_SIZE-1)
+
+#define HUFFMAN_TABLES	   4
+#define COMPONENTS	   4
+
+struct huffman_table
+{
+  /* Fast look up table, using HUFFMAN_HASH_NBITS bits we can have directly the symbol,
+   * if the symbol is <0, then we need to look into the tree table */
+  short int lookup[HUFFMAN_HASH_SIZE];
+  /* code size: give the number of bits of a symbol is encoded */
+  unsigned char code_size[HUFFMAN_HASH_SIZE];
+  /* some place to store value that is not encoded in the lookup table 
+   * FIXME: Calculate if 256 value is enough to store all values
+   */
+  uint16_t slowtable[16-HUFFMAN_HASH_NBITS][256];
+};
+
+struct component 
+{
+  unsigned int Hfactor;
+  unsigned int Vfactor;
+  float *Q_table;		/* Pointer to the quantisation table to use */
+  struct huffman_table *AC_table;
+  struct huffman_table *DC_table;
+  short int previous_DC;	/* Previous DC coefficient */
+  short int DCT[64];		/* DCT coef */
+};
+
+
+typedef void (*decode_MCU_fct) (struct jdec_private *priv);
+typedef void (*convert_colorspace_fct) (struct jdec_private *priv);
+
+struct jdec_private
+{
+  /* Public variables */
+  uint8_t *components[COMPONENTS];
+  unsigned int width, height;	/* Size of the image */
+  unsigned int flags;
+
+  /* Private variables */
+  const unsigned char *stream_begin;
+  unsigned int stream_length;
+
+  const unsigned char *stream;	/* Pointer to the current stream */
+  unsigned int reservoir, nbits_in_reservoir;
+
+  struct component component_infos[COMPONENTS];
+  float Q_tables[COMPONENTS][64];		/* quantization tables */
+  struct huffman_table HTDC[HUFFMAN_TABLES];	/* DC huffman tables   */
+  struct huffman_table HTAC[HUFFMAN_TABLES];	/* AC huffman tables   */
+  int default_huffman_table_initialized;
+
+  /* Temp space used after the IDCT to store each components */
+  uint8_t Y[64*4], Cr[64], Cb[64];
+
+  /* Internal Pointer use for colorspace conversion, do not modify it !!! */
+  uint8_t *plane[COMPONENTS];
+
+};
+
+#define IDCT jpeg_idct_float
+void jpeg_idct_float (struct component *compptr, uint8_t *output_buf, int stride);
+
+#endif
+
diff --git a/com32/lib/jpeg/tinyjpeg.c b/com32/lib/jpeg/tinyjpeg.c
new file mode 100644
index 00000000..a1b62dbc
--- /dev/null
+++ b/com32/lib/jpeg/tinyjpeg.c
@@ -0,0 +1,2045 @@
+/*
+ * Small jpeg decoder library
+ *
+ * Copyright (c) 2006, Luc Saillard <luc@saillard.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * - Redistributions of source code must retain the above copyright notice,
+ *  this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *  this list of conditions and the following disclaimer in the documentation
+ *  and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the author nor the names of its contributors may be
+ *  used to endorse or promote products derived from this software without
+ *  specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "tinyjpeg.h"
+#include "tinyjpeg-internal.h"
+
+enum std_markers {
+   DQT  = 0xDB, /* Define Quantization Table */
+   SOF  = 0xC0, /* Start of Frame (size information) */
+   DHT  = 0xC4, /* Huffman Table */
+   SOI  = 0xD8, /* Start of Image */
+   SOS  = 0xDA, /* Start of Scan */
+   EOI  = 0xD9, /* End of Image */
+   APP0 = 0xE0,
+};
+
+#define cY	1
+#define cCb	2
+#define cCr	3
+
+#define BLACK_Y 0
+#define BLACK_U 127
+#define BLACK_V 127
+
+#define SANITY_CHECK 1
+
+#if DEBUG
+#define error(fmt, args...) do { \
+   snprintf(error_string, sizeof(error_string), fmt, ## args); \
+   return -1; \
+} while(0)
+
+#define trace(fmt, args...) do { \
+   fprintf(stderr, fmt, ## args); \
+   fflush(stderr); \
+} while(0)
+#else
+#define error(fmt, args...) do { return -1; } while(0)
+#define trace(fmt, args...) do { } while (0)
+#endif
+
+#if 0
+static char *print_bits(unsigned int value, char *bitstr)
+{
+  int i, j;
+  i=31;
+  while (i>0)
+   {
+     if (value & (1UL<<i))
+       break;
+     i--;
+   }
+  j=0;
+  while (i>=0)
+   {
+     bitstr[j++] = (value & (1UL<<i))?'1':'0';
+     i--;
+   }
+  bitstr[j] = 0;
+  return bitstr;
+}
+
+static void print_next_16bytes(int offset, const unsigned char *stream)
+{
+  trace("%4.4x: %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
+	offset,
+	stream[0], stream[1], stream[2], stream[3], 
+	stream[4], stream[5], stream[6], stream[7],
+	stream[8], stream[9], stream[10], stream[11], 
+	stream[12], stream[13], stream[14], stream[15]);
+}
+
+#endif
+
+/* Global variable to return the last error found while deconding */
+static char error_string[256];
+
+static const unsigned char zigzag[64] = 
+{
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+static const unsigned char bits_dc_luminance[17] =
+{ 
+  0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 
+};
+static const unsigned char val_dc_luminance[] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 
+};
+  
+static const unsigned char bits_dc_chrominance[17] =
+{
+  0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 
+};
+static const unsigned char val_dc_chrominance[] = 
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 
+};
+  
+static const unsigned char bits_ac_luminance[17] =
+{
+  0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d 
+};
+static const unsigned char val_ac_luminance[] =
+{
+  0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+  0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+  0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+  0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+  0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+  0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+  0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+  0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+  0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+  0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+  0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+  0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+  0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+  0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+  0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+  0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+  0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+  0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa
+};
+
+static const unsigned char bits_ac_chrominance[17] =
+{ 
+  0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 
+};
+
+static const unsigned char val_ac_chrominance[] =
+{
+  0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+  0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+  0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+  0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+  0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+  0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+  0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+  0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+  0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+  0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+  0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+  0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+  0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+  0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+  0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+  0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa
+};
+
+
+/*
+ * 4 functions to manage the stream
+ *
+ *  fill_nbits: put at least nbits in the reservoir of bits.
+ *              But convert any 0xff,0x00 into 0xff
+ *  get_nbits: read nbits from the stream, and put it in result,
+ *             bits is removed from the stream and the reservoir is filled
+ *             automaticaly. The result is signed according to the number of
+ *             bits.
+ *  look_nbits: read nbits from the stream without marking as read.
+ *  skip_nbits: read nbits from the stream but do not return the result.
+ * 
+ * stream: current pointer in the jpeg data (read bytes per bytes)
+ * nbits_in_reservoir: number of bits filled into the reservoir
+ * reservoir: register that contains bits information. Only nbits_in_reservoir
+ *            is valid.
+ *                          nbits_in_reservoir
+ *                        <--    17 bits    -->
+ *            Ex: 0000 0000 1010 0000 1111 0000   <== reservoir
+ *                        ^
+ *                        bit 1
+ *            To get two bits from this example
+ *                 result = (reservoir >> 15) & 3
+ *
+ */
+#define fill_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted) do { \
+   while (nbits_in_reservoir<nbits_wanted) \
+    { \
+      const unsigned char c = *stream++; \
+      reservoir <<= 8; \
+      if (c == 0xff && *stream == 0x00) \
+        stream++; \
+      reservoir |= c; \
+      nbits_in_reservoir+=8; \
+    } \
+}  while(0);
+
+/* Signed version !!!! */
+#define get_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted,result) do { \
+   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
+   result = ((reservoir)>>(nbits_in_reservoir-(nbits_wanted))); \
+   nbits_in_reservoir -= (nbits_wanted);  \
+   reservoir &= ((1U<<nbits_in_reservoir)-1); \
+   if (result < (1UL<<((nbits_wanted)-1))) \
+       result += (0xFFFFFFFFUL<<(nbits_wanted))+1; \
+}  while(0);
+
+#define look_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted,result) do { \
+   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
+   result = ((reservoir)>>(nbits_in_reservoir-(nbits_wanted))); \
+}  while(0);
+
+#define skip_nbits(reservoir,nbits_in_reservoir,stream,nbits_wanted) do { \
+   fill_nbits(reservoir,nbits_in_reservoir,stream,(nbits_wanted)); \
+   nbits_in_reservoir -= (nbits_wanted); \
+   reservoir &= ((1U<<nbits_in_reservoir)-1); \
+}  while(0);
+
+
+#define be16_to_cpu(x) (((x)[0]<<8)|(x)[1])
+
+
+/**
+ * Get the next (valid) huffman code in the stream.
+ *
+ * To speedup the procedure, we look HUFFMAN_HASH_NBITS bits and the code is
+ * lower than HUFFMAN_HASH_NBITS we have automaticaly the length of the code
+ * and the value by using two lookup table.
+ * Else if the value is not found, just search (linear) into an array for each
+ * bits is the code is present.
+ *
+ * If the code is not present for any reason, -1 is return.
+ */
+static int get_next_huffman_code(struct jdec_private *priv, struct huffman_table *huffman_table)
+{
+  int value, hcode;
+  unsigned int extra_nbits, nbits;
+  uint16_t *slowtable;
+
+  look_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, HUFFMAN_HASH_NBITS, hcode);
+  value = huffman_table->lookup[hcode];
+  if (value>=0)
+  { 
+     int code_size = huffman_table->code_size[value];
+     skip_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, code_size);
+     return value;
+  }
+
+  /* Decode more bits each time ... */
+  for (extra_nbits=0; extra_nbits<16-HUFFMAN_HASH_NBITS; extra_nbits++)
+   {
+     nbits = HUFFMAN_HASH_NBITS + 1 + extra_nbits;
+
+     look_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, nbits, hcode);
+     slowtable = huffman_table->slowtable[extra_nbits];
+     /* Search if the code is in this array */
+     while (slowtable[0]) {
+	if (slowtable[0] == hcode) {
+	   skip_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, nbits);
+	   return slowtable[1];
+	}
+	slowtable+=2;
+     }
+   }
+  return 0;
+}
+
+
+
+
+/**
+ *
+ * Decode a single block that contains the DCT coefficients.
+ * The table coefficients is already dezigzaged at the end of the operation.
+ *
+ */
+static void process_Huffman_data_unit(struct jdec_private *priv, int component)
+{
+  unsigned char j;
+  int huff_code;
+  unsigned char size_val, count_0;
+
+  struct component *c = &priv->component_infos[component];
+  short int DCT[64];
+
+  /* Initialize the DCT coef table */
+  memset(DCT, 0, sizeof(DCT));
+
+  /* DC coefficient decoding */
+  huff_code = get_next_huffman_code(priv, c->DC_table);
+  if (huff_code) {
+     get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, huff_code, DCT[0]);
+     DCT[0] += c->previous_DC;
+     c->previous_DC = DCT[0];
+  } else {
+     DCT[0] = c->previous_DC;
+  }
+
+  /* AC coefficient decoding */
+  j = 1;
+  while (j<64)
+   {
+     huff_code = get_next_huffman_code(priv, c->AC_table);
+
+     size_val = huff_code & 0xF;
+     count_0 = huff_code >> 4;
+
+     if (size_val == 0)
+      { /* RLE */
+	if (count_0 == 0)
+	  break;	/* EOB found, go out */
+	else if (count_0 == 0xF)
+	  j += 16;	/* skip 16 zeros */
+      }
+     else
+      {
+	j += count_0;	/* skip count_0 zeroes */
+	get_nbits(priv->reservoir, priv->nbits_in_reservoir, priv->stream, size_val, DCT[j]);
+	j++;
+      }
+   }
+
+  for (j = 0; j < 64; j++)
+    c->DCT[j] = DCT[zigzag[j]];
+
+}
+
+/*
+ * Takes two array of bits, and build the huffman table for size, and code
+ * 
+ * lookup will return the symbol if the code is less or equal than HUFFMAN_HASH_NBITS.
+ * code_size will be used to known how many bits this symbol is encoded.
+ * slowtable will be used when the first lookup didn't give the result.
+ */
+static void build_huffman_table(const unsigned char *bits, const unsigned char *vals, struct huffman_table *table)
+{
+  unsigned int i, j, code, code_size, val, nbits;
+  unsigned char huffsize[257], *hz;
+  unsigned int huffcode[257], *hc;
+  int next_free_entry;
+
+  /*
+   * Build a temp array 
+   *   huffsize[X] => numbers of bits to write vals[X]
+   */
+  hz = huffsize;
+  for (i=1; i<=16; i++)
+   {
+     for (j=1; j<=bits[i]; j++)
+       *hz++ = i;
+   }
+  *hz = 0;
+
+  memset(table->lookup, 0xff, sizeof(table->lookup));
+  for (i=0; i<(16-HUFFMAN_HASH_NBITS); i++)
+    table->slowtable[i][0] = 0;
+
+  /* Build a temp array
+   *   huffcode[X] => code used to write vals[X]
+   */
+  code = 0;
+  hc = huffcode;
+  hz = huffsize;
+  nbits = *hz;
+  while (*hz)
+   {
+     while (*hz == nbits) {
+	*hc++ = code++;
+	hz++;
+     }
+     code <<= 1;
+     nbits++;
+   }
+
+  /*
+   * Build the lookup table, and the slowtable if needed.
+   */
+  next_free_entry = -1;
+  for (i=0; huffsize[i]; i++)
+   {
+     val = vals[i];
+     code = huffcode[i];
+     code_size = huffsize[i];
+
+     trace("val=%2.2x code=%8.8x codesize=%2.2d\n", i, code, code_size);
+
+     table->code_size[val] = code_size;
+     if (code_size <= HUFFMAN_HASH_NBITS)
+      {
+	/*
+	 * Good: val can be put in the lookup table, so fill all value of this
+	 * column with value val 
+	 */
+	int repeat = 1UL<<(HUFFMAN_HASH_NBITS - code_size);
+	code <<= HUFFMAN_HASH_NBITS - code_size;
+	while ( repeat-- )
+	  table->lookup[code++] = val;
+
+      }
+     else
+      {
+	/* Perhaps sorting the array will be an optimization */
+	uint16_t *slowtable = table->slowtable[code_size-HUFFMAN_HASH_NBITS-1];
+	while(slowtable[0])
+	  slowtable+=2;
+	slowtable[0] = code;
+	slowtable[1] = val;
+	slowtable[2] = 0;
+	/* TODO: NEED TO CHECK FOR AN OVERFLOW OF THE TABLE */
+      }
+
+   }
+
+}
+
+static void build_default_huffman_tables(struct jdec_private *priv)
+{
+  if (   (priv->flags & TINYJPEG_FLAGS_MJPEG_TABLE) 
+      && priv->default_huffman_table_initialized)
+    return;
+
+  build_huffman_table(bits_dc_luminance, val_dc_luminance, &priv->HTDC[0]);
+  build_huffman_table(bits_ac_luminance, val_ac_luminance, &priv->HTAC[0]);
+
+  build_huffman_table(bits_dc_chrominance, val_dc_chrominance, &priv->HTDC[1]);
+  build_huffman_table(bits_ac_chrominance, val_ac_chrominance, &priv->HTAC[1]);
+
+  priv->default_huffman_table_initialized = 1;
+}
+
+
+
+/*******************************************************************************
+ *
+ * Colorspace conversion routine
+ *
+ *
+ * Note:
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *      R = Y                + 1.40200 * Cr
+ *      G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *      B = Y + 1.77200 * Cb
+ * 
+ ******************************************************************************/
+
+static unsigned char clamp(int i)
+{
+  if (i<0)
+    return 0;
+  else if (i>255)
+    return 255;
+  else
+    return i;
+}   
+
+
+/**
+ *  YCrCb -> YUV420P (1x1)
+ *  .---.
+ *  | 1 |
+ *  `---'
+ */
+static void YCrCB_to_YUV420P_1x1(struct jdec_private *priv)
+{
+  const unsigned char *s, *y;
+  unsigned char *p;
+  int i,j;
+
+  p = priv->plane[0];
+  y = priv->Y;
+  for (i=0; i<8; i++)
+   {
+     memcpy(p, y, 8);
+     p+=priv->width;
+     y+=8;
+   }
+
+  p = priv->plane[1];
+  s = priv->Cb;
+  for (i=0; i<8; i+=2)
+   {
+     for (j=0; j<8; j+=2, s+=2)
+       *p++ = *s;
+     s += 8; /* Skip one line */
+     p += priv->width/2 - 4;
+   }
+
+  p = priv->plane[2];
+  s = priv->Cr;
+  for (i=0; i<8; i+=2)
+   {
+     for (j=0; j<8; j+=2, s+=2)
+       *p++ = *s;
+     s += 8; /* Skip one line */
+     p += priv->width/2 - 4;
+   }
+}
+
+/**
+ *  YCrCb -> YUV420P (2x1)
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void YCrCB_to_YUV420P_2x1(struct jdec_private *priv)
+{
+  unsigned char *p;
+  const unsigned char *s, *y1;
+  int i,j;
+
+  p = priv->plane[0];
+  y1 = priv->Y;
+  for (i=0; i<8; i++)
+   {
+     memcpy(p, y1, 16);
+     p += priv->width;
+     y1 += 16;
+   }
+
+  p = priv->plane[1];
+  s = priv->Cb;
+  for (i=0; i<8; i+=2)
+   {
+     for (j=0; j<8; j+=1, s+=1)
+       *p++ = *s;
+     s += 8; /* Skip one line */
+     p += priv->width/2 - 8;
+   }
+
+  p = priv->plane[2];
+  s = priv->Cr;
+  for (i=0; i<8; i+=2)
+   {
+     for (j=0; j<8; j+=1, s+=1)
+       *p++ = *s;
+     s += 8; /* Skip one line */
+     p += priv->width/2 - 8;
+   }
+}
+
+
+/**
+ *  YCrCb -> YUV420P (1x2)
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void YCrCB_to_YUV420P_1x2(struct jdec_private *priv)
+{
+  const unsigned char *s, *y;
+  unsigned char *p;
+  int i,j;
+
+  p = priv->plane[0];
+  y = priv->Y;
+  for (i=0; i<16; i++)
+   {
+     memcpy(p, y, 8);
+     p+=priv->width;
+     y+=8;
+   }
+
+  p = priv->plane[1];
+  s = priv->Cb;
+  for (i=0; i<8; i++)
+   {
+     for (j=0; j<8; j+=2, s+=2)
+       *p++ = *s;
+     p += priv->width/2 - 4;
+   }
+
+  p = priv->plane[2];
+  s = priv->Cr;
+  for (i=0; i<8; i++)
+   {
+     for (j=0; j<8; j+=2, s+=2)
+       *p++ = *s;
+     p += priv->width/2 - 4;
+   }
+}
+
+/**
+ *  YCrCb -> YUV420P (2x2)
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void YCrCB_to_YUV420P_2x2(struct jdec_private *priv)
+{
+  unsigned char *p;
+  const unsigned char *s, *y1;
+  int i;
+
+  p = priv->plane[0];
+  y1 = priv->Y;
+  for (i=0; i<16; i++)
+   {
+     memcpy(p, y1, 16);
+     p += priv->width;
+     y1 += 16;
+   }
+
+  p = priv->plane[1];
+  s = priv->Cb;
+  for (i=0; i<8; i++)
+   {
+     memcpy(p, s, 8);
+     s += 8;
+     p += priv->width/2;
+   }
+
+  p = priv->plane[2];
+  s = priv->Cr;
+  for (i=0; i<8; i++)
+   {
+     memcpy(p, s, 8);
+     s += 8;
+     p += priv->width/2;
+   }
+}
+
+/**
+ *  YCrCb -> RGB24 (1x1)
+ *  .---.
+ *  | 1 |
+ *  `---'
+ */
+static void YCrCB_to_RGB24_1x1(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = priv->width*3 - 8*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0;j<8;j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       y  = (*Y++) << SCALEBITS;
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+    }
+
+    p += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+/**
+ *  YCrCb -> BGR24 (1x1)
+ *  .---.
+ *  | 1 |
+ *  `---'
+ */
+static void YCrCB_to_BGR24_1x1(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = priv->width*3 - 8*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0;j<8;j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       y  = (*Y++) << SCALEBITS;
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+    }
+
+    p += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+
+/**
+ *  YCrCb -> RGB24 (2x1)
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void YCrCB_to_RGB24_2x1(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = priv->width*3 - 16*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0; j<8; j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       y  = (*Y++) << SCALEBITS;
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+       y  = (*Y++) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+    }
+
+    p += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+/*
+ *  YCrCb -> BGR24 (2x1)
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void YCrCB_to_BGR24_2x1(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = priv->width*3 - 16*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0; j<8; j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       y  = (*Y++) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+       y  = (*Y++) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+    }
+
+    p += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+/**
+ *  YCrCb -> RGB24 (1x2)
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void YCrCB_to_RGB24_1x2(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p, *p2;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  p2 = priv->plane[0] + priv->width*3;
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = 2*priv->width*3 - 8*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0; j<8; j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       y  = (*Y++) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+       y  = (Y[8-1]) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+
+    }
+    Y += 8;
+    p += offset_to_next_row;
+    p2 += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+/*
+ *  YCrCb -> BGR24 (1x2)
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void YCrCB_to_BGR24_1x2(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p, *p2;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  p2 = priv->plane[0] + priv->width*3;
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = 2*priv->width*3 - 8*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0; j<8; j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       y  = (*Y++) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+       y  = (Y[8-1]) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+
+    }
+    Y += 8;
+    p += offset_to_next_row;
+    p2 += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+
+/**
+ *  YCrCb -> RGB24 (2x2)
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void YCrCB_to_RGB24_2x2(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p, *p2;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  p2 = priv->plane[0] + priv->width*3;
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = (priv->width*3*2) - 16*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0;j<8;j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       y  = (*Y++) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+       y  = (*Y++) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+
+       y  = (Y[16-2]) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+
+       y  = (Y[16-1]) << SCALEBITS;
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+    }
+    Y  += 16;
+    p  += offset_to_next_row;
+    p2 += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+
+/*
+ *  YCrCb -> BGR24 (2x2)
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void YCrCB_to_BGR24_2x2(struct jdec_private *priv)
+{
+  const unsigned char *Y, *Cb, *Cr;
+  unsigned char *p, *p2;
+  int i,j;
+  int offset_to_next_row;
+
+#define SCALEBITS       10
+#define ONE_HALF        (1UL << (SCALEBITS-1))
+#define FIX(x)          ((int)((x) * (1UL<<SCALEBITS) + 0.5))
+
+  p = priv->plane[0];
+  p2 = priv->plane[0] + priv->width*3;
+  Y = priv->Y;
+  Cb = priv->Cb;
+  Cr = priv->Cr;
+  offset_to_next_row = (priv->width*3*2) - 16*3;
+  for (i=0; i<8; i++) {
+
+    for (j=0;j<8;j++) {
+
+       int y, cb, cr;
+       int add_r, add_g, add_b;
+       int r, g , b;
+
+       cb = *Cb++ - 128;
+       cr = *Cr++ - 128;
+       add_r = FIX(1.40200) * cr + ONE_HALF;
+       add_g = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;
+       add_b = FIX(1.77200) * cb + ONE_HALF;
+
+       y  = (*Y++) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+       y  = (*Y++) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p++ = clamp(r);
+
+       y  = (Y[16-2]) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+
+       y  = (Y[16-1]) << SCALEBITS;
+       b = (y + add_b) >> SCALEBITS;
+       *p2++ = clamp(b);
+       g = (y + add_g) >> SCALEBITS;
+       *p2++ = clamp(g);
+       r = (y + add_r) >> SCALEBITS;
+       *p2++ = clamp(r);
+    }
+    Y  += 16;
+    p  += offset_to_next_row;
+    p2 += offset_to_next_row;
+  }
+
+#undef SCALEBITS
+#undef ONE_HALF
+#undef FIX
+
+}
+
+
+
+/**
+ *  YCrCb -> Grey (1x1)
+ *  .---.
+ *  | 1 |
+ *  `---'
+ */
+static void YCrCB_to_Grey_1x1(struct jdec_private *priv)
+{
+  const unsigned char *y;
+  unsigned char *p;
+  unsigned int i;
+  int offset_to_next_row;
+
+  p = priv->plane[0];
+  y = priv->Y;
+  offset_to_next_row = priv->width;
+
+  for (i=0; i<8; i++) {
+     memcpy(p, y, 8);
+     y+=8;
+     p += offset_to_next_row;
+  }
+}
+
+/**
+ *  YCrCb -> Grey (2x1)
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void YCrCB_to_Grey_2x1(struct jdec_private *priv)
+{
+  const unsigned char *y;
+  unsigned char *p;
+  unsigned int i;
+
+  p = priv->plane[0];
+  y = priv->Y;
+
+  for (i=0; i<8; i++) {
+     memcpy(p, y, 16);
+     y += 16;
+     p += priv->width;
+  }
+}
+
+
+/**
+ *  YCrCb -> Grey (1x2)
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void YCrCB_to_Grey_1x2(struct jdec_private *priv)
+{
+  const unsigned char *y;
+  unsigned char *p;
+  unsigned int i;
+
+  p = priv->plane[0];
+  y = priv->Y;
+
+  for (i=0; i<16; i++) {
+     memcpy(p, y, 8);
+     y += 8;
+     p += priv->width;
+  }
+}
+
+/**
+ *  YCrCb -> Grey (2x2)
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void YCrCB_to_Grey_2x2(struct jdec_private *priv)
+{
+  const unsigned char *y;
+  unsigned char *p;
+  unsigned int i;
+
+  p = priv->plane[0];
+  y = priv->Y;
+
+  for (i=0; i<16; i++) {
+     memcpy(p, y, 16);
+     y += 16;
+     p += priv->width;
+  }
+}
+
+
+/*
+ * Decode all the 3 components for 1x1 
+ */
+static void decode_MCU_1x1_3planes(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 8);
+  
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
+}
+
+/*
+ * Decode a 1x1 directly in 1 color
+ */
+static void decode_MCU_1x1_1plane(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 8);
+  
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
+}
+
+
+/*
+ * Decode a 2x1
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void decode_MCU_2x1_3planes(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
+}
+
+/*
+ * Decode a 2x1
+ *  .-------.
+ *  | 1 | 2 |
+ *  `-------'
+ */
+static void decode_MCU_2x1_1plane(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+}
+
+
+/*
+ * Decode a 2x2
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void decode_MCU_2x2_3planes(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64*2, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64*2+8, 16);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
+}
+
+/*
+ * Decode a 2x2 directly in GREY format (8bits)
+ *  .-------.
+ *  | 1 | 2 |
+ *  |---+---|
+ *  | 3 | 4 |
+ *  `-------'
+ */
+static void decode_MCU_2x2_1plane(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+8, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64*2, 16);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64*2+8, 16);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+}
+
+/*
+ * Decode a 1x2 mcu
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void decode_MCU_1x2_3planes(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 8);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64, 8);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+  IDCT(&priv->component_infos[cCb], priv->Cb, 8);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+  IDCT(&priv->component_infos[cCr], priv->Cr, 8);
+}
+
+/*
+ * Decode a 1x2 mcu
+ *  .---.
+ *  | 1 |
+ *  |---|
+ *  | 2 |
+ *  `---'
+ */
+static void decode_MCU_1x2_1plane(struct jdec_private *priv)
+{
+  // Y
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y, 8);
+  process_Huffman_data_unit(priv, cY);
+  IDCT(&priv->component_infos[cY], priv->Y+64, 8);
+
+  // Cb
+  process_Huffman_data_unit(priv, cCb);
+
+  // Cr
+  process_Huffman_data_unit(priv, cCr);
+}
+
+static void print_SOF(const unsigned char *stream)
+{
+  int width, height, nr_components, precision;
+#if DEBUG
+  const char *nr_components_to_string[] = {
+     "????",
+     "Grayscale",
+     "????",
+     "YCbCr",
+     "CYMK"
+  };
+#endif
+
+  precision = stream[2];
+  height = be16_to_cpu(stream+3);
+  width  = be16_to_cpu(stream+5);
+  nr_components = stream[7];
+
+  trace("> SOF marker\n");
+  trace("Size:%dx%d nr_components:%d (%s)  precision:%d\n", 
+      width, height,
+      nr_components, nr_components_to_string[nr_components],
+      precision);
+}
+
+/*******************************************************************************
+ *
+ * JPEG/JFIF Parsing functions
+ *
+ * Note: only a small subset of the jpeg file format is supported. No markers,
+ * nor progressive stream is supported.
+ *
+ ******************************************************************************/
+
+static void build_quantization_table(float *qtable, const unsigned char *ref_table)
+{
+  /* Taken from libjpeg. Copyright Independent JPEG Group's LLM idct.
+   * For float AA&N IDCT method, divisors are equal to quantization
+   * coefficients scaled by scalefactor[row]*scalefactor[col], where
+   *   scalefactor[0] = 1
+   *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+   * We apply a further scale factor of 8.
+   * What's actually stored is 1/divisor so that the inner loop can
+   * use a multiplication rather than a division.
+   */
+  int i, j;
+  static const double aanscalefactor[8] = {
+     1.0, 1.387039845, 1.306562965, 1.175875602,
+     1.0, 0.785694958, 0.541196100, 0.275899379
+  };
+  const unsigned char *zz = zigzag;
+
+  for (i=0; i<8; i++) {
+     for (j=0; j<8; j++) {
+       *qtable++ = ref_table[*zz++] * aanscalefactor[i] * aanscalefactor[j];
+     }
+   }
+
+}
+
+static int parse_DQT(struct jdec_private *priv, const unsigned char *stream)
+{
+  int length, qi;
+  float *table;
+
+  trace("> DQT marker\n");
+  length = be16_to_cpu(stream) - 2;
+  stream += 2;	/* Skip length */
+
+  while (length>0)
+   {
+     qi = *stream++;
+#if SANITY_CHECK
+     if (qi>>4)
+       error("16 bits quantization table is not supported\n");
+     if (qi>4)
+       error("No more 4 quantization table is supported (got %d)\n", qi);
+#endif
+     table = priv->Q_tables[qi];
+     build_quantization_table(table, stream);
+     stream += 64;
+     length -= 65;
+   }
+  return 0;
+}
+
+static int parse_SOF(struct jdec_private *priv, const unsigned char *stream)
+{
+  int i, width, height, nr_components, cid, sampling_factor;
+  int Q_table;
+  struct component *c;
+
+  print_SOF(stream);
+
+  height = be16_to_cpu(stream+3);
+  width  = be16_to_cpu(stream+5);
+  nr_components = stream[7];
+#if SANITY_CHECK
+  if (stream[2] != 8)
+    error("Precision other than 8 is not supported\n");
+  if (width>2048 || height>2048)
+    error("Width and Height (%dx%d) seems suspicious\n", width, height);
+  if (nr_components != 3)
+    error("We only support YUV images\n");
+  if (height%16)
+    error("Height need to be a multiple of 16 (current height is %d)\n", height);
+  if (width%16)
+    error("Width need to be a multiple of 16 (current Width is %d)\n", width);
+#endif
+  stream += 8;
+  for (i=0; i<nr_components; i++) {
+     cid = *stream++;
+     sampling_factor = *stream++;
+     Q_table = *stream++;
+     c = &priv->component_infos[cid];
+     c->Vfactor = sampling_factor&0xf;
+     c->Hfactor = sampling_factor>>4;
+     c->Q_table = priv->Q_tables[Q_table];
+     trace("Component:%d  factor:%dx%d  Quantization table:%d\n",
+	 cid, c->Hfactor, c->Hfactor, Q_table );
+
+  }
+  priv->width = width;
+  priv->height = height;
+
+  return 0;
+}
+
+static int parse_SOS(struct jdec_private *priv, const unsigned char *stream)
+{
+  unsigned int i, cid, table;
+  unsigned int nr_components = stream[2];
+
+  trace("> SOS marker\n");
+
+#if SANITY_CHECK
+  if (nr_components != 3)
+    error("We only support YCbCr image\n");
+#endif
+
+  stream += 3;
+  for (i=0;i<nr_components;i++) {
+     cid = *stream++;
+     table = *stream++;
+#if SANITY_CHECK
+     if ((table&0xf)>=4)
+	error("We do not support more than 2 AC Huffman table\n");
+     if ((table>>4)>=4)
+	error("We do not support more than 2 DC Huffman table\n");
+     trace("ComponentId:%d  tableAC:%d tableDC:%d\n", cid, table&0xf, table>>4);
+#endif
+     priv->component_infos[cid].AC_table = &priv->HTAC[table&0xf];
+     priv->component_infos[cid].DC_table = &priv->HTDC[table>>4];
+  }
+  priv->stream = stream+3;
+  return 0;
+}
+
+static int parse_DHT(struct jdec_private *priv, const unsigned char *stream)
+{
+  unsigned int count, i;
+  unsigned char huff_bits[17];
+  int length, index;
+
+  length = be16_to_cpu(stream) - 2;
+  stream += 2;	/* Skip length */
+
+  trace("> DHT marker (length=%d)\n", length);
+
+  while (length>0) {
+     index = *stream++;
+
+     /* We need to calculate the number of bytes 'vals' will takes */
+     huff_bits[0] = 0;
+     count = 0;
+     for (i=1; i<17; i++) {
+	huff_bits[i] = *stream++;
+	count += huff_bits[i];
+     }
+#if SANITY_CHECK
+     if (count > 1024)
+       error("No more than 1024 bytes is allowed to describe a huffman table");
+     if ( (index &0xf) >= HUFFMAN_TABLES)
+       error("No mode than %d Huffman tables is supported\n", HUFFMAN_TABLES);
+     trace("Huffman table %s n%d\n", (index&0xf0)?"AC":"DC", index&0xf);
+     trace("Length of the table: %d\n", count);
+#endif
+
+     if (index & 0xf0 )
+       build_huffman_table(huff_bits, stream, &priv->HTAC[index&0xf]);
+     else
+       build_huffman_table(huff_bits, stream, &priv->HTDC[index&0xf]);
+
+     length -= 1;
+     length -= 16;
+     length -= count;
+  }
+  trace("< DHT marker\n");
+  return 0;
+}
+
+static void resync(struct jdec_private *priv)
+{
+  int i;
+
+  /* Init DC coefficients */
+  for (i=0; i<COMPONENTS; i++)
+     priv->component_infos[i].previous_DC = 0;
+
+  priv->reservoir = 0;
+  priv->nbits_in_reservoir = 0;
+
+}
+
+
+static int parse_JFIF(struct jdec_private *priv, const unsigned char *stream)
+{
+  int chuck_len;
+  int marker;
+  int sos_marker_found = 0;
+  int dht_marker_found = 0;
+  const unsigned char *next_chunck;
+
+  /* Parse marker */
+  while (!sos_marker_found)
+   {
+     if (*stream++ != 0xff)
+       goto bogus_jpeg_format;
+     /* Skip any padding ff byte (this is normal) */
+     while (*stream == 0xff)
+       stream++;
+
+     marker = *stream++;
+     chuck_len = be16_to_cpu(stream);
+     next_chunck = stream + chuck_len;
+     switch (marker)
+      {
+       case SOF:
+	 if (parse_SOF(priv, stream) < 0)
+	   return -1;
+	 break;
+       case DQT:
+	 if (parse_DQT(priv, stream) < 0)
+	   return -1;
+	 break;
+       case SOS:
+	 if (parse_SOS(priv, stream) < 0)
+	   return -1;
+	 sos_marker_found = 1;
+	 break;
+       case DHT:
+	 if (parse_DHT(priv, stream) < 0)
+	   return -1;
+	 dht_marker_found = 1;
+	 break;
+       default:
+	 trace("> Unknown marker %2.2x\n", marker);
+	 break;
+      }
+
+     stream = next_chunck;
+   }
+
+  if (!dht_marker_found) {
+    trace("No Huffman table loaded, using the default one\n");
+    build_default_huffman_tables(priv);
+  }
+
+#ifdef SANITY_CHECK
+  if (   (priv->component_infos[cY].Hfactor < priv->component_infos[cCb].Hfactor)
+      || (priv->component_infos[cY].Hfactor < priv->component_infos[cCr].Hfactor))
+    error("Horizontal sampling factor for Y should be greater than horitontal sampling factor for Cb or Cr\n");
+  if (   (priv->component_infos[cY].Vfactor < priv->component_infos[cCb].Vfactor)
+      || (priv->component_infos[cY].Vfactor < priv->component_infos[cCr].Vfactor))
+    error("Vertical sampling factor for Y should be greater than vertical sampling factor for Cb or Cr\n");
+  if (   (priv->component_infos[cCb].Hfactor!=1) 
+      || (priv->component_infos[cCr].Hfactor!=1)
+      || (priv->component_infos[cCb].Vfactor!=1)
+      || (priv->component_infos[cCr].Vfactor!=1))
+    error("Sampling other than 1x1 for Cr and Cb is not supported");
+#endif
+
+  return 0;
+bogus_jpeg_format:
+  trace("Bogus jpeg format\n");
+  return -1;
+}
+
+/*******************************************************************************
+ *
+ * Functions exported of the library.
+ *
+ * Note: Some applications can access directly to internal pointer of the
+ * structure. It's is not recommended, but if you have many images to
+ * uncompress with the same parameters, some functions can be called to speedup
+ * the decoding.
+ *
+ ******************************************************************************/
+
+/**
+ * Allocate a new tinyjpeg decoder object.
+ *
+ * Before calling any other functions, an object need to be called.
+ */
+struct jdec_private *tinyjpeg_init(void)
+{
+  struct jdec_private *priv;
+ 
+  priv = (struct jdec_private *)calloc(1, sizeof(struct jdec_private));
+  if (priv == NULL)
+    return NULL;
+  return priv;
+}
+
+/**
+ * Free a tinyjpeg object.
+ *
+ * No others function can be called after this one.
+ */
+void tinyjpeg_free(struct jdec_private *priv)
+{
+  int i;
+  for (i=0; i<COMPONENTS; i++) {
+     if (priv->components[i])
+       free(priv->components[i]);
+     priv->components[i] = NULL;
+  }
+  free(priv);
+}
+
+/**
+ * Initialize the tinyjpeg object and prepare the decoding of the stream.
+ *
+ * Check if the jpeg can be decoded with this jpeg decoder.
+ * Fill some table used for preprocessing.
+ */
+int tinyjpeg_parse_header(struct jdec_private *priv, const unsigned char *buf, unsigned int size)
+{
+  int ret;
+
+  /* Identify the file */
+  if ((buf[0] != 0xFF) || (buf[1] != SOI))
+    error("Not a JPG file ?\n");
+
+  priv->stream_begin = buf+2;
+  priv->stream_length = size-2;
+
+  ret = parse_JFIF(priv, priv->stream_begin);
+
+  return ret;
+}
+
+static const decode_MCU_fct decode_mcu_3comp_table[4] = {
+   decode_MCU_1x1_3planes,
+   decode_MCU_1x2_3planes,
+   decode_MCU_2x1_3planes,
+   decode_MCU_2x2_3planes,
+};
+
+static const decode_MCU_fct decode_mcu_1comp_table[4] = {
+   decode_MCU_1x1_1plane,
+   decode_MCU_1x2_1plane,
+   decode_MCU_2x1_1plane,
+   decode_MCU_2x2_1plane,
+};
+
+static const convert_colorspace_fct convert_colorspace_yuv420p[4] = {
+   YCrCB_to_YUV420P_1x1,
+   YCrCB_to_YUV420P_1x2,
+   YCrCB_to_YUV420P_2x1,
+   YCrCB_to_YUV420P_2x2,
+};
+
+static const convert_colorspace_fct convert_colorspace_rgb24[4] = {
+   YCrCB_to_RGB24_1x1,
+   YCrCB_to_RGB24_1x2,
+   YCrCB_to_RGB24_2x1,
+   YCrCB_to_RGB24_2x2,
+};
+
+static const convert_colorspace_fct convert_colorspace_bgr24[4] = {
+   YCrCB_to_BGR24_1x1,
+   YCrCB_to_BGR24_1x2,
+   YCrCB_to_BGR24_2x1,
+   YCrCB_to_BGR24_2x2,
+};
+
+static const convert_colorspace_fct convert_colorspace_grey[4] = {
+   YCrCB_to_Grey_1x1,
+   YCrCB_to_Grey_1x2,
+   YCrCB_to_Grey_2x1,
+   YCrCB_to_Grey_2x2,
+};
+
+/**
+ * Decode and convert the jpeg image into @pixfmt@ image
+ *
+ * Note: components will be automaticaly allocated if no memory is attached.
+ */
+int tinyjpeg_decode(struct jdec_private *priv, int pixfmt)
+{
+  unsigned int x, y, xstride_by_mcu, ystride_by_mcu;
+  unsigned int bytes_per_blocklines[3], bytes_per_mcu[3];
+  decode_MCU_fct decode_MCU;
+  const decode_MCU_fct *decode_mcu_table;
+  const convert_colorspace_fct *colorspace_array_conv;
+  convert_colorspace_fct convert_to_pixfmt;
+
+  /* To keep gcc happy initialize some array */
+  bytes_per_mcu[1] = 0;
+  bytes_per_mcu[2] = 0;
+  bytes_per_blocklines[1] = 0;
+  bytes_per_blocklines[2] = 0;
+
+  decode_mcu_table = decode_mcu_3comp_table;
+  switch (pixfmt) {
+     case TINYJPEG_FMT_YUV420P:
+       colorspace_array_conv = convert_colorspace_yuv420p;
+       if (priv->components[0] == NULL)
+	 priv->components[0] = (uint8_t *)malloc(priv->width * priv->height);
+       if (priv->components[1] == NULL)
+	 priv->components[1] = (uint8_t *)malloc(priv->width * priv->height/4);
+       if (priv->components[2] == NULL)
+	 priv->components[2] = (uint8_t *)malloc(priv->width * priv->height/4);
+       bytes_per_blocklines[0] = priv->width;
+       bytes_per_blocklines[1] = priv->width/4;
+       bytes_per_blocklines[2] = priv->width/4;
+       bytes_per_mcu[0] = 8;
+       bytes_per_mcu[1] = 4;
+       bytes_per_mcu[2] = 4;
+       break;
+
+     case TINYJPEG_FMT_RGB24:
+       colorspace_array_conv = convert_colorspace_rgb24;
+       if (priv->components[0] == NULL)
+	 priv->components[0] = (uint8_t *)malloc(priv->width * priv->height * 3);
+       bytes_per_blocklines[0] = priv->width * 3;
+       bytes_per_mcu[0] = 3*8;
+       break;
+
+     case TINYJPEG_FMT_BGR24:
+       colorspace_array_conv = convert_colorspace_bgr24;
+       if (priv->components[0] == NULL)
+	 priv->components[0] = (uint8_t *)malloc(priv->width * priv->height * 3);
+       bytes_per_blocklines[0] = priv->width * 3;
+       bytes_per_mcu[0] = 3*8;
+       break;
+
+     case TINYJPEG_FMT_GREY:
+       decode_mcu_table = decode_mcu_1comp_table;
+       colorspace_array_conv = convert_colorspace_grey;
+       if (priv->components[0] == NULL)
+	 priv->components[0] = (uint8_t *)malloc(priv->width * priv->height);
+       bytes_per_blocklines[0] = priv->width;
+       bytes_per_mcu[0] = 8;
+       break;
+
+     default:
+       trace("Bad pixel format\n");
+       return -1;
+  }
+
+  xstride_by_mcu = ystride_by_mcu = 8;
+  if ((priv->component_infos[cY].Hfactor | priv->component_infos[cY].Vfactor) == 1) {
+     decode_MCU = decode_mcu_table[0];
+     convert_to_pixfmt = colorspace_array_conv[0];
+     trace("Use decode 1x1 sampling\n");
+  } else if (priv->component_infos[cY].Hfactor == 1) {
+     decode_MCU = decode_mcu_table[1];
+     convert_to_pixfmt = colorspace_array_conv[1];
+     ystride_by_mcu = 16;
+     trace("Use decode 1x2 sampling (not supported)\n");
+  } else if (priv->component_infos[cY].Vfactor == 2) {
+     decode_MCU = decode_mcu_table[3];
+     convert_to_pixfmt = colorspace_array_conv[3];
+     xstride_by_mcu = 16;
+     ystride_by_mcu = 16;
+     trace("Use decode 2x2 sampling\n");
+  } else {
+     decode_MCU = decode_mcu_table[2];
+     convert_to_pixfmt = colorspace_array_conv[2];
+     xstride_by_mcu = 16;
+     trace("Use decode 2x1 sampling\n");
+  }
+
+  resync(priv);
+
+  /* Don't forget to that block can be either 8 or 16 lines */
+  bytes_per_blocklines[0] *= ystride_by_mcu;
+  bytes_per_blocklines[1] *= ystride_by_mcu;
+  bytes_per_blocklines[2] *= ystride_by_mcu;
+
+  bytes_per_mcu[0] *= xstride_by_mcu/8;
+  bytes_per_mcu[1] *= xstride_by_mcu/8;
+  bytes_per_mcu[2] *= xstride_by_mcu/8;
+
+  /* Just the decode the image by macroblock (size is 8x8, 8x16, or 16x16) */
+  for (y=0; y < priv->height/ystride_by_mcu; y++)
+   {
+     //trace("Decoding row %d\n", y);
+     priv->plane[0] = priv->components[0] + (y * bytes_per_blocklines[0]);
+     priv->plane[1] = priv->components[1] + (y * bytes_per_blocklines[1]);
+     priv->plane[2] = priv->components[2] + (y * bytes_per_blocklines[2]);
+     for (x=0; x < priv->width; x+=xstride_by_mcu)
+      {
+	decode_MCU(priv);
+	convert_to_pixfmt(priv);
+	priv->plane[0] += bytes_per_mcu[0];
+	priv->plane[1] += bytes_per_mcu[1];
+	priv->plane[2] += bytes_per_mcu[2];
+
+      }
+   }
+
+  return 0;
+}
+
+const char *tinyjpeg_get_errorstring(struct jdec_private *priv)
+{
+  return error_string;
+}
+
+void tinyjpeg_get_size(struct jdec_private *priv, unsigned int *width, unsigned int *height)
+{
+  *width = priv->width;
+  *height = priv->height;
+}
+
+int tinyjpeg_get_components(struct jdec_private *priv, unsigned char **components)
+{
+  int i;
+  for (i=0; priv->components[i] && i<COMPONENTS; i++)
+    components[i] = priv->components[i];
+  return 0;
+}
+
+int tinyjpeg_set_components(struct jdec_private *priv, unsigned char **components, unsigned int ncomponents)
+{
+  int i;
+  if (ncomponents > COMPONENTS)
+    ncomponents = COMPONENTS;
+  for (i=0; i<ncomponents; i++)
+    priv->components[i] = components[i];
+  return 0;
+}
+
+int tinyjpeg_set_flags(struct jdec_private *priv, int flags)
+{
+  int oldflags = priv->flags;
+  priv->flags = flags;
+  return oldflags;
+}
+
author	H. Peter Anvin <hpa@zytor.com>	2006-09-01 21:55:10 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2006-09-01 21:55:10 -0700
commit	6124926122f979e85aba8beb27a2d76d7edadc3a (patch)
tree	72a5564580b441c3b71412faa272b9f4e6e5ef72 /com32/lib/jpeg
parent	743ac8f1721cef695e1393f8bc76ccdb62445762 (diff)
download	syslinux-6124926122f979e85aba8beb27a2d76d7edadc3a.tar.gz