summaryrefslogtreecommitdiff
path: root/libmpeg2
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2008-07-18 14:57:36 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2008-07-18 14:57:36 +0000
commitd1e80dc431815df3ac6eeed654eeccaf8d6beea7 (patch)
treecc8b20e9558f81920c490fe4ec1384c8b7a0662b /libmpeg2
downloadlibmpeg2-tarball-d1e80dc431815df3ac6eeed654eeccaf8d6beea7.tar.gz
Diffstat (limited to 'libmpeg2')
-rw-r--r--libmpeg2/Makefile.am24
-rw-r--r--libmpeg2/Makefile.in784
-rw-r--r--libmpeg2/alloc.c70
-rw-r--r--libmpeg2/convert/Makefile.am15
-rw-r--r--libmpeg2/convert/Makefile.in558
-rw-r--r--libmpeg2/convert/convert_internal.h42
-rw-r--r--libmpeg2/convert/libmpeg2convert.pc.in10
-rw-r--r--libmpeg2/convert/rgb.c598
-rw-r--r--libmpeg2/convert/rgb_mmx.c321
-rw-r--r--libmpeg2/convert/rgb_vis.c384
-rw-r--r--libmpeg2/convert/uyvy.c123
-rw-r--r--libmpeg2/cpu_accel.c260
-rw-r--r--libmpeg2/cpu_state.c129
-rw-r--r--libmpeg2/decode.c439
-rw-r--r--libmpeg2/header.c964
-rw-r--r--libmpeg2/idct.c289
-rw-r--r--libmpeg2/idct_alpha.c377
-rw-r--r--libmpeg2/idct_altivec.c286
-rw-r--r--libmpeg2/idct_mmx.c1305
-rw-r--r--libmpeg2/libmpeg2.pc.in10
-rw-r--r--libmpeg2/motion_comp.c135
-rw-r--r--libmpeg2/motion_comp_alpha.c253
-rw-r--r--libmpeg2/motion_comp_altivec.c1010
-rw-r--r--libmpeg2/motion_comp_arm.c185
-rw-r--r--libmpeg2/motion_comp_arm_s.S323
-rw-r--r--libmpeg2/motion_comp_mmx.c1005
-rw-r--r--libmpeg2/motion_comp_vis.c2061
-rw-r--r--libmpeg2/mpeg2_internal.h317
-rw-r--r--libmpeg2/slice.c2078
-rw-r--r--libmpeg2/vlc.h434
30 files changed, 14789 insertions, 0 deletions
diff --git a/libmpeg2/Makefile.am b/libmpeg2/Makefile.am
new file mode 100644
index 0000000..a4dd944
--- /dev/null
+++ b/libmpeg2/Makefile.am
@@ -0,0 +1,24 @@
+SUBDIRS = convert
+
+AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+lib_LTLIBRARIES = libmpeg2.la
+libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c
+libmpeg2_la_LIBADD = libmpeg2arch.la
+libmpeg2_la_LDFLAGS = -no-undefined -version-info 1:0:1
+
+noinst_LTLIBRARIES = libmpeg2arch.la
+libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
+ motion_comp_altivec.c idct_altivec.c \
+ motion_comp_alpha.c idct_alpha.c \
+ motion_comp_vis.c motion_comp_arm.c \
+ cpu_accel.c cpu_state.c
+if ARCH_ARM
+libmpeg2arch_la_SOURCES += motion_comp_arm_s.S
+endif
+libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libmpeg2.pc
+
+EXTRA_DIST = vlc.h mpeg2_internal.h
diff --git a/libmpeg2/Makefile.in b/libmpeg2/Makefile.in
new file mode 100644
index 0000000..347eaf3
--- /dev/null
+++ b/libmpeg2/Makefile.in
@@ -0,0 +1,784 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@ARCH_ARM_TRUE@am__append_1 = motion_comp_arm_s.S
+subdir = libmpeg2
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/libmpeg2.pc.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/cflags.m4 \
+ $(top_srcdir)/m4/inttypes.m4 $(top_srcdir)/m4/keywords.m4 \
+ $(top_srcdir)/m4/nonpic.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES = libmpeg2.pc
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)"
+libLTLIBRARIES_INSTALL = $(INSTALL)
+LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
+libmpeg2_la_DEPENDENCIES = libmpeg2arch.la
+am_libmpeg2_la_OBJECTS = alloc.lo header.lo decode.lo slice.lo \
+ motion_comp.lo idct.lo
+libmpeg2_la_OBJECTS = $(am_libmpeg2_la_OBJECTS)
+libmpeg2_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libmpeg2_la_LDFLAGS) $(LDFLAGS) -o $@
+libmpeg2arch_la_LIBADD =
+am__libmpeg2arch_la_SOURCES_DIST = motion_comp_mmx.c idct_mmx.c \
+ motion_comp_altivec.c idct_altivec.c motion_comp_alpha.c \
+ idct_alpha.c motion_comp_vis.c motion_comp_arm.c cpu_accel.c \
+ cpu_state.c motion_comp_arm_s.S
+@ARCH_ARM_TRUE@am__objects_1 = motion_comp_arm_s.lo
+am_libmpeg2arch_la_OBJECTS = libmpeg2arch_la-motion_comp_mmx.lo \
+ libmpeg2arch_la-idct_mmx.lo \
+ libmpeg2arch_la-motion_comp_altivec.lo \
+ libmpeg2arch_la-idct_altivec.lo \
+ libmpeg2arch_la-motion_comp_alpha.lo \
+ libmpeg2arch_la-idct_alpha.lo \
+ libmpeg2arch_la-motion_comp_vis.lo \
+ libmpeg2arch_la-motion_comp_arm.lo \
+ libmpeg2arch_la-cpu_accel.lo libmpeg2arch_la-cpu_state.lo \
+ $(am__objects_1)
+libmpeg2arch_la_OBJECTS = $(am_libmpeg2arch_la_OBJECTS)
+libmpeg2arch_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libmpeg2arch_la_CFLAGS) \
+ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/.auto/depcomp
+am__depfiles_maybe = depfiles
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libmpeg2_la_SOURCES) $(libmpeg2arch_la_SOURCES)
+DIST_SOURCES = $(libmpeg2_la_SOURCES) \
+ $(am__libmpeg2arch_la_SOURCES_DIST)
+RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
+ html-recursive info-recursive install-data-recursive \
+ install-dvi-recursive install-exec-recursive \
+ install-html-recursive install-info-recursive \
+ install-pdf-recursive install-ps-recursive install-recursive \
+ installcheck-recursive installdirs-recursive pdf-recursive \
+ ps-recursive uninstall-recursive
+pkgconfigDATA_INSTALL = $(INSTALL_DATA)
+DATA = $(pkgconfig_DATA)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_CPPFLAGS = @AM_CPPFLAGS@
+AR = @AR@
+ARCH_OPT_CFLAGS = @ARCH_OPT_CFLAGS@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBMPEG2_CFLAGS = @LIBMPEG2_CFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBVO_CFLAGS = @LIBVO_CFLAGS@
+LIBVO_LIBS = @LIBVO_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPEG2DEC_CFLAGS = @MPEG2DEC_CFLAGS@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPT_CFLAGS = @OPT_CFLAGS@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SDLCONFIG = @SDLCONFIG@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+XMKMF = @XMKMF@
+X_CFLAGS = @X_CFLAGS@
+X_EXTRA_LIBS = @X_EXTRA_LIBS@
+X_LIBS = @X_LIBS@
+X_PRE_LIBS = @X_PRE_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = convert
+AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+lib_LTLIBRARIES = libmpeg2.la
+libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c
+libmpeg2_la_LIBADD = libmpeg2arch.la
+libmpeg2_la_LDFLAGS = -no-undefined -version-info 1:0:1
+noinst_LTLIBRARIES = libmpeg2arch.la
+libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \
+ motion_comp_altivec.c idct_altivec.c motion_comp_alpha.c \
+ idct_alpha.c motion_comp_vis.c motion_comp_arm.c cpu_accel.c \
+ cpu_state.c $(am__append_1)
+libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libmpeg2.pc
+EXTRA_DIST = vlc.h mpeg2_internal.h
+all: all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .S .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+ && exit 0; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign libmpeg2/Makefile'; \
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign libmpeg2/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+libmpeg2.pc: $(top_builddir)/config.status $(srcdir)/libmpeg2.pc.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libmpeg2.la: $(libmpeg2_la_OBJECTS) $(libmpeg2_la_DEPENDENCIES)
+ $(libmpeg2_la_LINK) -rpath $(libdir) $(libmpeg2_la_OBJECTS) $(libmpeg2_la_LIBADD) $(LIBS)
+libmpeg2arch.la: $(libmpeg2arch_la_OBJECTS) $(libmpeg2arch_la_DEPENDENCIES)
+ $(libmpeg2arch_la_LINK) $(libmpeg2arch_la_OBJECTS) $(libmpeg2arch_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/decode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/header.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idct.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-cpu_accel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-cpu_state.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-idct_alpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-idct_altivec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-idct_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-motion_comp_alpha.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-motion_comp_altivec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-motion_comp_arm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-motion_comp_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2arch_la-motion_comp_vis.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/motion_comp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/motion_comp_arm_s.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slice.Plo@am__quote@
+
+.S.o:
+@am__fastdepCCAS_TRUE@ $(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@ $(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@ $(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@ $(LTCPPASCOMPILE) -c -o $@ $<
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+libmpeg2arch_la-motion_comp_mmx.lo: motion_comp_mmx.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-motion_comp_mmx.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-motion_comp_mmx.Tpo -c -o libmpeg2arch_la-motion_comp_mmx.lo `test -f 'motion_comp_mmx.c' || echo '$(srcdir)/'`motion_comp_mmx.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-motion_comp_mmx.Tpo $(DEPDIR)/libmpeg2arch_la-motion_comp_mmx.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='motion_comp_mmx.c' object='libmpeg2arch_la-motion_comp_mmx.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-motion_comp_mmx.lo `test -f 'motion_comp_mmx.c' || echo '$(srcdir)/'`motion_comp_mmx.c
+
+libmpeg2arch_la-idct_mmx.lo: idct_mmx.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-idct_mmx.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-idct_mmx.Tpo -c -o libmpeg2arch_la-idct_mmx.lo `test -f 'idct_mmx.c' || echo '$(srcdir)/'`idct_mmx.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-idct_mmx.Tpo $(DEPDIR)/libmpeg2arch_la-idct_mmx.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='idct_mmx.c' object='libmpeg2arch_la-idct_mmx.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-idct_mmx.lo `test -f 'idct_mmx.c' || echo '$(srcdir)/'`idct_mmx.c
+
+libmpeg2arch_la-motion_comp_altivec.lo: motion_comp_altivec.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-motion_comp_altivec.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-motion_comp_altivec.Tpo -c -o libmpeg2arch_la-motion_comp_altivec.lo `test -f 'motion_comp_altivec.c' || echo '$(srcdir)/'`motion_comp_altivec.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-motion_comp_altivec.Tpo $(DEPDIR)/libmpeg2arch_la-motion_comp_altivec.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='motion_comp_altivec.c' object='libmpeg2arch_la-motion_comp_altivec.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-motion_comp_altivec.lo `test -f 'motion_comp_altivec.c' || echo '$(srcdir)/'`motion_comp_altivec.c
+
+libmpeg2arch_la-idct_altivec.lo: idct_altivec.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-idct_altivec.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-idct_altivec.Tpo -c -o libmpeg2arch_la-idct_altivec.lo `test -f 'idct_altivec.c' || echo '$(srcdir)/'`idct_altivec.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-idct_altivec.Tpo $(DEPDIR)/libmpeg2arch_la-idct_altivec.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='idct_altivec.c' object='libmpeg2arch_la-idct_altivec.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-idct_altivec.lo `test -f 'idct_altivec.c' || echo '$(srcdir)/'`idct_altivec.c
+
+libmpeg2arch_la-motion_comp_alpha.lo: motion_comp_alpha.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-motion_comp_alpha.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-motion_comp_alpha.Tpo -c -o libmpeg2arch_la-motion_comp_alpha.lo `test -f 'motion_comp_alpha.c' || echo '$(srcdir)/'`motion_comp_alpha.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-motion_comp_alpha.Tpo $(DEPDIR)/libmpeg2arch_la-motion_comp_alpha.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='motion_comp_alpha.c' object='libmpeg2arch_la-motion_comp_alpha.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-motion_comp_alpha.lo `test -f 'motion_comp_alpha.c' || echo '$(srcdir)/'`motion_comp_alpha.c
+
+libmpeg2arch_la-idct_alpha.lo: idct_alpha.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-idct_alpha.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-idct_alpha.Tpo -c -o libmpeg2arch_la-idct_alpha.lo `test -f 'idct_alpha.c' || echo '$(srcdir)/'`idct_alpha.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-idct_alpha.Tpo $(DEPDIR)/libmpeg2arch_la-idct_alpha.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='idct_alpha.c' object='libmpeg2arch_la-idct_alpha.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-idct_alpha.lo `test -f 'idct_alpha.c' || echo '$(srcdir)/'`idct_alpha.c
+
+libmpeg2arch_la-motion_comp_vis.lo: motion_comp_vis.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-motion_comp_vis.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-motion_comp_vis.Tpo -c -o libmpeg2arch_la-motion_comp_vis.lo `test -f 'motion_comp_vis.c' || echo '$(srcdir)/'`motion_comp_vis.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-motion_comp_vis.Tpo $(DEPDIR)/libmpeg2arch_la-motion_comp_vis.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='motion_comp_vis.c' object='libmpeg2arch_la-motion_comp_vis.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-motion_comp_vis.lo `test -f 'motion_comp_vis.c' || echo '$(srcdir)/'`motion_comp_vis.c
+
+libmpeg2arch_la-motion_comp_arm.lo: motion_comp_arm.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-motion_comp_arm.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-motion_comp_arm.Tpo -c -o libmpeg2arch_la-motion_comp_arm.lo `test -f 'motion_comp_arm.c' || echo '$(srcdir)/'`motion_comp_arm.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-motion_comp_arm.Tpo $(DEPDIR)/libmpeg2arch_la-motion_comp_arm.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='motion_comp_arm.c' object='libmpeg2arch_la-motion_comp_arm.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-motion_comp_arm.lo `test -f 'motion_comp_arm.c' || echo '$(srcdir)/'`motion_comp_arm.c
+
+libmpeg2arch_la-cpu_accel.lo: cpu_accel.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-cpu_accel.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-cpu_accel.Tpo -c -o libmpeg2arch_la-cpu_accel.lo `test -f 'cpu_accel.c' || echo '$(srcdir)/'`cpu_accel.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-cpu_accel.Tpo $(DEPDIR)/libmpeg2arch_la-cpu_accel.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='cpu_accel.c' object='libmpeg2arch_la-cpu_accel.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-cpu_accel.lo `test -f 'cpu_accel.c' || echo '$(srcdir)/'`cpu_accel.c
+
+libmpeg2arch_la-cpu_state.lo: cpu_state.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -MT libmpeg2arch_la-cpu_state.lo -MD -MP -MF $(DEPDIR)/libmpeg2arch_la-cpu_state.Tpo -c -o libmpeg2arch_la-cpu_state.lo `test -f 'cpu_state.c' || echo '$(srcdir)/'`cpu_state.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2arch_la-cpu_state.Tpo $(DEPDIR)/libmpeg2arch_la-cpu_state.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='cpu_state.c' object='libmpeg2arch_la-cpu_state.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2arch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2arch_la-cpu_state.lo `test -f 'cpu_state.c' || echo '$(srcdir)/'`cpu_state.c
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-pkgconfigDATA: $(pkgconfig_DATA)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkgconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)"
+ @list='$(pkgconfig_DATA)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f=$(am__strip_dir) \
+ echo " $(pkgconfigDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(pkgconfigdir)/$$f'"; \
+ $(pkgconfigDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(pkgconfigdir)/$$f"; \
+ done
+
+uninstall-pkgconfigDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkgconfig_DATA)'; for p in $$list; do \
+ f=$(am__strip_dir) \
+ echo " rm -f '$(DESTDIR)$(pkgconfigdir)/$$f'"; \
+ rm -f "$(DESTDIR)$(pkgconfigdir)/$$f"; \
+ done
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run `make' without going through this Makefile.
+# To change the values of `make' variables: instead of editing Makefiles,
+# (1) if the variable is set in `config.status', edit `config.status'
+# (which will cause the Makefiles to be regenerated when you run `make');
+# (2) otherwise, pass the desired values on the `make' command line.
+$(RECURSIVE_TARGETS):
+ @failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
+$(RECURSIVE_CLEAN_TARGETS):
+ @failcom='exit 1'; \
+ for f in x $$MAKEFLAGS; do \
+ case $$f in \
+ *=* | --[!k]*);; \
+ *k*) failcom='fail=yes';; \
+ esac; \
+ done; \
+ dot_seen=no; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ rev=''; for subdir in $$list; do \
+ if test "$$subdir" = "."; then :; else \
+ rev="$$subdir $$rev"; \
+ fi; \
+ done; \
+ rev="$$rev ."; \
+ target=`echo $@ | sed s/-recursive//`; \
+ for subdir in $$rev; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done && test -z "$$fail"
+tags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \
+ done
+ctags-recursive:
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique; \
+ fi
+ctags: CTAGS
+CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+ list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ distdir=`$(am__cd) $(distdir) && pwd`; \
+ top_distdir=`$(am__cd) $(top_distdir) && pwd`; \
+ (cd $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$top_distdir" \
+ distdir="$$distdir/$$subdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(LTLIBRARIES) $(DATA)
+installdirs: installdirs-recursive
+installdirs-am:
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ clean-noinstLTLIBRARIES mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+info: info-recursive
+
+info-am:
+
+install-data-am: install-pkgconfigDATA
+
+install-dvi: install-dvi-recursive
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-recursive
+
+install-info: install-info-recursive
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-ps: install-ps-recursive
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES uninstall-pkgconfigDATA
+
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \
+ install-strip
+
+.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
+ all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \
+ ctags ctags-recursive distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-libLTLIBRARIES \
+ install-man install-pdf install-pdf-am install-pkgconfigDATA \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-recursive uninstall uninstall-am \
+ uninstall-libLTLIBRARIES uninstall-pkgconfigDATA
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/libmpeg2/alloc.c b/libmpeg2/alloc.c
new file mode 100644
index 0000000..71bc18b
--- /dev/null
+++ b/libmpeg2/alloc.c
@@ -0,0 +1,70 @@
+/*
+ * alloc.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+
+static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL;
+static int (* free_hook) (void * buf) = NULL;
+
+void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason)
+{
+ char * buf;
+
+ if (malloc_hook) {
+ buf = (char *) malloc_hook (size, reason);
+ if (buf)
+ return buf;
+ }
+
+ if (size) {
+ buf = (char *) malloc (size + 63 + sizeof (void **));
+ if (buf) {
+ char * align_buf;
+
+ align_buf = buf + 63 + sizeof (void **);
+ align_buf -= (long)align_buf & 63;
+ *(((void **)align_buf) - 1) = buf;
+ return align_buf;
+ }
+ }
+ return NULL;
+}
+
+void mpeg2_free (void * buf)
+{
+ if (free_hook && free_hook (buf))
+ return;
+
+ if (buf)
+ free (*(((void **)buf) - 1));
+}
+
+void mpeg2_malloc_hooks (void * alloc_func (unsigned, mpeg2_alloc_t),
+ int free_func (void *))
+{
+ malloc_hook = alloc_func;
+ free_hook = free_func;
+}
diff --git a/libmpeg2/convert/Makefile.am b/libmpeg2/convert/Makefile.am
new file mode 100644
index 0000000..3522ee3
--- /dev/null
+++ b/libmpeg2/convert/Makefile.am
@@ -0,0 +1,15 @@
+AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+lib_LTLIBRARIES = libmpeg2convert.la
+libmpeg2convert_la_SOURCES = rgb.c uyvy.c
+libmpeg2convert_la_LIBADD = libmpeg2convertarch.la
+libmpeg2convert_la_LDFLAGS = -no-undefined
+
+noinst_LTLIBRARIES = libmpeg2convertarch.la
+libmpeg2convertarch_la_SOURCES = rgb_mmx.c rgb_vis.c
+libmpeg2convertarch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libmpeg2convert.pc
+
+EXTRA_DIST = convert_internal.h
diff --git a/libmpeg2/convert/Makefile.in b/libmpeg2/convert/Makefile.in
new file mode 100644
index 0000000..87daab4
--- /dev/null
+++ b/libmpeg2/convert/Makefile.in
@@ -0,0 +1,558 @@
+# Makefile.in generated by automake 1.10.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = libmpeg2/convert
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
+ $(srcdir)/libmpeg2convert.pc.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/cflags.m4 \
+ $(top_srcdir)/m4/inttypes.m4 $(top_srcdir)/m4/keywords.m4 \
+ $(top_srcdir)/m4/nonpic.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/include/config.h
+CONFIG_CLEAN_FILES = libmpeg2convert.pc
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)"
+libLTLIBRARIES_INSTALL = $(INSTALL)
+LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
+libmpeg2convert_la_DEPENDENCIES = libmpeg2convertarch.la
+am_libmpeg2convert_la_OBJECTS = rgb.lo uyvy.lo
+libmpeg2convert_la_OBJECTS = $(am_libmpeg2convert_la_OBJECTS)
+libmpeg2convert_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(libmpeg2convert_la_LDFLAGS) $(LDFLAGS) -o $@
+libmpeg2convertarch_la_LIBADD =
+am_libmpeg2convertarch_la_OBJECTS = libmpeg2convertarch_la-rgb_mmx.lo \
+ libmpeg2convertarch_la-rgb_vis.lo
+libmpeg2convertarch_la_OBJECTS = $(am_libmpeg2convertarch_la_OBJECTS)
+libmpeg2convertarch_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ $(libmpeg2convertarch_la_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/include
+depcomp = $(SHELL) $(top_srcdir)/.auto/depcomp
+am__depfiles_maybe = depfiles
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libmpeg2convert_la_SOURCES) \
+ $(libmpeg2convertarch_la_SOURCES)
+DIST_SOURCES = $(libmpeg2convert_la_SOURCES) \
+ $(libmpeg2convertarch_la_SOURCES)
+pkgconfigDATA_INSTALL = $(INSTALL_DATA)
+DATA = $(pkgconfig_DATA)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_CPPFLAGS = @AM_CPPFLAGS@
+AR = @AR@
+ARCH_OPT_CFLAGS = @ARCH_OPT_CFLAGS@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+ECHO = @ECHO@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+F77 = @F77@
+FFLAGS = @FFLAGS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LDFLAGS = @LDFLAGS@
+LIBMPEG2_CFLAGS = @LIBMPEG2_CFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIBVO_CFLAGS = @LIBVO_CFLAGS@
+LIBVO_LIBS = @LIBVO_LIBS@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MPEG2DEC_CFLAGS = @MPEG2DEC_CFLAGS@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPT_CFLAGS = @OPT_CFLAGS@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SDLCONFIG = @SDLCONFIG@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+XMKMF = @XMKMF@
+X_CFLAGS = @X_CFLAGS@
+X_EXTRA_LIBS = @X_EXTRA_LIBS@
+X_LIBS = @X_LIBS@
+X_PRE_LIBS = @X_PRE_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_F77 = @ac_ct_F77@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CFLAGS = $(OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+lib_LTLIBRARIES = libmpeg2convert.la
+libmpeg2convert_la_SOURCES = rgb.c uyvy.c
+libmpeg2convert_la_LIBADD = libmpeg2convertarch.la
+libmpeg2convert_la_LDFLAGS = -no-undefined
+noinst_LTLIBRARIES = libmpeg2convertarch.la
+libmpeg2convertarch_la_SOURCES = rgb_mmx.c rgb_vis.c
+libmpeg2convertarch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libmpeg2convert.pc
+EXTRA_DIST = convert_internal.h
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
+ && exit 0; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign libmpeg2/convert/Makefile'; \
+ cd $(top_srcdir) && \
+ $(AUTOMAKE) --foreign libmpeg2/convert/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+libmpeg2convert.pc: $(top_builddir)/config.status $(srcdir)/libmpeg2convert.pc.in
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ if test -f $$p; then \
+ f=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \
+ else :; fi; \
+ done
+
+uninstall-libLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ p=$(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \
+ done
+
+clean-libLTLIBRARIES:
+ -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+ @list='$(lib_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+libmpeg2convert.la: $(libmpeg2convert_la_OBJECTS) $(libmpeg2convert_la_DEPENDENCIES)
+ $(libmpeg2convert_la_LINK) -rpath $(libdir) $(libmpeg2convert_la_OBJECTS) $(libmpeg2convert_la_LIBADD) $(LIBS)
+libmpeg2convertarch.la: $(libmpeg2convertarch_la_OBJECTS) $(libmpeg2convertarch_la_DEPENDENCIES)
+ $(libmpeg2convertarch_la_LINK) $(libmpeg2convertarch_la_OBJECTS) $(libmpeg2convertarch_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2convertarch_la-rgb_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpeg2convertarch_la-rgb_vis.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rgb.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uyvy.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+libmpeg2convertarch_la-rgb_mmx.lo: rgb_mmx.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2convertarch_la_CFLAGS) $(CFLAGS) -MT libmpeg2convertarch_la-rgb_mmx.lo -MD -MP -MF $(DEPDIR)/libmpeg2convertarch_la-rgb_mmx.Tpo -c -o libmpeg2convertarch_la-rgb_mmx.lo `test -f 'rgb_mmx.c' || echo '$(srcdir)/'`rgb_mmx.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2convertarch_la-rgb_mmx.Tpo $(DEPDIR)/libmpeg2convertarch_la-rgb_mmx.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='rgb_mmx.c' object='libmpeg2convertarch_la-rgb_mmx.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2convertarch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2convertarch_la-rgb_mmx.lo `test -f 'rgb_mmx.c' || echo '$(srcdir)/'`rgb_mmx.c
+
+libmpeg2convertarch_la-rgb_vis.lo: rgb_vis.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2convertarch_la_CFLAGS) $(CFLAGS) -MT libmpeg2convertarch_la-rgb_vis.lo -MD -MP -MF $(DEPDIR)/libmpeg2convertarch_la-rgb_vis.Tpo -c -o libmpeg2convertarch_la-rgb_vis.lo `test -f 'rgb_vis.c' || echo '$(srcdir)/'`rgb_vis.c
+@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/libmpeg2convertarch_la-rgb_vis.Tpo $(DEPDIR)/libmpeg2convertarch_la-rgb_vis.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='rgb_vis.c' object='libmpeg2convertarch_la-rgb_vis.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libmpeg2convertarch_la_CFLAGS) $(CFLAGS) -c -o libmpeg2convertarch_la-rgb_vis.lo `test -f 'rgb_vis.c' || echo '$(srcdir)/'`rgb_vis.c
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-pkgconfigDATA: $(pkgconfig_DATA)
+ @$(NORMAL_INSTALL)
+ test -z "$(pkgconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)"
+ @list='$(pkgconfig_DATA)'; for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ f=$(am__strip_dir) \
+ echo " $(pkgconfigDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(pkgconfigdir)/$$f'"; \
+ $(pkgconfigDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(pkgconfigdir)/$$f"; \
+ done
+
+uninstall-pkgconfigDATA:
+ @$(NORMAL_UNINSTALL)
+ @list='$(pkgconfig_DATA)'; for p in $$list; do \
+ f=$(am__strip_dir) \
+ echo " rm -f '$(DESTDIR)$(pkgconfigdir)/$$f'"; \
+ rm -f "$(DESTDIR)$(pkgconfigdir)/$$f"; \
+ done
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$tags $$unique; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ tags=; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$tags$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$tags $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && cd $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) $$here
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+ fi; \
+ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+ else \
+ test -f $(distdir)/$$file \
+ || cp -p $$d/$$file $(distdir)/$$file \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(DATA)
+installdirs:
+ for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgconfigdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+ clean-noinstLTLIBRARIES mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+info: info-am
+
+info-am:
+
+install-data-am: install-pkgconfigDATA
+
+install-dvi: install-dvi-am
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-info: install-info-am
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-ps: install-ps-am
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES uninstall-pkgconfigDATA
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \
+ ctags distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-libLTLIBRARIES install-man install-pdf \
+ install-pdf-am install-pkgconfigDATA install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am \
+ uninstall-libLTLIBRARIES uninstall-pkgconfigDATA
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/libmpeg2/convert/convert_internal.h b/libmpeg2/convert/convert_internal.h
new file mode 100644
index 0000000..d1e63d5
--- /dev/null
+++ b/libmpeg2/convert/convert_internal.h
@@ -0,0 +1,42 @@
+/*
+ * convert_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+typedef struct {
+ uint8_t * rgb_ptr;
+ int width;
+ int field;
+ int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice;
+ int chroma420, convert420;
+ int dither_offset, dither_stride;
+ int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min;
+} convert_rgb_t;
+
+typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src,
+ unsigned int v_offset);
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode,
+ const mpeg2_sequence_t * seq);
+mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode,
+ const mpeg2_sequence_t * seq);
+mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode,
+ const mpeg2_sequence_t * seq);
diff --git a/libmpeg2/convert/libmpeg2convert.pc.in b/libmpeg2/convert/libmpeg2convert.pc.in
new file mode 100644
index 0000000..044ea1e
--- /dev/null
+++ b/libmpeg2/convert/libmpeg2convert.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libmpeg2convert
+Description: Image conversion library helper for libmpeg2
+Version: @VERSION@
+Libs: -L${libdir} -lmpeg2convert
+Cflags: -I${includedir}/mpeg2dec
diff --git a/libmpeg2/convert/rgb.c b/libmpeg2/convert/rgb.c
new file mode 100644
index 0000000..8863b0b
--- /dev/null
+++ b/libmpeg2/convert/rgb.c
@@ -0,0 +1,598 @@
+/*
+ * rgb.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+#include "attributes.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+
+static int matrix_coefficients = 6;
+
+static const int Inverse_Table_6_9[8][4] = {
+ {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
+ {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
+ {104597, 132201, 25675, 53279}, /* unspecified */
+ {104597, 132201, 25675, 53279}, /* reserved */
+ {104448, 132798, 24759, 53109}, /* FCC */
+ {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
+ {104597, 132201, 25675, 53279}, /* SMPTE 170M */
+ {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
+};
+
+static const uint8_t dither[] ATTR_ALIGN(32) = {
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35,
+ 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62,
+ 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62,
+ 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62,
+ 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62,
+ 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+ 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+ 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+ 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26,
+ 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69,
+ 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69,
+ 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69,
+ 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69,
+ 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33,
+ 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33,
+ 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33,
+ 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33,
+ 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60,
+ 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60,
+ 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60,
+ 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60,
+ 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+ 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+ 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+ 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35,
+ 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35
+};
+
+static const uint8_t dither_temporal[64] = {
+ 0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41,
+ 0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03,
+ 0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1,
+ 0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83,
+ 0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5,
+ 0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87,
+ 0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45,
+ 0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07
+};
+
+typedef struct {
+ convert_rgb_t base;
+ void * table_rV[256];
+ void * table_gU[256];
+ int table_gV[256];
+ void * table_bU[256];
+} convert_rgb_c_t;
+
+#define RGB(type,i) \
+ U = pu[i]; \
+ V = pv[i]; \
+ r = (type *) id->table_rV[V]; \
+ g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]); \
+ b = (type *) id->table_bU[U];
+
+#define DST(py,dst,i,j) \
+ Y = py[i]; \
+ dst[i] = r[Y] + g[Y] + b[Y];
+
+#define DSTRGB(py,dst,i,j) \
+ Y = py[i]; \
+ dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y];
+
+#define DSTBGR(py,dst,i,j) \
+ Y = py[i]; \
+ dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y];
+
+#define DSTDITHER(py,dst,i,j) \
+ Y = py[i]; \
+ dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]];
+
+#define DO(x) x
+#define SKIP(x)
+
+#define DECLARE_420(func,type,num,DST,DITHER) \
+static void func (void * _id, uint8_t * const * src, \
+ unsigned int v_offset) \
+{ \
+ const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \
+ type * dst_1; \
+ const uint8_t * py_1, * pu, * pv; \
+ int i; \
+ DITHER(uint8_t dithpos = id->base.dither_offset;) \
+ \
+ dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset); \
+ py_1 = src[0]; pu = src[1]; pv = src[2]; \
+ \
+ i = 8; \
+ do { \
+ const uint8_t * py_2; \
+ int j, U, V, Y; \
+ const type * r, * g, * b; \
+ type * dst_2; \
+ DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \
+ \
+ dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride); \
+ py_2 = py_1 + id->base.y_stride; \
+ j = id->base.width; \
+ do { \
+ RGB (type, 0) \
+ DST (py_1, dst_1, 0, 0) \
+ DST (py_1, dst_1, 1, 0) \
+ DST (py_2, dst_2, 0, 1) \
+ DST (py_2, dst_2, 1, 1) \
+ \
+ RGB (type, 1) \
+ DST (py_2, dst_2, 2, 1) \
+ DST (py_2, dst_2, 3, 1) \
+ DST (py_1, dst_1, 2, 0) \
+ DST (py_1, dst_1, 3, 0) \
+ \
+ RGB (type, 2) \
+ DST (py_1, dst_1, 4, 0) \
+ DST (py_1, dst_1, 5, 0) \
+ DST (py_2, dst_2, 4, 1) \
+ DST (py_2, dst_2, 5, 1) \
+ \
+ RGB (type, 3) \
+ DST (py_2, dst_2, 6, 1) \
+ DST (py_2, dst_2, 7, 1) \
+ DST (py_1, dst_1, 6, 0) \
+ DST (py_1, dst_1, 7, 0) \
+ \
+ pu += 4; \
+ pv += 4; \
+ py_1 += 8; \
+ py_2 += 8; \
+ dst_1 += 8 * num; \
+ dst_2 += 8 * num; \
+ } while (--j); \
+ if (--i == id->base.field) { \
+ dst_1 = (type *)(id->base.rgb_ptr + \
+ id->base.rgb_slice * (v_offset + 1)); \
+ py_1 = src[0] + id->base.y_stride_frame; \
+ pu = src[1] + id->base.uv_stride_frame; \
+ pv = src[2] + id->base.uv_stride_frame; \
+ } else { \
+ py_1 += id->base.y_increm; \
+ pu += id->base.uv_increm; \
+ pv += id->base.uv_increm; \
+ dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm); \
+ DITHER(dithpos += id->base.dither_stride;) \
+ } \
+ } while (i); \
+}
+
+DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP)
+DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP)
+DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO)
+
+#define DECLARE_422(func,type,num,DST,DITHER) \
+static void func (void * _id, uint8_t * const * src, \
+ unsigned int v_offset) \
+{ \
+ const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \
+ type * dst; \
+ const uint8_t * py, * pu, * pv; \
+ int i; \
+ DITHER(uint8_t dithpos = id->base.dither_offset;) \
+ \
+ dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \
+ py = src[0]; pu = src[1]; pv = src[2]; \
+ \
+ i = 16; \
+ do { \
+ int j, U, V, Y; \
+ const type * r, * g, * b; \
+ DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \
+ \
+ j = id->base.width; \
+ do { \
+ RGB (type, 0) \
+ DST (py, dst, 0, 0) \
+ DST (py, dst, 1, 0) \
+ \
+ RGB (type, 1) \
+ DST (py, dst, 2, 0) \
+ DST (py, dst, 3, 0) \
+ \
+ RGB (type, 2) \
+ DST (py, dst, 4, 0) \
+ DST (py, dst, 5, 0) \
+ \
+ RGB (type, 3) \
+ DST (py, dst, 6, 0) \
+ DST (py, dst, 7, 0) \
+ \
+ pu += 4; \
+ pv += 4; \
+ py += 8; \
+ dst += 8 * num; \
+ } while (--j); \
+ py += id->base.y_increm; \
+ pu += id->base.uv_increm; \
+ pv += id->base.uv_increm; \
+ dst = (type *)((char *)dst + id->base.rgb_increm); \
+ DITHER(dithpos += id->base.dither_stride;) \
+ } while (--i); \
+}
+
+DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP)
+DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP)
+DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO)
+
+#define DECLARE_444(func,type,num,DST,DITHER) \
+static void func (void * _id, uint8_t * const * src, \
+ unsigned int v_offset) \
+{ \
+ const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \
+ type * dst; \
+ const uint8_t * py, * pu, * pv; \
+ int i; \
+ DITHER(uint8_t dithpos = id->base.dither_offset;) \
+ \
+ dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \
+ py = src[0]; pu = src[1]; pv = src[2]; \
+ \
+ i = 16; \
+ do { \
+ int j, U, V, Y; \
+ const type * r, * g, * b; \
+ DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \
+ \
+ j = id->base.width; \
+ do { \
+ RGB (type, 0) \
+ DST (py, dst, 0, 0) \
+ RGB (type, 1) \
+ DST (py, dst, 1, 0) \
+ RGB (type, 2) \
+ DST (py, dst, 2, 0) \
+ RGB (type, 3) \
+ DST (py, dst, 3, 0) \
+ RGB (type, 4) \
+ DST (py, dst, 4, 0) \
+ RGB (type, 5) \
+ DST (py, dst, 5, 0) \
+ RGB (type, 6) \
+ DST (py, dst, 6, 0) \
+ RGB (type, 7) \
+ DST (py, dst, 7, 0) \
+ \
+ pu += 8; \
+ pv += 8; \
+ py += 8; \
+ dst += 8 * num; \
+ } while (--j); \
+ py += id->base.y_increm; \
+ pu += id->base.y_increm; \
+ pv += id->base.y_increm; \
+ dst = (type *)((char *)dst + id->base.rgb_increm); \
+ DITHER(dithpos += id->base.dither_stride;) \
+ } while (--i); \
+}
+
+DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP)
+DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP)
+DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP)
+DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP)
+DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO)
+
+static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf,
+ const mpeg2_picture_t * picture,
+ const mpeg2_gop_t * gop)
+{
+ convert_rgb_t * id = (convert_rgb_t *) _id;
+ int uv_stride = id->uv_stride_frame;
+ id->y_stride = id->y_stride_frame;
+ id->rgb_ptr = fbuf->buf[0];
+ id->rgb_slice = id->rgb_stride = id->rgb_stride_frame;
+ id->dither_stride = 32;
+ id->dither_offset = dither_temporal[picture->temporal_reference & 63];
+ id->field = 0;
+ if ((picture->nb_fields == 1) ||
+ (id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) {
+ uv_stride <<= 1;
+ id->y_stride <<= 1;
+ id->rgb_stride <<= 1;
+ id->dither_stride <<= 1;
+ id->dither_offset += 16;
+ if (picture->nb_fields == 1) {
+ id->rgb_slice <<= 1;
+ if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) {
+ id->rgb_ptr += id->rgb_stride_frame;
+ id->dither_offset += 32;
+ }
+ } else
+ id->field = 8 >> id->convert420;
+ }
+ id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame;
+ id->uv_increm = uv_stride - id->uv_stride_frame;
+ id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min;
+ id->dither_stride <<= id->convert420;
+}
+
+static inline int div_round (int dividend, int divisor)
+{
+ if (dividend > 0)
+ return (dividend + (divisor>>1)) / divisor;
+ else
+ return -((-dividend + (divisor>>1)) / divisor);
+}
+
+static unsigned int rgb_c_init (convert_rgb_c_t * id,
+ mpeg2convert_rgb_order_t order,
+ unsigned int bpp)
+{
+ int i;
+ uint8_t table_Y[1024];
+ uint32_t * table_32 = 0;
+ uint16_t * table_16 = 0;
+ uint8_t * table_8 = 0;
+ uint8_t * table_332 = 0;
+ int entry_size = 0;
+ void * table_r = 0;
+ void * table_g = 0;
+ void * table_b = 0;
+
+ int crv = Inverse_Table_6_9[matrix_coefficients][0];
+ int cbu = Inverse_Table_6_9[matrix_coefficients][1];
+ int cgu = -Inverse_Table_6_9[matrix_coefficients][2];
+ int cgv = -Inverse_Table_6_9[matrix_coefficients][3];
+
+ for (i = 0; i < 1024; i++) {
+ int j;
+
+ j = (76309 * (i - 384 - 16) + 32768) >> 16;
+ table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j);
+ }
+
+ switch (bpp) {
+ case 32:
+ if (!id)
+ return (197 + 2*682 + 256 + 132) * sizeof (uint32_t);
+ table_32 = (uint32_t *) (id + 1);
+ entry_size = sizeof (uint32_t);
+ table_r = table_32 + 197;
+ table_b = table_32 + 197 + 685;
+ table_g = table_32 + 197 + 2*682;
+
+ for (i = -197; i < 256+197; i++)
+ ((uint32_t *) table_r)[i] =
+ table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0);
+ for (i = -132; i < 256+132; i++)
+ ((uint32_t *) table_g)[i] = table_Y[i+384] << 8;
+ for (i = -232; i < 256+232; i++)
+ ((uint32_t *) table_b)[i] =
+ table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16);
+ break;
+
+ case 24:
+ if (!id)
+ return (256 + 2*232) * sizeof (uint8_t);
+ table_8 = (uint8_t *) (id + 1);
+ entry_size = sizeof (uint8_t);
+ table_r = table_g = table_b = table_8 + 232;
+
+ for (i = -232; i < 256+232; i++)
+ ((uint8_t * )table_b)[i] = table_Y[i+384];
+ break;
+
+ case 15:
+ case 16:
+ if (!id)
+ return (197 + 2*682 + 256 + 132) * sizeof (uint16_t);
+ table_16 = (uint16_t *) (id + 1);
+ entry_size = sizeof (uint16_t);
+ table_r = table_16 + 197;
+ table_b = table_16 + 197 + 685;
+ table_g = table_16 + 197 + 2*682;
+
+ for (i = -197; i < 256+197; i++) {
+ int j = table_Y[i+384] >> 3;
+
+ if (order == MPEG2CONVERT_RGB)
+ j <<= ((bpp==16) ? 11 : 10);
+
+ ((uint16_t *)table_r)[i] = j;
+ }
+ for (i = -132; i < 256+132; i++) {
+ int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3);
+
+ ((uint16_t *)table_g)[i] = j << 5;
+ }
+ for (i = -232; i < 256+232; i++) {
+ int j = table_Y[i+384] >> 3;
+
+ if (order == MPEG2CONVERT_BGR)
+ j <<= ((bpp==16) ? 11 : 10);
+
+ ((uint16_t *)table_b)[i] = j;
+ }
+ break;
+
+ case 8:
+ if (!id)
+ return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t);
+ table_332 = (uint8_t *) (id + 1);
+ entry_size = sizeof (uint8_t);
+ table_r = table_332 + 197;
+ table_g = table_332 + 197 + 682 + 30;
+ table_b = table_332 + 197 + 2*682;
+
+ for (i = -197; i < 256+197+30; i++)
+ ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) <<
+ (order == MPEG2CONVERT_RGB ? 5 : 0));
+ for (i = -132; i < 256+132+30; i++)
+ ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) <<
+ (order == MPEG2CONVERT_RGB ? 2 : 3));
+ for (i = -232; i < 256+232+71; i++)
+ ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) <<
+ (order == MPEG2CONVERT_RGB ? 0 : 6));
+ break;
+ }
+
+ for (i = 0; i < 256; i++) {
+ id->table_rV[i] = (((uint8_t *)table_r) +
+ entry_size * div_round (crv * (i-128), 76309));
+ id->table_gU[i] = (((uint8_t *)table_g) +
+ entry_size * div_round (cgu * (i-128), 76309));
+ id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309);
+ id->table_bU[i] = (((uint8_t *)table_b) +
+ entry_size * div_round (cbu * (i-128), 76309));
+ }
+
+ return 0;
+}
+
+static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp,
+ int stage, void * _id, const mpeg2_sequence_t * seq,
+ int stride, uint32_t accel, void * arg,
+ mpeg2_convert_init_t * result)
+{
+ convert_rgb_t * id = (convert_rgb_t *) _id;
+ mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0;
+ unsigned int id_size = sizeof (convert_rgb_t);
+ int chroma420 = (seq->chroma_height < seq->height);
+ int convert420 = 0;
+ int rgb_stride_min = ((bpp + 7) >> 3) * seq->width;
+
+#ifdef ARCH_X86
+ if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) {
+ convert420 = 0;
+ copy = mpeg2convert_rgb_mmxext (order, bpp, seq);
+ }
+ if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) {
+ convert420 = 0;
+ copy = mpeg2convert_rgb_mmx (order, bpp, seq);
+ }
+#endif
+#ifdef ARCH_SPARC
+ if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) {
+ convert420 = chroma420;
+ copy = mpeg2convert_rgb_vis (order, bpp, seq);
+ }
+#endif
+ if (!copy) {
+ int src, dest;
+ static void (* rgb_c[3][5]) (void *, uint8_t * const *,
+ unsigned int) =
+ {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420,
+ rgb_c_24_rgb_420, rgb_c_32_420},
+ {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422,
+ rgb_c_24_rgb_422, rgb_c_32_422},
+ {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444,
+ rgb_c_24_rgb_444, rgb_c_32_444}};
+
+ convert420 = chroma420;
+ id_size = (sizeof (convert_rgb_c_t) +
+ rgb_c_init ((convert_rgb_c_t *) id, order, bpp));
+ src = ((seq->chroma_width == seq->width) +
+ (seq->chroma_height == seq->height));
+ dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3);
+ copy = rgb_c[src][dest];
+ }
+
+ result->id_size = id_size;
+
+ if (stride < rgb_stride_min)
+ stride = rgb_stride_min;
+
+ if (stage == MPEG2_CONVERT_STRIDE)
+ return stride;
+ else if (stage == MPEG2_CONVERT_START) {
+ id->width = seq->width >> 3;
+ id->y_stride_frame = seq->width;
+ id->uv_stride_frame = seq->chroma_width;
+ id->rgb_stride_frame = stride;
+ id->rgb_stride_min = rgb_stride_min;
+ id->chroma420 = chroma420;
+ id->convert420 = convert420;
+ result->buf_size[0] = stride * seq->height;
+ result->buf_size[1] = result->buf_size[2] = 0;
+ result->start = rgb_start;
+ result->copy = copy;
+ }
+ return 0;
+}
+
+#define DECLARE(func,order,bpp) \
+int func (int stage, void * id, \
+ const mpeg2_sequence_t * sequence, int stride, \
+ uint32_t accel, void * arg, mpeg2_convert_init_t * result) \
+{ \
+ return rgb_internal (order, bpp, stage, id, sequence, stride, \
+ accel, arg, result); \
+}
+
+DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32)
+DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24)
+DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16)
+DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15)
+DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8)
+DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32)
+DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24)
+DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16)
+DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15)
+DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8)
+
+mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order,
+ unsigned int bpp)
+{
+ static mpeg2_convert_t * table[5][2] =
+ {{mpeg2convert_rgb15, mpeg2convert_bgr15},
+ {mpeg2convert_rgb8, mpeg2convert_bgr8},
+ {mpeg2convert_rgb16, mpeg2convert_bgr16},
+ {mpeg2convert_rgb24, mpeg2convert_bgr24},
+ {mpeg2convert_rgb32, mpeg2convert_bgr32}};
+
+ if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) {
+ if (bpp == 15)
+ return table[0][order == MPEG2CONVERT_BGR];
+ else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0)
+ return table[bpp >> 3][order == MPEG2CONVERT_BGR];
+ }
+ return (mpeg2_convert_t *) 0;
+}
diff --git a/libmpeg2/convert/rgb_mmx.c b/libmpeg2/convert/rgb_mmx.c
new file mode 100644
index 0000000..912291c
--- /dev/null
+++ b/libmpeg2/convert/rgb_mmx.c
@@ -0,0 +1,321 @@
+/*
+ * rgb_mmx.c
+ * Copyright (C) 2000-2003 Silicon Integrated System Corp.
+ * All Rights Reserved.
+ *
+ * Author: Olie Lho <ollie@sis.com.tw>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_X86
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+#include "attributes.h"
+#include "mmx.h"
+
+#define CPU_MMXEXT 0
+#define CPU_MMX 1
+
+/* CPU_MMXEXT/CPU_MMX adaptation layer */
+
+#define movntq(src,dest) \
+do { \
+ if (cpu == CPU_MMXEXT) \
+ movntq_r2m (src, dest); \
+ else \
+ movq_r2m (src, dest); \
+} while (0)
+
+static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv)
+{
+ static mmx_t mmx_80w = {0x0080008000800080LL};
+ static mmx_t mmx_U_green = {0xf37df37df37df37dLL};
+ static mmx_t mmx_U_blue = {0x4093409340934093LL};
+ static mmx_t mmx_V_red = {0x3312331233123312LL};
+ static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL};
+ static mmx_t mmx_10w = {0x1010101010101010LL};
+ static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL};
+ static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL};
+
+ movd_m2r (*pu, mm0); /* mm0 = 00 00 00 00 u3 u2 u1 u0 */
+ movd_m2r (*pv, mm1); /* mm1 = 00 00 00 00 v3 v2 v1 v0 */
+ movq_m2r (*py, mm6); /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ pxor_r2r (mm4, mm4); /* mm4 = 0 */
+ /* XXX might do cache preload for image here */
+
+ /*
+ * Do the multiply part of the conversion for even and odd pixels
+ * register usage:
+ * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
+ * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels
+ * mm6 -> Y even, mm7 -> Y odd
+ */
+
+ punpcklbw_r2r (mm4, mm0); /* mm0 = u3 u2 u1 u0 */
+ punpcklbw_r2r (mm4, mm1); /* mm1 = v3 v2 v1 v0 */
+ psubsw_m2r (mmx_80w, mm0); /* u -= 128 */
+ psubsw_m2r (mmx_80w, mm1); /* v -= 128 */
+ psllw_i2r (3, mm0); /* promote precision */
+ psllw_i2r (3, mm1); /* promote precision */
+ movq_r2r (mm0, mm2); /* mm2 = u3 u2 u1 u0 */
+ movq_r2r (mm1, mm3); /* mm3 = v3 v2 v1 v0 */
+ pmulhw_m2r (mmx_U_green, mm2); /* mm2 = u * u_green */
+ pmulhw_m2r (mmx_V_green, mm3); /* mm3 = v * v_green */
+ pmulhw_m2r (mmx_U_blue, mm0); /* mm0 = chroma_b */
+ pmulhw_m2r (mmx_V_red, mm1); /* mm1 = chroma_r */
+ paddsw_r2r (mm3, mm2); /* mm2 = chroma_g */
+
+ psubusb_m2r (mmx_10w, mm6); /* Y -= 16 */
+ movq_r2r (mm6, mm7); /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
+ pand_m2r (mmx_00ffw, mm6); /* mm6 = Y6 Y4 Y2 Y0 */
+ psrlw_i2r (8, mm7); /* mm7 = Y7 Y5 Y3 Y1 */
+ psllw_i2r (3, mm6); /* promote precision */
+ psllw_i2r (3, mm7); /* promote precision */
+ pmulhw_m2r (mmx_Y_coeff, mm6); /* mm6 = luma_rgb even */
+ pmulhw_m2r (mmx_Y_coeff, mm7); /* mm7 = luma_rgb odd */
+
+ /*
+ * Do the addition part of the conversion for even and odd pixels
+ * register usage:
+ * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels
+ * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels
+ * mm6 -> Y even, mm7 -> Y odd
+ */
+
+ movq_r2r (mm0, mm3); /* mm3 = chroma_b */
+ movq_r2r (mm1, mm4); /* mm4 = chroma_r */
+ movq_r2r (mm2, mm5); /* mm5 = chroma_g */
+ paddsw_r2r (mm6, mm0); /* mm0 = B6 B4 B2 B0 */
+ paddsw_r2r (mm7, mm3); /* mm3 = B7 B5 B3 B1 */
+ paddsw_r2r (mm6, mm1); /* mm1 = R6 R4 R2 R0 */
+ paddsw_r2r (mm7, mm4); /* mm4 = R7 R5 R3 R1 */
+ paddsw_r2r (mm6, mm2); /* mm2 = G6 G4 G2 G0 */
+ paddsw_r2r (mm7, mm5); /* mm5 = G7 G5 G3 G1 */
+ packuswb_r2r (mm0, mm0); /* saturate to 0-255 */
+ packuswb_r2r (mm1, mm1); /* saturate to 0-255 */
+ packuswb_r2r (mm2, mm2); /* saturate to 0-255 */
+ packuswb_r2r (mm3, mm3); /* saturate to 0-255 */
+ packuswb_r2r (mm4, mm4); /* saturate to 0-255 */
+ packuswb_r2r (mm5, mm5); /* saturate to 0-255 */
+ punpcklbw_r2r (mm3, mm0); /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */
+ punpcklbw_r2r (mm4, mm1); /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */
+ punpcklbw_r2r (mm5, mm2); /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */
+}
+
+static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu)
+{
+ static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL};
+ static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL};
+ static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL};
+
+ /*
+ * convert RGB plane to RGB 16 bits
+ * mm0 -> B, mm1 -> R, mm2 -> G
+ * mm4 -> GB, mm5 -> AR pixel 4-7
+ * mm6 -> GB, mm7 -> AR pixel 0-3
+ */
+
+ pand_m2r (mmx_bluemask, mm0); /* mm0 = b7b6b5b4b3______ */
+ pand_m2r (mmx_greenmask, mm2); /* mm2 = g7g6g5g4g3g2____ */
+ pand_m2r (mmx_redmask, mm1); /* mm1 = r7r6r5r4r3______ */
+ psrlq_i2r (3, mm0); /* mm0 = ______b7b6b5b4b3 */
+ pxor_r2r (mm4, mm4); /* mm4 = 0 */
+ movq_r2r (mm0, mm5); /* mm5 = ______b7b6b5b4b3 */
+ movq_r2r (mm2, mm7); /* mm7 = g7g6g5g4g3g2____ */
+
+ punpcklbw_r2r (mm4, mm2);
+ punpcklbw_r2r (mm1, mm0);
+ psllq_i2r (3, mm2);
+ por_r2r (mm2, mm0);
+ movntq (mm0, *image);
+
+ punpckhbw_r2r (mm4, mm7);
+ punpckhbw_r2r (mm1, mm5);
+ psllq_i2r (3, mm7);
+ por_r2r (mm7, mm5);
+ movntq (mm5, *(image+8));
+}
+
+static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu)
+{
+ /*
+ * convert RGB plane to RGB packed format,
+ * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0,
+ * mm4 -> GB, mm5 -> AR pixel 4-7,
+ * mm6 -> GB, mm7 -> AR pixel 0-3
+ */
+
+ pxor_r2r (mm3, mm3);
+ movq_r2r (mm0, mm6);
+ movq_r2r (mm1, mm7);
+ movq_r2r (mm0, mm4);
+ movq_r2r (mm1, mm5);
+ punpcklbw_r2r (mm2, mm6);
+ punpcklbw_r2r (mm3, mm7);
+ punpcklwd_r2r (mm7, mm6);
+ movntq (mm6, *image);
+ movq_r2r (mm0, mm6);
+ punpcklbw_r2r (mm2, mm6);
+ punpckhwd_r2r (mm7, mm6);
+ movntq (mm6, *(image+8));
+ punpckhbw_r2r (mm2, mm4);
+ punpckhbw_r2r (mm3, mm5);
+ punpcklwd_r2r (mm5, mm4);
+ movntq (mm4, *(image+16));
+ movq_r2r (mm0, mm4);
+ punpckhbw_r2r (mm2, mm4);
+ punpckhwd_r2r (mm5, mm4);
+ movntq (mm4, *(image+24));
+}
+
+static inline void rgb16 (void * const _id, uint8_t * const * src,
+ const unsigned int v_offset, const int cpu)
+{
+ convert_rgb_t * const id = (convert_rgb_t *) _id;
+ uint8_t * dst;
+ uint8_t * py, * pu, * pv;
+ int i, j;
+
+ dst = id->rgb_ptr + id->rgb_slice * v_offset;
+ py = src[0]; pu = src[1]; pv = src[2];
+
+ i = 16;
+ do {
+ j = id->width;
+ do {
+ mmx_yuv2rgb (py, pu, pv);
+ mmx_unpack_16rgb (dst, cpu);
+ py += 8;
+ pu += 4;
+ pv += 4;
+ dst += 16;
+ } while (--j);
+
+ dst += id->rgb_increm;
+ py += id->y_increm;
+ if (--i == id->field) {
+ dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
+ py = src[0] + id->y_stride_frame;
+ pu = src[1] + id->uv_stride_frame;
+ pv = src[2] + id->uv_stride_frame;
+ } else if (! (i & id->chroma420)) {
+ pu += id->uv_increm;
+ pv += id->uv_increm;
+ } else {
+ pu -= id->uv_stride_frame;
+ pv -= id->uv_stride_frame;
+ }
+ } while (i);
+}
+
+static inline void argb32 (void * const _id, uint8_t * const * src,
+ const unsigned int v_offset, const int cpu)
+{
+ convert_rgb_t * const id = (convert_rgb_t *) _id;
+ uint8_t * dst;
+ uint8_t * py, * pu, * pv;
+ int i, j;
+
+ dst = id->rgb_ptr + id->rgb_slice * v_offset;
+ py = src[0]; pu = src[1]; pv = src[2];
+
+ i = 16;
+ do {
+ j = id->width;
+ do {
+ mmx_yuv2rgb (py, pu, pv);
+ mmx_unpack_32rgb (dst, cpu);
+ py += 8;
+ pu += 4;
+ pv += 4;
+ dst += 32;
+ } while (--j);
+
+ dst += id->rgb_increm;
+ py += id->y_increm;
+ if (--i == id->field) {
+ dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1);
+ py = src[0] + id->y_stride_frame;
+ pu = src[1] + id->uv_stride_frame;
+ pv = src[2] + id->uv_stride_frame;
+ } else if (! (i & id->chroma420)) {
+ pu += id->uv_increm;
+ pv += id->uv_increm;
+ } else {
+ pu -= id->uv_stride_frame;
+ pv -= id->uv_stride_frame;
+ }
+ } while (i);
+}
+
+static void mmxext_rgb16 (void * id, uint8_t * const * src,
+ unsigned int v_offset)
+{
+ rgb16 (id, src, v_offset, CPU_MMXEXT);
+}
+
+static void mmxext_argb32 (void * id, uint8_t * const * src,
+ unsigned int v_offset)
+{
+ argb32 (id, src, v_offset, CPU_MMXEXT);
+}
+
+static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset)
+{
+ rgb16 (id, src, v_offset, CPU_MMX);
+}
+
+static void mmx_argb32 (void * id, uint8_t * const * src,
+ unsigned int v_offset)
+{
+ argb32 (id, src, v_offset, CPU_MMX);
+}
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp,
+ const mpeg2_sequence_t * seq)
+{
+ if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
+ if (bpp == 16)
+ return mmxext_rgb16;
+ else if (bpp == 32)
+ return mmxext_argb32;
+ }
+ return NULL; /* Fallback to C */
+}
+
+mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp,
+ const mpeg2_sequence_t * seq)
+{
+ if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) {
+ if (bpp == 16)
+ return mmx_rgb16;
+ else if (bpp == 32)
+ return mmx_argb32;
+ }
+ return NULL; /* Fallback to C */
+}
+#endif
diff --git a/libmpeg2/convert/rgb_vis.c b/libmpeg2/convert/rgb_vis.c
new file mode 100644
index 0000000..49d8d1d
--- /dev/null
+++ b/libmpeg2/convert/rgb_vis.c
@@ -0,0 +1,384 @@
+/*
+ * rgb_vis.c
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_SPARC
+
+#include <stddef.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+#include "convert_internal.h"
+#include "attributes.h"
+#include "vis.h"
+
+/* Based partially upon the MMX yuv2rgb code, see there for credits.
+ *
+ * The difference here is that since we have enough registers we
+ * process both even and odd scanlines in one pass.
+ */
+
+static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048};
+static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024};
+static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128};
+static const uint8_t const_Ugreen[] ATTR_ALIGN(8) =
+ {0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00};
+static const uint8_t const_Vgreen[] ATTR_ALIGN(8) =
+ {0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00};
+static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) =
+ {0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33};
+static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25};
+
+#define TMP0 0
+#define TMP1 1
+#define TMP2 2
+#define TMP3 3
+#define TMP4 4
+#define TMP5 5
+#define TMP6 6
+#define TMP7 7
+#define TMP8 8
+#define TMP9 9
+#define TMP10 10
+#define TMP11 11
+#define TMP12 12
+#define TMP13 13
+
+#define CONST_UBLUE 14
+#define CONST_VRED 15
+#define CONST_2048 16
+
+#define BLUE8_EVEN 18
+#define BLUE8_ODD 19
+#define RED8_EVEN 20
+#define RED8_ODD 21
+#define GREEN8_EVEN 22
+#define GREEN8_ODD 23
+
+#define BLUE8_2_EVEN 24
+#define BLUE8_2_ODD 25
+#define RED8_2_EVEN 26
+#define RED8_2_ODD 27
+#define GREEN8_2_EVEN 28
+#define GREEN8_2_ODD 29
+
+#define CONST_YCOEFF 30
+#define ZEROS 31
+
+#define PU_0 32
+#define PU_2 34
+#define PV_0 36
+#define PV_2 38
+#define PY_0 40
+#define PY_2 42
+#define PY_4 44
+#define PY_6 46
+
+#define CONST_128 56
+#define CONST_1024 58
+#define CONST_VGREEN 60
+#define CONST_UGREEN 62
+
+static inline void vis_init_consts(void)
+{
+ vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT);
+
+ vis_ld64(const_2048[0], CONST_2048);
+ vis_ld64(const_1024[0], CONST_1024);
+ vis_ld64(const_Ugreen[0], CONST_UGREEN);
+ vis_ld64(const_Vgreen[0], CONST_VGREEN);
+ vis_fzeros(ZEROS);
+ vis_ld64(const_Ublue_Vred[0], CONST_UBLUE);
+ vis_ld32(const_Ycoeff[0], CONST_YCOEFF);
+ vis_ld64(const_128[0], CONST_128);
+}
+
+static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv,
+ int y_stride)
+{
+ vis_ld32(pu[0], TMP0);
+
+ vis_ld32(pv[0], TMP2);
+
+ vis_ld64(py[0], TMP4);
+ vis_mul8x16au(TMP0, CONST_2048, PU_0);
+
+ vis_ld64_2(py, y_stride, TMP8);
+ vis_mul8x16au(TMP2, CONST_2048, PV_0);
+
+ vis_pmerge(TMP4, TMP5, TMP6);
+
+ vis_pmerge(TMP6, TMP7, TMP4);
+
+ vis_pmerge(TMP8, TMP9, TMP10);
+
+ vis_pmerge(TMP10, TMP11, TMP8);
+ vis_mul8x16au(TMP4, CONST_2048, PY_0);
+
+ vis_psub16(PU_0, CONST_1024, PU_0);
+ vis_mul8x16au(TMP5, CONST_2048, PY_2);
+
+ vis_psub16(PV_0, CONST_1024, PV_0);
+ vis_mul8x16au(TMP8, CONST_2048, PY_4);
+
+ vis_psub16(PY_0, CONST_128, PY_0);
+ vis_mul8x16au(TMP9, CONST_2048, PY_6);
+
+ vis_psub16(PY_2, CONST_128, PY_2);
+ vis_mul8x16(CONST_YCOEFF, PY_0, PY_0);
+
+ vis_psub16(PY_4, CONST_128, PY_4);
+ vis_mul8x16(CONST_YCOEFF, PY_2, PY_2);
+
+ vis_psub16(PY_6, CONST_128, PY_6);
+ vis_mul8x16(CONST_YCOEFF, PY_4, PY_4);
+
+ vis_mul8x16(CONST_YCOEFF, PY_6, PY_6);
+
+ vis_mul8sux16(CONST_UGREEN, PU_0, TMP0);
+
+ vis_mul8sux16(CONST_VGREEN, PV_0, TMP2);
+
+ vis_mul8x16(CONST_UBLUE, PU_0, TMP4);
+
+ vis_mul8x16(CONST_VRED, PV_0, TMP6);
+ vis_padd16(TMP0, TMP2, TMP10);
+
+ vis_padd16(PY_0, TMP4, TMP0);
+
+ vis_padd16(PY_2, TMP4, TMP2);
+ vis_pack16(TMP0, BLUE8_EVEN);
+
+ vis_padd16(PY_4, TMP4, TMP0);
+ vis_pack16(TMP2, BLUE8_ODD);
+
+ vis_padd16(PY_6, TMP4, TMP2);
+ vis_pack16(TMP0, BLUE8_2_EVEN);
+
+ vis_padd16(PY_0, TMP6, TMP0);
+ vis_pack16(TMP2, BLUE8_2_ODD);
+
+ vis_padd16(PY_2, TMP6, TMP2);
+ vis_pack16(TMP0, RED8_EVEN);
+
+ vis_padd16(PY_4, TMP6, TMP0);
+ vis_pack16(TMP2, RED8_ODD);
+
+ vis_padd16(PY_6, TMP6, TMP2);
+ vis_pack16(TMP0, RED8_2_EVEN);
+
+ vis_padd16(PY_0, TMP10, TMP0);
+ vis_pack16(TMP2, RED8_2_ODD);
+
+ vis_padd16(PY_2, TMP10, TMP2);
+ vis_pack16(TMP0, GREEN8_EVEN);
+
+ vis_padd16(PY_4, TMP10, TMP0);
+ vis_pack16(TMP2, GREEN8_ODD);
+
+ vis_padd16(PY_6, TMP10, TMP2);
+ vis_pack16(TMP0, GREEN8_2_EVEN);
+
+ vis_pack16(TMP2, GREEN8_2_ODD);
+ vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN);
+
+ vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN);
+
+ vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN);
+
+ vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN);
+
+ vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN);
+
+ vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN);
+}
+
+static inline void vis_unpack_32rgb(uint8_t *image, int stride)
+{
+ vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
+ vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2);
+
+ vis_pmerge(TMP0, TMP2, TMP4);
+ vis_st64(TMP4, image[0]);
+
+ vis_pmerge(TMP1, TMP3, TMP6);
+ vis_st64_2(TMP6, image, 8);
+
+ vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
+ vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10);
+
+ vis_pmerge(TMP8, TMP10, TMP0);
+ vis_st64_2(TMP0, image, 16);
+
+ vis_pmerge(TMP9, TMP11, TMP2);
+ vis_st64_2(TMP2, image, 24);
+
+ image += stride;
+
+ vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
+ vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2);
+
+ vis_pmerge(TMP0, TMP2, TMP4);
+ vis_st64(TMP4, image[0]);
+
+ vis_pmerge(TMP1, TMP3, TMP6);
+ vis_st64_2(TMP6, image, 8);
+
+ vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
+ vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10);
+
+ vis_pmerge(TMP8, TMP10, TMP0);
+ vis_st64_2(TMP0, image, 16);
+
+ vis_pmerge(TMP9, TMP11, TMP2);
+ vis_st64_2(TMP2, image, 24);
+}
+
+static inline void vis_unpack_32bgr(uint8_t *image, int stride)
+{
+ vis_pmerge(ZEROS, GREEN8_EVEN, TMP0);
+ vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2);
+
+ vis_pmerge(TMP0, TMP2, TMP4);
+ vis_st64(TMP4, image[0]);
+
+ vis_pmerge(TMP1, TMP3, TMP6);
+ vis_st64_2(TMP6, image, 8);
+
+ vis_pmerge(ZEROS, GREEN8_ODD, TMP8);
+ vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10);
+
+ vis_pmerge(TMP8, TMP10, TMP0);
+ vis_st64_2(TMP0, image, 16);
+
+ vis_pmerge(TMP9, TMP11, TMP2);
+ vis_st64_2(TMP2, image, 24);
+
+ image += stride;
+
+ vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0);
+ vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2);
+
+ vis_pmerge(TMP0, TMP2, TMP4);
+ vis_st64(TMP4, image[0]);
+
+ vis_pmerge(TMP1, TMP3, TMP6);
+ vis_st64_2(TMP6, image, 8);
+
+ vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8);
+ vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10);
+
+ vis_pmerge(TMP8, TMP10, TMP0);
+ vis_st64_2(TMP0, image, 16);
+
+ vis_pmerge(TMP9, TMP11, TMP2);
+ vis_st64_2(TMP2, image, 24);
+}
+
+static inline void vis_yuv420_argb32(uint8_t *image,
+ uint8_t *py, uint8_t *pu, uint8_t *pv,
+ int width, int height, int rgb_stride,
+ int y_stride, int uv_stride)
+{
+ height >>= 1;
+ uv_stride -= width >> 1;
+ do {
+ int i = width >> 3;
+ do {
+ vis_yuv2rgb(py, pu, pv, y_stride);
+ vis_unpack_32rgb(image, rgb_stride);
+ py += 8;
+ pu += 4;
+ pv += 4;
+ image += 32;
+ } while (--i);
+
+ py += (y_stride << 1) - width;
+ image += (rgb_stride << 1) - 4 * width;
+ pu += uv_stride;
+ pv += uv_stride;
+ } while (--height);
+}
+
+static inline void vis_yuv420_abgr32(uint8_t *image,
+ uint8_t *py, uint8_t *pu, uint8_t *pv,
+ int width, int height, int rgb_stride,
+ int y_stride, int uv_stride)
+{
+ height >>= 1;
+ uv_stride -= width >> 1;
+ do {
+ int i = width >> 3;
+ do {
+ vis_yuv2rgb(py, pu, pv, y_stride);
+ vis_unpack_32bgr(image, rgb_stride);
+ py += 8;
+ pu += 4;
+ pv += 4;
+ image += 32;
+ } while (--i);
+
+ py += (y_stride << 1) - width;
+ image += (rgb_stride << 1) - 4 * width;
+ pu += uv_stride;
+ pv += uv_stride;
+ } while (--height);
+}
+
+static void vis_argb32(void *_id, uint8_t * const *src,
+ unsigned int v_offset)
+{
+ convert_rgb_t *id = (convert_rgb_t *) _id;
+
+ vis_init_consts();
+ vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset,
+ src[0], src[1], src[2], id->width, 16,
+ id->rgb_stride, id->y_stride, id->y_stride >> 1);
+}
+
+static void vis_abgr32(void *_id, uint8_t * const *src,
+ unsigned int v_offset)
+{
+ convert_rgb_t *id = (convert_rgb_t *) _id;
+
+ vis_init_consts();
+ vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset,
+ src[0], src[1], src[2], id->width, 16,
+ id->rgb_stride, id->y_stride, id->y_stride >> 1);
+}
+
+mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp,
+ const mpeg2_sequence_t * seq)
+{
+ if (bpp == 32 && seq->chroma_height < seq->height) {
+ if (order == MPEG2CONVERT_RGB)
+ return vis_argb32;
+ if (order == MPEG2CONVERT_BGR)
+ return vis_abgr32;
+ }
+
+ return NULL; /* Fallback to C */
+}
+
+#endif /* ARCH_SPARC */
diff --git a/libmpeg2/convert/uyvy.c b/libmpeg2/convert/uyvy.c
new file mode 100644
index 0000000..7f107ff
--- /dev/null
+++ b/libmpeg2/convert/uyvy.c
@@ -0,0 +1,123 @@
+/*
+ * uyvy.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "mpeg2convert.h"
+
+typedef struct {
+ int width;
+ int stride;
+ int chroma420;
+ uint8_t * out;
+} convert_uyvy_t;
+
+static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf,
+ const mpeg2_picture_t * picture,
+ const mpeg2_gop_t * gop)
+{
+ convert_uyvy_t * instance = (convert_uyvy_t *) _id;
+
+ instance->out = fbuf->buf[0];
+ instance->stride = instance->width;
+ if (picture->nb_fields == 1) {
+ if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST))
+ instance->out += 2 * instance->stride;
+ instance->stride <<= 1;
+ }
+}
+
+#ifdef WORDS_BIGENDIAN
+#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+#else
+#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a))
+#endif
+
+static void uyvy_copy (void * const _id, uint8_t * const * src,
+ const unsigned int v_offset)
+{
+ const convert_uyvy_t * const id = (convert_uyvy_t *) _id;
+ uint8_t * _dst;
+ uint8_t * py, * pu, * pv;
+ int i, j;
+
+ _dst = id->out + 2 * id->stride * v_offset;
+ py = src[0]; pu = src[1]; pv = src[2];
+
+ i = 16;
+ do {
+ uint32_t * dst = (uint32_t *) _dst;
+
+ j = id->width >> 4;
+ do {
+ dst[0] = PACK (pu[0], py[0], pv[0], py[1]);
+ dst[1] = PACK (pu[1], py[2], pv[1], py[3]);
+ dst[2] = PACK (pu[2], py[4], pv[2], py[5]);
+ dst[3] = PACK (pu[3], py[6], pv[3], py[7]);
+ dst[4] = PACK (pu[4], py[8], pv[4], py[9]);
+ dst[5] = PACK (pu[5], py[10], pv[5], py[11]);
+ dst[6] = PACK (pu[6], py[12], pv[6], py[13]);
+ dst[7] = PACK (pu[7], py[14], pv[7], py[15]);
+ py += 16;
+ pu += 8;
+ pv += 8;
+ dst += 8;
+ } while (--j);
+ py -= id->width;
+ pu -= id->width >> 1;
+ pv -= id->width >> 1;
+ _dst += 2 * id->stride;
+ py += id->stride;
+ if (! (--i & id->chroma420)) {
+ pu += id->stride >> 1;
+ pv += id->stride >> 1;
+ }
+ } while (i);
+}
+
+int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq,
+ int stride, uint32_t accel, void * arg,
+ mpeg2_convert_init_t * result)
+{
+ convert_uyvy_t * instance = (convert_uyvy_t *) _id;
+
+ if (seq->chroma_width == seq->width)
+ return 1;
+
+ if (instance) {
+ instance->width = seq->width;
+ instance->chroma420 = (seq->chroma_height < seq->height);
+ result->buf_size[0] = seq->width * seq->height * 2;
+ result->buf_size[1] = result->buf_size[2] = 0;
+ result->start = uyvy_start;
+ result->copy = uyvy_copy;
+ } else {
+ result->id_size = sizeof (convert_uyvy_t);
+ }
+
+ return 0;
+}
diff --git a/libmpeg2/cpu_accel.c b/libmpeg2/cpu_accel.c
new file mode 100644
index 0000000..9b24610
--- /dev/null
+++ b/libmpeg2/cpu_accel.c
@@ -0,0 +1,260 @@
+/*
+ * cpu_accel.c
+ * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static inline uint32_t arch_accel (uint32_t accel)
+{
+ if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT))
+ accel |= MPEG2_ACCEL_X86_MMX;
+
+ if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3))
+ accel |= MPEG2_ACCEL_X86_MMXEXT;
+
+ if (accel & (MPEG2_ACCEL_X86_SSE3))
+ accel |= MPEG2_ACCEL_X86_SSE2;
+
+#ifdef ACCEL_DETECT
+ if (accel & MPEG2_ACCEL_DETECT) {
+ uint32_t eax, ebx, ecx, edx;
+ int AMD;
+
+#if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("cpuid" \
+ : "=a" (eax), \
+ "=b" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "a" (op) \
+ : "cc")
+#else /* PIC version : save ebx (not needed on x86_64) */
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("pushl %%ebx\n\t" \
+ "cpuid\n\t" \
+ "movl %%ebx,%1\n\t" \
+ "popl %%ebx" \
+ : "=a" (eax), \
+ "=r" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "a" (op) \
+ : "cc")
+#endif
+
+#ifndef __x86_64__ /* x86_64 supports the cpuid op */
+ __asm__ ("pushf\n\t"
+ "pushf\n\t"
+ "pop %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl $0x200000,%0\n\t"
+ "push %0\n\t"
+ "popf\n\t"
+ "pushf\n\t"
+ "pop %0\n\t"
+ "popf"
+ : "=r" (eax),
+ "=r" (ebx)
+ :
+ : "cc");
+
+ if (eax == ebx) /* no cpuid */
+ return accel;
+#endif
+
+ cpuid (0x00000000, eax, ebx, ecx, edx);
+ if (!eax) /* vendor string only */
+ return accel;
+
+ AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65);
+
+ cpuid (0x00000001, eax, ebx, ecx, edx);
+ if (! (edx & 0x00800000)) /* no MMX */
+ return accel;
+
+ accel |= MPEG2_ACCEL_X86_MMX;
+ if (edx & 0x02000000) /* SSE - identical to AMD MMX ext. */
+ accel |= MPEG2_ACCEL_X86_MMXEXT;
+
+ if (edx & 0x04000000) /* SSE2 */
+ accel |= MPEG2_ACCEL_X86_SSE2;
+
+ if (ecx & 0x00000001) /* SSE3 */
+ accel |= MPEG2_ACCEL_X86_SSE3;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+ if (eax < 0x80000001) /* no extended capabilities */
+ return accel;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & 0x80000000)
+ accel |= MPEG2_ACCEL_X86_3DNOW;
+
+ if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */
+ accel |= MPEG2_ACCEL_X86_MMXEXT;
+ }
+#endif /* ACCEL_DETECT */
+
+ return accel;
+}
+#endif /* ARCH_X86 || ARCH_X86_64 */
+
+#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC))
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static RETSIGTYPE sigill_handler (int sig)
+{
+ if (!canjump) {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ canjump = 0;
+ siglongjmp (jmpbuf, 1);
+}
+#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */
+
+#ifdef ARCH_PPC
+static uint32_t arch_accel (uint32_t accel)
+{
+#ifdef ACCEL_DETECT
+ if ((accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT)) ==
+ MPEG2_ACCEL_DETECT) {
+ static RETSIGTYPE (* oldsig) (int);
+
+ oldsig = signal (SIGILL, sigill_handler);
+ if (sigsetjmp (jmpbuf, 1)) {
+ signal (SIGILL, oldsig);
+ return accel;
+ }
+
+ canjump = 1;
+
+#if defined(__APPLE_CC__) /* apple */
+#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t"
+#else /* gnu */
+#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t"
+#endif
+ asm volatile ("mtspr 256, %0\n\t"
+ VAND (0, 0, 0)
+ :
+ : "r" (-1));
+
+ canjump = 0;
+ accel |= MPEG2_ACCEL_PPC_ALTIVEC;
+
+ signal (SIGILL, oldsig);
+ }
+#endif /* ACCEL_DETECT */
+
+ return accel;
+}
+#endif /* ARCH_PPC */
+
+#ifdef ARCH_SPARC
+static uint32_t arch_accel (uint32_t accel)
+{
+ if (accel & MPEG2_ACCEL_SPARC_VIS2)
+ accel |= MPEG2_ACCEL_SPARC_VIS;
+
+#ifdef ACCEL_DETECT
+ if ((accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT)) ==
+ MPEG2_ACCEL_DETECT) {
+ static RETSIGTYPE (* oldsig) (int);
+
+ oldsig = signal (SIGILL, sigill_handler);
+ if (sigsetjmp (jmpbuf, 1)) {
+ signal (SIGILL, oldsig);
+ return accel;
+ }
+
+ canjump = 1;
+
+ /* pdist %f0, %f0, %f0 */
+ __asm__ __volatile__(".word\t0x81b007c0");
+
+ canjump = 0;
+ accel |= MPEG2_ACCEL_SPARC_VIS;
+
+ if (sigsetjmp (jmpbuf, 1)) {
+ signal (SIGILL, oldsig);
+ return accel;
+ }
+
+ canjump = 1;
+
+ /* edge8n %g0, %g0, %g0 */
+ __asm__ __volatile__(".word\t0x81b00020");
+
+ canjump = 0;
+ accel |= MPEG2_ACCEL_SPARC_VIS2;
+
+ signal (SIGILL, oldsig);
+ }
+#endif /* ACCEL_DETECT */
+
+ return accel;
+}
+#endif /* ARCH_SPARC */
+
+#ifdef ARCH_ALPHA
+static inline uint32_t arch_accel (uint32_t accel)
+{
+ if (accel & MPEG2_ACCEL_ALPHA_MVI)
+ accel |= MPEG2_ACCEL_ALPHA;
+
+#ifdef ACCEL_DETECT
+ if (accel & MPEG2_ACCEL_DETECT) {
+ uint64_t no_mvi;
+
+ asm volatile ("amask %1, %0"
+ : "=r" (no_mvi)
+ : "rI" (256)); /* AMASK_MVI */
+ accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA |
+ MPEG2_ACCEL_ALPHA_MVI);
+ }
+#endif /* ACCEL_DETECT */
+
+ return accel;
+}
+#endif /* ARCH_ALPHA */
+
+uint32_t mpeg2_detect_accel (uint32_t accel)
+{
+#if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC)
+ accel = arch_accel (accel);
+#endif
+ return accel;
+}
diff --git a/libmpeg2/cpu_state.c b/libmpeg2/cpu_state.c
new file mode 100644
index 0000000..2f2f64a
--- /dev/null
+++ b/libmpeg2/cpu_state.c
@@ -0,0 +1,129 @@
+/*
+ * cpu_state.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+#include "mmx.h"
+#endif
+
+void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL;
+void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL;
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+static void state_restore_mmx (cpu_state_t * state)
+{
+ emms ();
+}
+#endif
+
+#ifdef ARCH_PPC
+#if defined(__APPLE_CC__) /* apple */
+#define LI(a,b) "li r" #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t"
+#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t"
+#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t"
+#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t"
+#else /* gnu */
+#define LI(a,b) "li " #a "," #b "\n\t"
+#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t"
+#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t"
+#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t"
+#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t"
+#endif
+
+static void state_save_altivec (cpu_state_t * state)
+{
+ asm (LI (9, 16)
+ STVX0 (20, 0, 3)
+ LI (11, 32)
+ STVX (21, 9, 3)
+ LI (9, 48)
+ STVX (22, 11, 3)
+ LI (11, 64)
+ STVX (23, 9, 3)
+ LI (9, 80)
+ STVX (24, 11, 3)
+ LI (11, 96)
+ STVX (25, 9, 3)
+ LI (9, 112)
+ STVX (26, 11, 3)
+ LI (11, 128)
+ STVX (27, 9, 3)
+ LI (9, 144)
+ STVX (28, 11, 3)
+ LI (11, 160)
+ STVX (29, 9, 3)
+ LI (9, 176)
+ STVX (30, 11, 3)
+ STVX (31, 9, 3));
+}
+
+static void state_restore_altivec (cpu_state_t * state)
+{
+ asm (LI (9, 16)
+ LVX0 (20, 0, 3)
+ LI (11, 32)
+ LVX (21, 9, 3)
+ LI (9, 48)
+ LVX (22, 11, 3)
+ LI (11, 64)
+ LVX (23, 9, 3)
+ LI (9, 80)
+ LVX (24, 11, 3)
+ LI (11, 96)
+ LVX (25, 9, 3)
+ LI (9, 112)
+ LVX (26, 11, 3)
+ LI (11, 128)
+ LVX (27, 9, 3)
+ LI (9, 144)
+ LVX (28, 11, 3)
+ LI (11, 160)
+ LVX (29, 9, 3)
+ LI (9, 176)
+ LVX (30, 11, 3)
+ LVX (31, 9, 3));
+}
+#endif
+
+void mpeg2_cpu_state_init (uint32_t accel)
+{
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+ if (accel & MPEG2_ACCEL_X86_MMX) {
+ mpeg2_cpu_state_restore = state_restore_mmx;
+ }
+#endif
+#ifdef ARCH_PPC
+ if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+ mpeg2_cpu_state_save = state_save_altivec;
+ mpeg2_cpu_state_restore = state_restore_altivec;
+ }
+#endif
+}
diff --git a/libmpeg2/decode.c b/libmpeg2/decode.c
new file mode 100644
index 0000000..3bfa27a
--- /dev/null
+++ b/libmpeg2/decode.c
@@ -0,0 +1,439 @@
+/*
+ * decode.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <string.h> /* memcmp/memset, try to remove */
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+static int mpeg2_accels = 0;
+
+#define BUFFER_SIZE (1194 * 1024)
+
+const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec)
+{
+ return &(mpeg2dec->info);
+}
+
+static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+ uint8_t * current;
+ uint32_t shift;
+ uint8_t * limit;
+ uint8_t byte;
+
+ if (!bytes)
+ return 0;
+
+ current = mpeg2dec->buf_start;
+ shift = mpeg2dec->shift;
+ limit = current + bytes;
+
+ do {
+ byte = *current++;
+ if (shift == 0x00000100) {
+ int skipped;
+
+ mpeg2dec->shift = 0xffffff00;
+ skipped = current - mpeg2dec->buf_start;
+ mpeg2dec->buf_start = current;
+ return skipped;
+ }
+ shift = (shift | byte) << 8;
+ } while (current < limit);
+
+ mpeg2dec->shift = shift;
+ mpeg2dec->buf_start = current;
+ return 0;
+}
+
+static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes)
+{
+ uint8_t * current;
+ uint32_t shift;
+ uint8_t * chunk_ptr;
+ uint8_t * limit;
+ uint8_t byte;
+
+ if (!bytes)
+ return 0;
+
+ current = mpeg2dec->buf_start;
+ shift = mpeg2dec->shift;
+ chunk_ptr = mpeg2dec->chunk_ptr;
+ limit = current + bytes;
+
+ do {
+ byte = *current++;
+ if (shift == 0x00000100) {
+ int copied;
+
+ mpeg2dec->shift = 0xffffff00;
+ mpeg2dec->chunk_ptr = chunk_ptr + 1;
+ copied = current - mpeg2dec->buf_start;
+ mpeg2dec->buf_start = current;
+ return copied;
+ }
+ shift = (shift | byte) << 8;
+ *chunk_ptr++ = byte;
+ } while (current < limit);
+
+ mpeg2dec->shift = shift;
+ mpeg2dec->buf_start = current;
+ return 0;
+}
+
+void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end)
+{
+ mpeg2dec->buf_start = start;
+ mpeg2dec->buf_end = end;
+}
+
+int mpeg2_getpos (mpeg2dec_t * mpeg2dec)
+{
+ return mpeg2dec->buf_end - mpeg2dec->buf_start;
+}
+
+static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec)
+{
+ int size, skipped;
+
+ size = mpeg2dec->buf_end - mpeg2dec->buf_start;
+ skipped = skip_chunk (mpeg2dec, size);
+ if (!skipped) {
+ mpeg2dec->bytes_since_tag += size;
+ return STATE_BUFFER;
+ }
+ mpeg2dec->bytes_since_tag += skipped;
+ mpeg2dec->code = mpeg2dec->buf_start[-1];
+ return STATE_INTERNAL_NORETURN;
+}
+
+mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec)
+{
+ while (!(mpeg2dec->code == 0xb3 ||
+ ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 ||
+ !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1)))
+ if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+ return STATE_BUFFER;
+ mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+ mpeg2dec->user_data_len = 0;
+ return ((mpeg2dec->code == 0xb7) ?
+ mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec));
+}
+
+#define RECEIVED(code,state) (((state) << 8) + (code))
+
+mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec)
+{
+ int size_buffer, size_chunk, copied;
+
+ if (mpeg2dec->action) {
+ mpeg2_state_t state;
+
+ state = mpeg2dec->action (mpeg2dec);
+ if ((int)state > (int)STATE_INTERNAL_NORETURN)
+ return state;
+ }
+
+ while (1) {
+ while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) <
+ mpeg2dec->nb_decode_slices) {
+ size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+ size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+ mpeg2dec->chunk_ptr);
+ if (size_buffer <= size_chunk) {
+ copied = copy_chunk (mpeg2dec, size_buffer);
+ if (!copied) {
+ mpeg2dec->bytes_since_tag += size_buffer;
+ mpeg2dec->chunk_ptr += size_buffer;
+ return STATE_BUFFER;
+ }
+ } else {
+ copied = copy_chunk (mpeg2dec, size_chunk);
+ if (!copied) {
+ /* filled the chunk buffer without finding a start code */
+ mpeg2dec->bytes_since_tag += size_chunk;
+ mpeg2dec->action = seek_chunk;
+ return STATE_INVALID;
+ }
+ }
+ mpeg2dec->bytes_since_tag += copied;
+
+ mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code,
+ mpeg2dec->chunk_start);
+ mpeg2dec->code = mpeg2dec->buf_start[-1];
+ mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+ }
+ if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1)
+ break;
+ if (seek_chunk (mpeg2dec) == STATE_BUFFER)
+ return STATE_BUFFER;
+ }
+
+ mpeg2dec->action = mpeg2_seek_header;
+ switch (mpeg2dec->code) {
+ case 0x00:
+ return mpeg2dec->state;
+ case 0xb3:
+ case 0xb7:
+ case 0xb8:
+ return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID;
+ default:
+ mpeg2dec->action = seek_chunk;
+ return STATE_INVALID;
+ }
+}
+
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec)
+{
+ static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = {
+ mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data,
+ mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop
+ };
+ int size_buffer, size_chunk, copied;
+
+ mpeg2dec->action = mpeg2_parse_header;
+ mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0;
+ while (1) {
+ size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start;
+ size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE -
+ mpeg2dec->chunk_ptr);
+ if (size_buffer <= size_chunk) {
+ copied = copy_chunk (mpeg2dec, size_buffer);
+ if (!copied) {
+ mpeg2dec->bytes_since_tag += size_buffer;
+ mpeg2dec->chunk_ptr += size_buffer;
+ return STATE_BUFFER;
+ }
+ } else {
+ copied = copy_chunk (mpeg2dec, size_chunk);
+ if (!copied) {
+ /* filled the chunk buffer without finding a start code */
+ mpeg2dec->bytes_since_tag += size_chunk;
+ mpeg2dec->code = 0xb4;
+ mpeg2dec->action = mpeg2_seek_header;
+ return STATE_INVALID;
+ }
+ }
+ mpeg2dec->bytes_since_tag += copied;
+
+ if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) {
+ mpeg2dec->code = mpeg2dec->buf_start[-1];
+ mpeg2dec->action = mpeg2_seek_header;
+ return STATE_INVALID;
+ }
+
+ mpeg2dec->code = mpeg2dec->buf_start[-1];
+ switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) {
+
+ /* state transition after a sequence header */
+ case RECEIVED (0x00, STATE_SEQUENCE):
+ case RECEIVED (0xb8, STATE_SEQUENCE):
+ mpeg2_header_sequence_finalize (mpeg2dec);
+ break;
+
+ /* other legal state transitions */
+ case RECEIVED (0x00, STATE_GOP):
+ mpeg2_header_gop_finalize (mpeg2dec);
+ break;
+ case RECEIVED (0x01, STATE_PICTURE):
+ case RECEIVED (0x01, STATE_PICTURE_2ND):
+ mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels);
+ mpeg2dec->action = mpeg2_header_slice_start;
+ break;
+
+ /* legal headers within a given state */
+ case RECEIVED (0xb2, STATE_SEQUENCE):
+ case RECEIVED (0xb2, STATE_GOP):
+ case RECEIVED (0xb2, STATE_PICTURE):
+ case RECEIVED (0xb2, STATE_PICTURE_2ND):
+ case RECEIVED (0xb5, STATE_SEQUENCE):
+ case RECEIVED (0xb5, STATE_PICTURE):
+ case RECEIVED (0xb5, STATE_PICTURE_2ND):
+ mpeg2dec->chunk_ptr = mpeg2dec->chunk_start;
+ continue;
+
+ default:
+ mpeg2dec->action = mpeg2_seek_header;
+ return STATE_INVALID;
+ }
+
+ mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer;
+ mpeg2dec->user_data_len = 0;
+ return mpeg2dec->state;
+ }
+}
+
+int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg)
+{
+ mpeg2_convert_init_t convert_init;
+ int error;
+
+ error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0,
+ mpeg2_accels, arg, &convert_init);
+ if (!error) {
+ mpeg2dec->convert = convert;
+ mpeg2dec->convert_arg = arg;
+ mpeg2dec->convert_id_size = convert_init.id_size;
+ mpeg2dec->convert_stride = 0;
+ }
+ return error;
+}
+
+int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride)
+{
+ if (!mpeg2dec->convert) {
+ if (stride < (int) mpeg2dec->sequence.width)
+ stride = mpeg2dec->sequence.width;
+ mpeg2dec->decoder.stride_frame = stride;
+ } else {
+ mpeg2_convert_init_t convert_init;
+
+ stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL,
+ &(mpeg2dec->sequence), stride,
+ mpeg2_accels, mpeg2dec->convert_arg,
+ &convert_init);
+ mpeg2dec->convert_id_size = convert_init.id_size;
+ mpeg2dec->convert_stride = stride;
+ }
+ return stride;
+}
+
+void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id)
+{
+ mpeg2_fbuf_t * fbuf;
+
+ if (mpeg2dec->custom_fbuf) {
+ if (mpeg2dec->state == STATE_SEQUENCE) {
+ mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+ mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+ }
+ mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type ==
+ PIC_FLAG_CODING_TYPE_B));
+ fbuf = mpeg2dec->fbuf[0];
+ } else {
+ fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf);
+ mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index;
+ }
+ fbuf->buf[0] = buf[0];
+ fbuf->buf[1] = buf[1];
+ fbuf->buf[2] = buf[2];
+ fbuf->id = id;
+}
+
+void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf)
+{
+ mpeg2dec->custom_fbuf = custom_fbuf;
+}
+
+void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip)
+{
+ mpeg2dec->first_decode_slice = 1;
+ mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1);
+}
+
+void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end)
+{
+ start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start;
+ end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end;
+ mpeg2dec->first_decode_slice = start;
+ mpeg2dec->nb_decode_slices = end - start;
+}
+
+void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2)
+{
+ mpeg2dec->tag_previous = mpeg2dec->tag_current;
+ mpeg2dec->tag2_previous = mpeg2dec->tag2_current;
+ mpeg2dec->tag_current = tag;
+ mpeg2dec->tag2_current = tag2;
+ mpeg2dec->num_tags++;
+ mpeg2dec->bytes_since_tag = 0;
+}
+
+uint32_t mpeg2_accel (uint32_t accel)
+{
+ if (!mpeg2_accels) {
+ mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT;
+ mpeg2_cpu_state_init (mpeg2_accels);
+ mpeg2_idct_init (mpeg2_accels);
+ mpeg2_mc_init (mpeg2_accels);
+ }
+ return mpeg2_accels & ~MPEG2_ACCEL_DETECT;
+}
+
+void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset)
+{
+ mpeg2dec->buf_start = mpeg2dec->buf_end = NULL;
+ mpeg2dec->num_tags = 0;
+ mpeg2dec->shift = 0xffffff00;
+ mpeg2dec->code = 0xb4;
+ mpeg2dec->action = mpeg2_seek_header;
+ mpeg2dec->state = STATE_INVALID;
+ mpeg2dec->first = 1;
+
+ mpeg2_reset_info(&(mpeg2dec->info));
+ mpeg2dec->info.gop = NULL;
+ mpeg2dec->info.user_data = NULL;
+ mpeg2dec->info.user_data_len = 0;
+ if (full_reset) {
+ mpeg2dec->info.sequence = NULL;
+ mpeg2_header_state_init (mpeg2dec);
+ }
+
+}
+
+mpeg2dec_t * mpeg2_init (void)
+{
+ mpeg2dec_t * mpeg2dec;
+
+ mpeg2_accel (MPEG2_ACCEL_DETECT);
+
+ mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t),
+ MPEG2_ALLOC_MPEG2DEC);
+ if (mpeg2dec == NULL)
+ return NULL;
+
+ memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t));
+ memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t));
+
+ mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4,
+ MPEG2_ALLOC_CHUNK);
+
+ mpeg2dec->sequence.width = (unsigned)-1;
+ mpeg2_reset (mpeg2dec, 1);
+
+ return mpeg2dec;
+}
+
+void mpeg2_close (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_header_state_init (mpeg2dec);
+ mpeg2_free (mpeg2dec->chunk_buffer);
+ mpeg2_free (mpeg2dec);
+}
diff --git a/libmpeg2/header.c b/libmpeg2/header.c
new file mode 100644
index 0000000..6e286d6
--- /dev/null
+++ b/libmpeg2/header.c
@@ -0,0 +1,964 @@
+/*
+ * header.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+#include <stdlib.h> /* defines NULL */
+#include <string.h> /* memcmp */
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+#define SEQ_EXT 2
+#define SEQ_DISPLAY_EXT 4
+#define QUANT_MATRIX_EXT 8
+#define COPYRIGHT_EXT 0x10
+#define PIC_DISPLAY_EXT 0x80
+#define PIC_CODING_EXT 0x100
+
+/* default intra quant matrix, in zig-zag order */
+static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = {
+ 8,
+ 16, 16,
+ 19, 16, 19,
+ 22, 22, 22, 22,
+ 22, 22, 26, 24, 26,
+ 27, 27, 27, 26, 26, 26,
+ 26, 27, 27, 27, 29, 29, 29,
+ 34, 34, 34, 29, 29, 29, 27, 27,
+ 29, 29, 32, 32, 34, 34, 37,
+ 38, 37, 35, 35, 34, 35,
+ 38, 38, 40, 40, 40,
+ 48, 48, 46, 46,
+ 56, 56, 58,
+ 69, 69,
+ 83
+};
+
+uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = {
+ /* Zig-Zag scan pattern */
+ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = {
+ /* Alternate scan pattern */
+ 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
+ 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
+ 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
+ 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec)
+{
+ if (mpeg2dec->sequence.width != (unsigned)-1) {
+ int i;
+
+ mpeg2dec->sequence.width = (unsigned)-1;
+ if (!mpeg2dec->custom_fbuf)
+ for (i = mpeg2dec->alloc_index_user;
+ i < mpeg2dec->alloc_index; i++) {
+ mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]);
+ mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]);
+ mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]);
+ }
+ if (mpeg2dec->convert_start)
+ for (i = 0; i < 3; i++) {
+ mpeg2_free (mpeg2dec->yuv_buf[i][0]);
+ mpeg2_free (mpeg2dec->yuv_buf[i][1]);
+ mpeg2_free (mpeg2dec->yuv_buf[i][2]);
+ }
+ if (mpeg2dec->decoder.convert_id)
+ mpeg2_free (mpeg2dec->decoder.convert_id);
+ }
+ mpeg2dec->decoder.coding_type = I_TYPE;
+ mpeg2dec->decoder.convert = NULL;
+ mpeg2dec->decoder.convert_id = NULL;
+ mpeg2dec->picture = mpeg2dec->pictures;
+ mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf;
+ mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf;
+ mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf;
+ mpeg2dec->first = 1;
+ mpeg2dec->alloc_index = 0;
+ mpeg2dec->alloc_index_user = 0;
+ mpeg2dec->first_decode_slice = 1;
+ mpeg2dec->nb_decode_slices = 0xb0 - 1;
+ mpeg2dec->convert = NULL;
+ mpeg2dec->convert_start = NULL;
+ mpeg2dec->custom_fbuf = 0;
+ mpeg2dec->yuv_index = 0;
+}
+
+void mpeg2_reset_info (mpeg2_info_t * info)
+{
+ info->current_picture = info->current_picture_2nd = NULL;
+ info->display_picture = info->display_picture_2nd = NULL;
+ info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL;
+}
+
+static void info_user_data (mpeg2dec_t * mpeg2dec)
+{
+ if (mpeg2dec->user_data_len) {
+ mpeg2dec->info.user_data = mpeg2dec->chunk_buffer;
+ mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3;
+ }
+}
+
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+ static unsigned int frame_period[16] = {
+ 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000,
+ /* unofficial: xing 15 fps */
+ 1800000,
+ /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */
+ 5400000, 2700000, 2250000, 1800000, 0, 0
+ };
+ int i;
+
+ if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */
+ return 1;
+
+ i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2];
+ if (! (sequence->display_width = sequence->picture_width = i >> 12))
+ return 1;
+ if (! (sequence->display_height = sequence->picture_height = i & 0xfff))
+ return 1;
+ sequence->width = (sequence->picture_width + 15) & ~15;
+ sequence->height = (sequence->picture_height + 15) & ~15;
+ sequence->chroma_width = sequence->width >> 1;
+ sequence->chroma_height = sequence->height >> 1;
+
+ sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE |
+ SEQ_VIDEO_FORMAT_UNSPECIFIED);
+
+ sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */
+ sequence->frame_period = frame_period[buffer[3] & 15];
+
+ sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6);
+
+ sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800;
+
+ if (buffer[7] & 4)
+ sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS;
+
+ mpeg2dec->copy_matrix = 3;
+ if (buffer[7] & 2) {
+ for (i = 0; i < 64; i++)
+ mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
+ (buffer[i+7] << 7) | (buffer[i+8] >> 1);
+ buffer += 64;
+ } else
+ for (i = 0; i < 64; i++)
+ mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] =
+ default_intra_quantizer_matrix[i];
+
+ if (buffer[7] & 1)
+ for (i = 0; i < 64; i++)
+ mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] =
+ buffer[i+8];
+ else
+ memset (mpeg2dec->new_quantizer_matrix[1], 16, 64);
+
+ sequence->profile_level_id = 0x80;
+ sequence->colour_primaries = 0;
+ sequence->transfer_characteristics = 0;
+ sequence->matrix_coefficients = 0;
+
+ mpeg2dec->ext_state = SEQ_EXT;
+ mpeg2dec->state = STATE_SEQUENCE;
+ mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0;
+
+ return 0;
+}
+
+static int sequence_ext (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+ uint32_t flags;
+
+ if (!(buffer[3] & 1))
+ return 1;
+
+ sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4);
+
+ sequence->display_width = sequence->picture_width +=
+ ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000;
+ sequence->display_height = sequence->picture_height +=
+ (buffer[2] << 7) & 0x3000;
+ sequence->width = (sequence->picture_width + 15) & ~15;
+ sequence->height = (sequence->picture_height + 15) & ~15;
+ flags = sequence->flags | SEQ_FLAG_MPEG2;
+ if (!(buffer[1] & 8)) {
+ flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE;
+ sequence->height = (sequence->height + 31) & ~31;
+ }
+ if (buffer[5] & 0x80)
+ flags |= SEQ_FLAG_LOW_DELAY;
+ sequence->flags = flags;
+ sequence->chroma_width = sequence->width;
+ sequence->chroma_height = sequence->height;
+ switch (buffer[1] & 6) {
+ case 0: /* invalid */
+ return 1;
+ case 2: /* 4:2:0 */
+ sequence->chroma_height >>= 1;
+ case 4: /* 4:2:2 */
+ sequence->chroma_width >>= 1;
+ }
+
+ sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000;
+
+ sequence->vbv_buffer_size |= buffer[4] << 21;
+
+ sequence->frame_period =
+ sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>5)&3)+1);
+
+ mpeg2dec->ext_state = SEQ_DISPLAY_EXT;
+
+ return 0;
+}
+
+static int sequence_display_ext (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+
+ sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) |
+ ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT));
+ if (buffer[0] & 1) {
+ sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION;
+ sequence->colour_primaries = buffer[1];
+ sequence->transfer_characteristics = buffer[2];
+ sequence->matrix_coefficients = buffer[3];
+ buffer += 3;
+ }
+
+ if (!(buffer[2] & 2)) /* missing marker_bit */
+ return 1;
+
+ if( (buffer[1] << 6) | (buffer[2] >> 2) )
+ sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2);
+ if( ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3) )
+ sequence->display_height =
+ ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3);
+
+ return 0;
+}
+
+static inline void simplify (unsigned int * u, unsigned int * v)
+{
+ unsigned int a, b, tmp;
+
+ a = *u; b = *v;
+ while (a) { /* find greatest common divisor */
+ tmp = a; a = b % tmp; b = tmp;
+ }
+ *u /= b; *v /= b;
+}
+
+static inline void finalize_sequence (mpeg2_sequence_t * sequence)
+{
+ int width;
+ int height;
+
+ sequence->byte_rate *= 50;
+
+ if (sequence->flags & SEQ_FLAG_MPEG2) {
+ switch (sequence->pixel_width) {
+ case 1: /* square pixels */
+ sequence->pixel_width = sequence->pixel_height = 1; return;
+ case 2: /* 4:3 aspect ratio */
+ width = 4; height = 3; break;
+ case 3: /* 16:9 aspect ratio */
+ width = 16; height = 9; break;
+ case 4: /* 2.21:1 aspect ratio */
+ width = 221; height = 100; break;
+ default: /* illegal */
+ sequence->pixel_width = sequence->pixel_height = 0; return;
+ }
+ width *= sequence->display_height;
+ height *= sequence->display_width;
+
+ } else {
+ if (sequence->byte_rate == 50 * 0x3ffff)
+ sequence->byte_rate = 0; /* mpeg-1 VBR */
+
+ switch (sequence->pixel_width) {
+ case 0: case 15: /* illegal */
+ sequence->pixel_width = sequence->pixel_height = 0; return;
+ case 1: /* square pixels */
+ sequence->pixel_width = sequence->pixel_height = 1; return;
+ case 3: /* 720x576 16:9 */
+ sequence->pixel_width = 64; sequence->pixel_height = 45; return;
+ case 6: /* 720x480 16:9 */
+ sequence->pixel_width = 32; sequence->pixel_height = 27; return;
+ case 8: /* BT.601 625 lines 4:3 */
+ sequence->pixel_width = 59; sequence->pixel_height = 54; return;
+ case 12: /* BT.601 525 lines 4:3 */
+ sequence->pixel_width = 10; sequence->pixel_height = 11; return;
+ default:
+ height = 88 * sequence->pixel_width + 1171;
+ width = 2000;
+ }
+ }
+
+ sequence->pixel_width = width;
+ sequence->pixel_height = height;
+ simplify (&sequence->pixel_width, &sequence->pixel_height);
+}
+
+int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence,
+ unsigned int * pixel_width,
+ unsigned int * pixel_height)
+{
+ static struct {
+ unsigned int width, height;
+ } video_modes[] = {
+ {720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */
+ {704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */
+ {544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */
+ {528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */
+ {480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */
+ {352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */
+ {352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */
+ {176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */
+ {720, 486}, /* 525 lines, 13.5 MHz (D1) */
+ {704, 486}, /* 525 lines, 13.5 MHz */
+ {720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */
+ {704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */
+ {544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */
+ {528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */
+ {480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */
+ {352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */
+ {352, 240} /* 525 lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */
+ };
+ unsigned int width, height, pix_width, pix_height, i, DAR_16_9;
+
+ *pixel_width = sequence->pixel_width;
+ *pixel_height = sequence->pixel_height;
+ width = sequence->picture_width;
+ height = sequence->picture_height;
+ for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++)
+ if (width == video_modes[i].width && height == video_modes[i].height)
+ break;
+ if (i == sizeof (video_modes) / sizeof (video_modes[0]) ||
+ (sequence->pixel_width == 1 && sequence->pixel_height == 1) ||
+ width != sequence->display_width || height != sequence->display_height)
+ return 0;
+
+ for (pix_height = 1; height * pix_height < 480; pix_height <<= 1);
+ height *= pix_height;
+ for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1);
+ width *= pix_width;
+
+ if (! (sequence->flags & SEQ_FLAG_MPEG2)) {
+ static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}};
+ DAR_16_9 = (sequence->pixel_height == 27 ||
+ sequence->pixel_height == 45);
+ if (width < 704 ||
+ sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576])
+ return 0;
+ } else {
+ DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width >
+ 4 * sequence->picture_height * sequence->pixel_height);
+ switch (width) {
+ case 528: case 544: pix_width *= 4; pix_height *= 3; break;
+ case 480: pix_width *= 3; pix_height *= 2; break;
+ }
+ }
+ if (DAR_16_9) {
+ pix_width *= 4; pix_height *= 3;
+ }
+ if (height == 576) {
+ pix_width *= 59; pix_height *= 54;
+ } else {
+ pix_width *= 10; pix_height *= 11;
+ }
+ *pixel_width = pix_width;
+ *pixel_height = pix_height;
+ simplify (pixel_width, pixel_height);
+ return (height == 576) ? 1 : 2;
+}
+
+static void copy_matrix (mpeg2dec_t * mpeg2dec, int idx)
+{
+ if (memcmp (mpeg2dec->quantizer_matrix[idx],
+ mpeg2dec->new_quantizer_matrix[idx], 64)) {
+ memcpy (mpeg2dec->quantizer_matrix[idx],
+ mpeg2dec->new_quantizer_matrix[idx], 64);
+ mpeg2dec->scaled[idx] = -1;
+ }
+}
+
+static void finalize_matrix (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ if (mpeg2dec->copy_matrix & (1 << i))
+ copy_matrix (mpeg2dec, i);
+ if ((mpeg2dec->copy_matrix & (4 << i)) &&
+ memcmp (mpeg2dec->quantizer_matrix[i],
+ mpeg2dec->new_quantizer_matrix[i+2], 64)) {
+ copy_matrix (mpeg2dec, i + 2);
+ decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2];
+ } else if (mpeg2dec->copy_matrix & (5 << i))
+ decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i];
+ }
+}
+
+static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_reset_info (&(mpeg2dec->info));
+ mpeg2dec->info.gop = NULL;
+ info_user_data (mpeg2dec);
+ mpeg2_header_state_init (mpeg2dec);
+ mpeg2dec->sequence = mpeg2dec->new_sequence;
+ mpeg2dec->action = mpeg2_seek_header;
+ mpeg2dec->state = STATE_SEQUENCE;
+ return STATE_SEQUENCE;
+}
+
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence);
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+ finalize_sequence (sequence);
+ finalize_matrix (mpeg2dec);
+
+ decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2);
+ decoder->width = sequence->width;
+ decoder->height = sequence->height;
+ decoder->vertical_position_extension = (sequence->picture_height > 2800);
+ decoder->chroma_format = ((sequence->chroma_width == sequence->width) +
+ (sequence->chroma_height == sequence->height));
+
+ if (mpeg2dec->sequence.width != (unsigned)-1) {
+ /*
+ * According to 6.1.1.6, repeat sequence headers should be
+ * identical to the original. However some encoders do not
+ * respect that and change various fields (including bitrate
+ * and aspect ratio) in the repeat sequence headers. So we
+ * choose to be as conservative as possible and only restart
+ * the decoder if the width, height, chroma_width,
+ * chroma_height or low_delay flag are modified.
+ */
+ if (sequence->width != mpeg2dec->sequence.width ||
+ sequence->height != mpeg2dec->sequence.height ||
+ sequence->chroma_width != mpeg2dec->sequence.chroma_width ||
+ sequence->chroma_height != mpeg2dec->sequence.chroma_height ||
+ ((sequence->flags ^ mpeg2dec->sequence.flags) &
+ SEQ_FLAG_LOW_DELAY)) {
+ decoder->stride_frame = sequence->width;
+ mpeg2_header_end (mpeg2dec);
+ mpeg2dec->action = invalid_end_action;
+ mpeg2dec->state = STATE_INVALID_END;
+ return;
+ }
+ mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence,
+ sizeof (mpeg2_sequence_t)) ?
+ STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED);
+ } else
+ decoder->stride_frame = sequence->width;
+ mpeg2dec->sequence = *sequence;
+ mpeg2_reset_info (&(mpeg2dec->info));
+ mpeg2dec->info.sequence = &(mpeg2dec->sequence);
+ mpeg2dec->info.gop = NULL;
+ info_user_data (mpeg2dec);
+}
+
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_gop_t * gop = &(mpeg2dec->new_gop);
+
+ if (! (buffer[1] & 8))
+ return 1;
+ gop->hours = (buffer[0] >> 2) & 31;
+ gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63;
+ gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63;
+ gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63;
+ gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6);
+ mpeg2dec->state = STATE_GOP;
+ return 0;
+}
+
+void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2dec->gop = mpeg2dec->new_gop;
+ mpeg2_reset_info (&(mpeg2dec->info));
+ mpeg2dec->info.gop = &(mpeg2dec->gop);
+ info_user_data (mpeg2dec);
+}
+
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type)
+{
+ int i;
+
+ for (i = 0; i < 3; i++)
+ if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf &&
+ mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) {
+ mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf;
+ mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0];
+ if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+ if (b_type || mpeg2dec->convert)
+ mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0];
+ mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0];
+ }
+ break;
+ }
+}
+
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+ int type;
+
+ mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ?
+ STATE_PICTURE : STATE_PICTURE_2ND);
+ mpeg2dec->ext_state = PIC_CODING_EXT;
+
+ picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6);
+
+ type = (buffer [1] >> 3) & 7;
+ if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) {
+ /* forward_f_code and backward_f_code - used in mpeg1 only */
+ decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1;
+ decoder->f_motion.f_code[0] =
+ (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1;
+ decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1;
+ decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1;
+ }
+
+ picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type;
+ picture->tag = picture->tag2 = 0;
+ if (mpeg2dec->num_tags) {
+ if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) {
+ mpeg2dec->num_tags = 0;
+ picture->tag = mpeg2dec->tag_current;
+ picture->tag2 = mpeg2dec->tag2_current;
+ picture->flags |= PIC_FLAG_TAGS;
+ } else if (mpeg2dec->num_tags > 1) {
+ mpeg2dec->num_tags = 1;
+ picture->tag = mpeg2dec->tag_previous;
+ picture->tag2 = mpeg2dec->tag2_previous;
+ picture->flags |= PIC_FLAG_TAGS;
+ }
+ }
+ picture->nb_fields = 2;
+ picture->display_offset[0].x = picture->display_offset[1].x =
+ picture->display_offset[2].x = mpeg2dec->display_offset_x;
+ picture->display_offset[0].y = picture->display_offset[1].y =
+ picture->display_offset[2].y = mpeg2dec->display_offset_y;
+
+ /* XXXXXX decode extra_information_picture as well */
+
+ decoder->q_scale_type = 0;
+ decoder->intra_dc_precision = 7;
+ decoder->frame_pred_frame_dct = 1;
+ decoder->concealment_motion_vectors = 0;
+ decoder->scan = mpeg2_scan_norm;
+ decoder->picture_structure = FRAME_PICTURE;
+ mpeg2dec->copy_matrix = 0;
+
+ return 0;
+}
+
+static int picture_coding_ext (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+ uint32_t flags;
+
+ /* pre subtract 1 for use later in compute_motion_vector */
+ decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1;
+ decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1;
+ decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1;
+ decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1;
+
+ flags = picture->flags;
+ decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3);
+ decoder->picture_structure = buffer[2] & 3;
+ switch (decoder->picture_structure) {
+ case TOP_FIELD:
+ flags |= PIC_FLAG_TOP_FIELD_FIRST;
+ case BOTTOM_FIELD:
+ picture->nb_fields = 1;
+ break;
+ case FRAME_PICTURE:
+ if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) {
+ picture->nb_fields = (buffer[3] & 2) ? 3 : 2;
+ flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0;
+ flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0;
+ } else
+ picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2;
+ break;
+ default:
+ return 1;
+ }
+ decoder->top_field_first = buffer[3] >> 7;
+ decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1;
+ decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1;
+ decoder->q_scale_type = buffer[3] & 16;
+ decoder->intra_vlc_format = (buffer[3] >> 3) & 1;
+ decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm;
+ if (!(buffer[4] & 0x80))
+ flags &= ~PIC_FLAG_PROGRESSIVE_FRAME;
+ if (buffer[4] & 0x40)
+ flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) &
+ PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY;
+ picture->flags = flags;
+
+ mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT;
+
+ return 0;
+}
+
+static int picture_display_ext (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ mpeg2_picture_t * picture = &(mpeg2dec->new_picture);
+ int i, nb_pos;
+
+ nb_pos = picture->nb_fields;
+ if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)
+ nb_pos >>= 1;
+
+ for (i = 0; i < nb_pos; i++) {
+ int x, y;
+
+ x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) |
+ (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i);
+ y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) |
+ (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i);
+ if (! (x & y & 1))
+ return 1;
+ picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1;
+ picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1;
+ }
+ for (; i < 3; i++) {
+ picture->display_offset[i].x = mpeg2dec->display_offset_x;
+ picture->display_offset[i].y = mpeg2dec->display_offset_y;
+ }
+ return 0;
+}
+
+void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels)
+{
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+ int old_type_b = (decoder->coding_type == B_TYPE);
+ int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY;
+
+ finalize_matrix (mpeg2dec);
+ decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE;
+
+ if (mpeg2dec->state == STATE_PICTURE) {
+ mpeg2_picture_t * picture;
+ mpeg2_picture_t * other;
+
+ decoder->second_field = 0;
+
+ picture = other = mpeg2dec->pictures;
+ if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2))
+ picture += 2;
+ else
+ other += 2;
+ mpeg2dec->picture = picture;
+ *picture = mpeg2dec->new_picture;
+
+ if (!old_type_b) {
+ mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1];
+ mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0];
+ }
+ mpeg2dec->fbuf[0] = NULL;
+ mpeg2_reset_info (&(mpeg2dec->info));
+ mpeg2dec->info.current_picture = picture;
+ mpeg2dec->info.display_picture = picture;
+ if (decoder->coding_type != B_TYPE) {
+ if (!low_delay) {
+ if (mpeg2dec->first) {
+ mpeg2dec->info.display_picture = NULL;
+ mpeg2dec->first = 0;
+ } else {
+ mpeg2dec->info.display_picture = other;
+ if (other->nb_fields == 1)
+ mpeg2dec->info.display_picture_2nd = other + 1;
+ mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1];
+ }
+ }
+ if (!low_delay + !mpeg2dec->convert)
+ mpeg2dec->info.discard_fbuf =
+ mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert];
+ }
+ if (mpeg2dec->convert) {
+ mpeg2_convert_init_t convert_init;
+ if (!mpeg2dec->convert_start) {
+ int y_size, uv_size;
+
+ mpeg2dec->decoder.convert_id =
+ mpeg2_malloc (mpeg2dec->convert_id_size,
+ MPEG2_ALLOC_CONVERT_ID);
+ mpeg2dec->convert (MPEG2_CONVERT_START,
+ mpeg2dec->decoder.convert_id,
+ &(mpeg2dec->sequence),
+ mpeg2dec->convert_stride, accels,
+ mpeg2dec->convert_arg, &convert_init);
+ mpeg2dec->convert_start = convert_init.start;
+ mpeg2dec->decoder.convert = convert_init.copy;
+
+ y_size = decoder->stride_frame * mpeg2dec->sequence.height;
+ uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
+ mpeg2dec->yuv_buf[0][0] =
+ (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[0][1] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[0][2] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[1][0] =
+ (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[1][1] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[1][2] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ y_size = decoder->stride_frame * 32;
+ uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format);
+ mpeg2dec->yuv_buf[2][0] =
+ (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[2][1] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ mpeg2dec->yuv_buf[2][2] =
+ (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV);
+ }
+ if (!mpeg2dec->custom_fbuf) {
+ while (mpeg2dec->alloc_index < 3) {
+ mpeg2_fbuf_t * fbuf;
+
+ fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf;
+ fbuf->id = NULL;
+ fbuf->buf[0] =
+ (uint8_t *) mpeg2_malloc (convert_init.buf_size[0],
+ MPEG2_ALLOC_CONVERTED);
+ fbuf->buf[1] =
+ (uint8_t *) mpeg2_malloc (convert_init.buf_size[1],
+ MPEG2_ALLOC_CONVERTED);
+ fbuf->buf[2] =
+ (uint8_t *) mpeg2_malloc (convert_init.buf_size[2],
+ MPEG2_ALLOC_CONVERTED);
+ }
+ mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
+ }
+ } else if (!mpeg2dec->custom_fbuf) {
+ while (mpeg2dec->alloc_index < 3) {
+ mpeg2_fbuf_t * fbuf;
+ int y_size, uv_size;
+
+ fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf);
+ fbuf->id = NULL;
+ y_size = decoder->stride_frame * mpeg2dec->sequence.height;
+ uv_size = y_size >> (2 - decoder->chroma_format);
+ fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size,
+ MPEG2_ALLOC_YUV);
+ fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size,
+ MPEG2_ALLOC_YUV);
+ fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size,
+ MPEG2_ALLOC_YUV);
+ }
+ mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE));
+ }
+ } else {
+ decoder->second_field = 1;
+ mpeg2dec->picture++; /* second field picture */
+ *(mpeg2dec->picture) = mpeg2dec->new_picture;
+ mpeg2dec->info.current_picture_2nd = mpeg2dec->picture;
+ if (low_delay || decoder->coding_type == B_TYPE)
+ mpeg2dec->info.display_picture_2nd = mpeg2dec->picture;
+ }
+
+ info_user_data (mpeg2dec);
+}
+
+static int copyright_ext (mpeg2dec_t * mpeg2dec)
+{
+ return 0;
+}
+
+static int quant_matrix_ext (mpeg2dec_t * mpeg2dec)
+{
+ uint8_t * buffer = mpeg2dec->chunk_start;
+ int i, j;
+
+ for (i = 0; i < 4; i++)
+ if (buffer[0] & (8 >> i)) {
+ for (j = 0; j < 64; j++)
+ mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] =
+ (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i));
+ mpeg2dec->copy_matrix |= 1 << i;
+ buffer += 64;
+ }
+
+ return 0;
+}
+
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec)
+{
+ static int (* parser[]) (mpeg2dec_t *) = {
+ 0, sequence_ext, sequence_display_ext, quant_matrix_ext,
+ copyright_ext, 0, 0, picture_display_ext, picture_coding_ext
+ };
+ int ext, ext_bit;
+
+ ext = mpeg2dec->chunk_start[0] >> 4;
+ ext_bit = 1 << ext;
+
+ if (!(mpeg2dec->ext_state & ext_bit))
+ return 0; /* ignore illegal extensions */
+ mpeg2dec->ext_state &= ~ext_bit;
+ return parser[ext] (mpeg2dec);
+}
+
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start;
+ mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1;
+
+ return 0;
+}
+
+static void prescale (mpeg2dec_t * mpeg2dec, int idx)
+{
+ static int non_linear_scale [] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 28, 32, 36, 40, 44, 48, 52,
+ 56, 64, 72, 80, 88, 96, 104, 112
+ };
+ int i, j, k;
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+ if (mpeg2dec->scaled[idx] != decoder->q_scale_type) {
+ mpeg2dec->scaled[idx] = decoder->q_scale_type;
+ for (i = 0; i < 32; i++) {
+ k = decoder->q_scale_type ? non_linear_scale[i] : (i << 1);
+ for (j = 0; j < 64; j++)
+ decoder->quantizer_prescale[idx][i][j] =
+ k * mpeg2dec->quantizer_matrix[idx][j];
+ }
+ }
+}
+
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_decoder_t * decoder = &(mpeg2dec->decoder);
+
+ mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0;
+ mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 ||
+ mpeg2dec->state == STATE_PICTURE_2ND) ?
+ STATE_SLICE : STATE_SLICE_1ST);
+
+ if (mpeg2dec->decoder.coding_type != D_TYPE) {
+ prescale (mpeg2dec, 0);
+ if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2])
+ prescale (mpeg2dec, 2);
+ if (mpeg2dec->decoder.coding_type != I_TYPE) {
+ prescale (mpeg2dec, 1);
+ if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3])
+ prescale (mpeg2dec, 3);
+ }
+ }
+
+ if (!(mpeg2dec->nb_decode_slices))
+ mpeg2dec->picture->flags |= PIC_FLAG_SKIP;
+ else if (mpeg2dec->convert_start) {
+ mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0],
+ mpeg2dec->picture, mpeg2dec->info.gop);
+
+ if (mpeg2dec->decoder.coding_type == B_TYPE)
+ mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2],
+ mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+ mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+ else {
+ mpeg2_init_fbuf (&(mpeg2dec->decoder),
+ mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1],
+ mpeg2dec->yuv_buf[mpeg2dec->yuv_index],
+ mpeg2dec->yuv_buf[mpeg2dec->yuv_index]);
+ if (mpeg2dec->state == STATE_SLICE)
+ mpeg2dec->yuv_index ^= 1;
+ }
+ } else {
+ int b_type;
+
+ b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+ mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf,
+ mpeg2dec->fbuf[b_type + 1]->buf,
+ mpeg2dec->fbuf[b_type]->buf);
+ }
+ mpeg2dec->action = NULL;
+ return STATE_INTERNAL_NORETURN;
+}
+
+static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_reset_info (&(mpeg2dec->info));
+ mpeg2dec->info.sequence = NULL;
+ mpeg2dec->info.gop = NULL;
+ mpeg2_header_state_init (mpeg2dec);
+ mpeg2dec->action = mpeg2_seek_header;
+ return mpeg2_seek_header (mpeg2dec);
+}
+
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec)
+{
+ mpeg2_picture_t * picture;
+ int b_type;
+
+ b_type = (mpeg2dec->decoder.coding_type == B_TYPE);
+ picture = mpeg2dec->pictures;
+ if ((mpeg2dec->picture >= picture + 2) ^ b_type)
+ picture = mpeg2dec->pictures + 2;
+
+ mpeg2_reset_info (&(mpeg2dec->info));
+ if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) {
+ mpeg2dec->info.display_picture = picture;
+ if (picture->nb_fields == 1)
+ mpeg2dec->info.display_picture_2nd = picture + 1;
+ mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type];
+ if (!mpeg2dec->convert)
+ mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1];
+ } else if (!mpeg2dec->convert)
+ mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type];
+ mpeg2dec->action = seek_sequence;
+ return STATE_END;
+}
diff --git a/libmpeg2/idct.c b/libmpeg2/idct.c
new file mode 100644
index 0000000..81c57e0
--- /dev/null
+++ b/libmpeg2/idct.c
@@ -0,0 +1,289 @@
+/*
+ * idct.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+/* idct main entry point */
+void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+void (* mpeg2_idct_add) (int last, int16_t * block,
+ uint8_t * dest, int stride);
+
+/*
+ * In legal streams, the IDCT output should be between -384 and +384.
+ * In corrupted streams, it is possible to force the IDCT output to go
+ * to +-3826 - this is the worst case for a column IDCT where the
+ * column inputs are 16-bit values.
+ */
+uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
+do { \
+ t0 = W0 * d0 + W1 * d1; \
+ t1 = W0 * d1 - W1 * d0; \
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
+do { \
+ int tmp = W0 * (d0 + d1); \
+ t0 = tmp + (W1 - W0) * d1; \
+ t1 = tmp - (W1 + W0) * d0; \
+} while (0)
+#endif
+
+static inline void idct_row (int16_t * const block)
+{
+ int d0, d1, d2, d3;
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ int t0, t1, t2, t3;
+
+ /* shortcut */
+ if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] |
+ ((int32_t *)block)[3]))) {
+ uint32_t tmp = (uint16_t) (block[0] >> 1);
+ tmp |= tmp << 16;
+ ((int32_t *)block)[0] = tmp;
+ ((int32_t *)block)[1] = tmp;
+ ((int32_t *)block)[2] = tmp;
+ ((int32_t *)block)[3] = tmp;
+ return;
+ }
+
+ d0 = (block[0] << 11) + 2048;
+ d1 = block[1];
+ d2 = block[2] << 11;
+ d3 = block[3];
+ t0 = d0 + d2;
+ t1 = d0 - d2;
+ BUTTERFLY (t2, t3, W6, W2, d3, d1);
+ a0 = t0 + t2;
+ a1 = t1 + t3;
+ a2 = t1 - t3;
+ a3 = t0 - t2;
+
+ d0 = block[4];
+ d1 = block[5];
+ d2 = block[6];
+ d3 = block[7];
+ BUTTERFLY (t0, t1, W7, W1, d3, d0);
+ BUTTERFLY (t2, t3, W3, W5, d1, d2);
+ b0 = t0 + t2;
+ b3 = t1 + t3;
+ t0 -= t2;
+ t1 -= t3;
+ b1 = ((t0 + t1) >> 8) * 181;
+ b2 = ((t0 - t1) >> 8) * 181;
+
+ block[0] = (a0 + b0) >> 12;
+ block[1] = (a1 + b1) >> 12;
+ block[2] = (a2 + b2) >> 12;
+ block[3] = (a3 + b3) >> 12;
+ block[4] = (a3 - b3) >> 12;
+ block[5] = (a2 - b2) >> 12;
+ block[6] = (a1 - b1) >> 12;
+ block[7] = (a0 - b0) >> 12;
+}
+
+static inline void idct_col (int16_t * const block)
+{
+ int d0, d1, d2, d3;
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ int t0, t1, t2, t3;
+
+ d0 = (block[8*0] << 11) + 65536;
+ d1 = block[8*1];
+ d2 = block[8*2] << 11;
+ d3 = block[8*3];
+ t0 = d0 + d2;
+ t1 = d0 - d2;
+ BUTTERFLY (t2, t3, W6, W2, d3, d1);
+ a0 = t0 + t2;
+ a1 = t1 + t3;
+ a2 = t1 - t3;
+ a3 = t0 - t2;
+
+ d0 = block[8*4];
+ d1 = block[8*5];
+ d2 = block[8*6];
+ d3 = block[8*7];
+ BUTTERFLY (t0, t1, W7, W1, d3, d0);
+ BUTTERFLY (t2, t3, W3, W5, d1, d2);
+ b0 = t0 + t2;
+ b3 = t1 + t3;
+ t0 -= t2;
+ t1 -= t3;
+ b1 = ((t0 + t1) >> 8) * 181;
+ b2 = ((t0 - t1) >> 8) * 181;
+
+ block[8*0] = (a0 + b0) >> 17;
+ block[8*1] = (a1 + b1) >> 17;
+ block[8*2] = (a2 + b2) >> 17;
+ block[8*3] = (a3 + b3) >> 17;
+ block[8*4] = (a3 - b3) >> 17;
+ block[8*5] = (a2 - b2) >> 17;
+ block[8*6] = (a1 - b1) >> 17;
+ block[8*7] = (a0 - b0) >> 17;
+}
+
+static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest,
+ const int stride)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+ do {
+ dest[0] = CLIP (block[0]);
+ dest[1] = CLIP (block[1]);
+ dest[2] = CLIP (block[2]);
+ dest[3] = CLIP (block[3]);
+ dest[4] = CLIP (block[4]);
+ dest[5] = CLIP (block[5]);
+ dest[6] = CLIP (block[6]);
+ dest[7] = CLIP (block[7]);
+
+ ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
+ ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+}
+
+static void mpeg2_idct_add_c (const int last, int16_t * block,
+ uint8_t * dest, const int stride)
+{
+ int i;
+
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+ do {
+ dest[0] = CLIP (block[0] + dest[0]);
+ dest[1] = CLIP (block[1] + dest[1]);
+ dest[2] = CLIP (block[2] + dest[2]);
+ dest[3] = CLIP (block[3] + dest[3]);
+ dest[4] = CLIP (block[4] + dest[4]);
+ dest[5] = CLIP (block[5] + dest[5]);
+ dest[6] = CLIP (block[6] + dest[6]);
+ dest[7] = CLIP (block[7] + dest[7]);
+
+ ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0;
+ ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0;
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+ } else {
+ int DC;
+
+ DC = (block[0] + 64) >> 7;
+ block[0] = block[63] = 0;
+ i = 8;
+ do {
+ dest[0] = CLIP (DC + dest[0]);
+ dest[1] = CLIP (DC + dest[1]);
+ dest[2] = CLIP (DC + dest[2]);
+ dest[3] = CLIP (DC + dest[3]);
+ dest[4] = CLIP (DC + dest[4]);
+ dest[5] = CLIP (DC + dest[5]);
+ dest[6] = CLIP (DC + dest[6]);
+ dest[7] = CLIP (DC + dest[7]);
+ dest += stride;
+ } while (--i);
+ }
+}
+
+void mpeg2_idct_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+ if (accel & MPEG2_ACCEL_X86_SSE2) {
+ mpeg2_idct_copy = mpeg2_idct_copy_sse2;
+ mpeg2_idct_add = mpeg2_idct_add_sse2;
+ mpeg2_idct_mmx_init ();
+ } else if (accel & MPEG2_ACCEL_X86_MMXEXT) {
+ mpeg2_idct_copy = mpeg2_idct_copy_mmxext;
+ mpeg2_idct_add = mpeg2_idct_add_mmxext;
+ mpeg2_idct_mmx_init ();
+ } else if (accel & MPEG2_ACCEL_X86_MMX) {
+ mpeg2_idct_copy = mpeg2_idct_copy_mmx;
+ mpeg2_idct_add = mpeg2_idct_add_mmx;
+ mpeg2_idct_mmx_init ();
+ } else
+#endif
+#ifdef ARCH_PPC
+ if (accel & MPEG2_ACCEL_PPC_ALTIVEC) {
+ mpeg2_idct_copy = mpeg2_idct_copy_altivec;
+ mpeg2_idct_add = mpeg2_idct_add_altivec;
+ mpeg2_idct_altivec_init ();
+ } else
+#endif
+#ifdef ARCH_ALPHA
+ if (accel & MPEG2_ACCEL_ALPHA_MVI) {
+ mpeg2_idct_copy = mpeg2_idct_copy_mvi;
+ mpeg2_idct_add = mpeg2_idct_add_mvi;
+ mpeg2_idct_alpha_init ();
+ } else if (accel & MPEG2_ACCEL_ALPHA) {
+ int i;
+
+ mpeg2_idct_copy = mpeg2_idct_copy_alpha;
+ mpeg2_idct_add = mpeg2_idct_add_alpha;
+ mpeg2_idct_alpha_init ();
+ for (i = -3840; i < 3840 + 256; i++)
+ CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+ } else
+#endif
+ {
+ int i, j;
+
+ mpeg2_idct_copy = mpeg2_idct_copy_c;
+ mpeg2_idct_add = mpeg2_idct_add_c;
+ for (i = -3840; i < 3840 + 256; i++)
+ CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i);
+ for (i = 0; i < 64; i++) {
+ j = mpeg2_scan_norm[i];
+ mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+ j = mpeg2_scan_alt[i];
+ mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+ }
+ }
+}
diff --git a/libmpeg2/idct_alpha.c b/libmpeg2/idct_alpha.c
new file mode 100644
index 0000000..bc2f886
--- /dev/null
+++ b/libmpeg2/idct_alpha.c
@@ -0,0 +1,377 @@
+/*
+ * idct_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#include "alpha_asm.h"
+
+#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */
+#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */
+#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */
+#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */
+#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */
+#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */
+
+extern uint8_t mpeg2_clip[3840 * 2 + 256];
+#define CLIP(i) ((mpeg2_clip + 3840)[i])
+
+#if 0
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
+do { \
+ t0 = W0 * d0 + W1 * d1; \
+ t1 = W0 * d1 - W1 * d0; \
+} while (0)
+#else
+#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
+do { \
+ int_fast32_t tmp = W0 * (d0 + d1); \
+ t0 = tmp + (W1 - W0) * d1; \
+ t1 = tmp - (W1 + W0) * d0; \
+} while (0)
+#endif
+
+static inline void idct_row (int16_t * const block)
+{
+ uint64_t l, r;
+ int_fast32_t d0, d1, d2, d3;
+ int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+ int_fast32_t t0, t1, t2, t3;
+
+ l = ldq (block);
+ r = ldq (block + 4);
+
+ /* shortcut */
+ if (likely (!((l & ~0xffffUL) | r))) {
+ uint64_t tmp = (uint16_t) (l >> 1);
+ tmp |= tmp << 16;
+ tmp |= tmp << 32;
+ ((int32_t *)block)[0] = tmp;
+ ((int32_t *)block)[1] = tmp;
+ ((int32_t *)block)[2] = tmp;
+ ((int32_t *)block)[3] = tmp;
+ return;
+ }
+
+ d0 = (sextw (l) << 11) + 2048;
+ d1 = sextw (extwl (l, 2));
+ d2 = sextw (extwl (l, 4)) << 11;
+ d3 = sextw (extwl (l, 6));
+ t0 = d0 + d2;
+ t1 = d0 - d2;
+ BUTTERFLY (t2, t3, W6, W2, d3, d1);
+ a0 = t0 + t2;
+ a1 = t1 + t3;
+ a2 = t1 - t3;
+ a3 = t0 - t2;
+
+ d0 = sextw (r);
+ d1 = sextw (extwl (r, 2));
+ d2 = sextw (extwl (r, 4));
+ d3 = sextw (extwl (r, 6));
+ BUTTERFLY (t0, t1, W7, W1, d3, d0);
+ BUTTERFLY (t2, t3, W3, W5, d1, d2);
+ b0 = t0 + t2;
+ b3 = t1 + t3;
+ t0 -= t2;
+ t1 -= t3;
+ b1 = ((t0 + t1) >> 8) * 181;
+ b2 = ((t0 - t1) >> 8) * 181;
+
+ block[0] = (a0 + b0) >> 12;
+ block[1] = (a1 + b1) >> 12;
+ block[2] = (a2 + b2) >> 12;
+ block[3] = (a3 + b3) >> 12;
+ block[4] = (a3 - b3) >> 12;
+ block[5] = (a2 - b2) >> 12;
+ block[6] = (a1 - b1) >> 12;
+ block[7] = (a0 - b0) >> 12;
+}
+
+static inline void idct_col (int16_t * const block)
+{
+ int_fast32_t d0, d1, d2, d3;
+ int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
+ int_fast32_t t0, t1, t2, t3;
+
+ d0 = (block[8*0] << 11) + 65536;
+ d1 = block[8*1];
+ d2 = block[8*2] << 11;
+ d3 = block[8*3];
+ t0 = d0 + d2;
+ t1 = d0 - d2;
+ BUTTERFLY (t2, t3, W6, W2, d3, d1);
+ a0 = t0 + t2;
+ a1 = t1 + t3;
+ a2 = t1 - t3;
+ a3 = t0 - t2;
+
+ d0 = block[8*4];
+ d1 = block[8*5];
+ d2 = block[8*6];
+ d3 = block[8*7];
+ BUTTERFLY (t0, t1, W7, W1, d3, d0);
+ BUTTERFLY (t2, t3, W3, W5, d1, d2);
+ b0 = t0 + t2;
+ b3 = t1 + t3;
+ t0 -= t2;
+ t1 -= t3;
+ b1 = ((t0 + t1) >> 8) * 181;
+ b2 = ((t0 - t1) >> 8) * 181;
+
+ block[8*0] = (a0 + b0) >> 17;
+ block[8*1] = (a1 + b1) >> 17;
+ block[8*2] = (a2 + b2) >> 17;
+ block[8*3] = (a3 + b3) >> 17;
+ block[8*4] = (a3 - b3) >> 17;
+ block[8*5] = (a2 - b2) >> 17;
+ block[8*6] = (a1 - b1) >> 17;
+ block[8*7] = (a0 - b0) >> 17;
+}
+
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride)
+{
+ uint64_t clampmask;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+
+ clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
+ do {
+ uint64_t shorts0, shorts1;
+
+ shorts0 = ldq (block);
+ shorts0 = maxsw4 (shorts0, 0);
+ shorts0 = minsw4 (shorts0, clampmask);
+ stl (pkwb (shorts0), dest);
+
+ shorts1 = ldq (block + 4);
+ shorts1 = maxsw4 (shorts1, 0);
+ shorts1 = minsw4 (shorts1, clampmask);
+ stl (pkwb (shorts1), dest + 4);
+
+ stq (0, block);
+ stq (0, block + 4);
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+}
+
+void mpeg2_idct_add_mvi (const int last, int16_t * block,
+ uint8_t * dest, const int stride)
+{
+ uint64_t clampmask;
+ uint64_t signmask;
+ int i;
+
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+ clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */
+ signmask = zap (-1, 0x33);
+ signmask ^= signmask >> 1; /* 0x8000800080008000 */
+
+ do {
+ uint64_t shorts0, pix0, signs0;
+ uint64_t shorts1, pix1, signs1;
+
+ shorts0 = ldq (block);
+ shorts1 = ldq (block + 4);
+
+ pix0 = unpkbw (ldl (dest));
+ /* signed subword add (MMX paddw). */
+ signs0 = shorts0 & signmask;
+ shorts0 &= ~signmask;
+ shorts0 += pix0;
+ shorts0 ^= signs0;
+ /* clamp. */
+ shorts0 = maxsw4 (shorts0, 0);
+ shorts0 = minsw4 (shorts0, clampmask);
+
+ /* next 4. */
+ pix1 = unpkbw (ldl (dest + 4));
+ signs1 = shorts1 & signmask;
+ shorts1 &= ~signmask;
+ shorts1 += pix1;
+ shorts1 ^= signs1;
+ shorts1 = maxsw4 (shorts1, 0);
+ shorts1 = minsw4 (shorts1, clampmask);
+
+ stl (pkwb (shorts0), dest);
+ stl (pkwb (shorts1), dest + 4);
+ stq (0, block);
+ stq (0, block + 4);
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+ } else {
+ int DC;
+ uint64_t p0, p1, p2, p3, p4, p5, p6, p7;
+ uint64_t DCs;
+
+ DC = (block[0] + 64) >> 7;
+ block[0] = block[63] = 0;
+
+ p0 = ldq (dest + 0 * stride);
+ p1 = ldq (dest + 1 * stride);
+ p2 = ldq (dest + 2 * stride);
+ p3 = ldq (dest + 3 * stride);
+ p4 = ldq (dest + 4 * stride);
+ p5 = ldq (dest + 5 * stride);
+ p6 = ldq (dest + 6 * stride);
+ p7 = ldq (dest + 7 * stride);
+
+ if (DC > 0) {
+ DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255);
+ p0 += minub8 (DCs, ~p0);
+ p1 += minub8 (DCs, ~p1);
+ p2 += minub8 (DCs, ~p2);
+ p3 += minub8 (DCs, ~p3);
+ p4 += minub8 (DCs, ~p4);
+ p5 += minub8 (DCs, ~p5);
+ p6 += minub8 (DCs, ~p6);
+ p7 += minub8 (DCs, ~p7);
+ } else {
+ DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255);
+ p0 -= minub8 (DCs, p0);
+ p1 -= minub8 (DCs, p1);
+ p2 -= minub8 (DCs, p2);
+ p3 -= minub8 (DCs, p3);
+ p4 -= minub8 (DCs, p4);
+ p5 -= minub8 (DCs, p5);
+ p6 -= minub8 (DCs, p6);
+ p7 -= minub8 (DCs, p7);
+ }
+
+ stq (p0, dest + 0 * stride);
+ stq (p1, dest + 1 * stride);
+ stq (p2, dest + 2 * stride);
+ stq (p3, dest + 3 * stride);
+ stq (p4, dest + 4 * stride);
+ stq (p5, dest + 5 * stride);
+ stq (p6, dest + 6 * stride);
+ stq (p7, dest + 7 * stride);
+ }
+}
+
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+ do {
+ dest[0] = CLIP (block[0]);
+ dest[1] = CLIP (block[1]);
+ dest[2] = CLIP (block[2]);
+ dest[3] = CLIP (block[3]);
+ dest[4] = CLIP (block[4]);
+ dest[5] = CLIP (block[5]);
+ dest[6] = CLIP (block[6]);
+ dest[7] = CLIP (block[7]);
+
+ stq(0, block);
+ stq(0, block + 4);
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+}
+
+void mpeg2_idct_add_alpha (const int last, int16_t * block,
+ uint8_t * dest, const int stride)
+{
+ int i;
+
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ for (i = 0; i < 8; i++)
+ idct_row (block + 8 * i);
+ for (i = 0; i < 8; i++)
+ idct_col (block + i);
+ do {
+ dest[0] = CLIP (block[0] + dest[0]);
+ dest[1] = CLIP (block[1] + dest[1]);
+ dest[2] = CLIP (block[2] + dest[2]);
+ dest[3] = CLIP (block[3] + dest[3]);
+ dest[4] = CLIP (block[4] + dest[4]);
+ dest[5] = CLIP (block[5] + dest[5]);
+ dest[6] = CLIP (block[6] + dest[6]);
+ dest[7] = CLIP (block[7] + dest[7]);
+
+ stq(0, block);
+ stq(0, block + 4);
+
+ dest += stride;
+ block += 8;
+ } while (--i);
+ } else {
+ int DC;
+
+ DC = (block[0] + 64) >> 7;
+ block[0] = block[63] = 0;
+ i = 8;
+ do {
+ dest[0] = CLIP (DC + dest[0]);
+ dest[1] = CLIP (DC + dest[1]);
+ dest[2] = CLIP (DC + dest[2]);
+ dest[3] = CLIP (DC + dest[3]);
+ dest[4] = CLIP (DC + dest[4]);
+ dest[5] = CLIP (DC + dest[5]);
+ dest[6] = CLIP (DC + dest[6]);
+ dest[7] = CLIP (DC + dest[7]);
+ dest += stride;
+ } while (--i);
+ }
+}
+
+void mpeg2_idct_alpha_init (void)
+{
+ int i, j;
+
+ for (i = 0; i < 64; i++) {
+ j = mpeg2_scan_norm[i];
+ mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+ j = mpeg2_scan_alt[i];
+ mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
+ }
+}
+
+#endif /* ARCH_ALPHA */
diff --git a/libmpeg2/idct_altivec.c b/libmpeg2/idct_altivec.c
new file mode 100644
index 0000000..b5b395a
--- /dev/null
+++ b/libmpeg2/idct_altivec.c
@@ -0,0 +1,286 @@
+/*
+ * idct_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#if defined(HAVE_ALTIVEC_H) && !defined(__APPLE_CC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
+/* work around gcc <3.3 vec_mergel bug */
+static inline vector_s16_t my_vec_mergel (vector_s16_t const A,
+ vector_s16_t const B)
+{
+ static const vector_u8_t mergel = {
+ 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
+ 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
+ };
+ return vec_perm (A, B, mergel);
+}
+#undef vec_mergel
+#define vec_mergel my_vec_mergel
+#endif
+
+#ifdef HAVE_ALTIVEC_H /* gnu */
+#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h}
+#else /* apple */
+#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h)
+#endif
+
+static const vector_s16_t constants ATTR_ALIGN(16) =
+ VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31);
+static const vector_s16_t constants_1 ATTR_ALIGN(16) =
+ VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725);
+static const vector_s16_t constants_2 ATTR_ALIGN(16) =
+ VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289);
+static const vector_s16_t constants_3 ATTR_ALIGN(16) =
+ VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692);
+static const vector_s16_t constants_4 ATTR_ALIGN(16) =
+ VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895);
+
+#define IDCT \
+ vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
+ vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
+ vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
+ vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
+ vector_u16_t shift; \
+ \
+ c4 = vec_splat (constants, 0); \
+ a0 = vec_splat (constants, 1); \
+ a1 = vec_splat (constants, 2); \
+ a2 = vec_splat (constants, 3); \
+ mc4 = vec_splat (constants, 4); \
+ ma2 = vec_splat (constants, 5); \
+ bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \
+ \
+ zero = vec_splat_s16 (0); \
+ \
+ vx0 = vec_adds (block[0], block[4]); \
+ vx4 = vec_subs (block[0], block[4]); \
+ t5 = vec_mradds (vx0, constants_1, zero); \
+ t0 = vec_mradds (vx4, constants_1, zero); \
+ \
+ vx1 = vec_mradds (a1, block[7], block[1]); \
+ vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \
+ t1 = vec_mradds (vx1, constants_2, zero); \
+ t8 = vec_mradds (vx7, constants_2, zero); \
+ \
+ vx2 = vec_mradds (a0, block[6], block[2]); \
+ vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \
+ t2 = vec_mradds (vx2, constants_3, zero); \
+ t4 = vec_mradds (vx6, constants_3, zero); \
+ \
+ vx3 = vec_mradds (block[3], constants_4, zero); \
+ vx5 = vec_mradds (block[5], constants_4, zero); \
+ t7 = vec_mradds (a2, vx5, vx3); \
+ t3 = vec_mradds (ma2, vx3, vx5); \
+ \
+ t6 = vec_adds (t8, t3); \
+ t3 = vec_subs (t8, t3); \
+ t8 = vec_subs (t1, t7); \
+ t1 = vec_adds (t1, t7); \
+ t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \
+ t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \
+ \
+ t7 = vec_adds (t5, t2); \
+ t2 = vec_subs (t5, t2); \
+ t5 = vec_adds (t0, t4); \
+ t0 = vec_subs (t0, t4); \
+ t4 = vec_subs (t8, t3); \
+ t3 = vec_adds (t8, t3); \
+ \
+ vy0 = vec_adds (t7, t1); \
+ vy7 = vec_subs (t7, t1); \
+ vy1 = vec_adds (t5, t3); \
+ vy6 = vec_subs (t5, t3); \
+ vy2 = vec_adds (t0, t4); \
+ vy5 = vec_subs (t0, t4); \
+ vy3 = vec_adds (t2, t6); \
+ vy4 = vec_subs (t2, t6); \
+ \
+ vx0 = vec_mergeh (vy0, vy4); \
+ vx1 = vec_mergel (vy0, vy4); \
+ vx2 = vec_mergeh (vy1, vy5); \
+ vx3 = vec_mergel (vy1, vy5); \
+ vx4 = vec_mergeh (vy2, vy6); \
+ vx5 = vec_mergel (vy2, vy6); \
+ vx6 = vec_mergeh (vy3, vy7); \
+ vx7 = vec_mergel (vy3, vy7); \
+ \
+ vy0 = vec_mergeh (vx0, vx4); \
+ vy1 = vec_mergel (vx0, vx4); \
+ vy2 = vec_mergeh (vx1, vx5); \
+ vy3 = vec_mergel (vx1, vx5); \
+ vy4 = vec_mergeh (vx2, vx6); \
+ vy5 = vec_mergel (vx2, vx6); \
+ vy6 = vec_mergeh (vx3, vx7); \
+ vy7 = vec_mergel (vx3, vx7); \
+ \
+ vx0 = vec_mergeh (vy0, vy4); \
+ vx1 = vec_mergel (vy0, vy4); \
+ vx2 = vec_mergeh (vy1, vy5); \
+ vx3 = vec_mergel (vy1, vy5); \
+ vx4 = vec_mergeh (vy2, vy6); \
+ vx5 = vec_mergel (vy2, vy6); \
+ vx6 = vec_mergeh (vy3, vy7); \
+ vx7 = vec_mergel (vy3, vy7); \
+ \
+ vx0 = vec_adds (vx0, bias); \
+ t5 = vec_adds (vx0, vx4); \
+ t0 = vec_subs (vx0, vx4); \
+ \
+ t1 = vec_mradds (a1, vx7, vx1); \
+ t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \
+ \
+ t2 = vec_mradds (a0, vx6, vx2); \
+ t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \
+ \
+ t7 = vec_mradds (a2, vx5, vx3); \
+ t3 = vec_mradds (ma2, vx3, vx5); \
+ \
+ t6 = vec_adds (t8, t3); \
+ t3 = vec_subs (t8, t3); \
+ t8 = vec_subs (t1, t7); \
+ t1 = vec_adds (t1, t7); \
+ \
+ t7 = vec_adds (t5, t2); \
+ t2 = vec_subs (t5, t2); \
+ t5 = vec_adds (t0, t4); \
+ t0 = vec_subs (t0, t4); \
+ t4 = vec_subs (t8, t3); \
+ t3 = vec_adds (t8, t3); \
+ \
+ vy0 = vec_adds (t7, t1); \
+ vy7 = vec_subs (t7, t1); \
+ vy1 = vec_mradds (c4, t3, t5); \
+ vy6 = vec_mradds (mc4, t3, t5); \
+ vy2 = vec_mradds (c4, t4, t0); \
+ vy5 = vec_mradds (mc4, t4, t0); \
+ vy3 = vec_adds (t2, t6); \
+ vy4 = vec_subs (t2, t6); \
+ \
+ shift = vec_splat_u16 (6); \
+ vx0 = vec_sra (vy0, shift); \
+ vx1 = vec_sra (vy1, shift); \
+ vx2 = vec_sra (vy2, shift); \
+ vx3 = vec_sra (vy3, shift); \
+ vx4 = vec_sra (vy4, shift); \
+ vx5 = vec_sra (vy5, shift); \
+ vx6 = vec_sra (vy6, shift); \
+ vx7 = vec_sra (vy7, shift);
+
+void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest,
+ const int stride)
+{
+ vector_s16_t * const block = (vector_s16_t *)_block;
+ vector_u8_t tmp;
+
+ IDCT
+
+#define COPY(dest,src) \
+ tmp = vec_packsu (src, src); \
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+ COPY (dest, vx0) dest += stride;
+ COPY (dest, vx1) dest += stride;
+ COPY (dest, vx2) dest += stride;
+ COPY (dest, vx3) dest += stride;
+ COPY (dest, vx4) dest += stride;
+ COPY (dest, vx5) dest += stride;
+ COPY (dest, vx6) dest += stride;
+ COPY (dest, vx7)
+
+ block[0] = block[1] = block[2] = block[3] = zero;
+ block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_add_altivec (const int last, int16_t * const _block,
+ uint8_t * dest, const int stride)
+{
+ vector_s16_t * const block = (vector_s16_t *)_block;
+ vector_u8_t tmp;
+ vector_s16_t tmp2, tmp3;
+ vector_u8_t perm0;
+ vector_u8_t perm1;
+ vector_u8_t p0, p1, p;
+
+ IDCT
+
+ p0 = vec_lvsl (0, dest);
+ p1 = vec_lvsl (stride, dest);
+ p = vec_splat_u8 (-1);
+ perm0 = vec_mergeh (p, p0);
+ perm1 = vec_mergeh (p, p1);
+
+#define ADD(dest,src,perm) \
+ /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
+ tmp = vec_ld (0, dest); \
+ tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
+ tmp3 = vec_adds (tmp2, src); \
+ tmp = vec_packsu (tmp3, tmp3); \
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+
+ ADD (dest, vx0, perm0) dest += stride;
+ ADD (dest, vx1, perm1) dest += stride;
+ ADD (dest, vx2, perm0) dest += stride;
+ ADD (dest, vx3, perm1) dest += stride;
+ ADD (dest, vx4, perm0) dest += stride;
+ ADD (dest, vx5, perm1) dest += stride;
+ ADD (dest, vx6, perm0) dest += stride;
+ ADD (dest, vx7, perm1)
+
+ block[0] = block[1] = block[2] = block[3] = zero;
+ block[4] = block[5] = block[6] = block[7] = zero;
+}
+
+void mpeg2_idct_altivec_init (void)
+{
+ int i, j;
+
+ /* the altivec idct uses a transposed input, so we patch scan tables */
+ for (i = 0; i < 64; i++) {
+ j = mpeg2_scan_norm[i];
+ mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3);
+ j = mpeg2_scan_alt[i];
+ mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3);
+ }
+}
+
+#endif
diff --git a/libmpeg2/idct_mmx.c b/libmpeg2/idct_mmx.c
new file mode 100644
index 0000000..50f8f18
--- /dev/null
+++ b/libmpeg2/idct_mmx.c
@@ -0,0 +1,1305 @@
+/*
+ * idct_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#include "mmx.h"
+
+#define ROW_SHIFT 15
+#define COL_SHIFT 6
+
+#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT)))
+#define rounder(bias) {round (bias), round (bias)}
+#define rounder_sse2(bias) {round (bias), round (bias), round (bias), round (bias)}
+
+
+#if 0
+/* C row IDCT - it is just here to document the MMXEXT and MMX versions */
+static inline void idct_row (int16_t * row, int offset,
+ int16_t * table, int32_t * rounder)
+{
+ int C1, C2, C3, C4, C5, C6, C7;
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+
+ row += offset;
+
+ C1 = table[1];
+ C2 = table[2];
+ C3 = table[3];
+ C4 = table[4];
+ C5 = table[5];
+ C6 = table[6];
+ C7 = table[7];
+
+ a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder;
+ a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder;
+ a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder;
+ a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder;
+
+ b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+ b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+ b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+ b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+ row[0] = (a0 + b0) >> ROW_SHIFT;
+ row[1] = (a1 + b1) >> ROW_SHIFT;
+ row[2] = (a2 + b2) >> ROW_SHIFT;
+ row[3] = (a3 + b3) >> ROW_SHIFT;
+ row[4] = (a3 - b3) >> ROW_SHIFT;
+ row[5] = (a2 - b2) >> ROW_SHIFT;
+ row[6] = (a1 - b1) >> ROW_SHIFT;
+ row[7] = (a0 - b0) >> ROW_SHIFT;
+}
+#endif
+
+
+/* SSE2 row IDCT */
+#define sse2_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
+ c4, -c6, c4, -c2, \
+ c4, c6, -c4, -c2, \
+ -c4, c2, c4, -c6, \
+ c1, c3, c3, -c7, \
+ c5, -c1, c7, -c5, \
+ c5, c7, -c1, -c5, \
+ c7, c3, c3, -c1 }
+
+#define SSE2_IDCT_2ROW(table, row1, row2, round1, round2) do { \
+ /* no scheduling: trust in out of order execution */ \
+ /* based on Intel AP-945 */ \
+ /* (http://cache-www.intel.com/cd/00/00/01/76/17680_w_idct.pdf) */ \
+ \
+ /* input */ /* 1: row1= x7 x5 x3 x1 x6 x4 x2 x0 */ \
+ pshufd_r2r (row1, xmm1, 0); /* 1: xmm1= x2 x0 x2 x0 x2 x0 x2 x0 */ \
+ pmaddwd_m2r (table[0], xmm1); /* 1: xmm1= x2*C + x0*C ... */ \
+ pshufd_r2r (row1, xmm3, 0xaa); /* 1: xmm3= x3 x1 x3 x1 x3 x1 x3 x1 */ \
+ pmaddwd_m2r (table[2*8], xmm3); /* 1: xmm3= x3*C + x1*C ... */ \
+ pshufd_r2r (row1, xmm2, 0x55); /* 1: xmm2= x6 x4 x6 x4 x6 x4 x6 x4 */ \
+ pshufd_r2r (row1, row1, 0xff); /* 1: row1= x7 x5 x7 x5 x7 x5 x7 x5 */ \
+ pmaddwd_m2r (table[1*8], xmm2); /* 1: xmm2= x6*C + x4*C ... */ \
+ paddd_m2r (round1, xmm1); /* 1: xmm1= x2*C + x0*C + round ... */ \
+ pmaddwd_m2r (table[3*8], row1); /* 1: row1= x7*C + x5*C ... */ \
+ pshufd_r2r (row2, xmm5, 0); /* 2: */ \
+ pshufd_r2r (row2, xmm6, 0x55); /* 2: */ \
+ pmaddwd_m2r (table[0], xmm5); /* 2: */ \
+ paddd_r2r (xmm2, xmm1); /* 1: xmm1= a[] */ \
+ movdqa_r2r (xmm1, xmm2); /* 1: xmm2= a[] */ \
+ pshufd_r2r (row2, xmm7, 0xaa); /* 2: */ \
+ pmaddwd_m2r (table[1*8], xmm6); /* 2: */ \
+ paddd_r2r (xmm3, row1); /* 1: row1= b[]= 7*C+5*C+3*C+1*C ... */ \
+ pshufd_r2r (row2, row2, 0xff); /* 2: */ \
+ psubd_r2r (row1, xmm2); /* 1: xmm2= a[] - b[] */ \
+ pmaddwd_m2r (table[2*8], xmm7); /* 2: */ \
+ paddd_r2r (xmm1, row1); /* 1: row1= a[] + b[] */ \
+ psrad_i2r (ROW_SHIFT, xmm2); /* 1: xmm2= result 4...7 */ \
+ paddd_m2r (round2, xmm5); /* 2: */ \
+ pmaddwd_m2r (table[3*8], row2); /* 2: */ \
+ paddd_r2r (xmm6, xmm5); /* 2: */ \
+ movdqa_r2r (xmm5, xmm6); /* 2: */ \
+ psrad_i2r (ROW_SHIFT, row1); /* 1: row1= result 0...4 */ \
+ pshufd_r2r (xmm2, xmm2, 0x1b); /* 1: [0 1 2 3] -> [3 2 1 0] */ \
+ packssdw_r2r (xmm2, row1); /* 1: row1= result[] */ \
+ paddd_r2r (xmm7, row2); /* 2: */ \
+ psubd_r2r (row2, xmm6); /* 2: */ \
+ paddd_r2r (xmm5, row2); /* 2: */ \
+ psrad_i2r (ROW_SHIFT, xmm6); /* 2: */ \
+ psrad_i2r (ROW_SHIFT, row2); /* 2: */ \
+ pshufd_r2r (xmm6, xmm6, 0x1b); /* 2: */ \
+ packssdw_r2r (xmm6, row2); /* 2: */ \
+} while (0)
+
+
+/* MMXEXT row IDCT */
+
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \
+ c4, c6, c4, c6, \
+ c1, c3, -c1, -c5, \
+ c5, c7, c3, -c7, \
+ c4, -c6, c4, -c6, \
+ -c4, c2, c4, -c2, \
+ c5, -c1, c3, -c1, \
+ c7, c3, c7, -c5 }
+
+static inline void mmxext_row_head (int16_t * const row, const int offset,
+ const int16_t * const table)
+{
+ movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+
+ movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
+ movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+
+ movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
+ movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+
+ movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
+ pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+ pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+}
+
+static inline void mmxext_row (const int16_t * const table,
+ const int32_t * const rounder)
+{
+ movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */
+ pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */
+
+ pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */
+ pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */
+
+ movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */
+ pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */
+
+ paddd_m2r (*rounder, mm3); /* mm3 += rounder */
+ pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */
+
+ pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */
+ paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+
+ pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */
+ movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+
+ pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */
+ paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+
+ paddd_m2r (*rounder, mm0); /* mm0 += rounder */
+ psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+
+ psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
+ paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+
+ paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
+ psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+
+ paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
+ movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */
+
+ paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
+ psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmxext_row_tail (int16_t * const row, const int store)
+{
+ psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+
+ psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+
+ packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+
+ packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
+
+ movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
+ pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+
+ /* slot */
+
+ movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+}
+
+static inline void mmxext_row_mid (int16_t * const row, const int store,
+ const int offset,
+ const int16_t * const table)
+{
+ movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+
+ movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
+ psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */
+
+ packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+
+ packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */
+ movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+
+ movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
+ pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */
+
+ movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */
+ movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */
+
+ pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */
+
+ movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */
+ pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */
+}
+
+
+/* MMX row IDCT */
+
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \
+ c4, c6, -c4, -c2, \
+ c1, c3, c3, -c7, \
+ c5, c7, -c1, -c5, \
+ c4, -c6, c4, -c2, \
+ -c4, c2, c4, -c6, \
+ c5, -c1, c7, -c5, \
+ c7, c3, c3, -c1 }
+
+static inline void mmx_row_head (int16_t * const row, const int offset,
+ const int16_t * const table)
+{
+ movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+
+ movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
+ movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+
+ movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
+ movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+
+ punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
+
+ movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
+ pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+
+ movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
+ punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+}
+
+static inline void mmx_row (const int16_t * const table,
+ const int32_t * const rounder)
+{
+ pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */
+ punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */
+
+ pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */
+ punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */
+
+ movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */
+ pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */
+
+ paddd_m2r (*rounder, mm3); /* mm3 += rounder */
+ pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */
+
+ pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */
+ paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */
+
+ pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */
+ movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */
+
+ pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */
+ paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */
+
+ paddd_m2r (*rounder, mm0); /* mm0 += rounder */
+ psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */
+
+ psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */
+ paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */
+
+ paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */
+ psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */
+
+ paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */
+ movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */
+
+ paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */
+ psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */
+}
+
+static inline void mmx_row_tail (int16_t * const row, const int store)
+{
+ psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+
+ psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+
+ packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+
+ packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
+
+ movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
+ movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */
+
+ pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */
+
+ psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */
+
+ por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */
+
+ /* slot */
+
+ movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
+}
+
+static inline void mmx_row_mid (int16_t * const row, const int store,
+ const int offset, const int16_t * const table)
+{
+ movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */
+ psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */
+
+ movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */
+ psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */
+
+ packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */
+ movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */
+
+ packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */
+ movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */
+
+ movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */
+ movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */
+
+ punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */
+ psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */
+
+ movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */
+ pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */
+
+ movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */
+ por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */
+
+ movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */
+ punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */
+
+ movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */
+ pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */
+}
+
+
+#if 0
+/* C column IDCT - it is just here to document the MMXEXT and MMX versions */
+static inline void idct_col (int16_t * col, int offset)
+{
+/* multiplication - as implemented on mmx */
+#define F(c,x) (((c) * (x)) >> 16)
+
+/* saturation - it helps us handle torture test cases */
+#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x))
+
+ int16_t x0, x1, x2, x3, x4, x5, x6, x7;
+ int16_t y0, y1, y2, y3, y4, y5, y6, y7;
+ int16_t a0, a1, a2, a3, b0, b1, b2, b3;
+ int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12;
+
+ col += offset;
+
+ x0 = col[0*8];
+ x1 = col[1*8];
+ x2 = col[2*8];
+ x3 = col[3*8];
+ x4 = col[4*8];
+ x5 = col[5*8];
+ x6 = col[6*8];
+ x7 = col[7*8];
+
+ u04 = S (x0 + x4);
+ v04 = S (x0 - x4);
+ u26 = S (F (T2, x6) + x2);
+ v26 = S (F (T2, x2) - x6);
+
+ a0 = S (u04 + u26);
+ a1 = S (v04 + v26);
+ a2 = S (v04 - v26);
+ a3 = S (u04 - u26);
+
+ u17 = S (F (T1, x7) + x1);
+ v17 = S (F (T1, x1) - x7);
+ u35 = S (F (T3, x5) + x3);
+ v35 = S (F (T3, x3) - x5);
+
+ b0 = S (u17 + u35);
+ b3 = S (v17 - v35);
+ u12 = S (u17 - u35);
+ v12 = S (v17 + v35);
+ u12 = S (2 * F (C4, u12));
+ v12 = S (2 * F (C4, v12));
+ b1 = S (u12 + v12);
+ b2 = S (u12 - v12);
+
+ y0 = S (a0 + b0) >> COL_SHIFT;
+ y1 = S (a1 + b1) >> COL_SHIFT;
+ y2 = S (a2 + b2) >> COL_SHIFT;
+ y3 = S (a3 + b3) >> COL_SHIFT;
+
+ y4 = S (a3 - b3) >> COL_SHIFT;
+ y5 = S (a2 - b2) >> COL_SHIFT;
+ y6 = S (a1 - b1) >> COL_SHIFT;
+ y7 = S (a0 - b0) >> COL_SHIFT;
+
+ col[0*8] = y0;
+ col[1*8] = y1;
+ col[2*8] = y2;
+ col[3*8] = y3;
+ col[4*8] = y4;
+ col[5*8] = y5;
+ col[6*8] = y6;
+ col[7*8] = y7;
+}
+#endif
+
+
+#define T1 13036
+#define T2 27146
+#define T3 43790
+#define C4 23170
+
+
+/* SSE2 column IDCT */
+static inline void sse2_idct_col (int16_t * const col)
+{
+ /* Almost identical to mmxext version: */
+ /* just do both 4x8 columns in paraller */
+
+ static const short t1_vector[] ATTR_ALIGN(16) = {T1,T1,T1,T1,T1,T1,T1,T1};
+ static const short t2_vector[] ATTR_ALIGN(16) = {T2,T2,T2,T2,T2,T2,T2,T2};
+ static const short t3_vector[] ATTR_ALIGN(16) = {T3,T3,T3,T3,T3,T3,T3,T3};
+ static const short c4_vector[] ATTR_ALIGN(16) = {C4,C4,C4,C4,C4,C4,C4,C4};
+
+#if defined(__x86_64__)
+
+ /* INPUT: block in xmm8 ... xmm15 */
+
+ movdqa_m2r (*t1_vector, xmm0); /* xmm0 = T1 */
+ movdqa_r2r (xmm9, xmm1); /* xmm1 = x1 */
+
+ movdqa_r2r (xmm0, xmm2); /* xmm2 = T1 */
+ pmulhw_r2r (xmm1, xmm0); /* xmm0 = T1*x1 */
+
+ movdqa_m2r (*t3_vector, xmm5); /* xmm5 = T3 */
+ pmulhw_r2r (xmm15, xmm2); /* xmm2 = T1*x7 */
+
+ movdqa_r2r (xmm5, xmm7); /* xmm7 = T3-1 */
+ psubsw_r2r (xmm15, xmm0); /* xmm0 = v17 */
+
+ movdqa_m2r (*t2_vector, xmm9); /* xmm9 = T2 */
+ pmulhw_r2r (xmm11, xmm5); /* xmm5 = (T3-1)*x3 */
+
+ paddsw_r2r (xmm2, xmm1); /* xmm1 = u17 */
+ pmulhw_r2r (xmm13, xmm7); /* xmm7 = (T3-1)*x5 */
+
+ movdqa_r2r (xmm9, xmm2); /* xmm2 = T2 */
+ paddsw_r2r (xmm11, xmm5); /* xmm5 = T3*x3 */
+
+ pmulhw_r2r (xmm10, xmm9); /* xmm9 = T2*x2 */
+ paddsw_r2r (xmm13, xmm7); /* xmm7 = T3*x5 */
+
+ psubsw_r2r (xmm13, xmm5); /* xmm5 = v35 */
+ paddsw_r2r (xmm11, xmm7); /* xmm7 = u35 */
+
+ movdqa_r2r (xmm0, xmm6); /* xmm6 = v17 */
+ pmulhw_r2r (xmm14, xmm2); /* xmm2 = T2*x6 */
+
+ psubsw_r2r (xmm5, xmm0); /* xmm0 = b3 */
+ psubsw_r2r (xmm14, xmm9); /* xmm9 = v26 */
+
+ paddsw_r2r (xmm6, xmm5); /* xmm5 = v12 */
+ movdqa_r2r (xmm0, xmm11); /* xmm11 = b3 */
+
+ movdqa_r2r (xmm1, xmm6); /* xmm6 = u17 */
+ paddsw_r2r (xmm10, xmm2); /* xmm2 = u26 */
+
+ paddsw_r2r (xmm7, xmm6); /* xmm6 = b0 */
+ psubsw_r2r (xmm7, xmm1); /* xmm1 = u12 */
+
+ movdqa_r2r (xmm1, xmm7); /* xmm7 = u12 */
+ paddsw_r2r (xmm5, xmm1); /* xmm1 = u12+v12 */
+
+ movdqa_m2r (*c4_vector, xmm0); /* xmm0 = C4/2 */
+ psubsw_r2r (xmm5, xmm7); /* xmm7 = u12-v12 */
+
+ movdqa_r2r (xmm6, xmm4); /* xmm4 = b0 */
+ pmulhw_r2r (xmm0, xmm1); /* xmm1 = b1/2 */
+
+ movdqa_r2r (xmm9, xmm6); /* xmm6 = v26 */
+ pmulhw_r2r (xmm0, xmm7); /* xmm7 = b2/2 */
+
+ movdqa_r2r (xmm8, xmm10); /* xmm10 = x0 */
+ movdqa_r2r (xmm8, xmm0); /* xmm0 = x0 */
+
+ psubsw_r2r (xmm12, xmm10); /* xmm10 = v04 */
+ paddsw_r2r (xmm12, xmm0); /* xmm0 = u04 */
+
+ paddsw_r2r (xmm10, xmm9); /* xmm9 = a1 */
+ movdqa_r2r (xmm0, xmm8); /* xmm8 = u04 */
+
+ psubsw_r2r (xmm6, xmm10); /* xmm10 = a2 */
+ paddsw_r2r (xmm2, xmm8); /* xmm5 = a0 */
+
+ paddsw_r2r (xmm1, xmm1); /* xmm1 = b1 */
+ psubsw_r2r (xmm2, xmm0); /* xmm0 = a3 */
+
+ paddsw_r2r (xmm7, xmm7); /* xmm7 = b2 */
+ movdqa_r2r (xmm10, xmm13); /* xmm13 = a2 */
+
+ movdqa_r2r (xmm9, xmm14); /* xmm14 = a1 */
+ paddsw_r2r (xmm7, xmm10); /* xmm10 = a2+b2 */
+
+ psraw_i2r (COL_SHIFT,xmm10); /* xmm10 = y2 */
+ paddsw_r2r (xmm1, xmm9); /* xmm9 = a1+b1 */
+
+ psraw_i2r (COL_SHIFT, xmm9); /* xmm9 = y1 */
+ psubsw_r2r (xmm1, xmm14); /* xmm14 = a1-b1 */
+
+ psubsw_r2r (xmm7, xmm13); /* xmm13 = a2-b2 */
+ psraw_i2r (COL_SHIFT,xmm14); /* xmm14 = y6 */
+
+ movdqa_r2r (xmm8, xmm15); /* xmm15 = a0 */
+ psraw_i2r (COL_SHIFT,xmm13); /* xmm13 = y5 */
+
+ paddsw_r2r (xmm4, xmm8); /* xmm8 = a0+b0 */
+ psubsw_r2r (xmm4, xmm15); /* xmm15 = a0-b0 */
+
+ psraw_i2r (COL_SHIFT, xmm8); /* xmm8 = y0 */
+ movdqa_r2r (xmm0, xmm12); /* xmm12 = a3 */
+
+ psubsw_r2r (xmm11, xmm12); /* xmm12 = a3-b3 */
+ psraw_i2r (COL_SHIFT,xmm15); /* xmm15 = y7 */
+
+ paddsw_r2r (xmm0, xmm11); /* xmm11 = a3+b3 */
+ psraw_i2r (COL_SHIFT,xmm12); /* xmm12 = y4 */
+
+ psraw_i2r (COL_SHIFT,xmm11); /* xmm11 = y3 */
+
+ /* OUTPUT: block in xmm8 ... xmm15 */
+
+#else
+ movdqa_m2r (*t1_vector, xmm0); /* xmm0 = T1 */
+
+ movdqa_m2r (*(col+1*8), xmm1); /* xmm1 = x1 */
+ movdqa_r2r (xmm0, xmm2); /* xmm2 = T1 */
+
+ movdqa_m2r (*(col+7*8), xmm4); /* xmm4 = x7 */
+ pmulhw_r2r (xmm1, xmm0); /* xmm0 = T1*x1 */
+
+ movdqa_m2r (*t3_vector, xmm5); /* xmm5 = T3 */
+ pmulhw_r2r (xmm4, xmm2); /* xmm2 = T1*x7 */
+
+ movdqa_m2r (*(col+5*8), xmm6); /* xmm6 = x5 */
+ movdqa_r2r (xmm5, xmm7); /* xmm7 = T3-1 */
+
+ movdqa_m2r (*(col+3*8), xmm3); /* xmm3 = x3 */
+ psubsw_r2r (xmm4, xmm0); /* xmm0 = v17 */
+
+ movdqa_m2r (*t2_vector, xmm4); /* xmm4 = T2 */
+ pmulhw_r2r (xmm3, xmm5); /* xmm5 = (T3-1)*x3 */
+
+ paddsw_r2r (xmm2, xmm1); /* xmm1 = u17 */
+ pmulhw_r2r (xmm6, xmm7); /* xmm7 = (T3-1)*x5 */
+
+ /* slot */
+
+ movdqa_r2r (xmm4, xmm2); /* xmm2 = T2 */
+ paddsw_r2r (xmm3, xmm5); /* xmm5 = T3*x3 */
+
+ pmulhw_m2r (*(col+2*8), xmm4); /* xmm4 = T2*x2 */
+ paddsw_r2r (xmm6, xmm7); /* xmm7 = T3*x5 */
+
+ psubsw_r2r (xmm6, xmm5); /* xmm5 = v35 */
+ paddsw_r2r (xmm3, xmm7); /* xmm7 = u35 */
+
+ movdqa_m2r (*(col+6*8), xmm3); /* xmm3 = x6 */
+ movdqa_r2r (xmm0, xmm6); /* xmm6 = v17 */
+
+ pmulhw_r2r (xmm3, xmm2); /* xmm2 = T2*x6 */
+ psubsw_r2r (xmm5, xmm0); /* xmm0 = b3 */
+
+ psubsw_r2r (xmm3, xmm4); /* xmm4 = v26 */
+ paddsw_r2r (xmm6, xmm5); /* xmm5 = v12 */
+
+ movdqa_r2m (xmm0, *(col+3*8)); /* save b3 in scratch0 */
+ movdqa_r2r (xmm1, xmm6); /* xmm6 = u17 */
+
+ paddsw_m2r (*(col+2*8), xmm2); /* xmm2 = u26 */
+ paddsw_r2r (xmm7, xmm6); /* xmm6 = b0 */
+
+ psubsw_r2r (xmm7, xmm1); /* xmm1 = u12 */
+ movdqa_r2r (xmm1, xmm7); /* xmm7 = u12 */
+
+ movdqa_m2r (*(col+0*8), xmm3); /* xmm3 = x0 */
+ paddsw_r2r (xmm5, xmm1); /* xmm1 = u12+v12 */
+
+ movdqa_m2r (*c4_vector, xmm0); /* xmm0 = C4/2 */
+ psubsw_r2r (xmm5, xmm7); /* xmm7 = u12-v12 */
+
+ movdqa_r2m (xmm6, *(col+5*8)); /* save b0 in scratch1 */
+ pmulhw_r2r (xmm0, xmm1); /* xmm1 = b1/2 */
+
+ movdqa_r2r (xmm4, xmm6); /* xmm6 = v26 */
+ pmulhw_r2r (xmm0, xmm7); /* xmm7 = b2/2 */
+
+ movdqa_m2r (*(col+4*8), xmm5); /* xmm5 = x4 */
+ movdqa_r2r (xmm3, xmm0); /* xmm0 = x0 */
+
+ psubsw_r2r (xmm5, xmm3); /* xmm3 = v04 */
+ paddsw_r2r (xmm5, xmm0); /* xmm0 = u04 */
+
+ paddsw_r2r (xmm3, xmm4); /* xmm4 = a1 */
+ movdqa_r2r (xmm0, xmm5); /* xmm5 = u04 */
+
+ psubsw_r2r (xmm6, xmm3); /* xmm3 = a2 */
+ paddsw_r2r (xmm2, xmm5); /* xmm5 = a0 */
+
+ paddsw_r2r (xmm1, xmm1); /* xmm1 = b1 */
+ psubsw_r2r (xmm2, xmm0); /* xmm0 = a3 */
+
+ paddsw_r2r (xmm7, xmm7); /* xmm7 = b2 */
+ movdqa_r2r (xmm3, xmm2); /* xmm2 = a2 */
+
+ movdqa_r2r (xmm4, xmm6); /* xmm6 = a1 */
+ paddsw_r2r (xmm7, xmm3); /* xmm3 = a2+b2 */
+
+ psraw_i2r (COL_SHIFT, xmm3); /* xmm3 = y2 */
+ paddsw_r2r (xmm1, xmm4); /* xmm4 = a1+b1 */
+
+ psraw_i2r (COL_SHIFT, xmm4); /* xmm4 = y1 */
+ psubsw_r2r (xmm1, xmm6); /* xmm6 = a1-b1 */
+
+ movdqa_m2r (*(col+5*8), xmm1); /* xmm1 = b0 */
+ psubsw_r2r (xmm7, xmm2); /* xmm2 = a2-b2 */
+
+ psraw_i2r (COL_SHIFT, xmm6); /* xmm6 = y6 */
+ movdqa_r2r (xmm5, xmm7); /* xmm7 = a0 */
+
+ movdqa_r2m (xmm4, *(col+1*8)); /* save y1 */
+ psraw_i2r (COL_SHIFT, xmm2); /* xmm2 = y5 */
+
+ movdqa_r2m (xmm3, *(col+2*8)); /* save y2 */
+ paddsw_r2r (xmm1, xmm5); /* xmm5 = a0+b0 */
+
+ movdqa_m2r (*(col+3*8), xmm4); /* xmm4 = b3 */
+ psubsw_r2r (xmm1, xmm7); /* xmm7 = a0-b0 */
+
+ psraw_i2r (COL_SHIFT, xmm5); /* xmm5 = y0 */
+ movdqa_r2r (xmm0, xmm3); /* xmm3 = a3 */
+
+ movdqa_r2m (xmm2, *(col+5*8)); /* save y5 */
+ psubsw_r2r (xmm4, xmm3); /* xmm3 = a3-b3 */
+
+ psraw_i2r (COL_SHIFT, xmm7); /* xmm7 = y7 */
+ paddsw_r2r (xmm0, xmm4); /* xmm4 = a3+b3 */
+
+ movdqa_r2m (xmm5, *(col+0*8)); /* save y0 */
+ psraw_i2r (COL_SHIFT, xmm3); /* xmm3 = y4 */
+
+ movdqa_r2m (xmm6, *(col+6*8)); /* save y6 */
+ psraw_i2r (COL_SHIFT, xmm4); /* xmm4 = y3 */
+
+ movdqa_r2m (xmm7, *(col+7*8)); /* save y7 */
+
+ movdqa_r2m (xmm3, *(col+4*8)); /* save y4 */
+
+ movdqa_r2m (xmm4, *(col+3*8)); /* save y3 */
+#endif
+}
+
+
+/* MMX column IDCT */
+static inline void idct_col (int16_t * const col, const int offset)
+{
+ static const short t1_vector[] ATTR_ALIGN(8) = {T1,T1,T1,T1};
+ static const short t2_vector[] ATTR_ALIGN(8) = {T2,T2,T2,T2};
+ static const short t3_vector[] ATTR_ALIGN(8) = {T3,T3,T3,T3};
+ static const short c4_vector[] ATTR_ALIGN(8) = {C4,C4,C4,C4};
+
+ /* column code adapted from peter gubanov */
+ /* http://www.elecard.com/peter/idct.shtml */
+
+ movq_m2r (*t1_vector, mm0); /* mm0 = T1 */
+
+ movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */
+ movq_r2r (mm0, mm2); /* mm2 = T1 */
+
+ movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */
+ pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */
+
+ movq_m2r (*t3_vector, mm5); /* mm5 = T3 */
+ pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */
+
+ movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */
+ movq_r2r (mm5, mm7); /* mm7 = T3-1 */
+
+ movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */
+ psubsw_r2r (mm4, mm0); /* mm0 = v17 */
+
+ movq_m2r (*t2_vector, mm4); /* mm4 = T2 */
+ pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */
+
+ paddsw_r2r (mm2, mm1); /* mm1 = u17 */
+ pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */
+
+ /* slot */
+
+ movq_r2r (mm4, mm2); /* mm2 = T2 */
+ paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */
+
+ pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */
+ paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */
+
+ psubsw_r2r (mm6, mm5); /* mm5 = v35 */
+ paddsw_r2r (mm3, mm7); /* mm7 = u35 */
+
+ movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */
+ movq_r2r (mm0, mm6); /* mm6 = v17 */
+
+ pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */
+ psubsw_r2r (mm5, mm0); /* mm0 = b3 */
+
+ psubsw_r2r (mm3, mm4); /* mm4 = v26 */
+ paddsw_r2r (mm6, mm5); /* mm5 = v12 */
+
+ movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */
+ movq_r2r (mm1, mm6); /* mm6 = u17 */
+
+ paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */
+ paddsw_r2r (mm7, mm6); /* mm6 = b0 */
+
+ psubsw_r2r (mm7, mm1); /* mm1 = u12 */
+ movq_r2r (mm1, mm7); /* mm7 = u12 */
+
+ movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */
+ paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */
+
+ movq_m2r (*c4_vector, mm0); /* mm0 = C4/2 */
+ psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */
+
+ movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */
+ pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */
+
+ movq_r2r (mm4, mm6); /* mm6 = v26 */
+ pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */
+
+ movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */
+ movq_r2r (mm3, mm0); /* mm0 = x0 */
+
+ psubsw_r2r (mm5, mm3); /* mm3 = v04 */
+ paddsw_r2r (mm5, mm0); /* mm0 = u04 */
+
+ paddsw_r2r (mm3, mm4); /* mm4 = a1 */
+ movq_r2r (mm0, mm5); /* mm5 = u04 */
+
+ psubsw_r2r (mm6, mm3); /* mm3 = a2 */
+ paddsw_r2r (mm2, mm5); /* mm5 = a0 */
+
+ paddsw_r2r (mm1, mm1); /* mm1 = b1 */
+ psubsw_r2r (mm2, mm0); /* mm0 = a3 */
+
+ paddsw_r2r (mm7, mm7); /* mm7 = b2 */
+ movq_r2r (mm3, mm2); /* mm2 = a2 */
+
+ movq_r2r (mm4, mm6); /* mm6 = a1 */
+ paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */
+
+ psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */
+ paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */
+
+ psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */
+ psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */
+
+ movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */
+ psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */
+
+ psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */
+ movq_r2r (mm5, mm7); /* mm7 = a0 */
+
+ movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */
+ psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */
+
+ movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */
+ paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */
+
+ movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */
+ psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */
+
+ psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */
+ movq_r2r (mm0, mm3); /* mm3 = a3 */
+
+ movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */
+ psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */
+
+ psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */
+ paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */
+
+ movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */
+ psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */
+
+ movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */
+ psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */
+
+ movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */
+
+ movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */
+
+ movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */
+}
+
+
+static const int32_t rounder0[] ATTR_ALIGN(8) =
+ rounder ((1 << (COL_SHIFT - 1)) - 0.5);
+static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
+static const int32_t rounder1[] ATTR_ALIGN(8) =
+ rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
+static const int32_t rounder7[] ATTR_ALIGN(8) =
+ rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */
+static const int32_t rounder2[] ATTR_ALIGN(8) =
+ rounder (0.60355339059); /* C2 * (C6+C2)/2 */
+static const int32_t rounder6[] ATTR_ALIGN(8) =
+ rounder (-0.25); /* C2 * (C6-C2)/2 */
+static const int32_t rounder3[] ATTR_ALIGN(8) =
+ rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
+static const int32_t rounder5[] ATTR_ALIGN(8) =
+ rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+
+
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
+static inline void idct (int16_t * const block) \
+{ \
+ static const int16_t table04[] ATTR_ALIGN(16) = \
+ table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
+ static const int16_t table17[] ATTR_ALIGN(16) = \
+ table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
+ static const int16_t table26[] ATTR_ALIGN(16) = \
+ table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
+ static const int16_t table35[] ATTR_ALIGN(16) = \
+ table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
+ \
+ idct_row_head (block, 0*8, table04); \
+ idct_row (table04, rounder0); \
+ idct_row_mid (block, 0*8, 4*8, table04); \
+ idct_row (table04, rounder4); \
+ idct_row_mid (block, 4*8, 1*8, table17); \
+ idct_row (table17, rounder1); \
+ idct_row_mid (block, 1*8, 7*8, table17); \
+ idct_row (table17, rounder7); \
+ idct_row_mid (block, 7*8, 2*8, table26); \
+ idct_row (table26, rounder2); \
+ idct_row_mid (block, 2*8, 6*8, table26); \
+ idct_row (table26, rounder6); \
+ idct_row_mid (block, 6*8, 3*8, table35); \
+ idct_row (table35, rounder3); \
+ idct_row_mid (block, 3*8, 5*8, table35); \
+ idct_row (table35, rounder5); \
+ idct_row_tail (block, 5*8); \
+ \
+ idct_col (block, 0); \
+ idct_col (block, 4); \
+}
+
+static inline void sse2_idct (int16_t * const block)
+{
+ static const int16_t table04[] ATTR_ALIGN(16) =
+ sse2_table (22725, 21407, 19266, 16384, 12873, 8867, 4520);
+ static const int16_t table17[] ATTR_ALIGN(16) =
+ sse2_table (31521, 29692, 26722, 22725, 17855, 12299, 6270);
+ static const int16_t table26[] ATTR_ALIGN(16) =
+ sse2_table (29692, 27969, 25172, 21407, 16819, 11585, 5906);
+ static const int16_t table35[] ATTR_ALIGN(16) =
+ sse2_table (26722, 25172, 22654, 19266, 15137, 10426, 5315);
+
+ static const int32_t rounder0_128[] ATTR_ALIGN(16) =
+ rounder_sse2 ((1 << (COL_SHIFT - 1)) - 0.5);
+ static const int32_t rounder4_128[] ATTR_ALIGN(16) = rounder_sse2 (0);
+ static const int32_t rounder1_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */
+ static const int32_t rounder7_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (-0.25); /* C1*(C7/C4+C7-C1)/2 */
+ static const int32_t rounder2_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (0.60355339059); /* C2 * (C6+C2)/2 */
+ static const int32_t rounder6_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (-0.25); /* C2 * (C6-C2)/2 */
+ static const int32_t rounder3_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */
+ static const int32_t rounder5_128[] ATTR_ALIGN(16) =
+ rounder_sse2 (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */
+
+#if defined(__x86_64__)
+ movdqa_m2r (block[0*8], xmm8);
+ movdqa_m2r (block[4*8], xmm12);
+ SSE2_IDCT_2ROW (table04, xmm8, xmm12, *rounder0_128, *rounder4_128);
+
+ movdqa_m2r (block[1*8], xmm9);
+ movdqa_m2r (block[7*8], xmm15);
+ SSE2_IDCT_2ROW (table17, xmm9, xmm15, *rounder1_128, *rounder7_128);
+
+ movdqa_m2r (block[2*8], xmm10);
+ movdqa_m2r (block[6*8], xmm14);
+ SSE2_IDCT_2ROW (table26, xmm10, xmm14, *rounder2_128, *rounder6_128);
+
+ movdqa_m2r (block[3*8], xmm11);
+ movdqa_m2r (block[5*8], xmm13);
+ SSE2_IDCT_2ROW (table35, xmm11, xmm13, *rounder3_128, *rounder5_128);
+
+ /* OUTPUT: block in xmm8 ... xmm15 */
+
+#else
+ movdqa_m2r (block[0*8], xmm0);
+ movdqa_m2r (block[4*8], xmm4);
+ SSE2_IDCT_2ROW (table04, xmm0, xmm4, *rounder0_128, *rounder4_128);
+ movdqa_r2m (xmm0, block[0*8]);
+ movdqa_r2m (xmm4, block[4*8]);
+
+ movdqa_m2r (block[1*8], xmm0);
+ movdqa_m2r (block[7*8], xmm4);
+ SSE2_IDCT_2ROW (table17, xmm0, xmm4, *rounder1_128, *rounder7_128);
+ movdqa_r2m (xmm0, block[1*8]);
+ movdqa_r2m (xmm4, block[7*8]);
+
+ movdqa_m2r (block[2*8], xmm0);
+ movdqa_m2r (block[6*8], xmm4);
+ SSE2_IDCT_2ROW (table26, xmm0, xmm4, *rounder2_128, *rounder6_128);
+ movdqa_r2m (xmm0, block[2*8]);
+ movdqa_r2m (xmm4, block[6*8]);
+
+ movdqa_m2r (block[3*8], xmm0);
+ movdqa_m2r (block[5*8], xmm4);
+ SSE2_IDCT_2ROW (table35, xmm0, xmm4, *rounder3_128, *rounder5_128);
+ movdqa_r2m (xmm0, block[3*8]);
+ movdqa_r2m (xmm4, block[5*8]);
+#endif
+
+ sse2_idct_col (block);
+}
+
+static void sse2_block_copy (int16_t * const block, uint8_t * dest,
+ const int stride)
+{
+#if defined(__x86_64__)
+ /* INPUT: block in xmm8 ... xmm15 */
+ packuswb_r2r (xmm8, xmm8);
+ packuswb_r2r (xmm9, xmm9);
+ movq_r2m (xmm8, *(dest+0*stride));
+ packuswb_r2r (xmm10, xmm10);
+ movq_r2m (xmm9, *(dest+1*stride));
+ packuswb_r2r (xmm11, xmm11);
+ movq_r2m (xmm10, *(dest+2*stride));
+ packuswb_r2r (xmm12, xmm12);
+ movq_r2m (xmm11, *(dest+3*stride));
+ packuswb_r2r (xmm13, xmm13);
+ movq_r2m (xmm12, *(dest+4*stride));
+ packuswb_r2r (xmm14, xmm14);
+ movq_r2m (xmm13, *(dest+5*stride));
+ packuswb_r2r (xmm15, xmm15);
+ movq_r2m (xmm14, *(dest+6*stride));
+ movq_r2m (xmm15, *(dest+7*stride));
+#else
+ movdqa_m2r (*(block+0*8), xmm0);
+ movdqa_m2r (*(block+1*8), xmm1);
+ movdqa_m2r (*(block+2*8), xmm2);
+ packuswb_r2r (xmm0, xmm0);
+ movdqa_m2r (*(block+3*8), xmm3);
+ packuswb_r2r (xmm1, xmm1);
+ movdqa_m2r (*(block+4*8), xmm4);
+ packuswb_r2r (xmm2, xmm2);
+ movdqa_m2r (*(block+5*8), xmm5);
+ packuswb_r2r (xmm3, xmm3);
+ movdqa_m2r (*(block+6*8), xmm6);
+ packuswb_r2r (xmm4, xmm4);
+ movdqa_m2r (*(block+7*8), xmm7);
+ movq_r2m (xmm0, *(dest+0*stride));
+ packuswb_r2r (xmm5, xmm5);
+ movq_r2m (xmm1, *(dest+1*stride));
+ packuswb_r2r (xmm6, xmm6);
+ movq_r2m (xmm2, *(dest+2*stride));
+ packuswb_r2r (xmm7, xmm7);
+ movq_r2m (xmm3, *(dest+3*stride));
+ movq_r2m (xmm4, *(dest+4*stride));
+ movq_r2m (xmm5, *(dest+5*stride));
+ movq_r2m (xmm6, *(dest+6*stride));
+ movq_r2m (xmm7, *(dest+7*stride));
+#endif
+}
+
+#define COPY_MMX(offset,r0,r1,r2) \
+do { \
+ movq_m2r (*(block+offset), r0); \
+ dest += stride; \
+ movq_m2r (*(block+offset+4), r1); \
+ movq_r2m (r2, *dest); \
+ packuswb_r2r (r1, r0); \
+} while (0)
+
+static inline void block_copy (int16_t * const block, uint8_t * dest,
+ const int stride)
+{
+ movq_m2r (*(block+0*8), mm0);
+ movq_m2r (*(block+0*8+4), mm1);
+ movq_m2r (*(block+1*8), mm2);
+ packuswb_r2r (mm1, mm0);
+ movq_m2r (*(block+1*8+4), mm3);
+ movq_r2m (mm0, *dest);
+ packuswb_r2r (mm3, mm2);
+ COPY_MMX (2*8, mm0, mm1, mm2);
+ COPY_MMX (3*8, mm2, mm3, mm0);
+ COPY_MMX (4*8, mm0, mm1, mm2);
+ COPY_MMX (5*8, mm2, mm3, mm0);
+ COPY_MMX (6*8, mm0, mm1, mm2);
+ COPY_MMX (7*8, mm2, mm3, mm0);
+ movq_r2m (mm2, *(dest+stride));
+}
+
+#define ADD_SSE2_2ROW(op, block0, block1)\
+do { \
+ movq_m2r (*(dest), xmm1); \
+ movq_m2r (*(dest+stride), xmm2); \
+ punpcklbw_r2r (xmm0, xmm1); \
+ punpcklbw_r2r (xmm0, xmm2); \
+ paddsw_##op (block0, xmm1); \
+ paddsw_##op (block1, xmm2); \
+ packuswb_r2r (xmm1, xmm1); \
+ packuswb_r2r (xmm2, xmm2); \
+ movq_r2m (xmm1, *(dest)); \
+ movq_r2m (xmm2, *(dest+stride)); \
+ dest += 2*stride; \
+} while (0)
+
+static void sse2_block_add (int16_t * const block, uint8_t * dest,
+ const int stride)
+{
+ pxor_r2r(xmm0, xmm0);
+#if defined(__x86_64__)
+ /* INPUT: block in xmm8 ... xmm15 */
+ ADD_SSE2_2ROW(r2r, xmm8, xmm9);
+ ADD_SSE2_2ROW(r2r, xmm10, xmm11);
+ ADD_SSE2_2ROW(r2r, xmm12, xmm13);
+ ADD_SSE2_2ROW(r2r, xmm14, xmm15);
+#else
+ ADD_SSE2_2ROW(m2r, *(block+0*8), *(block+1*8));
+ ADD_SSE2_2ROW(m2r, *(block+2*8), *(block+3*8));
+ ADD_SSE2_2ROW(m2r, *(block+4*8), *(block+5*8));
+ ADD_SSE2_2ROW(m2r, *(block+6*8), *(block+7*8));
+#endif
+}
+
+#define ADD_MMX(offset,r1,r2,r3,r4) \
+do { \
+ movq_m2r (*(dest+2*stride), r1); \
+ packuswb_r2r (r4, r3); \
+ movq_r2r (r1, r2); \
+ dest += stride; \
+ movq_r2m (r3, *dest); \
+ punpcklbw_r2r (mm0, r1); \
+ paddsw_m2r (*(block+offset), r1); \
+ punpckhbw_r2r (mm0, r2); \
+ paddsw_m2r (*(block+offset+4), r2); \
+} while (0)
+
+static inline void block_add (int16_t * const block, uint8_t * dest,
+ const int stride)
+{
+ movq_m2r (*dest, mm1);
+ pxor_r2r (mm0, mm0);
+ movq_m2r (*(dest+stride), mm3);
+ movq_r2r (mm1, mm2);
+ punpcklbw_r2r (mm0, mm1);
+ movq_r2r (mm3, mm4);
+ paddsw_m2r (*(block+0*8), mm1);
+ punpckhbw_r2r (mm0, mm2);
+ paddsw_m2r (*(block+0*8+4), mm2);
+ punpcklbw_r2r (mm0, mm3);
+ paddsw_m2r (*(block+1*8), mm3);
+ packuswb_r2r (mm2, mm1);
+ punpckhbw_r2r (mm0, mm4);
+ movq_r2m (mm1, *dest);
+ paddsw_m2r (*(block+1*8+4), mm4);
+ ADD_MMX (2*8, mm1, mm2, mm3, mm4);
+ ADD_MMX (3*8, mm3, mm4, mm1, mm2);
+ ADD_MMX (4*8, mm1, mm2, mm3, mm4);
+ ADD_MMX (5*8, mm3, mm4, mm1, mm2);
+ ADD_MMX (6*8, mm1, mm2, mm3, mm4);
+ ADD_MMX (7*8, mm3, mm4, mm1, mm2);
+ packuswb_r2r (mm4, mm3);
+ movq_r2m (mm3, *(dest+stride));
+}
+
+
+static inline void sse2_block_zero (int16_t * const block)
+{
+ pxor_r2r (xmm0, xmm0);
+ movdqa_r2m (xmm0, *(block+0*8));
+ movdqa_r2m (xmm0, *(block+1*8));
+ movdqa_r2m (xmm0, *(block+2*8));
+ movdqa_r2m (xmm0, *(block+3*8));
+ movdqa_r2m (xmm0, *(block+4*8));
+ movdqa_r2m (xmm0, *(block+5*8));
+ movdqa_r2m (xmm0, *(block+6*8));
+ movdqa_r2m (xmm0, *(block+7*8));
+}
+
+static inline void block_zero (int16_t * const block)
+{
+ pxor_r2r (mm0, mm0);
+ movq_r2m (mm0, *(block+0*4));
+ movq_r2m (mm0, *(block+1*4));
+ movq_r2m (mm0, *(block+2*4));
+ movq_r2m (mm0, *(block+3*4));
+ movq_r2m (mm0, *(block+4*4));
+ movq_r2m (mm0, *(block+5*4));
+ movq_r2m (mm0, *(block+6*4));
+ movq_r2m (mm0, *(block+7*4));
+ movq_r2m (mm0, *(block+8*4));
+ movq_r2m (mm0, *(block+9*4));
+ movq_r2m (mm0, *(block+10*4));
+ movq_r2m (mm0, *(block+11*4));
+ movq_r2m (mm0, *(block+12*4));
+ movq_r2m (mm0, *(block+13*4));
+ movq_r2m (mm0, *(block+14*4));
+ movq_r2m (mm0, *(block+15*4));
+}
+
+
+#define CPU_MMXEXT 0
+#define CPU_MMX 1
+
+#define dup4(reg) \
+do { \
+ if (cpu != CPU_MMXEXT) { \
+ punpcklwd_r2r (reg, reg); \
+ punpckldq_r2r (reg, reg); \
+ } else \
+ pshufw_r2r (reg, reg, 0x00); \
+} while (0)
+
+static inline void block_add_DC (int16_t * const block, uint8_t * dest,
+ const int stride, const int cpu)
+{
+ movd_v2r ((block[0] + 64) >> 7, mm0);
+ pxor_r2r (mm1, mm1);
+ movq_m2r (*dest, mm2);
+ dup4 (mm0);
+ psubsw_r2r (mm0, mm1);
+ packuswb_r2r (mm0, mm0);
+ paddusb_r2r (mm0, mm2);
+ packuswb_r2r (mm1, mm1);
+ movq_m2r (*(dest + stride), mm3);
+ psubusb_r2r (mm1, mm2);
+ block[0] = 0;
+ paddusb_r2r (mm0, mm3);
+ movq_r2m (mm2, *dest);
+ psubusb_r2r (mm1, mm3);
+ movq_m2r (*(dest + 2*stride), mm2);
+ dest += stride;
+ movq_r2m (mm3, *dest);
+ paddusb_r2r (mm0, mm2);
+ movq_m2r (*(dest + 2*stride), mm3);
+ psubusb_r2r (mm1, mm2);
+ dest += stride;
+ paddusb_r2r (mm0, mm3);
+ movq_r2m (mm2, *dest);
+ psubusb_r2r (mm1, mm3);
+ movq_m2r (*(dest + 2*stride), mm2);
+ dest += stride;
+ movq_r2m (mm3, *dest);
+ paddusb_r2r (mm0, mm2);
+ movq_m2r (*(dest + 2*stride), mm3);
+ psubusb_r2r (mm1, mm2);
+ dest += stride;
+ paddusb_r2r (mm0, mm3);
+ movq_r2m (mm2, *dest);
+ psubusb_r2r (mm1, mm3);
+ movq_m2r (*(dest + 2*stride), mm2);
+ dest += stride;
+ movq_r2m (mm3, *dest);
+ paddusb_r2r (mm0, mm2);
+ movq_m2r (*(dest + 2*stride), mm3);
+ psubusb_r2r (mm1, mm2);
+ block[63] = 0;
+ paddusb_r2r (mm0, mm3);
+ movq_r2m (mm2, *(dest + stride));
+ psubusb_r2r (mm1, mm3);
+ movq_r2m (mm3, *(dest + 2*stride));
+}
+
+void mpeg2_idct_copy_sse2 (int16_t * const block, uint8_t * const dest,
+ const int stride)
+{
+ sse2_idct (block);
+ sse2_block_copy (block, dest, stride);
+ sse2_block_zero (block);
+}
+
+void mpeg2_idct_add_sse2 (const int last, int16_t * const block,
+ uint8_t * const dest, const int stride)
+{
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ sse2_idct (block);
+ sse2_block_add (block, dest, stride);
+ sse2_block_zero (block);
+ } else
+ block_add_DC (block, dest, stride, CPU_MMXEXT);
+}
+
+
+declare_idct (mmxext_idct, mmxext_table,
+ mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+
+void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest,
+ const int stride)
+{
+ mmxext_idct (block);
+ block_copy (block, dest, stride);
+ block_zero (block);
+}
+
+void mpeg2_idct_add_mmxext (const int last, int16_t * const block,
+ uint8_t * const dest, const int stride)
+{
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ mmxext_idct (block);
+ block_add (block, dest, stride);
+ block_zero (block);
+ } else
+ block_add_DC (block, dest, stride, CPU_MMXEXT);
+}
+
+
+declare_idct (mmx_idct, mmx_table,
+ mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+
+void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest,
+ const int stride)
+{
+ mmx_idct (block);
+ block_copy (block, dest, stride);
+ block_zero (block);
+}
+
+void mpeg2_idct_add_mmx (const int last, int16_t * const block,
+ uint8_t * const dest, const int stride)
+{
+ if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) {
+ mmx_idct (block);
+ block_add (block, dest, stride);
+ block_zero (block);
+ } else
+ block_add_DC (block, dest, stride, CPU_MMX);
+}
+
+
+void mpeg2_idct_mmx_init (void)
+{
+ int i, j;
+
+ /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
+
+ for (i = 0; i < 64; i++) {
+ j = mpeg2_scan_norm[i];
+ mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+ j = mpeg2_scan_alt[i];
+ mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+ }
+}
+
+#endif
diff --git a/libmpeg2/libmpeg2.pc.in b/libmpeg2/libmpeg2.pc.in
new file mode 100644
index 0000000..9c852f1
--- /dev/null
+++ b/libmpeg2/libmpeg2.pc.in
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: libmpeg2
+Description: MPEG-1 and MPEG-2 stream decoding library
+Version: @VERSION@
+Libs: -L${libdir} -lmpeg2
+Cflags: -I${includedir}/mpeg2dec
diff --git a/libmpeg2/motion_comp.c b/libmpeg2/motion_comp.c
new file mode 100644
index 0000000..7aed113
--- /dev/null
+++ b/libmpeg2/motion_comp.c
@@ -0,0 +1,135 @@
+/*
+ * motion_comp.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+mpeg2_mc_t mpeg2_mc;
+
+void mpeg2_mc_init (uint32_t accel)
+{
+#ifdef ARCH_X86
+ if (accel & MPEG2_ACCEL_X86_MMXEXT)
+ mpeg2_mc = mpeg2_mc_mmxext;
+ else if (accel & MPEG2_ACCEL_X86_3DNOW)
+ mpeg2_mc = mpeg2_mc_3dnow;
+ else if (accel & MPEG2_ACCEL_X86_MMX)
+ mpeg2_mc = mpeg2_mc_mmx;
+ else
+#endif
+#ifdef ARCH_PPC
+ if (accel & MPEG2_ACCEL_PPC_ALTIVEC)
+ mpeg2_mc = mpeg2_mc_altivec;
+ else
+#endif
+#ifdef ARCH_ALPHA
+ if (accel & MPEG2_ACCEL_ALPHA)
+ mpeg2_mc = mpeg2_mc_alpha;
+ else
+#endif
+#ifdef ARCH_SPARC
+ if (accel & MPEG2_ACCEL_SPARC_VIS)
+ mpeg2_mc = mpeg2_mc_vis;
+ else
+#endif
+#ifdef ARCH_ARM
+ if (accel & MPEG2_ACCEL_ARM) {
+ mpeg2_mc = mpeg2_mc_arm;
+ } else
+#endif
+ mpeg2_mc = mpeg2_mc_c;
+}
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+#define predict_o(i) (ref[i])
+#define predict_x(i) (avg2 (ref[i], ref[i+1]))
+#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
+#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
+ (ref+stride)[i], (ref+stride)[i+1]))
+
+#define put(predictor,i) dest[i] = predictor (i)
+#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
+
+/* mc function template */
+
+#define MC_FUNC(op,xy) \
+static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
+ const int stride, int height) \
+{ \
+ do { \
+ op (predict_##xy, 0); \
+ op (predict_##xy, 1); \
+ op (predict_##xy, 2); \
+ op (predict_##xy, 3); \
+ op (predict_##xy, 4); \
+ op (predict_##xy, 5); \
+ op (predict_##xy, 6); \
+ op (predict_##xy, 7); \
+ op (predict_##xy, 8); \
+ op (predict_##xy, 9); \
+ op (predict_##xy, 10); \
+ op (predict_##xy, 11); \
+ op (predict_##xy, 12); \
+ op (predict_##xy, 13); \
+ op (predict_##xy, 14); \
+ op (predict_##xy, 15); \
+ ref += stride; \
+ dest += stride; \
+ } while (--height); \
+} \
+static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
+ const int stride, int height) \
+{ \
+ do { \
+ op (predict_##xy, 0); \
+ op (predict_##xy, 1); \
+ op (predict_##xy, 2); \
+ op (predict_##xy, 3); \
+ op (predict_##xy, 4); \
+ op (predict_##xy, 5); \
+ op (predict_##xy, 6); \
+ op (predict_##xy, 7); \
+ ref += stride; \
+ dest += stride; \
+ } while (--height); \
+}
+
+/* definitions of the actual mc functions */
+
+MC_FUNC (put,o)
+MC_FUNC (avg,o)
+MC_FUNC (put,x)
+MC_FUNC (avg,x)
+MC_FUNC (put,y)
+MC_FUNC (avg,y)
+MC_FUNC (put,xy)
+MC_FUNC (avg,xy)
+
+MPEG2_MC_EXTERN (c)
diff --git a/libmpeg2/motion_comp_alpha.c b/libmpeg2/motion_comp_alpha.c
new file mode 100644
index 0000000..05cd550
--- /dev/null
+++ b/libmpeg2/motion_comp_alpha.c
@@ -0,0 +1,253 @@
+/*
+ * motion_comp_alpha.c
+ * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ALPHA
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#include "alpha_asm.h"
+
+static inline uint64_t avg2 (uint64_t a, uint64_t b)
+{
+ return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1);
+}
+
+// Load two unaligned quadwords from addr. This macro only works if
+// addr is actually unaligned.
+#define ULOAD16(ret_l,ret_r,addr) \
+ do { \
+ uint64_t _l = ldq_u (addr + 0); \
+ uint64_t _m = ldq_u (addr + 8); \
+ uint64_t _r = ldq_u (addr + 16); \
+ ret_l = extql (_l, addr) | extqh (_m, addr); \
+ ret_r = extql (_m, addr) | extqh (_r, addr); \
+ } while (0)
+
+// Load two aligned quadwords from addr.
+#define ALOAD16(ret_l,ret_r,addr) \
+ do { \
+ ret_l = ldq (addr); \
+ ret_r = ldq (addr + 8); \
+ } while (0)
+
+#define OP8(LOAD,LOAD16,STORE) \
+ do { \
+ STORE (LOAD (pixels), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP16(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t l, r; \
+ LOAD16 (l, r, pixels); \
+ STORE (l, block); \
+ STORE (r, block + 8); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP8_X2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t p0, p1; \
+ \
+ p0 = LOAD (pixels); \
+ p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \
+ STORE (avg2 (p0, p1), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP16_X2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t p0, p1; \
+ \
+ LOAD16 (p0, p1, pixels); \
+ STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \
+ STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \
+ block + 8); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP8_Y2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t p0, p1; \
+ p0 = LOAD (pixels); \
+ pixels += line_size; \
+ p1 = LOAD (pixels); \
+ do { \
+ uint64_t av = avg2 (p0, p1); \
+ if (--h == 0) line_size = 0; \
+ pixels += line_size; \
+ p0 = p1; \
+ p1 = LOAD (pixels); \
+ STORE (av, block); \
+ block += line_size; \
+ } while (h); \
+ } while (0)
+
+#define OP16_Y2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t p0l, p0r, p1l, p1r; \
+ LOAD16 (p0l, p0r, pixels); \
+ pixels += line_size; \
+ LOAD16 (p1l, p1r, pixels); \
+ do { \
+ uint64_t avl, avr; \
+ if (--h == 0) line_size = 0; \
+ avl = avg2 (p0l, p1l); \
+ avr = avg2 (p0r, p1r); \
+ p0l = p1l; \
+ p0r = p1r; \
+ pixels += line_size; \
+ LOAD16 (p1l, p1r, pixels); \
+ STORE (avl, block); \
+ STORE (avr, block + 8); \
+ block += line_size; \
+ } while (h); \
+ } while (0)
+
+#define OP8_XY2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t pl, ph; \
+ uint64_t p1 = LOAD (pixels); \
+ uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ \
+ ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
+ pl = ((p1 & BYTE_VEC (0x03)) + \
+ (p2 & BYTE_VEC (0x03))); \
+ \
+ do { \
+ uint64_t npl, nph; \
+ \
+ pixels += line_size; \
+ p1 = LOAD (pixels); \
+ p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \
+ nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p2 & ~BYTE_VEC (0x03)) >> 2)); \
+ npl = ((p1 & BYTE_VEC (0x03)) + \
+ (p2 & BYTE_VEC (0x03))); \
+ \
+ STORE (ph + nph + \
+ (((pl + npl + BYTE_VEC (0x02)) >> 2) & \
+ BYTE_VEC (0x03)), block); \
+ \
+ block += line_size; \
+ pl = npl; \
+ ph = nph; \
+ } while (--h); \
+ } while (0)
+
+#define OP16_XY2(LOAD,LOAD16,STORE) \
+ do { \
+ uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \
+ LOAD16 (p0, p2, pixels); \
+ p1 = p0 >> 8 | (p2 << 56); \
+ p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
+ \
+ ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
+ pl_l = ((p0 & BYTE_VEC (0x03)) + \
+ (p1 & BYTE_VEC(0x03))); \
+ ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
+ pl_r = ((p2 & BYTE_VEC (0x03)) + \
+ (p3 & BYTE_VEC (0x03))); \
+ \
+ do { \
+ uint64_t npl_l, nph_l, npl_r, nph_r; \
+ \
+ pixels += line_size; \
+ LOAD16 (p0, p2, pixels); \
+ p1 = p0 >> 8 | (p2 << 56); \
+ p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \
+ nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p1 & ~BYTE_VEC (0x03)) >> 2)); \
+ npl_l = ((p0 & BYTE_VEC (0x03)) + \
+ (p1 & BYTE_VEC (0x03))); \
+ nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \
+ ((p3 & ~BYTE_VEC (0x03)) >> 2)); \
+ npl_r = ((p2 & BYTE_VEC (0x03)) + \
+ (p3 & BYTE_VEC (0x03))); \
+ \
+ STORE (ph_l + nph_l + \
+ (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \
+ BYTE_VEC(0x03)), block); \
+ STORE (ph_r + nph_r + \
+ (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \
+ BYTE_VEC(0x03)), block + 8); \
+ \
+ block += line_size; \
+ pl_l = npl_l; \
+ ph_l = nph_l; \
+ pl_r = npl_r; \
+ ph_r = nph_r; \
+ } while (--h); \
+ } while (0)
+
+#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \
+static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \
+ (uint8_t *restrict block, const uint8_t *restrict pixels, \
+ int line_size, int h) \
+{ \
+ if ((uint64_t) pixels & 0x7) { \
+ OPKIND (uldq, ULOAD16, STORE); \
+ } else { \
+ OPKIND (ldq, ALOAD16, STORE); \
+ } \
+}
+
+#define PIXOP(OPNAME,STORE) \
+ MAKE_OP (OPNAME, 8, o, OP8, STORE); \
+ MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \
+ MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \
+ MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \
+ MAKE_OP (OPNAME, 16, o, OP16, STORE); \
+ MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \
+ MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \
+ MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE);
+
+#define STORE(l,b) stq (l, b)
+PIXOP (put, STORE);
+#undef STORE
+#define STORE(l,b) stq (avg2 (l, ldq (b)), b);
+PIXOP (avg, STORE);
+
+mpeg2_mc_t mpeg2_mc_alpha = {
+ { MC_put_o_16_alpha, MC_put_x_16_alpha,
+ MC_put_y_16_alpha, MC_put_xy_16_alpha,
+ MC_put_o_8_alpha, MC_put_x_8_alpha,
+ MC_put_y_8_alpha, MC_put_xy_8_alpha },
+ { MC_avg_o_16_alpha, MC_avg_x_16_alpha,
+ MC_avg_y_16_alpha, MC_avg_xy_16_alpha,
+ MC_avg_o_8_alpha, MC_avg_x_8_alpha,
+ MC_avg_y_8_alpha, MC_avg_xy_8_alpha }
+};
+
+#endif
diff --git a/libmpeg2/motion_comp_altivec.c b/libmpeg2/motion_comp_altivec.c
new file mode 100644
index 0000000..4356aa6
--- /dev/null
+++ b/libmpeg2/motion_comp_altivec.c
@@ -0,0 +1,1010 @@
+/*
+ * motion_comp_altivec.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_PPC
+
+#ifdef HAVE_ALTIVEC_H
+#include <altivec.h>
+#endif
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+typedef vector signed char vector_s8_t;
+typedef vector unsigned char vector_u8_t;
+typedef vector signed short vector_s16_t;
+typedef vector unsigned short vector_u16_t;
+typedef vector signed int vector_s32_t;
+typedef vector unsigned int vector_u32_t;
+
+#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */
+
+static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B)
+{
+ return vec_ld (A, (uint8_t *)B);
+}
+#undef vec_ld
+#define vec_ld my_vec_ld
+
+static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B)
+{
+ return vec_and (A, B);
+}
+#undef vec_and
+#define vec_and my_vec_and
+
+static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B)
+{
+ return vec_avg (A, B);
+}
+#undef vec_avg
+#define vec_avg my_vec_avg
+
+#endif
+
+static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm, ref0, ref1, tmp;
+
+ perm = vec_lvsl (0, ref);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ tmp = vec_perm (ref0, ref1, perm);
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ vec_st (tmp, 0, dest);
+ tmp = vec_perm (ref0, ref1, perm);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp = vec_perm (ref0, ref1, perm);
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ vec_st (tmp, 0, dest);
+ tmp = vec_perm (ref0, ref1, perm);
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1;
+
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_perm (ref0, ref1, perm1);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_perm (ref0, ref1, perm1);
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t permA, permB, ref0, ref1, tmp;
+
+ permA = vec_lvsl (0, ref);
+ permB = vec_add (permA, vec_splat_u8 (1));
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ tmp = vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp = vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+
+ ones = vec_splat_u8 (1);
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ perm0B = vec_add (perm0A, ones);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+ perm1B = vec_add (perm1A, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+ vec_perm (ref0, ref1, perm0B));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+ vec_perm (ref0, ref1, perm1B));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A),
+ vec_perm (ref0, ref1, perm0B));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A),
+ vec_perm (ref0, ref1, perm1B));
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp;
+
+ perm = vec_lvsl (0, ref);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ tmp0 = vec_perm (ref0, ref1, perm);
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ tmp1 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (tmp0, tmp1);
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ vec_st (tmp, 0, dest);
+ tmp0 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (tmp0, tmp1);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp1 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (tmp0, tmp1);
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ vec_st (tmp, 0, dest);
+ tmp0 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (tmp0, tmp1);
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1;
+
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ tmp1 = vec_perm (ref0, ref1, perm1);
+ tmp = vec_avg (tmp0, tmp1);
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ tmp = vec_avg (tmp0, tmp1);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_perm (ref0, ref1, perm1);
+ tmp = vec_avg (tmp0, tmp1);
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ tmp = vec_avg (tmp0, tmp1);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+ vector_u8_t ones;
+
+ ones = vec_splat_u8 (1);
+ permA = vec_lvsl (0, ref);
+ permB = vec_add (permA, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ vec_st (tmp, 0, dest);
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ vec_st (tmp, 0, dest);
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+ vector_u8_t avg0, avg1, xor0, xor1, tmp, ones;
+
+ ones = vec_splat_u8 (1);
+ perm0A = vec_lvsl (0, ref);
+ perm0A = vec_mergeh (perm0A, perm0A);
+ perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+ perm0B = vec_add (perm0A, ones);
+ perm1A = vec_lvsl (stride, ref);
+ perm1A = vec_mergeh (perm1A, perm1A);
+ perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+ perm1B = vec_add (perm1A, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, perm1A);
+ B = vec_perm (ref0, ref1, perm1B);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm1A);
+ B = vec_perm (ref0, ref1, perm1B);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1)));
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+#if 0
+static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones;
+ vector_u16_t splat2, temp;
+
+ ones = vec_splat_u8 (1);
+ permA = vec_lvsl (0, ref);
+ permB = vec_add (permA, ones);
+
+ zero = vec_splat_u8 (0);
+ splat2 = vec_splat_u16 (2);
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ C = vec_perm (ref0, ref1, permA);
+ D = vec_perm (ref0, ref1, permB);
+
+ temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A),
+ (vector_u16_t)vec_mergeh (zero, B)),
+ vec_add ((vector_u16_t)vec_mergeh (zero, C),
+ (vector_u16_t)vec_mergeh (zero, D)));
+ temp = vec_sr (vec_add (temp, splat2), splat2);
+ tmp = vec_pack (temp, temp);
+
+ vec_st (tmp, 0, dest);
+ dest += stride;
+ tmp = vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB));
+ } while (--height);
+}
+#endif
+
+static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm, ref0, ref1, tmp, prev;
+
+ perm = vec_lvsl (0, ref);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (2*stride, dest);
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (prev, vec_perm (ref0, ref1, perm));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev;
+
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1));
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t permA, permB, ref0, ref1, tmp, prev;
+
+ permA = vec_lvsl (0, ref);
+ permB = vec_add (permA, vec_splat_u8 (1));
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ prev = vec_ld (0, dest);
+ ref += stride;
+ tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB)));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB)));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ prev = vec_ld (2*stride, dest);
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB)));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA),
+ vec_perm (ref0, ref1, permB)));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1;
+ vector_u8_t prev;
+
+ ones = vec_splat_u8 (1);
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ perm0B = vec_add (perm0A, ones);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+ perm1B = vec_add (perm1A, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ prev = vec_ld (0, dest);
+ ref += stride;
+ tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+ vec_perm (ref0, ref1, perm0B)));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+ vec_perm (ref0, ref1, perm1B)));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A),
+ vec_perm (ref0, ref1, perm0B)));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A),
+ vec_perm (ref0, ref1, perm1B)));
+ vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev;
+
+ perm = vec_lvsl (0, ref);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ tmp0 = vec_perm (ref0, ref1, perm);
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ tmp1 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp0 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ ref += stride;
+ prev = vec_ld (2*stride, dest);
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ tmp1 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (15, ref);
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ tmp0 = vec_perm (ref0, ref1, perm);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev;
+
+ tmp0 = vec_lvsl (0, ref);
+ tmp0 = vec_mergeh (tmp0, tmp0);
+ perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0);
+ tmp1 = vec_lvsl (stride, ref);
+ tmp1 = vec_mergeh (tmp1, tmp1);
+ perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ tmp1 = vec_perm (ref0, ref1, perm1);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp1 = vec_perm (ref0, ref1, perm1);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (7, ref);
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ tmp0 = vec_perm (ref0, ref1, perm0);
+ tmp = vec_avg (prev, vec_avg (tmp0, tmp1));
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp;
+ vector_u8_t ones, prev;
+
+ ones = vec_splat_u8 (1);
+ permA = vec_lvsl (0, ref);
+ permB = vec_add (permA, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_avg (prev,
+ vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ ref += stride;
+ prev = vec_ld (2*stride, dest);
+ vec_st (tmp, stride, dest);
+ dest += 2*stride;
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_avg (prev,
+ vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (16, ref);
+ prev = vec_ld (stride, dest);
+ vec_st (tmp, 0, dest);
+ A = vec_perm (ref0, ref1, permA);
+ B = vec_perm (ref0, ref1, permB);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+ vec_st (tmp, stride, dest);
+}
+
+static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref,
+ const int stride, int height)
+{
+ vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B;
+ vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev;
+
+ ones = vec_splat_u8 (1);
+ perm0A = vec_lvsl (0, ref);
+ perm0A = vec_mergeh (perm0A, perm0A);
+ perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A);
+ perm0B = vec_add (perm0A, ones);
+ perm1A = vec_lvsl (stride, ref);
+ perm1A = vec_mergeh (perm1A, perm1A);
+ perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A);
+ perm1B = vec_add (perm1A, ones);
+
+ height = (height >> 1) - 1;
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ prev = vec_ld (0, dest);
+ A = vec_perm (ref0, ref1, perm1A);
+ B = vec_perm (ref0, ref1, perm1B);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+
+ do {
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_avg (prev,
+ vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ ref += stride;
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm1A);
+ B = vec_perm (ref0, ref1, perm1B);
+ avg1 = vec_avg (A, B);
+ xor1 = vec_xor (A, B);
+ tmp = vec_avg (prev,
+ vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+ } while (--height);
+
+ ref0 = vec_ld (0, ref);
+ ref1 = vec_ld (8, ref);
+ prev = vec_ld (stride, dest);
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ dest += stride;
+ A = vec_perm (ref0, ref1, perm0A);
+ B = vec_perm (ref0, ref1, perm0B);
+ avg0 = vec_avg (A, B);
+ xor0 = vec_xor (A, B);
+ tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1),
+ vec_and (vec_and (ones, vec_or (xor0, xor1)),
+ vec_xor (avg0, avg1))));
+ vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);
+ vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+}
+
+MPEG2_MC_EXTERN (altivec)
+
+#endif
diff --git a/libmpeg2/motion_comp_arm.c b/libmpeg2/motion_comp_arm.c
new file mode 100644
index 0000000..0111f7f
--- /dev/null
+++ b/libmpeg2/motion_comp_arm.c
@@ -0,0 +1,185 @@
+/*
+ * motion_comp_arm.c
+ * Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with mpeg2dec; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_ARM
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+#define predict_o(i) (ref[i])
+#define predict_x(i) (avg2 (ref[i], ref[i+1]))
+#define predict_y(i) (avg2 (ref[i], (ref+stride)[i]))
+#define predict_xy(i) (avg4 (ref[i], ref[i+1], \
+ (ref+stride)[i], (ref+stride)[i+1]))
+
+#define put(predictor,i) dest[i] = predictor (i)
+#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i])
+
+/* mc function template */
+
+#define MC_FUNC(op,xy) \
+static void inline MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \
+ const int stride, int height) \
+{ \
+ do { \
+ op (predict_##xy, 0); \
+ op (predict_##xy, 1); \
+ op (predict_##xy, 2); \
+ op (predict_##xy, 3); \
+ op (predict_##xy, 4); \
+ op (predict_##xy, 5); \
+ op (predict_##xy, 6); \
+ op (predict_##xy, 7); \
+ op (predict_##xy, 8); \
+ op (predict_##xy, 9); \
+ op (predict_##xy, 10); \
+ op (predict_##xy, 11); \
+ op (predict_##xy, 12); \
+ op (predict_##xy, 13); \
+ op (predict_##xy, 14); \
+ op (predict_##xy, 15); \
+ ref += stride; \
+ dest += stride; \
+ } while (--height); \
+} \
+static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \
+ const int stride, int height) \
+{ \
+ do { \
+ op (predict_##xy, 0); \
+ op (predict_##xy, 1); \
+ op (predict_##xy, 2); \
+ op (predict_##xy, 3); \
+ op (predict_##xy, 4); \
+ op (predict_##xy, 5); \
+ op (predict_##xy, 6); \
+ op (predict_##xy, 7); \
+ ref += stride; \
+ dest += stride; \
+ } while (--height); \
+} \
+/* definitions of the actual mc functions */
+
+MC_FUNC (avg,o)
+MC_FUNC (avg,x)
+MC_FUNC (put,y)
+MC_FUNC (avg,y)
+MC_FUNC (put,xy)
+MC_FUNC (avg,xy)
+
+
+extern void MC_put_o_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height);
+
+extern void MC_put_x_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height);
+
+
+static void MC_put_y_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_y_16_c(dest, ref, stride, height);
+}
+
+static void MC_put_xy_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_xy_16_c(dest, ref, stride, height);
+}
+
+extern void MC_put_o_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height);
+
+extern void MC_put_x_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height);
+
+static void MC_put_y_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_y_8_c(dest, ref, stride, height);
+}
+
+static void MC_put_xy_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_xy_8_c(dest, ref, stride, height);
+}
+
+static void MC_avg_o_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_o_16_c(dest, ref, stride, height);
+}
+
+static void MC_avg_x_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_x_16_c(dest, ref, stride, height);
+}
+
+static void MC_avg_y_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_y_16_c(dest, ref, stride, height);
+}
+
+static void MC_avg_xy_16_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_xy_16_c(dest, ref, stride, height);
+}
+
+static void MC_avg_o_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_o_8_c(dest, ref, stride, height);
+}
+
+static void MC_avg_x_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_x_8_c(dest, ref, stride, height);
+}
+
+static void MC_avg_y_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_y_8_c(dest, ref, stride, height);
+}
+
+static void MC_avg_xy_8_arm (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_xy_8_c(dest, ref, stride, height);
+}
+
+MPEG2_MC_EXTERN (arm)
+
+#endif
diff --git a/libmpeg2/motion_comp_arm_s.S b/libmpeg2/motion_comp_arm_s.S
new file mode 100644
index 0000000..f6c3d7d
--- /dev/null
+++ b/libmpeg2/motion_comp_arm_s.S
@@ -0,0 +1,323 @@
+@ motion_comp_arm_s.S
+@ Copyright (C) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+@ See http://libmpeg2.sourceforge.net/ for updates.
+@
+@ mpeg2dec is free software; you can redistribute it and/or modify
+@ it under the terms of the GNU General Public License as published by
+@ the Free Software Foundation; either version 2 of the License, or
+@ (at your option) any later version.
+@
+@ mpeg2dec is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+@ GNU General Public License for more details.
+@
+@ You should have received a copy of the GNU General Public License
+@ along with mpeg2dec; if not, write to the Free Software
+@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+ .text
+
+@ ----------------------------------------------------------------
+ .align
+ .global MC_put_o_16_arm
+MC_put_o_16_arm:
+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
+ pld [r1]
+ stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+ and r4, r1, #3
+ adr r5, MC_put_o_16_arm_align_jt
+ add r5, r5, r4, lsl #2
+ ldr pc, [r5]
+
+MC_put_o_16_arm_align0:
+ ldmia r1, {r4-r7}
+ add r1, r1, r2
+ pld [r1]
+ stmia r0, {r4-r7}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne MC_put_o_16_arm_align0
+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
+
+.macro PROC shift
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ mov r9, r4, lsr #(\shift)
+ pld [r1]
+ mov r10, r5, lsr #(\shift)
+ orr r9, r9, r5, lsl #(32-\shift)
+ mov r11, r6, lsr #(\shift)
+ orr r10, r10, r6, lsl #(32-\shift)
+ mov r12, r7, lsr #(\shift)
+ orr r11, r11, r7, lsl #(32-\shift)
+ orr r12, r12, r8, lsl #(32-\shift)
+ stmia r0, {r9-r12}
+ subs r3, r3, #1
+ add r0, r0, r2
+.endm
+
+MC_put_o_16_arm_align1:
+ and r1, r1, #0xFFFFFFFC
+1: PROC(8)
+ bne 1b
+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
+MC_put_o_16_arm_align2:
+ and r1, r1, #0xFFFFFFFC
+1: PROC(16)
+ bne 1b
+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
+MC_put_o_16_arm_align3:
+ and r1, r1, #0xFFFFFFFC
+1: PROC(24)
+ bne 1b
+ ldmfd sp!, {r4-r11, pc} @@ update PC with LR content.
+MC_put_o_16_arm_align_jt:
+ .word MC_put_o_16_arm_align0
+ .word MC_put_o_16_arm_align1
+ .word MC_put_o_16_arm_align2
+ .word MC_put_o_16_arm_align3
+
+@ ----------------------------------------------------------------
+ .align
+ .global MC_put_o_8_arm
+MC_put_o_8_arm:
+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
+ pld [r1]
+ stmfd sp!, {r4-r10, lr} @ R14 is also called LR
+ and r4, r1, #3
+ adr r5, MC_put_o_8_arm_align_jt
+ add r5, r5, r4, lsl #2
+ ldr pc, [r5]
+MC_put_o_8_arm_align0:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ pld [r1]
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ subs r3, r3, #1
+ bne MC_put_o_8_arm_align0
+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
+
+.macro PROC8 shift
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ mov r9, r4, lsr #(\shift)
+ pld [r1]
+ mov r10, r5, lsr #(\shift)
+ orr r9, r9, r5, lsl #(32-\shift)
+ orr r10, r10, r6, lsl #(32-\shift)
+ stmia r0, {r9-r10}
+ subs r3, r3, #1
+ add r0, r0, r2
+.endm
+
+MC_put_o_8_arm_align1:
+ and r1, r1, #0xFFFFFFFC
+1: PROC8(8)
+ bne 1b
+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
+
+MC_put_o_8_arm_align2:
+ and r1, r1, #0xFFFFFFFC
+1: PROC8(16)
+ bne 1b
+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
+
+MC_put_o_8_arm_align3:
+ and r1, r1, #0xFFFFFFFC
+1: PROC8(24)
+ bne 1b
+ ldmfd sp!, {r4-r10, pc} @@ update PC with LR content.
+
+MC_put_o_8_arm_align_jt:
+ .word MC_put_o_8_arm_align0
+ .word MC_put_o_8_arm_align1
+ .word MC_put_o_8_arm_align2
+ .word MC_put_o_8_arm_align3
+
+@ ----------------------------------------------------------------
+.macro AVG_PW rW1, rW2
+ mov \rW2, \rW2, lsl #24
+ orr \rW2, \rW2, \rW1, lsr #8
+ eor r9, \rW1, \rW2
+ and \rW2, \rW1, \rW2
+ and r10, r9, r12
+ add \rW2, \rW2, r10, lsr #1
+ and r10, r9, r11
+ add \rW2, \rW2, r10
+.endm
+
+ .align
+ .global MC_put_x_16_arm
+MC_put_x_16_arm:
+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ and r4, r1, #3
+ adr r5, MC_put_x_16_arm_align_jt
+ ldr r11, [r5]
+ mvn r12, r11
+ add r5, r5, r4, lsl #2
+ ldr pc, [r5, #4]
+
+.macro ADJ_ALIGN_QW shift, R0, R1, R2, R3, R4
+ mov \R0, \R0, lsr #(\shift)
+ orr \R0, \R0, \R1, lsl #(32 - \shift)
+ mov \R1, \R1, lsr #(\shift)
+ orr \R1, \R1, \R2, lsl #(32 - \shift)
+ mov \R2, \R2, lsr #(\shift)
+ orr \R2, \R2, \R3, lsl #(32 - \shift)
+ mov \R3, \R3, lsr #(\shift)
+ orr \R3, \R3, \R4, lsl #(32 - \shift)
+ mov \R4, \R4, lsr #(\shift)
+@ and \R4, \R4, #0xFF
+.endm
+
+MC_put_x_16_arm_align0:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ pld [r1]
+ AVG_PW r7, r8
+ AVG_PW r6, r7
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r8}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne MC_put_x_16_arm_align0
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_16_arm_align1:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r8}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_QW 8, r4, r5, r6, r7, r8
+ AVG_PW r7, r8
+ AVG_PW r6, r7
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r8}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_16_arm_align2:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r8}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_QW 16, r4, r5, r6, r7, r8
+ AVG_PW r7, r8
+ AVG_PW r6, r7
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r8}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_16_arm_align3:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r8}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_QW 24, r4, r5, r6, r7, r8
+ AVG_PW r7, r8
+ AVG_PW r6, r7
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r8}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_16_arm_align_jt:
+ .word 0x01010101
+ .word MC_put_x_16_arm_align0
+ .word MC_put_x_16_arm_align1
+ .word MC_put_x_16_arm_align2
+ .word MC_put_x_16_arm_align3
+
+@ ----------------------------------------------------------------
+ .align
+ .global MC_put_x_8_arm
+MC_put_x_8_arm:
+ @@ void func(uint8_t * dest, const uint8_t * ref, int stride, int height)
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ and r4, r1, #3
+ adr r5, MC_put_x_8_arm_align_jt
+ ldr r11, [r5]
+ mvn r12, r11
+ add r5, r5, r4, lsl #2
+ ldr pc, [r5, #4]
+
+.macro ADJ_ALIGN_DW shift, R0, R1, R2
+ mov \R0, \R0, lsr #(\shift)
+ orr \R0, \R0, \R1, lsl #(32 - \shift)
+ mov \R1, \R1, lsr #(\shift)
+ orr \R1, \R1, \R2, lsl #(32 - \shift)
+ mov \R2, \R2, lsr #(\shift)
+@ and \R4, \R4, #0xFF
+.endm
+
+MC_put_x_8_arm_align0:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r6}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne MC_put_x_8_arm_align0
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_8_arm_align1:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DW 8, r4, r5, r6
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r6}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_8_arm_align2:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DW 16, r4, r5, r6
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r6}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_8_arm_align3:
+ and r1, r1, #0xFFFFFFFC
+1: ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DW 24, r4, r5, r6
+ AVG_PW r5, r6
+ AVG_PW r4, r5
+ stmia r0, {r5-r6}
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11,pc} @@ update PC with LR content.
+MC_put_x_8_arm_align_jt:
+ .word 0x01010101
+ .word MC_put_x_8_arm_align0
+ .word MC_put_x_8_arm_align1
+ .word MC_put_x_8_arm_align2
+ .word MC_put_x_8_arm_align3
diff --git a/libmpeg2/motion_comp_mmx.c b/libmpeg2/motion_comp_mmx.c
new file mode 100644
index 0000000..fc265f4
--- /dev/null
+++ b/libmpeg2/motion_comp_mmx.c
@@ -0,0 +1,1005 @@
+/*
+ * motion_comp_mmx.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#include "mmx.h"
+
+#define CPU_MMXEXT 0
+#define CPU_3DNOW 1
+
+
+/* MMX code - needs a rewrite */
+
+/*
+ * Motion Compensation frequently needs to average values using the
+ * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction
+ * to compute this, but it's been left out of classic MMX.
+ *
+ * We need to be careful of overflows when doing this computation.
+ * Rather than unpacking data to 16-bits, which reduces parallelism,
+ * we use the following formulas:
+ *
+ * (x+y)>>1 == (x&y)+((x^y)>>1)
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ */
+
+/* some rounding constants */
+static mmx_t mask1 = {0xfefefefefefefefeLL};
+static mmx_t round4 = {0x0002000200020002LL};
+
+/*
+ * This code should probably be compiled with loop unrolling
+ * (ie, -funroll-loops in gcc)becuase some of the loops
+ * use a small static number of iterations. This was written
+ * with the assumption the compiler knows best about when
+ * unrolling will help
+ */
+
+static inline void mmx_zero_reg (void)
+{
+ /* load 0 into mm0 */
+ pxor_r2r (mm0, mm0);
+}
+
+static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2)
+{
+ /* *dest = (*src1 + *src2 + 1)/ 2; */
+
+ movq_m2r (*src1, mm1); /* load 8 src1 bytes */
+ movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
+
+ movq_m2r (*src2, mm3); /* load 8 src2 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
+
+ pxor_r2r (mm1, mm3); /* xor src1 and src2 */
+ pand_m2r (mask1, mm3); /* mask lower bits */
+ psrlq_i2r (1, mm3); /* /2 */
+ por_r2r (mm2, mm4); /* or src1 and src2 */
+ psubb_r2r (mm3, mm4); /* subtract subresults */
+ movq_r2m (mm4, *dest); /* store result in dest */
+}
+
+static inline void mmx_interp_average_2_U8 (uint8_t * dest,
+ const uint8_t * src1,
+ const uint8_t * src2)
+{
+ /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */
+
+ movq_m2r (*dest, mm1); /* load 8 dest bytes */
+ movq_r2r (mm1, mm2); /* copy 8 dest bytes */
+
+ movq_m2r (*src1, mm3); /* load 8 src1 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src1 bytes */
+
+ movq_m2r (*src2, mm5); /* load 8 src2 bytes */
+ movq_r2r (mm5, mm6); /* copy 8 src2 bytes */
+
+ pxor_r2r (mm3, mm5); /* xor src1 and src2 */
+ pand_m2r (mask1, mm5); /* mask lower bits */
+ psrlq_i2r (1, mm5); /* /2 */
+ por_r2r (mm4, mm6); /* or src1 and src2 */
+ psubb_r2r (mm5, mm6); /* subtract subresults */
+ movq_r2r (mm6, mm5); /* copy subresult */
+
+ pxor_r2r (mm1, mm5); /* xor srcavg and dest */
+ pand_m2r (mask1, mm5); /* mask lower bits */
+ psrlq_i2r (1, mm5); /* /2 */
+ por_r2r (mm2, mm6); /* or srcavg and dest */
+ psubb_r2r (mm5, mm6); /* subtract subresults */
+ movq_r2m (mm6, *dest); /* store result in dest */
+}
+
+static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1,
+ const uint8_t * src2,
+ const uint8_t * src3,
+ const uint8_t * src4)
+{
+ /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */
+
+ movq_m2r (*src1, mm1); /* load 8 src1 bytes */
+ movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
+
+ punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */
+ punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */
+
+ movq_m2r (*src2, mm3); /* load 8 src2 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
+
+ punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */
+ punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */
+
+ paddw_r2r (mm3, mm1); /* add lows */
+ paddw_r2r (mm4, mm2); /* add highs */
+
+ /* now have partials in mm1 and mm2 */
+
+ movq_m2r (*src3, mm3); /* load 8 src3 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src3 bytes */
+
+ punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */
+ punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */
+
+ paddw_r2r (mm3, mm1); /* add lows */
+ paddw_r2r (mm4, mm2); /* add highs */
+
+ movq_m2r (*src4, mm5); /* load 8 src4 bytes */
+ movq_r2r (mm5, mm6); /* copy 8 src4 bytes */
+
+ punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */
+ punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */
+
+ paddw_r2r (mm5, mm1); /* add lows */
+ paddw_r2r (mm6, mm2); /* add highs */
+
+ /* now have subtotal in mm1 and mm2 */
+
+ paddw_m2r (round4, mm1);
+ psraw_i2r (2, mm1); /* /4 */
+ paddw_m2r (round4, mm2);
+ psraw_i2r (2, mm2); /* /4 */
+
+ packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */
+ movq_r2m (mm1, *dest); /* store result in dest */
+}
+
+static inline void mmx_interp_average_4_U8 (uint8_t * dest,
+ const uint8_t * src1,
+ const uint8_t * src2,
+ const uint8_t * src3,
+ const uint8_t * src4)
+{
+ /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */
+
+ movq_m2r (*src1, mm1); /* load 8 src1 bytes */
+ movq_r2r (mm1, mm2); /* copy 8 src1 bytes */
+
+ punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */
+ punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */
+
+ movq_m2r (*src2, mm3); /* load 8 src2 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src2 bytes */
+
+ punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */
+ punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */
+
+ paddw_r2r (mm3, mm1); /* add lows */
+ paddw_r2r (mm4, mm2); /* add highs */
+
+ /* now have partials in mm1 and mm2 */
+
+ movq_m2r (*src3, mm3); /* load 8 src3 bytes */
+ movq_r2r (mm3, mm4); /* copy 8 src3 bytes */
+
+ punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */
+ punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */
+
+ paddw_r2r (mm3, mm1); /* add lows */
+ paddw_r2r (mm4, mm2); /* add highs */
+
+ movq_m2r (*src4, mm5); /* load 8 src4 bytes */
+ movq_r2r (mm5, mm6); /* copy 8 src4 bytes */
+
+ punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */
+ punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */
+
+ paddw_r2r (mm5, mm1); /* add lows */
+ paddw_r2r (mm6, mm2); /* add highs */
+
+ paddw_m2r (round4, mm1);
+ psraw_i2r (2, mm1); /* /4 */
+ paddw_m2r (round4, mm2);
+ psraw_i2r (2, mm2); /* /4 */
+
+ /* now have subtotal/4 in mm1 and mm2 */
+
+ movq_m2r (*dest, mm3); /* load 8 dest bytes */
+ movq_r2r (mm3, mm4); /* copy 8 dest bytes */
+
+ packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */
+ movq_r2r (mm1,mm2); /* copy subresult */
+
+ pxor_r2r (mm1, mm3); /* xor srcavg and dest */
+ pand_m2r (mask1, mm3); /* mask lower bits */
+ psrlq_i2r (1, mm3); /* /2 */
+ por_r2r (mm2, mm4); /* or srcavg and dest */
+ psubb_r2r (mm3, mm4); /* subtract subresults */
+ movq_r2m (mm4, *dest); /* store result in dest */
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ mmx_zero_reg ();
+
+ do {
+ mmx_average_2_U8 (dest, dest, ref);
+
+ if (width == 16)
+ mmx_average_2_U8 (dest+8, dest+8, ref+8);
+
+ dest += stride;
+ ref += stride;
+ } while (--height);
+}
+
+static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ mmx_zero_reg ();
+
+ do {
+ movq_m2r (* ref, mm1); /* load 8 ref bytes */
+ movq_r2m (mm1,* dest); /* store 8 bytes at curr */
+
+ if (width == 16)
+ {
+ movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */
+ movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */
+ }
+
+ dest += stride;
+ ref += stride;
+ } while (--height);
+}
+
+static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+/* Half pixel interpolation in the x direction */
+static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ mmx_zero_reg ();
+
+ do {
+ mmx_interp_average_2_U8 (dest, ref, ref+1);
+
+ if (width == 16)
+ mmx_interp_average_2_U8 (dest+8, ref+8, ref+9);
+
+ dest += stride;
+ ref += stride;
+ } while (--height);
+}
+
+static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ mmx_zero_reg ();
+
+ do {
+ mmx_average_2_U8 (dest, ref, ref+1);
+
+ if (width == 16)
+ mmx_average_2_U8 (dest+8, ref+8, ref+9);
+
+ dest += stride;
+ ref += stride;
+ } while (--height);
+}
+
+static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_x_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_x_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ const uint8_t * ref_next = ref + stride;
+
+ mmx_zero_reg ();
+
+ do {
+ mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+ if (width == 16)
+ mmx_interp_average_4_U8 (dest+8, ref+8, ref+9,
+ ref_next+8, ref_next+9);
+
+ dest += stride;
+ ref += stride;
+ ref_next += stride;
+ } while (--height);
+}
+
+static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ const uint8_t * ref_next = ref + stride;
+
+ mmx_zero_reg ();
+
+ do {
+ mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1);
+
+ if (width == 16)
+ mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9);
+
+ dest += stride;
+ ref += stride;
+ ref_next += stride;
+ } while (--height);
+}
+
+static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_xy_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_xy_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ const uint8_t * ref_next = ref + stride;
+
+ mmx_zero_reg ();
+
+ do {
+ mmx_interp_average_2_U8 (dest, ref, ref_next);
+
+ if (width == 16)
+ mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+ dest += stride;
+ ref += stride;
+ ref_next += stride;
+ } while (--height);
+}
+
+static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg_y_mmx (8, height, dest, ref, stride);
+}
+
+/*-----------------------------------------------------------------------*/
+
+static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest,
+ const uint8_t * ref, const int stride)
+{
+ const uint8_t * ref_next = ref + stride;
+
+ mmx_zero_reg ();
+
+ do {
+ mmx_average_2_U8 (dest, ref, ref_next);
+
+ if (width == 16)
+ mmx_average_2_U8 (dest+8, ref+8, ref_next+8);
+
+ dest += stride;
+ ref += stride;
+ ref_next += stride;
+ } while (--height);
+}
+
+static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_y_mmx (16, height, dest, ref, stride);
+}
+
+static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put_y_mmx (8, height, dest, ref, stride);
+}
+
+
+MPEG2_MC_EXTERN (mmx)
+
+
+
+
+
+
+
+/* CPU_MMXEXT/CPU_3DNOW adaptation layer */
+
+#define pavg_r2r(src,dest) \
+do { \
+ if (cpu == CPU_MMXEXT) \
+ pavgb_r2r (src, dest); \
+ else \
+ pavgusb_r2r (src, dest); \
+} while (0)
+
+#define pavg_m2r(src,dest) \
+do { \
+ if (cpu == CPU_MMXEXT) \
+ pavgb_m2r (src, dest); \
+ else \
+ pavgusb_m2r (src, dest); \
+} while (0)
+
+
+/* CPU_MMXEXT code */
+
+
+static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_r2m (mm0, *dest);
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+8), mm1);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ movq_r2m (mm1, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ pavg_m2r (*dest, mm0);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+8), mm1);
+ pavg_m2r (*dest, mm0);
+ pavg_m2r (*(dest+8), mm1);
+ movq_r2m (mm0, *dest);
+ ref += stride;
+ movq_r2m (mm1, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int offset,
+ const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ pavg_m2r (*(ref+offset), mm0);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int offset,
+ const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+8), mm1);
+ pavg_m2r (*(ref+offset), mm0);
+ pavg_m2r (*(ref+offset+8), mm1);
+ movq_r2m (mm0, *dest);
+ ref += stride;
+ movq_r2m (mm1, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int offset,
+ const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ pavg_m2r (*(ref+offset), mm0);
+ pavg_m2r (*dest, mm0);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int offset,
+ const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+8), mm1);
+ pavg_m2r (*(ref+offset), mm0);
+ pavg_m2r (*(ref+offset+8), mm1);
+ pavg_m2r (*dest, mm0);
+ pavg_m2r (*(dest+8), mm1);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ movq_r2m (mm1, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static mmx_t mask_one = {0x0101010101010101LL};
+
+static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+1), mm1);
+ movq_r2r (mm0, mm7);
+ pxor_r2r (mm1, mm7);
+ pavg_r2r (mm1, mm0);
+ ref += stride;
+
+ do {
+ movq_m2r (*ref, mm2);
+ movq_r2r (mm0, mm5);
+
+ movq_m2r (*(ref+1), mm3);
+ movq_r2r (mm2, mm6);
+
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm3, mm2);
+
+ por_r2r (mm6, mm7);
+ pxor_r2r (mm2, mm5);
+
+ pand_r2r (mm5, mm7);
+ pavg_r2r (mm2, mm0);
+
+ pand_m2r (mask_one, mm7);
+
+ psubusb_r2r (mm7, mm0);
+
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ dest += stride;
+
+ movq_r2r (mm6, mm7); /* unroll ! */
+ movq_r2r (mm2, mm0); /* unroll ! */
+ } while (--height);
+}
+
+static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+stride+1), mm1);
+ movq_r2r (mm0, mm7);
+ movq_m2r (*(ref+1), mm2);
+ pxor_r2r (mm1, mm7);
+ movq_m2r (*(ref+stride), mm3);
+ movq_r2r (mm2, mm6);
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm1, mm0);
+ pavg_r2r (mm3, mm2);
+ por_r2r (mm6, mm7);
+ movq_r2r (mm0, mm6);
+ pxor_r2r (mm2, mm6);
+ pand_r2r (mm6, mm7);
+ pand_m2r (mask_one, mm7);
+ pavg_r2r (mm2, mm0);
+ psubusb_r2r (mm7, mm0);
+ movq_r2m (mm0, *dest);
+
+ movq_m2r (*(ref+8), mm0);
+ movq_m2r (*(ref+stride+9), mm1);
+ movq_r2r (mm0, mm7);
+ movq_m2r (*(ref+9), mm2);
+ pxor_r2r (mm1, mm7);
+ movq_m2r (*(ref+stride+8), mm3);
+ movq_r2r (mm2, mm6);
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm1, mm0);
+ pavg_r2r (mm3, mm2);
+ por_r2r (mm6, mm7);
+ movq_r2r (mm0, mm6);
+ pxor_r2r (mm2, mm6);
+ pand_r2r (mm6, mm7);
+ pand_m2r (mask_one, mm7);
+ pavg_r2r (mm2, mm0);
+ psubusb_r2r (mm7, mm0);
+ ref += stride;
+ movq_r2m (mm0, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+stride+1), mm1);
+ movq_r2r (mm0, mm7);
+ movq_m2r (*(ref+1), mm2);
+ pxor_r2r (mm1, mm7);
+ movq_m2r (*(ref+stride), mm3);
+ movq_r2r (mm2, mm6);
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm1, mm0);
+ pavg_r2r (mm3, mm2);
+ por_r2r (mm6, mm7);
+ movq_r2r (mm0, mm6);
+ pxor_r2r (mm2, mm6);
+ pand_r2r (mm6, mm7);
+ pand_m2r (mask_one, mm7);
+ pavg_r2r (mm2, mm0);
+ psubusb_r2r (mm7, mm0);
+ movq_m2r (*dest, mm1);
+ pavg_r2r (mm1, mm0);
+ ref += stride;
+ movq_r2m (mm0, *dest);
+ dest += stride;
+ } while (--height);
+}
+
+static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref,
+ const int stride, const int cpu)
+{
+ do {
+ movq_m2r (*ref, mm0);
+ movq_m2r (*(ref+stride+1), mm1);
+ movq_r2r (mm0, mm7);
+ movq_m2r (*(ref+1), mm2);
+ pxor_r2r (mm1, mm7);
+ movq_m2r (*(ref+stride), mm3);
+ movq_r2r (mm2, mm6);
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm1, mm0);
+ pavg_r2r (mm3, mm2);
+ por_r2r (mm6, mm7);
+ movq_r2r (mm0, mm6);
+ pxor_r2r (mm2, mm6);
+ pand_r2r (mm6, mm7);
+ pand_m2r (mask_one, mm7);
+ pavg_r2r (mm2, mm0);
+ psubusb_r2r (mm7, mm0);
+ movq_m2r (*dest, mm1);
+ pavg_r2r (mm1, mm0);
+ movq_r2m (mm0, *dest);
+
+ movq_m2r (*(ref+8), mm0);
+ movq_m2r (*(ref+stride+9), mm1);
+ movq_r2r (mm0, mm7);
+ movq_m2r (*(ref+9), mm2);
+ pxor_r2r (mm1, mm7);
+ movq_m2r (*(ref+stride+8), mm3);
+ movq_r2r (mm2, mm6);
+ pxor_r2r (mm3, mm6);
+ pavg_r2r (mm1, mm0);
+ pavg_r2r (mm3, mm2);
+ por_r2r (mm6, mm7);
+ movq_r2r (mm0, mm6);
+ pxor_r2r (mm2, mm6);
+ pand_r2r (mm6, mm7);
+ pand_m2r (mask_one, mm7);
+ pavg_r2r (mm2, mm0);
+ psubusb_r2r (mm7, mm0);
+ movq_m2r (*(dest+8), mm1);
+ pavg_r2r (mm1, mm0);
+ ref += stride;
+ movq_r2m (mm0, *(dest+8));
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT);
+}
+
+static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT);
+}
+
+
+MPEG2_MC_EXTERN (mmxext)
+
+
+
+static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put1_16 (height, dest, ref, stride);
+}
+
+static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put1_8 (height, dest, ref, stride);
+}
+
+static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW);
+}
+
+static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put4_16 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref,
+ int stride, int height)
+{
+ MC_put4_8 (height, dest, ref, stride, CPU_3DNOW);
+}
+
+
+MPEG2_MC_EXTERN (3dnow)
+
+#endif
diff --git a/libmpeg2/motion_comp_vis.c b/libmpeg2/motion_comp_vis.c
new file mode 100644
index 0000000..54c0f7e
--- /dev/null
+++ b/libmpeg2/motion_comp_vis.c
@@ -0,0 +1,2061 @@
+/*
+ * motion_comp_vis.c
+ * Copyright (C) 2003 David S. Miller <davem@redhat.com>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#ifdef ARCH_SPARC
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+#include "vis.h"
+
+/* The trick used in some of this file is the formula from the MMX
+ * motion comp code, which is:
+ *
+ * (x+y+1)>>1 == (x|y)-((x^y)>>1)
+ *
+ * This allows us to average 8 bytes at a time in a 64-bit FPU reg.
+ * We avoid overflows by masking before we do the shift, and we
+ * implement the shift by multiplying by 1/2 using mul8x16. So in
+ * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask
+ * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
+ * the value 0x80808080 is in f8):
+ *
+ * fxor f0, f2, f10
+ * fand f10, f4, f10
+ * fmul8x16 f8, f10, f10
+ * fand f10, f6, f10
+ * for f0, f2, f12
+ * fpsub16 f12, f10, f10
+ */
+
+#define DUP4(x) {x, x, x, x}
+#define DUP8(x) {x, x, x, x, x, x, x, x}
+static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1);
+static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2);
+static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3);
+static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6);
+static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
+static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
+static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
+static const int16_t constants256_512[] ATTR_ALIGN(8) =
+ {256, 512, 256, 512};
+static const int16_t constants256_1024[] ATTR_ALIGN(8) =
+ {256, 1024, 256, 1024};
+
+#define REF_0 0
+#define REF_0_1 1
+#define REF_2 2
+#define REF_2_1 3
+#define REF_4 4
+#define REF_4_1 5
+#define REF_6 6
+#define REF_6_1 7
+#define REF_S0 8
+#define REF_S0_1 9
+#define REF_S2 10
+#define REF_S2_1 11
+#define REF_S4 12
+#define REF_S4_1 13
+#define REF_S6 14
+#define REF_S6_1 15
+#define DST_0 16
+#define DST_1 17
+#define DST_2 18
+#define DST_3 19
+#define CONST_1 20
+#define CONST_2 20
+#define CONST_3 20
+#define CONST_6 20
+#define MASK_fe 20
+#define CONST_128 22
+#define CONST_256 22
+#define CONST_512 22
+#define CONST_1024 22
+#define TMP0 24
+#define TMP1 25
+#define TMP2 26
+#define TMP3 27
+#define TMP4 28
+#define TMP5 29
+#define ZERO 30
+#define MASK_7f 30
+
+#define TMP6 32
+#define TMP8 34
+#define TMP10 36
+#define TMP12 38
+#define TMP14 40
+#define TMP16 42
+#define TMP18 44
+#define TMP20 46
+#define TMP22 48
+#define TMP24 50
+#define TMP26 52
+#define TMP28 54
+#define TMP30 56
+#define TMP32 58
+
+static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 16 : 0;
+ do { /* 5 cycles */
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, 8, TMP2);
+
+ vis_ld64_2(ref, offset, TMP4);
+ ref += stride;
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+ vis_st64(REF_0, dest[0]);
+
+ vis_faligndata(TMP2, TMP4, REF_2);
+ vis_st64_2(REF_2, dest, 8);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 8 : 0;
+ do { /* 4 cycles */
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, offset, TMP2);
+ ref += stride;
+
+ /* stall */
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+ vis_st64(REF_0, dest[0]);
+ dest += stride;
+ } while (--height);
+}
+
+
+static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int stride_8 = stride + 8;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 16 : 0;
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64(ref[8], TMP2);
+
+ vis_ld64_2(ref, offset, TMP4);
+
+ vis_ld64(dest[0], DST_0);
+
+ vis_ld64(dest[8], DST_2);
+
+ vis_ld64(constants_fe[0], MASK_fe);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+ vis_faligndata(TMP2, TMP4, REF_2);
+
+ vis_ld64(constants128[0], CONST_128);
+
+ ref += stride;
+ height = (height >> 1) - 1;
+
+ do { /* 24 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(DST_0, REF_0, TMP6);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_and(TMP6, MASK_fe, TMP6);
+
+ vis_ld64_2(ref, offset, TMP4);
+ ref += stride;
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_xor(DST_2, REF_2, TMP8);
+
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_or(DST_0, REF_0, TMP10);
+ vis_ld64_2(dest, stride, DST_0);
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+
+ vis_or(DST_2, REF_2, TMP12);
+ vis_ld64_2(dest, stride_8, DST_2);
+
+ vis_ld64(ref[0], TMP14);
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+
+ dest += stride;
+ vis_ld64_2(ref, 8, TMP16);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, offset, TMP18);
+ vis_faligndata(TMP2, TMP4, REF_2);
+ ref += stride;
+
+ vis_xor(DST_0, REF_0, TMP20);
+
+ vis_and(TMP20, MASK_fe, TMP20);
+
+ vis_xor(DST_2, REF_2, TMP22);
+ vis_mul8x16(CONST_128, TMP20, TMP20);
+
+ vis_and(TMP22, MASK_fe, TMP22);
+
+ vis_or(DST_0, REF_0, TMP24);
+ vis_mul8x16(CONST_128, TMP22, TMP22);
+
+ vis_or(DST_2, REF_2, TMP26);
+
+ vis_ld64_2(dest, stride, DST_0);
+ vis_faligndata(TMP14, TMP16, REF_0);
+
+ vis_ld64_2(dest, stride_8, DST_2);
+ vis_faligndata(TMP16, TMP18, REF_2);
+
+ vis_and(TMP20, MASK_7f, TMP20);
+
+ vis_and(TMP22, MASK_7f, TMP22);
+
+ vis_psub16(TMP24, TMP20, TMP20);
+ vis_st64(TMP20, dest[0]);
+
+ vis_psub16(TMP26, TMP22, TMP22);
+ vis_st64_2(TMP22, dest, 8);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(DST_0, REF_0, TMP6);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_and(TMP6, MASK_fe, TMP6);
+
+ vis_ld64_2(ref, offset, TMP4);
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_xor(DST_2, REF_2, TMP8);
+
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_or(DST_0, REF_0, TMP10);
+ vis_ld64_2(dest, stride, DST_0);
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+
+ vis_or(DST_2, REF_2, TMP12);
+ vis_ld64_2(dest, stride_8, DST_2);
+
+ vis_ld64(ref[0], TMP14);
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+
+ dest += stride;
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_faligndata(TMP2, TMP4, REF_2);
+
+ vis_xor(DST_0, REF_0, TMP20);
+
+ vis_and(TMP20, MASK_fe, TMP20);
+
+ vis_xor(DST_2, REF_2, TMP22);
+ vis_mul8x16(CONST_128, TMP20, TMP20);
+
+ vis_and(TMP22, MASK_fe, TMP22);
+
+ vis_or(DST_0, REF_0, TMP24);
+ vis_mul8x16(CONST_128, TMP22, TMP22);
+
+ vis_or(DST_2, REF_2, TMP26);
+
+ vis_and(TMP20, MASK_7f, TMP20);
+
+ vis_and(TMP22, MASK_7f, TMP22);
+
+ vis_psub16(TMP24, TMP20, TMP20);
+ vis_st64(TMP20, dest[0]);
+
+ vis_psub16(TMP26, TMP22, TMP22);
+ vis_st64_2(TMP22, dest, 8);
+}
+
+static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 8 : 0;
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, offset, TMP2);
+
+ vis_ld64(dest[0], DST_0);
+
+ vis_ld64(constants_fe[0], MASK_fe);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64(constants128[0], CONST_128);
+
+ ref += stride;
+ height = (height >> 1) - 1;
+
+ do { /* 12 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(DST_0, REF_0, TMP4);
+
+ vis_ld64_2(ref, offset, TMP2);
+ vis_and(TMP4, MASK_fe, TMP4);
+
+ vis_or(DST_0, REF_0, TMP6);
+ vis_ld64_2(dest, stride, DST_0);
+ ref += stride;
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_ld64(ref[0], TMP12);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, offset, TMP2);
+ vis_xor(DST_0, REF_0, TMP0);
+ ref += stride;
+
+ vis_and(TMP0, MASK_fe, TMP0);
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_psub16(TMP6, TMP4, TMP4);
+ vis_st64(TMP4, dest[0]);
+ dest += stride;
+ vis_mul8x16(CONST_128, TMP0, TMP0);
+
+ vis_or(DST_0, REF_0, TMP6);
+ vis_ld64_2(dest, stride, DST_0);
+
+ vis_faligndata(TMP12, TMP2, REF_0);
+
+ vis_and(TMP0, MASK_7f, TMP0);
+
+ vis_psub16(TMP6, TMP0, TMP4);
+ vis_st64(TMP4, dest[0]);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(DST_0, REF_0, TMP4);
+
+ vis_ld64_2(ref, offset, TMP2);
+ vis_and(TMP4, MASK_fe, TMP4);
+
+ vis_or(DST_0, REF_0, TMP6);
+ vis_ld64_2(dest, stride, DST_0);
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_xor(DST_0, REF_0, TMP0);
+
+ vis_and(TMP0, MASK_fe, TMP0);
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_psub16(TMP6, TMP4, TMP4);
+ vis_st64(TMP4, dest[0]);
+ dest += stride;
+ vis_mul8x16(CONST_128, TMP0, TMP0);
+
+ vis_or(DST_0, REF_0, TMP6);
+
+ vis_and(TMP0, MASK_7f, TMP0);
+
+ vis_psub16(TMP6, TMP0, TMP4);
+ vis_st64(TMP4, dest[0]);
+}
+
+static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, 8, TMP2);
+
+ vis_ld64_2(ref, 16, TMP4);
+
+ vis_ld64(constants_fe[0], MASK_fe);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64(constants128[0], CONST_128);
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ }
+
+ ref += stride;
+ height = (height >> 1) - 1;
+
+ do { /* 34 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP6);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_xor(REF_4, REF_6, TMP8);
+
+ vis_ld64_2(ref, 16, TMP4);
+ vis_and(TMP6, MASK_fe, TMP6);
+ ref += stride;
+
+ vis_ld64(ref[0], TMP14);
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_ld64_2(ref, 8, TMP16);
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+ vis_or(REF_0, REF_2, TMP10);
+
+ vis_ld64_2(ref, 16, TMP18);
+ ref += stride;
+ vis_or(REF_4, REF_6, TMP12);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ }
+
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+ dest += stride;
+
+ vis_xor(REF_0, REF_2, TMP6);
+
+ vis_xor(REF_4, REF_6, TMP8);
+
+ vis_and(TMP6, MASK_fe, TMP6);
+
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+ vis_or(REF_0, REF_2, TMP10);
+
+ vis_or(REF_4, REF_6, TMP12);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_faligndata(TMP14, TMP16, REF_0);
+
+ vis_faligndata(TMP16, TMP18, REF_4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP14, TMP16, REF_2);
+ vis_faligndata(TMP16, TMP18, REF_6);
+ } else {
+ vis_src1(TMP16, REF_2);
+ vis_src1(TMP18, REF_6);
+ }
+
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP6);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_xor(REF_4, REF_6, TMP8);
+
+ vis_ld64_2(ref, 16, TMP4);
+ vis_and(TMP6, MASK_fe, TMP6);
+
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+ vis_or(REF_0, REF_2, TMP10);
+
+ vis_or(REF_4, REF_6, TMP12);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ }
+
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+ dest += stride;
+
+ vis_xor(REF_0, REF_2, TMP6);
+
+ vis_xor(REF_4, REF_6, TMP8);
+
+ vis_and(TMP6, MASK_fe, TMP6);
+
+ vis_mul8x16(CONST_128, TMP6, TMP6);
+ vis_and(TMP8, MASK_fe, TMP8);
+
+ vis_mul8x16(CONST_128, TMP8, TMP8);
+ vis_or(REF_0, REF_2, TMP10);
+
+ vis_or(REF_4, REF_6, TMP12);
+
+ vis_and(TMP6, MASK_7f, TMP6);
+
+ vis_and(TMP8, MASK_7f, TMP8);
+
+ vis_psub16(TMP10, TMP6, TMP6);
+ vis_st64(TMP6, dest[0]);
+
+ vis_psub16(TMP12, TMP8, TMP8);
+ vis_st64_2(TMP8, dest, 8);
+}
+
+static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64(ref[8], TMP2);
+
+ vis_ld64(constants_fe[0], MASK_fe);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+
+ vis_ld64(constants128[0], CONST_128);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ } else {
+ vis_src1(TMP2, REF_2);
+ }
+
+ ref += stride;
+ height = (height >> 1) - 1;
+
+ do { /* 20 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP4);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_and(TMP4, MASK_fe, TMP4);
+ ref += stride;
+
+ vis_ld64(ref[0], TMP8);
+ vis_or(REF_0, REF_2, TMP6);
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64_2(ref, 8, TMP10);
+ ref += stride;
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ } else {
+ vis_src1(TMP2, REF_2);
+ }
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_psub16(TMP6, TMP4, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_or(REF_0, REF_2, TMP14);
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+
+ vis_alignaddr_g0((void *)off);
+ vis_faligndata(TMP8, TMP10, REF_0);
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP8, TMP10, REF_2);
+ } else {
+ vis_src1(TMP10, REF_2);
+ }
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_psub16(TMP14, TMP12, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP4);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_and(TMP4, MASK_fe, TMP4);
+
+ vis_or(REF_0, REF_2, TMP6);
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ } else {
+ vis_src1(TMP2, REF_2);
+ }
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_psub16(TMP6, TMP4, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_or(REF_0, REF_2, TMP14);
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_psub16(TMP14, TMP12, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+}
+
+static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ vis_ld64(constants3[0], CONST_3);
+ vis_fzero(ZERO);
+ vis_ld64(constants256_512[0], CONST_256);
+
+ ref = vis_alignaddr(ref);
+ do { /* 26 cycles */
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64(ref[8], TMP2);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64(ref[16], TMP4);
+
+ vis_ld64(dest[0], DST_0);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64(dest[8], DST_2);
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ }
+
+ vis_mul8x16au(REF_0, CONST_256, TMP0);
+
+ vis_pmerge(ZERO, REF_2, TMP4);
+ vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+ vis_pmerge(ZERO, REF_2_1, TMP6);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+
+ vis_mul8x16al(DST_0, CONST_512, TMP4);
+ vis_padd16(TMP2, TMP6, TMP2);
+
+ vis_mul8x16al(DST_1, CONST_512, TMP6);
+
+ vis_mul8x16au(REF_6, CONST_256, TMP12);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+ vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+ vis_mul8x16au(REF_4, CONST_256, TMP16);
+
+ vis_padd16(TMP0, CONST_3, TMP8);
+ vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+
+ vis_padd16(TMP2, CONST_3, TMP10);
+ vis_pack16(TMP8, DST_0);
+
+ vis_pack16(TMP10, DST_1);
+ vis_padd16(TMP16, TMP12, TMP0);
+
+ vis_st64(DST_0, dest[0]);
+ vis_mul8x16al(DST_2, CONST_512, TMP4);
+ vis_padd16(TMP18, TMP14, TMP2);
+
+ vis_mul8x16al(DST_3, CONST_512, TMP6);
+ vis_padd16(TMP0, CONST_3, TMP0);
+
+ vis_padd16(TMP2, CONST_3, TMP2);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+ vis_pack16(TMP0, DST_2);
+
+ vis_pack16(TMP2, DST_3);
+ vis_st64(DST_2, dest[8]);
+
+ ref += stride;
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+ int stride_times_2 = stride << 1;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ vis_ld64(constants3[0], CONST_3);
+ vis_fzero(ZERO);
+ vis_ld64(constants256_512[0], CONST_256);
+
+ ref = vis_alignaddr(ref);
+ height >>= 2;
+ do { /* 47 cycles */
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, 8, TMP2);
+ ref += stride;
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64(ref[0], TMP4);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, 8, TMP6);
+ ref += stride;
+
+ vis_ld64(ref[0], TMP8);
+
+ vis_ld64_2(ref, 8, TMP10);
+ ref += stride;
+ vis_faligndata(TMP4, TMP6, REF_4);
+
+ vis_ld64(ref[0], TMP12);
+
+ vis_ld64_2(ref, 8, TMP14);
+ ref += stride;
+ vis_faligndata(TMP8, TMP10, REF_S0);
+
+ vis_faligndata(TMP12, TMP14, REF_S4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+
+ vis_ld64(dest[0], DST_0);
+ vis_faligndata(TMP0, TMP2, REF_2);
+
+ vis_ld64_2(dest, stride, DST_2);
+ vis_faligndata(TMP4, TMP6, REF_6);
+
+ vis_faligndata(TMP8, TMP10, REF_S2);
+
+ vis_faligndata(TMP12, TMP14, REF_S6);
+ } else {
+ vis_ld64(dest[0], DST_0);
+ vis_src1(TMP2, REF_2);
+
+ vis_ld64_2(dest, stride, DST_2);
+ vis_src1(TMP6, REF_6);
+
+ vis_src1(TMP10, REF_S2);
+
+ vis_src1(TMP14, REF_S6);
+ }
+
+ vis_pmerge(ZERO, REF_0, TMP0);
+ vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+ vis_pmerge(ZERO, REF_2, TMP4);
+ vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+
+ vis_padd16(TMP0, CONST_3, TMP0);
+ vis_mul8x16al(DST_0, CONST_512, TMP16);
+
+ vis_padd16(TMP2, CONST_3, TMP2);
+ vis_mul8x16al(DST_1, CONST_512, TMP18);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+ vis_mul8x16au(REF_4, CONST_256, TMP8);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+ vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+
+ vis_padd16(TMP0, TMP16, TMP0);
+ vis_mul8x16au(REF_6, CONST_256, TMP12);
+
+ vis_padd16(TMP2, TMP18, TMP2);
+ vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+
+ vis_padd16(TMP8, CONST_3, TMP8);
+ vis_mul8x16al(DST_2, CONST_512, TMP16);
+
+ vis_padd16(TMP8, TMP12, TMP8);
+ vis_mul8x16al(DST_3, CONST_512, TMP18);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+ vis_pack16(TMP0, DST_0);
+
+ vis_pack16(TMP2, DST_1);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+ vis_padd16(TMP10, CONST_3, TMP10);
+
+ vis_ld64_2(dest, stride, DST_0);
+ vis_padd16(TMP8, TMP16, TMP8);
+
+ vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+ vis_padd16(TMP10, TMP18, TMP10);
+ vis_pack16(TMP8, DST_2);
+
+ vis_pack16(TMP10, DST_3);
+ vis_st64(DST_2, dest[0]);
+ dest += stride;
+
+ vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+ vis_pmerge(ZERO, REF_S0, TMP0);
+
+ vis_pmerge(ZERO, REF_S2, TMP24);
+ vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+
+ vis_padd16(TMP0, CONST_3, TMP0);
+ vis_mul8x16au(REF_S4, CONST_256, TMP8);
+
+ vis_padd16(TMP2, CONST_3, TMP2);
+ vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+
+ vis_padd16(TMP0, TMP24, TMP0);
+ vis_mul8x16au(REF_S6, CONST_256, TMP12);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+ vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+
+ vis_padd16(TMP8, CONST_3, TMP8);
+ vis_mul8x16al(DST_0, CONST_512, TMP16);
+
+ vis_padd16(TMP10, CONST_3, TMP10);
+ vis_mul8x16al(DST_1, CONST_512, TMP18);
+
+ vis_padd16(TMP8, TMP12, TMP8);
+ vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+
+ vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+ vis_padd16(TMP0, TMP16, TMP0);
+
+ vis_padd16(TMP2, TMP18, TMP2);
+ vis_pack16(TMP0, DST_0);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+ vis_pack16(TMP2, DST_1);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_padd16(TMP8, TMP20, TMP8);
+
+ vis_padd16(TMP10, TMP22, TMP10);
+ vis_pack16(TMP8, DST_2);
+
+ vis_pack16(TMP10, DST_3);
+ vis_st64(DST_2, dest[0]);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 16 : 0;
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, 8, TMP2);
+
+ vis_ld64_2(ref, offset, TMP4);
+ ref += stride;
+
+ vis_ld64(ref[0], TMP6);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, 8, TMP8);
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_ld64_2(ref, offset, TMP10);
+ ref += stride;
+
+ vis_ld64(constants_fe[0], MASK_fe);
+ vis_faligndata(TMP6, TMP8, REF_2);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+ vis_faligndata(TMP8, TMP10, REF_6);
+
+ vis_ld64(constants128[0], CONST_128);
+ height = (height >> 1) - 1;
+ do { /* 24 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_xor(REF_4, REF_6, TMP16);
+
+ vis_ld64_2(ref, offset, TMP4);
+ ref += stride;
+ vis_or(REF_0, REF_2, TMP14);
+
+ vis_ld64(ref[0], TMP6);
+ vis_or(REF_4, REF_6, TMP18);
+
+ vis_ld64_2(ref, 8, TMP8);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, offset, TMP10);
+ ref += stride;
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_and(TMP16, MASK_fe, TMP16);
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+
+ vis_mul8x16(CONST_128, TMP16, TMP16);
+ vis_xor(REF_0, REF_2, TMP0);
+
+ vis_xor(REF_4, REF_6, TMP2);
+
+ vis_or(REF_0, REF_2, TMP20);
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_and(TMP16, MASK_7f, TMP16);
+
+ vis_psub16(TMP14, TMP12, TMP12);
+ vis_st64(TMP12, dest[0]);
+
+ vis_psub16(TMP18, TMP16, TMP16);
+ vis_st64_2(TMP16, dest, 8);
+ dest += stride;
+
+ vis_or(REF_4, REF_6, TMP18);
+
+ vis_and(TMP0, MASK_fe, TMP0);
+
+ vis_and(TMP2, MASK_fe, TMP2);
+ vis_mul8x16(CONST_128, TMP0, TMP0);
+
+ vis_faligndata(TMP6, TMP8, REF_2);
+ vis_mul8x16(CONST_128, TMP2, TMP2);
+
+ vis_faligndata(TMP8, TMP10, REF_6);
+
+ vis_and(TMP0, MASK_7f, TMP0);
+
+ vis_and(TMP2, MASK_7f, TMP2);
+
+ vis_psub16(TMP20, TMP0, TMP0);
+ vis_st64(TMP0, dest[0]);
+
+ vis_psub16(TMP18, TMP2, TMP2);
+ vis_st64_2(TMP2, dest, 8);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_ld64_2(ref, 8, TMP2);
+ vis_xor(REF_4, REF_6, TMP16);
+
+ vis_ld64_2(ref, offset, TMP4);
+ vis_or(REF_0, REF_2, TMP14);
+
+ vis_or(REF_4, REF_6, TMP18);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_and(TMP16, MASK_fe, TMP16);
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+
+ vis_mul8x16(CONST_128, TMP16, TMP16);
+ vis_xor(REF_0, REF_2, TMP0);
+
+ vis_xor(REF_4, REF_6, TMP2);
+
+ vis_or(REF_0, REF_2, TMP20);
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_and(TMP16, MASK_7f, TMP16);
+
+ vis_psub16(TMP14, TMP12, TMP12);
+ vis_st64(TMP12, dest[0]);
+
+ vis_psub16(TMP18, TMP16, TMP16);
+ vis_st64_2(TMP16, dest, 8);
+ dest += stride;
+
+ vis_or(REF_4, REF_6, TMP18);
+
+ vis_and(TMP0, MASK_fe, TMP0);
+
+ vis_and(TMP2, MASK_fe, TMP2);
+ vis_mul8x16(CONST_128, TMP0, TMP0);
+
+ vis_mul8x16(CONST_128, TMP2, TMP2);
+
+ vis_and(TMP0, MASK_7f, TMP0);
+
+ vis_and(TMP2, MASK_7f, TMP2);
+
+ vis_psub16(TMP20, TMP0, TMP0);
+ vis_st64(TMP0, dest[0]);
+
+ vis_psub16(TMP18, TMP2, TMP2);
+ vis_st64_2(TMP2, dest, 8);
+}
+
+static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int offset;
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 8 : 0;
+
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, offset, TMP2);
+ ref += stride;
+
+ vis_ld64(ref[0], TMP4);
+
+ vis_ld64_2(ref, offset, TMP6);
+ ref += stride;
+
+ vis_ld64(constants_fe[0], MASK_fe);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64(constants_7f[0], MASK_7f);
+ vis_faligndata(TMP4, TMP6, REF_2);
+
+ vis_ld64(constants128[0], CONST_128);
+ height = (height >> 1) - 1;
+ do { /* 12 cycles */
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP4);
+
+ vis_ld64_2(ref, offset, TMP2);
+ ref += stride;
+ vis_and(TMP4, MASK_fe, TMP4);
+
+ vis_or(REF_0, REF_2, TMP6);
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+ vis_ld64(ref[0], TMP0);
+
+ vis_ld64_2(ref, offset, TMP2);
+ ref += stride;
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+ vis_or(REF_0, REF_2, TMP14);
+
+ vis_psub16(TMP6, TMP4, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_faligndata(TMP0, TMP2, REF_2);
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_psub16(TMP14, TMP12, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+ } while (--height);
+
+ vis_ld64(ref[0], TMP0);
+ vis_xor(REF_0, REF_2, TMP4);
+
+ vis_ld64_2(ref, offset, TMP2);
+ vis_and(TMP4, MASK_fe, TMP4);
+
+ vis_or(REF_0, REF_2, TMP6);
+ vis_mul8x16(CONST_128, TMP4, TMP4);
+
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_xor(REF_0, REF_2, TMP12);
+
+ vis_and(TMP4, MASK_7f, TMP4);
+
+ vis_and(TMP12, MASK_fe, TMP12);
+
+ vis_mul8x16(CONST_128, TMP12, TMP12);
+ vis_or(REF_0, REF_2, TMP14);
+
+ vis_psub16(TMP6, TMP4, DST_0);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_and(TMP12, MASK_7f, TMP12);
+
+ vis_psub16(TMP14, TMP12, DST_0);
+ vis_st64(DST_0, dest[0]);
+}
+
+static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int stride_8 = stride + 8;
+ int stride_16;
+ int offset;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 16 : 0;
+
+ vis_ld64(ref[ 0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64(ref[ 8], TMP2);
+
+ vis_ld64_2(ref, offset, TMP4);
+ stride_16 = stride + offset;
+
+ vis_ld64(constants3[0], CONST_3);
+ vis_faligndata(TMP0, TMP2, REF_2);
+
+ vis_ld64(constants256_512[0], CONST_256);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ height >>= 1;
+
+ do { /* 31 cycles */
+ vis_ld64_2(ref, stride, TMP0);
+ vis_pmerge(ZERO, REF_2, TMP12);
+ vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ vis_pmerge(ZERO, REF_6, TMP16);
+ vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+
+ vis_ld64_2(ref, stride_16, TMP4);
+ ref += stride;
+
+ vis_ld64(dest[0], DST_0);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(dest, 8, DST_2);
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_ld64_2(ref, stride, TMP6);
+ vis_pmerge(ZERO, REF_0, TMP0);
+ vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+
+ vis_ld64_2(ref, stride_8, TMP8);
+ vis_pmerge(ZERO, REF_4, TMP4);
+
+ vis_ld64_2(ref, stride_16, TMP10);
+ ref += stride;
+
+ vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+ vis_faligndata(TMP6, TMP8, REF_2);
+ vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+ vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+ vis_faligndata(TMP8, TMP10, REF_6);
+ vis_mul8x16al(DST_0, CONST_512, TMP20);
+
+ vis_padd16(TMP0, CONST_3, TMP0);
+ vis_mul8x16al(DST_1, CONST_512, TMP22);
+
+ vis_padd16(TMP2, CONST_3, TMP2);
+ vis_mul8x16al(DST_2, CONST_512, TMP24);
+
+ vis_padd16(TMP4, CONST_3, TMP4);
+ vis_mul8x16al(DST_3, CONST_512, TMP26);
+
+ vis_padd16(TMP6, CONST_3, TMP6);
+
+ vis_padd16(TMP12, TMP20, TMP12);
+ vis_mul8x16al(REF_S0, CONST_512, TMP20);
+
+ vis_padd16(TMP14, TMP22, TMP14);
+ vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+
+ vis_padd16(TMP16, TMP24, TMP16);
+ vis_mul8x16al(REF_S2, CONST_512, TMP24);
+
+ vis_padd16(TMP18, TMP26, TMP18);
+ vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+
+ vis_padd16(TMP12, TMP0, TMP12);
+ vis_mul8x16au(REF_2, CONST_256, TMP28);
+
+ vis_padd16(TMP14, TMP2, TMP14);
+ vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+
+ vis_padd16(TMP16, TMP4, TMP16);
+ vis_mul8x16au(REF_6, CONST_256, REF_S4);
+
+ vis_padd16(TMP18, TMP6, TMP18);
+ vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+
+ vis_pack16(TMP12, DST_0);
+ vis_padd16(TMP28, TMP0, TMP12);
+
+ vis_pack16(TMP14, DST_1);
+ vis_st64(DST_0, dest[0]);
+ vis_padd16(TMP30, TMP2, TMP14);
+
+ vis_pack16(TMP16, DST_2);
+ vis_padd16(REF_S4, TMP4, TMP16);
+
+ vis_pack16(TMP18, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+ vis_padd16(REF_S6, TMP6, TMP18);
+
+ vis_padd16(TMP12, TMP20, TMP12);
+
+ vis_padd16(TMP14, TMP22, TMP14);
+ vis_pack16(TMP12, DST_0);
+
+ vis_padd16(TMP16, TMP24, TMP16);
+ vis_pack16(TMP14, DST_1);
+ vis_st64(DST_0, dest[0]);
+
+ vis_padd16(TMP18, TMP26, TMP18);
+ vis_pack16(TMP16, DST_2);
+
+ vis_pack16(TMP18, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ int stride_8;
+ int offset;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+ offset = (ref != _ref) ? 8 : 0;
+
+ vis_ld64(ref[ 0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64_2(ref, offset, TMP2);
+ stride_8 = stride + offset;
+
+ vis_ld64(constants3[0], CONST_3);
+ vis_faligndata(TMP0, TMP2, REF_2);
+
+ vis_ld64(constants256_512[0], CONST_256);
+
+ height >>= 1;
+ do { /* 20 cycles */
+ vis_ld64_2(ref, stride, TMP0);
+ vis_pmerge(ZERO, REF_2, TMP8);
+ vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ ref += stride;
+
+ vis_ld64(dest[0], DST_0);
+
+ vis_ld64_2(dest, stride, DST_2);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, stride, TMP4);
+ vis_mul8x16al(DST_0, CONST_512, TMP16);
+ vis_pmerge(ZERO, REF_0, TMP12);
+
+ vis_ld64_2(ref, stride_8, TMP6);
+ ref += stride;
+ vis_mul8x16al(DST_1, CONST_512, TMP18);
+ vis_pmerge(ZERO, REF_0_1, TMP14);
+
+ vis_padd16(TMP12, CONST_3, TMP12);
+ vis_mul8x16al(DST_2, CONST_512, TMP24);
+
+ vis_padd16(TMP14, CONST_3, TMP14);
+ vis_mul8x16al(DST_3, CONST_512, TMP26);
+
+ vis_faligndata(TMP4, TMP6, REF_2);
+
+ vis_padd16(TMP8, TMP12, TMP8);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+ vis_mul8x16au(REF_2, CONST_256, TMP20);
+
+ vis_padd16(TMP8, TMP16, TMP0);
+ vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+
+ vis_padd16(TMP10, TMP18, TMP2);
+ vis_pack16(TMP0, DST_0);
+
+ vis_pack16(TMP2, DST_1);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+ vis_padd16(TMP12, TMP20, TMP12);
+
+ vis_padd16(TMP14, TMP22, TMP14);
+
+ vis_padd16(TMP12, TMP24, TMP0);
+
+ vis_padd16(TMP14, TMP26, TMP2);
+ vis_pack16(TMP0, DST_2);
+
+ vis_pack16(TMP2, DST_3);
+ vis_st64(DST_2, dest[0]);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+ int stride_8 = stride + 8;
+ int stride_16 = stride + 16;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[ 0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64(ref[ 8], TMP2);
+
+ vis_ld64(ref[16], TMP4);
+
+ vis_ld64(constants2[0], CONST_2);
+ vis_faligndata(TMP0, TMP2, REF_S0);
+
+ vis_ld64(constants256_512[0], CONST_256);
+ vis_faligndata(TMP2, TMP4, REF_S4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S2);
+ vis_faligndata(TMP2, TMP4, REF_S6);
+ } else {
+ vis_src1(TMP2, REF_S2);
+ vis_src1(TMP4, REF_S6);
+ }
+
+ height >>= 1;
+ do {
+ vis_ld64_2(ref, stride, TMP0);
+ vis_mul8x16au(REF_S0, CONST_256, TMP12);
+ vis_pmerge(ZERO, REF_S0_1, TMP14);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ vis_mul8x16au(REF_S2, CONST_256, TMP16);
+ vis_pmerge(ZERO, REF_S2_1, TMP18);
+
+ vis_ld64_2(ref, stride_16, TMP4);
+ ref += stride;
+ vis_mul8x16au(REF_S4, CONST_256, TMP20);
+ vis_pmerge(ZERO, REF_S4_1, TMP22);
+
+ vis_ld64_2(ref, stride, TMP6);
+ vis_mul8x16au(REF_S6, CONST_256, TMP24);
+ vis_pmerge(ZERO, REF_S6_1, TMP26);
+
+ vis_ld64_2(ref, stride_8, TMP8);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, stride_16, TMP10);
+ ref += stride;
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_faligndata(TMP6, TMP8, REF_S0);
+
+ vis_faligndata(TMP8, TMP10, REF_S4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ vis_faligndata(TMP6, TMP8, REF_S2);
+ vis_faligndata(TMP8, TMP10, REF_S6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ vis_src1(TMP8, REF_S2);
+ vis_src1(TMP10, REF_S6);
+ }
+
+ vis_mul8x16au(REF_0, CONST_256, TMP0);
+ vis_pmerge(ZERO, REF_0_1, TMP2);
+
+ vis_mul8x16au(REF_2, CONST_256, TMP4);
+ vis_pmerge(ZERO, REF_2_1, TMP6);
+
+ vis_padd16(TMP0, CONST_2, TMP8);
+ vis_mul8x16au(REF_4, CONST_256, TMP0);
+
+ vis_padd16(TMP2, CONST_2, TMP10);
+ vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+
+ vis_padd16(TMP8, TMP4, TMP8);
+ vis_mul8x16au(REF_6, CONST_256, TMP4);
+
+ vis_padd16(TMP10, TMP6, TMP10);
+ vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+
+ vis_padd16(TMP12, TMP8, TMP12);
+
+ vis_padd16(TMP14, TMP10, TMP14);
+
+ vis_padd16(TMP12, TMP16, TMP12);
+
+ vis_padd16(TMP14, TMP18, TMP14);
+ vis_pack16(TMP12, DST_0);
+
+ vis_pack16(TMP14, DST_1);
+ vis_st64(DST_0, dest[0]);
+ vis_padd16(TMP0, CONST_2, TMP12);
+
+ vis_mul8x16au(REF_S0, CONST_256, TMP0);
+ vis_padd16(TMP2, CONST_2, TMP14);
+
+ vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+ vis_padd16(TMP12, TMP4, TMP12);
+
+ vis_mul8x16au(REF_S2, CONST_256, TMP4);
+ vis_padd16(TMP14, TMP6, TMP14);
+
+ vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+ vis_padd16(TMP20, TMP12, TMP20);
+
+ vis_padd16(TMP22, TMP14, TMP22);
+
+ vis_padd16(TMP20, TMP24, TMP20);
+
+ vis_padd16(TMP22, TMP26, TMP22);
+ vis_pack16(TMP20, DST_2);
+
+ vis_pack16(TMP22, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+ vis_padd16(TMP0, TMP4, TMP24);
+
+ vis_mul8x16au(REF_S4, CONST_256, TMP0);
+ vis_padd16(TMP2, TMP6, TMP26);
+
+ vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+ vis_padd16(TMP24, TMP8, TMP24);
+
+ vis_padd16(TMP26, TMP10, TMP26);
+ vis_pack16(TMP24, DST_0);
+
+ vis_pack16(TMP26, DST_1);
+ vis_st64(DST_0, dest[0]);
+ vis_pmerge(ZERO, REF_S6, TMP4);
+
+ vis_pmerge(ZERO, REF_S6_1, TMP6);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+
+ vis_padd16(TMP0, TMP12, TMP0);
+
+ vis_padd16(TMP2, TMP14, TMP2);
+ vis_pack16(TMP0, DST_2);
+
+ vis_pack16(TMP2, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+ int stride_8 = stride + 8;
+
+ vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[ 0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64(ref[ 8], TMP2);
+
+ vis_ld64(constants2[0], CONST_2);
+
+ vis_ld64(constants256_512[0], CONST_256);
+ vis_faligndata(TMP0, TMP2, REF_S0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S2);
+ } else {
+ vis_src1(TMP2, REF_S2);
+ }
+
+ height >>= 1;
+ do { /* 26 cycles */
+ vis_ld64_2(ref, stride, TMP0);
+ vis_mul8x16au(REF_S0, CONST_256, TMP8);
+ vis_pmerge(ZERO, REF_S2, TMP12);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ ref += stride;
+ vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+ vis_pmerge(ZERO, REF_S2_1, TMP14);
+
+ vis_ld64_2(ref, stride, TMP4);
+
+ vis_ld64_2(ref, stride_8, TMP6);
+ ref += stride;
+ vis_faligndata(TMP0, TMP2, REF_S4);
+
+ vis_pmerge(ZERO, REF_S4, TMP18);
+
+ vis_pmerge(ZERO, REF_S4_1, TMP20);
+
+ vis_faligndata(TMP4, TMP6, REF_S0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S6);
+ vis_faligndata(TMP4, TMP6, REF_S2);
+ } else {
+ vis_src1(TMP2, REF_S6);
+ vis_src1(TMP6, REF_S2);
+ }
+
+ vis_padd16(TMP18, CONST_2, TMP18);
+ vis_mul8x16au(REF_S6, CONST_256, TMP22);
+
+ vis_padd16(TMP20, CONST_2, TMP20);
+ vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+
+ vis_mul8x16au(REF_S0, CONST_256, TMP26);
+ vis_pmerge(ZERO, REF_S0_1, TMP28);
+
+ vis_mul8x16au(REF_S2, CONST_256, TMP30);
+ vis_padd16(TMP18, TMP22, TMP18);
+
+ vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+ vis_padd16(TMP20, TMP24, TMP20);
+
+ vis_padd16(TMP8, TMP18, TMP8);
+
+ vis_padd16(TMP10, TMP20, TMP10);
+
+ vis_padd16(TMP8, TMP12, TMP8);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+ vis_pack16(TMP8, DST_0);
+
+ vis_pack16(TMP10, DST_1);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+ vis_padd16(TMP18, TMP26, TMP18);
+
+ vis_padd16(TMP20, TMP28, TMP20);
+
+ vis_padd16(TMP18, TMP30, TMP18);
+
+ vis_padd16(TMP20, TMP32, TMP20);
+ vis_pack16(TMP18, DST_2);
+
+ vis_pack16(TMP20, DST_3);
+ vis_st64(DST_2, dest[0]);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+ int stride_8 = stride + 8;
+ int stride_16 = stride + 16;
+
+ vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[ 0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64(ref[ 8], TMP2);
+
+ vis_ld64(ref[16], TMP4);
+
+ vis_ld64(constants6[0], CONST_6);
+ vis_faligndata(TMP0, TMP2, REF_S0);
+
+ vis_ld64(constants256_1024[0], CONST_256);
+ vis_faligndata(TMP2, TMP4, REF_S4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S2);
+ vis_faligndata(TMP2, TMP4, REF_S6);
+ } else {
+ vis_src1(TMP2, REF_S2);
+ vis_src1(TMP4, REF_S6);
+ }
+
+ height >>= 1;
+ do { /* 55 cycles */
+ vis_ld64_2(ref, stride, TMP0);
+ vis_mul8x16au(REF_S0, CONST_256, TMP12);
+ vis_pmerge(ZERO, REF_S0_1, TMP14);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ vis_mul8x16au(REF_S2, CONST_256, TMP16);
+ vis_pmerge(ZERO, REF_S2_1, TMP18);
+
+ vis_ld64_2(ref, stride_16, TMP4);
+ ref += stride;
+ vis_mul8x16au(REF_S4, CONST_256, TMP20);
+ vis_pmerge(ZERO, REF_S4_1, TMP22);
+
+ vis_ld64_2(ref, stride, TMP6);
+ vis_mul8x16au(REF_S6, CONST_256, TMP24);
+ vis_pmerge(ZERO, REF_S6_1, TMP26);
+
+ vis_ld64_2(ref, stride_8, TMP8);
+ vis_faligndata(TMP0, TMP2, REF_0);
+
+ vis_ld64_2(ref, stride_16, TMP10);
+ ref += stride;
+ vis_faligndata(TMP2, TMP4, REF_4);
+
+ vis_ld64(dest[0], DST_0);
+ vis_faligndata(TMP6, TMP8, REF_S0);
+
+ vis_ld64_2(dest, 8, DST_2);
+ vis_faligndata(TMP8, TMP10, REF_S4);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_2);
+ vis_faligndata(TMP2, TMP4, REF_6);
+ vis_faligndata(TMP6, TMP8, REF_S2);
+ vis_faligndata(TMP8, TMP10, REF_S6);
+ } else {
+ vis_src1(TMP2, REF_2);
+ vis_src1(TMP4, REF_6);
+ vis_src1(TMP8, REF_S2);
+ vis_src1(TMP10, REF_S6);
+ }
+
+ vis_mul8x16al(DST_0, CONST_1024, TMP30);
+ vis_pmerge(ZERO, REF_0, TMP0);
+
+ vis_mul8x16al(DST_1, CONST_1024, TMP32);
+ vis_pmerge(ZERO, REF_0_1, TMP2);
+
+ vis_mul8x16au(REF_2, CONST_256, TMP4);
+ vis_pmerge(ZERO, REF_2_1, TMP6);
+
+ vis_mul8x16al(DST_2, CONST_1024, REF_0);
+ vis_padd16(TMP0, CONST_6, TMP0);
+
+ vis_mul8x16al(DST_3, CONST_1024, REF_2);
+ vis_padd16(TMP2, CONST_6, TMP2);
+
+ vis_padd16(TMP0, TMP4, TMP0);
+ vis_mul8x16au(REF_4, CONST_256, TMP4);
+
+ vis_padd16(TMP2, TMP6, TMP2);
+ vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+
+ vis_padd16(TMP12, TMP0, TMP12);
+ vis_mul8x16au(REF_6, CONST_256, TMP8);
+
+ vis_padd16(TMP14, TMP2, TMP14);
+ vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+
+ vis_padd16(TMP12, TMP16, TMP12);
+ vis_mul8x16au(REF_S0, CONST_256, REF_4);
+
+ vis_padd16(TMP14, TMP18, TMP14);
+ vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+
+ vis_padd16(TMP12, TMP30, TMP12);
+
+ vis_padd16(TMP14, TMP32, TMP14);
+ vis_pack16(TMP12, DST_0);
+
+ vis_pack16(TMP14, DST_1);
+ vis_st64(DST_0, dest[0]);
+ vis_padd16(TMP4, CONST_6, TMP4);
+
+ vis_ld64_2(dest, stride, DST_0);
+ vis_padd16(TMP6, CONST_6, TMP6);
+ vis_mul8x16au(REF_S2, CONST_256, TMP12);
+
+ vis_padd16(TMP4, TMP8, TMP4);
+ vis_mul8x16au(REF_S2_1, CONST_256, TMP14);
+
+ vis_padd16(TMP6, TMP10, TMP6);
+
+ vis_padd16(TMP20, TMP4, TMP20);
+
+ vis_padd16(TMP22, TMP6, TMP22);
+
+ vis_padd16(TMP20, TMP24, TMP20);
+
+ vis_padd16(TMP22, TMP26, TMP22);
+
+ vis_padd16(TMP20, REF_0, TMP20);
+ vis_mul8x16au(REF_S4, CONST_256, REF_0);
+
+ vis_padd16(TMP22, REF_2, TMP22);
+ vis_pack16(TMP20, DST_2);
+
+ vis_pack16(TMP22, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+
+ vis_ld64_2(dest, 8, DST_2);
+ vis_mul8x16al(DST_0, CONST_1024, TMP30);
+ vis_pmerge(ZERO, REF_S4_1, REF_2);
+
+ vis_mul8x16al(DST_1, CONST_1024, TMP32);
+ vis_padd16(REF_4, TMP0, TMP8);
+
+ vis_mul8x16au(REF_S6, CONST_256, REF_4);
+ vis_padd16(REF_6, TMP2, TMP10);
+
+ vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+ vis_padd16(TMP8, TMP12, TMP8);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+
+ vis_padd16(TMP8, TMP30, TMP8);
+
+ vis_padd16(TMP10, TMP32, TMP10);
+ vis_pack16(TMP8, DST_0);
+
+ vis_pack16(TMP10, DST_1);
+ vis_st64(DST_0, dest[0]);
+
+ vis_padd16(REF_0, TMP4, REF_0);
+
+ vis_mul8x16al(DST_2, CONST_1024, TMP30);
+ vis_padd16(REF_2, TMP6, REF_2);
+
+ vis_mul8x16al(DST_3, CONST_1024, TMP32);
+ vis_padd16(REF_0, REF_4, REF_0);
+
+ vis_padd16(REF_2, REF_6, REF_2);
+
+ vis_padd16(REF_0, TMP30, REF_0);
+
+ /* stall */
+
+ vis_padd16(REF_2, TMP32, REF_2);
+ vis_pack16(REF_0, DST_2);
+
+ vis_pack16(REF_2, DST_3);
+ vis_st64_2(DST_2, dest, 8);
+ dest += stride;
+ } while (--height);
+}
+
+static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
+ const int stride, int height)
+{
+ uint8_t *ref = (uint8_t *) _ref;
+ unsigned long off = (unsigned long) ref & 0x7;
+ unsigned long off_plus_1 = off + 1;
+ int stride_8 = stride + 8;
+
+ vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+
+ ref = vis_alignaddr(ref);
+
+ vis_ld64(ref[0], TMP0);
+ vis_fzero(ZERO);
+
+ vis_ld64_2(ref, 8, TMP2);
+
+ vis_ld64(constants6[0], CONST_6);
+
+ vis_ld64(constants256_1024[0], CONST_256);
+ vis_faligndata(TMP0, TMP2, REF_S0);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S2);
+ } else {
+ vis_src1(TMP2, REF_S2);
+ }
+
+ height >>= 1;
+ do { /* 31 cycles */
+ vis_ld64_2(ref, stride, TMP0);
+ vis_mul8x16au(REF_S0, CONST_256, TMP8);
+ vis_pmerge(ZERO, REF_S0_1, TMP10);
+
+ vis_ld64_2(ref, stride_8, TMP2);
+ ref += stride;
+ vis_mul8x16au(REF_S2, CONST_256, TMP12);
+ vis_pmerge(ZERO, REF_S2_1, TMP14);
+
+ vis_alignaddr_g0((void *)off);
+
+ vis_ld64_2(ref, stride, TMP4);
+ vis_faligndata(TMP0, TMP2, REF_S4);
+
+ vis_ld64_2(ref, stride_8, TMP6);
+ ref += stride;
+
+ vis_ld64(dest[0], DST_0);
+ vis_faligndata(TMP4, TMP6, REF_S0);
+
+ vis_ld64_2(dest, stride, DST_2);
+
+ if (off != 0x7) {
+ vis_alignaddr_g0((void *)off_plus_1);
+ vis_faligndata(TMP0, TMP2, REF_S6);
+ vis_faligndata(TMP4, TMP6, REF_S2);
+ } else {
+ vis_src1(TMP2, REF_S6);
+ vis_src1(TMP6, REF_S2);
+ }
+
+ vis_mul8x16al(DST_0, CONST_1024, TMP30);
+ vis_pmerge(ZERO, REF_S4, TMP22);
+
+ vis_mul8x16al(DST_1, CONST_1024, TMP32);
+ vis_pmerge(ZERO, REF_S4_1, TMP24);
+
+ vis_mul8x16au(REF_S6, CONST_256, TMP26);
+ vis_pmerge(ZERO, REF_S6_1, TMP28);
+
+ vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+ vis_padd16(TMP22, CONST_6, TMP22);
+
+ vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+ vis_padd16(TMP24, CONST_6, TMP24);
+
+ vis_mul8x16al(DST_2, CONST_1024, REF_0);
+ vis_padd16(TMP22, TMP26, TMP22);
+
+ vis_mul8x16al(DST_3, CONST_1024, REF_2);
+ vis_padd16(TMP24, TMP28, TMP24);
+
+ vis_mul8x16au(REF_S2, CONST_256, TMP26);
+ vis_padd16(TMP8, TMP22, TMP8);
+
+ vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+ vis_padd16(TMP10, TMP24, TMP10);
+
+ vis_padd16(TMP8, TMP12, TMP8);
+
+ vis_padd16(TMP10, TMP14, TMP10);
+
+ vis_padd16(TMP8, TMP30, TMP8);
+
+ vis_padd16(TMP10, TMP32, TMP10);
+ vis_pack16(TMP8, DST_0);
+
+ vis_pack16(TMP10, DST_1);
+ vis_st64(DST_0, dest[0]);
+ dest += stride;
+
+ vis_padd16(REF_S4, TMP22, TMP12);
+
+ vis_padd16(REF_S6, TMP24, TMP14);
+
+ vis_padd16(TMP12, TMP26, TMP12);
+
+ vis_padd16(TMP14, TMP28, TMP14);
+
+ vis_padd16(TMP12, REF_0, TMP12);
+
+ vis_padd16(TMP14, REF_2, TMP14);
+ vis_pack16(TMP12, DST_2);
+
+ vis_pack16(TMP14, DST_3);
+ vis_st64(DST_2, dest[0]);
+ dest += stride;
+ } while (--height);
+}
+
+MPEG2_MC_EXTERN(vis);
+
+#endif /* !(ARCH_SPARC) */
diff --git a/libmpeg2/mpeg2_internal.h b/libmpeg2/mpeg2_internal.h
new file mode 100644
index 0000000..2e0f261
--- /dev/null
+++ b/libmpeg2/mpeg2_internal.h
@@ -0,0 +1,317 @@
+/*
+ * mpeg2_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LIBMPEG2_MPEG2_INTERNAL_H
+#define LIBMPEG2_MPEG2_INTERNAL_H
+
+#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1)
+
+/* macroblock modes */
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+#define DCT_TYPE_INTERLACED 32
+/* motion_type */
+#define MOTION_TYPE_SHIFT 6
+#define MC_FIELD 1
+#define MC_FRAME 2
+#define MC_16X8 2
+#define MC_DMV 3
+
+/* picture structure */
+#define TOP_FIELD 1
+#define BOTTOM_FIELD 2
+#define FRAME_PICTURE 3
+
+/* picture coding type */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+#define D_TYPE 4
+
+typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int);
+
+typedef struct {
+ uint8_t * ref[2][3];
+ uint8_t ** ref2[2];
+ int pmv[2][2];
+ int f_code[2];
+} motion_t;
+
+typedef void motion_parser_t (mpeg2_decoder_t * decoder,
+ motion_t * motion,
+ mpeg2_mc_fct * const * table);
+
+struct mpeg2_decoder_s {
+ /* first, state that carries information from one macroblock to the */
+ /* next inside a slice, and is never used outside of mpeg2_slice() */
+
+ /* bit parsing stuff */
+ uint32_t bitstream_buf; /* current 32 bit working set */
+ int bitstream_bits; /* used bits in working set */
+ const uint8_t * bitstream_ptr; /* buffer with stream data */
+
+ uint8_t * dest[3];
+
+ int offset;
+ int stride;
+ int uv_stride;
+ int slice_stride;
+ int slice_uv_stride;
+ int stride_frame;
+ unsigned int limit_x;
+ unsigned int limit_y_16;
+ unsigned int limit_y_8;
+ unsigned int limit_y;
+
+ /* Motion vectors */
+ /* The f_ and b_ correspond to the forward and backward motion */
+ /* predictors */
+ motion_t b_motion;
+ motion_t f_motion;
+ motion_parser_t * motion_parser[5];
+
+ /* predictor for DC coefficients in intra blocks */
+ int16_t dc_dct_pred[3];
+
+ /* DCT coefficients */
+ int16_t DCTblock[64] ATTR_ALIGN(64);
+
+ uint8_t * picture_dest[3];
+ void (* convert) (void * convert_id, uint8_t * const * src,
+ unsigned int v_offset);
+ void * convert_id;
+
+ int dmv_offset;
+ unsigned int v_offset;
+
+ /* now non-slice-specific information */
+
+ /* sequence header stuff */
+ uint16_t * quantizer_matrix[4];
+ uint16_t (* chroma_quantizer[2])[64];
+ uint16_t quantizer_prescale[4][32][64];
+
+ /* The width and height of the picture snapped to macroblock units */
+ int width;
+ int height;
+ int vertical_position_extension;
+ int chroma_format;
+
+ /* picture header stuff */
+
+ /* what type of picture this is (I, P, B, D) */
+ int coding_type;
+
+ /* picture coding extension stuff */
+
+ /* quantization factor for intra dc coefficients */
+ int intra_dc_precision;
+ /* top/bottom/both fields */
+ int picture_structure;
+ /* bool to indicate all predictions are frame based */
+ int frame_pred_frame_dct;
+ /* bool to indicate whether intra blocks have motion vectors */
+ /* (for concealment) */
+ int concealment_motion_vectors;
+ /* bool to use different vlc tables */
+ int intra_vlc_format;
+ /* used for DMV MC */
+ int top_field_first;
+
+ /* stuff derived from bitstream */
+
+ /* pointer to the zigzag scan we're supposed to be using */
+ const uint8_t * scan;
+
+ int second_field;
+
+ int mpeg1;
+
+ /* XXX: stuff due to xine shit */
+ int8_t q_scale_type;
+};
+
+typedef struct {
+ mpeg2_fbuf_t fbuf;
+} fbuf_alloc_t;
+
+struct mpeg2dec_s {
+ mpeg2_decoder_t decoder;
+
+ mpeg2_info_t info;
+
+ uint32_t shift;
+ int is_display_initialized;
+ mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec);
+ mpeg2_state_t state;
+ uint32_t ext_state;
+
+ /* allocated in init - gcc has problems allocating such big structures */
+ uint8_t * chunk_buffer;
+ /* pointer to start of the current chunk */
+ uint8_t * chunk_start;
+ /* pointer to current position in chunk_buffer */
+ uint8_t * chunk_ptr;
+ /* last start code ? */
+ uint8_t code;
+
+ /* picture tags */
+ uint32_t tag_current, tag2_current, tag_previous, tag2_previous;
+ int num_tags;
+ int bytes_since_tag;
+
+ int first;
+ int alloc_index_user;
+ int alloc_index;
+ uint8_t first_decode_slice;
+ uint8_t nb_decode_slices;
+
+ unsigned int user_data_len;
+
+ mpeg2_sequence_t new_sequence;
+ mpeg2_sequence_t sequence;
+ mpeg2_gop_t new_gop;
+ mpeg2_gop_t gop;
+ mpeg2_picture_t new_picture;
+ mpeg2_picture_t pictures[4];
+ mpeg2_picture_t * picture;
+ /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */
+
+ fbuf_alloc_t fbuf_alloc[3];
+ int custom_fbuf;
+
+ uint8_t * yuv_buf[3][3];
+ int yuv_index;
+ mpeg2_convert_t * convert;
+ void * convert_arg;
+ unsigned int convert_id_size;
+ int convert_stride;
+ void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf,
+ const mpeg2_picture_t * picture,
+ const mpeg2_gop_t * gop);
+
+ uint8_t * buf_start;
+ uint8_t * buf_end;
+
+ int16_t display_offset_x, display_offset_y;
+
+ int copy_matrix;
+ int8_t scaled[4]; /* XXX: MOVED */
+ //int8_t q_scale_type, scaled[4];
+ uint8_t quantizer_matrix[4][64];
+ uint8_t new_quantizer_matrix[4][64];
+};
+
+typedef struct {
+#ifdef ARCH_PPC
+ uint8_t regv[12*16];
+#endif
+ int dummy;
+} cpu_state_t;
+
+/* cpu_accel.c */
+uint32_t mpeg2_detect_accel (uint32_t accel);
+
+/* cpu_state.c */
+void mpeg2_cpu_state_init (uint32_t accel);
+
+/* decode.c */
+mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec);
+
+/* header.c */
+void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec);
+void mpeg2_reset_info (mpeg2_info_t * info);
+int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_gop (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_picture (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_extension (mpeg2dec_t * mpeg2dec);
+int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec);
+void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels);
+mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec);
+mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec);
+void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type);
+
+/* idct.c */
+extern void mpeg2_idct_init (uint32_t accel);
+extern uint8_t mpeg2_scan_norm[64];
+extern uint8_t mpeg2_scan_alt[64];
+
+/* idct_mmx.c */
+void mpeg2_idct_copy_sse2 (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_sse2 (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmxext (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mmx (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_mmx_init (void);
+
+/* idct_altivec.c */
+void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_altivec (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_altivec_init (void);
+
+/* idct_alpha.c */
+void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_mvi (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride);
+void mpeg2_idct_add_alpha (int last, int16_t * block,
+ uint8_t * dest, int stride);
+void mpeg2_idct_alpha_init (void);
+
+/* motion_comp.c */
+void mpeg2_mc_init (uint32_t accel);
+
+typedef struct {
+ mpeg2_mc_fct * put [8];
+ mpeg2_mc_fct * avg [8];
+} mpeg2_mc_t;
+
+#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \
+ {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \
+ MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \
+ {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \
+ MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \
+};
+
+extern mpeg2_mc_t mpeg2_mc_c;
+extern mpeg2_mc_t mpeg2_mc_mmx;
+extern mpeg2_mc_t mpeg2_mc_mmxext;
+extern mpeg2_mc_t mpeg2_mc_3dnow;
+extern mpeg2_mc_t mpeg2_mc_altivec;
+extern mpeg2_mc_t mpeg2_mc_alpha;
+extern mpeg2_mc_t mpeg2_mc_vis;
+extern mpeg2_mc_t mpeg2_mc_arm;
+
+#endif /* LIBMPEG2_MPEG2_INTERNAL_H */
diff --git a/libmpeg2/slice.c b/libmpeg2/slice.c
new file mode 100644
index 0000000..99ba205
--- /dev/null
+++ b/libmpeg2/slice.c
@@ -0,0 +1,2078 @@
+/*
+ * slice.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 2003 Peter Gubanov <peter@elecard.net.ru>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "config.h"
+
+#include <inttypes.h>
+
+#include "mpeg2.h"
+#include "attributes.h"
+#include "mpeg2_internal.h"
+
+extern mpeg2_mc_t mpeg2_mc;
+extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride);
+extern void (* mpeg2_idct_add) (int last, int16_t * block,
+ uint8_t * dest, int stride);
+extern void (* mpeg2_cpu_state_save) (cpu_state_t * state);
+extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state);
+
+#include "vlc.h"
+
+static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ int macroblock_modes;
+ const MBtab * tab;
+
+ switch (decoder->coding_type) {
+ case I_TYPE:
+
+ tab = MB_I + UBITS (bit_buf, 1);
+ DUMPBITS (bit_buf, bits, tab->len);
+ macroblock_modes = tab->modes;
+
+ if ((! (decoder->frame_pred_frame_dct)) &&
+ (decoder->picture_structure == FRAME_PICTURE)) {
+ macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+ DUMPBITS (bit_buf, bits, 1);
+ }
+
+ return macroblock_modes;
+
+ case P_TYPE:
+
+ tab = MB_P + UBITS (bit_buf, 5);
+ DUMPBITS (bit_buf, bits, tab->len);
+ macroblock_modes = tab->modes;
+
+ if (decoder->picture_structure != FRAME_PICTURE) {
+ if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+ macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+ DUMPBITS (bit_buf, bits, 2);
+ }
+ return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+ } else if (decoder->frame_pred_frame_dct) {
+ if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+ macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+ return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+ } else {
+ if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+ macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+ DUMPBITS (bit_buf, bits, 2);
+ }
+ if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+ macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+ DUMPBITS (bit_buf, bits, 1);
+ }
+ return macroblock_modes | MACROBLOCK_MOTION_FORWARD;
+ }
+
+ case B_TYPE:
+
+ tab = MB_B + UBITS (bit_buf, 6);
+ DUMPBITS (bit_buf, bits, tab->len);
+ macroblock_modes = tab->modes;
+
+ if (decoder->picture_structure != FRAME_PICTURE) {
+ if (! (macroblock_modes & MACROBLOCK_INTRA)) {
+ macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+ DUMPBITS (bit_buf, bits, 2);
+ }
+ return macroblock_modes;
+ } else if (decoder->frame_pred_frame_dct) {
+ /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+ macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT;
+ return macroblock_modes;
+ } else {
+ if (macroblock_modes & MACROBLOCK_INTRA)
+ goto intra;
+ macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
+ DUMPBITS (bit_buf, bits, 2);
+ if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+ intra:
+ macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
+ DUMPBITS (bit_buf, bits, 1);
+ }
+ return macroblock_modes;
+ }
+
+ case D_TYPE:
+
+ DUMPBITS (bit_buf, bits, 1);
+ return MACROBLOCK_INTRA;
+
+ default:
+ return 0;
+ }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+ int quantizer_scale_code;
+
+ quantizer_scale_code = UBITS (bit_buf, 5);
+ DUMPBITS (bit_buf, bits, 5);
+
+ decoder->quantizer_matrix[0] =
+ decoder->quantizer_prescale[0][quantizer_scale_code];
+ decoder->quantizer_matrix[1] =
+ decoder->quantizer_prescale[1][quantizer_scale_code];
+ decoder->quantizer_matrix[2] =
+ decoder->chroma_quantizer[0][quantizer_scale_code];
+ decoder->quantizer_matrix[3] =
+ decoder->chroma_quantizer[1][quantizer_scale_code];
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_motion_delta (mpeg2_decoder_t * const decoder,
+ const int f_code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+ int delta;
+ int sign;
+ const MVtab * tab;
+
+ if (bit_buf & 0x80000000) {
+ DUMPBITS (bit_buf, bits, 1);
+ return 0;
+ } else if (bit_buf >= 0x0c000000) {
+
+ tab = MV_4 + UBITS (bit_buf, 4);
+ delta = (tab->delta << f_code) + 1;
+ bits += tab->len + f_code + 1;
+ bit_buf <<= tab->len;
+
+ sign = SBITS (bit_buf, 1);
+ bit_buf <<= 1;
+
+ if (f_code)
+ delta += UBITS (bit_buf, f_code);
+ bit_buf <<= f_code;
+
+ return (delta ^ sign) - sign;
+
+ } else {
+
+ tab = MV_10 + UBITS (bit_buf, 10);
+ delta = (tab->delta << f_code) + 1;
+ bits += tab->len + 1;
+ bit_buf <<= tab->len;
+
+ sign = SBITS (bit_buf, 1);
+ bit_buf <<= 1;
+
+ if (f_code) {
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ delta += UBITS (bit_buf, f_code);
+ DUMPBITS (bit_buf, bits, f_code);
+ }
+
+ return (delta ^ sign) - sign;
+
+ }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int bound_motion_vector (const int vector, const int f_code)
+{
+ return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
+}
+
+static inline int get_dmv (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+ const DMVtab * tab;
+
+ tab = DMV_2 + UBITS (bit_buf, 2);
+ DUMPBITS (bit_buf, bits, tab->len);
+ return tab->dmv;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+ const CBPtab * tab;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ if (bit_buf >= 0x20000000) {
+
+ tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
+ DUMPBITS (bit_buf, bits, tab->len);
+ return tab->cbp;
+
+ } else {
+
+ tab = CBP_9 + UBITS (bit_buf, 9);
+ DUMPBITS (bit_buf, bits, tab->len);
+ return tab->cbp;
+ }
+
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ const DCtab * tab;
+ int size;
+ int dc_diff;
+
+ if (bit_buf < 0xf8000000) {
+ tab = DC_lum_5 + UBITS (bit_buf, 5);
+ size = tab->size;
+ if (size) {
+ bits += tab->len + size;
+ bit_buf <<= tab->len;
+ dc_diff =
+ UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+ bit_buf <<= size;
+ return dc_diff << decoder->intra_dc_precision;
+ } else {
+ DUMPBITS (bit_buf, bits, 3);
+ return 0;
+ }
+ } else {
+ tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
+ size = tab->size;
+ DUMPBITS (bit_buf, bits, tab->len);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+ DUMPBITS (bit_buf, bits, size);
+ return dc_diff << decoder->intra_dc_precision;
+ }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ const DCtab * tab;
+ int size;
+ int dc_diff;
+
+ if (bit_buf < 0xf8000000) {
+ tab = DC_chrom_5 + UBITS (bit_buf, 5);
+ size = tab->size;
+ if (size) {
+ bits += tab->len + size;
+ bit_buf <<= tab->len;
+ dc_diff =
+ UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+ bit_buf <<= size;
+ return dc_diff << decoder->intra_dc_precision;
+ } else {
+ DUMPBITS (bit_buf, bits, 2);
+ return 0;
+ }
+ } else {
+ tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
+ size = tab->size;
+ DUMPBITS (bit_buf, bits, tab->len + 1);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
+ DUMPBITS (bit_buf, bits, size);
+ return dc_diff << decoder->intra_dc_precision;
+ }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+#define SATURATE(val) \
+do { \
+ val <<= 4; \
+ if (unlikely (val != (int16_t) val)) \
+ val = (SBITS (val, 1) ^ 2047) << 4; \
+} while (0)
+
+static void get_intra_block_B14 (mpeg2_decoder_t * const decoder,
+ const uint16_t * const quant_matrix)
+{
+ int i;
+ int j;
+ int val;
+ const uint8_t * const scan = decoder->scan;
+ int mismatch;
+ const DCTtab * tab;
+ uint32_t bit_buf;
+ int bits;
+ const uint8_t * bit_ptr;
+ int16_t * const dest = decoder->DCTblock;
+
+ i = 0;
+ mismatch = ~dest[0];
+
+ bit_buf = decoder->bitstream_buf;
+ bits = decoder->bitstream_bits;
+ bit_ptr = decoder->bitstream_ptr;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ while (1) {
+ if (bit_buf >= 0x28000000) {
+
+ tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+ i += tab->run;
+ if (i >= 64)
+ break; /* end of block */
+
+ normal_code:
+ j = scan[i];
+ bit_buf <<= tab->len;
+ bits += tab->len + 1;
+ val = (tab->level * quant_matrix[j]) >> 4;
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ bit_buf <<= 1;
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x04000000) {
+
+ tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+
+ /* escape code */
+
+ i += UBITS (bit_buf << 6, 6) - 64;
+ if (i >= 64)
+ break; /* illegal, check needed to avoid buffer overflow */
+
+ j = scan[i];
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x02000000) {
+ tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00800000) {
+ tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00200000) {
+ tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else {
+ tab = DCT_16 + UBITS (bit_buf, 16);
+ bit_buf <<= 16;
+ GETWORD (bit_buf, bits + 16, bit_ptr);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ }
+ break; /* illegal, check needed to avoid buffer overflow */
+ }
+ dest[63] ^= mismatch & 16;
+ DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */
+ decoder->bitstream_buf = bit_buf;
+ decoder->bitstream_bits = bits;
+ decoder->bitstream_ptr = bit_ptr;
+}
+
+static void get_intra_block_B15 (mpeg2_decoder_t * const decoder,
+ const uint16_t * const quant_matrix)
+{
+ int i;
+ int j;
+ int val;
+ const uint8_t * const scan = decoder->scan;
+ int mismatch;
+ const DCTtab * tab;
+ uint32_t bit_buf;
+ int bits;
+ const uint8_t * bit_ptr;
+ int16_t * const dest = decoder->DCTblock;
+
+ i = 0;
+ mismatch = ~dest[0];
+
+ bit_buf = decoder->bitstream_buf;
+ bits = decoder->bitstream_bits;
+ bit_ptr = decoder->bitstream_ptr;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ while (1) {
+ if (bit_buf >= 0x04000000) {
+
+ tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
+
+ i += tab->run;
+ if (i < 64) {
+
+ normal_code:
+ j = scan[i];
+ bit_buf <<= tab->len;
+ bits += tab->len + 1;
+ val = (tab->level * quant_matrix[j]) >> 4;
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ bit_buf <<= 1;
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else {
+
+ /* end of block. I commented out this code because if we */
+ /* do not exit here we will still exit at the later test :) */
+
+ /* if (i >= 128) break; */ /* end of block */
+
+ /* escape code */
+
+ i += UBITS (bit_buf << 6, 6) - 64;
+ if (i >= 64)
+ break; /* illegal, check against buffer overflow */
+
+ j = scan[i];
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16;
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ }
+ } else if (bit_buf >= 0x02000000) {
+ tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00800000) {
+ tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00200000) {
+ tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else {
+ tab = DCT_16 + UBITS (bit_buf, 16);
+ bit_buf <<= 16;
+ GETWORD (bit_buf, bits + 16, bit_ptr);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ }
+ break; /* illegal, check needed to avoid buffer overflow */
+ }
+ dest[63] ^= mismatch & 16;
+ DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */
+ decoder->bitstream_buf = bit_buf;
+ decoder->bitstream_bits = bits;
+ decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_non_intra_block (mpeg2_decoder_t * const decoder,
+ const uint16_t * const quant_matrix)
+{
+ int i;
+ int j;
+ int val;
+ const uint8_t * const scan = decoder->scan;
+ int mismatch;
+ const DCTtab * tab;
+ uint32_t bit_buf;
+ int bits;
+ const uint8_t * bit_ptr;
+ int16_t * const dest = decoder->DCTblock;
+
+ i = -1;
+ mismatch = -1;
+
+ bit_buf = decoder->bitstream_buf;
+ bits = decoder->bitstream_bits;
+ bit_ptr = decoder->bitstream_ptr;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ if (bit_buf >= 0x28000000) {
+ tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+ goto entry_1;
+ } else
+ goto entry_2;
+
+ while (1) {
+ if (bit_buf >= 0x28000000) {
+
+ tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+ entry_1:
+ i += tab->run;
+ if (i >= 64)
+ break; /* end of block */
+
+ normal_code:
+ j = scan[i];
+ bit_buf <<= tab->len;
+ bits += tab->len + 1;
+ val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ bit_buf <<= 1;
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ }
+
+ entry_2:
+ if (bit_buf >= 0x04000000) {
+
+ tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+
+ /* escape code */
+
+ i += UBITS (bit_buf << 6, 6) - 64;
+ if (i >= 64)
+ break; /* illegal, check needed to avoid buffer overflow */
+
+ j = scan[i];
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
+ val = (val * quant_matrix[j]) / 32;
+
+ SATURATE (val);
+ dest[j] = val;
+ mismatch ^= val;
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x02000000) {
+ tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00800000) {
+ tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00200000) {
+ tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else {
+ tab = DCT_16 + UBITS (bit_buf, 16);
+ bit_buf <<= 16;
+ GETWORD (bit_buf, bits + 16, bit_ptr);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ }
+ break; /* illegal, check needed to avoid buffer overflow */
+ }
+ dest[63] ^= mismatch & 16;
+ DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */
+ decoder->bitstream_buf = bit_buf;
+ decoder->bitstream_bits = bits;
+ decoder->bitstream_ptr = bit_ptr;
+ return i;
+}
+
+static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder)
+{
+ int i;
+ int j;
+ int val;
+ const uint8_t * const scan = decoder->scan;
+ const uint16_t * const quant_matrix = decoder->quantizer_matrix[0];
+ const DCTtab * tab;
+ uint32_t bit_buf;
+ int bits;
+ const uint8_t * bit_ptr;
+ int16_t * const dest = decoder->DCTblock;
+
+ i = 0;
+
+ bit_buf = decoder->bitstream_buf;
+ bits = decoder->bitstream_bits;
+ bit_ptr = decoder->bitstream_ptr;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ while (1) {
+ if (bit_buf >= 0x28000000) {
+
+ tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+ i += tab->run;
+ if (i >= 64)
+ break; /* end of block */
+
+ normal_code:
+ j = scan[i];
+ bit_buf <<= tab->len;
+ bits += tab->len + 1;
+ val = (tab->level * quant_matrix[j]) >> 4;
+
+ /* oddification */
+ val = (val - 1) | 1;
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+ SATURATE (val);
+ dest[j] = val;
+
+ bit_buf <<= 1;
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x04000000) {
+
+ tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+
+ /* escape code */
+
+ i += UBITS (bit_buf << 6, 6) - 64;
+ if (i >= 64)
+ break; /* illegal, check needed to avoid buffer overflow */
+
+ j = scan[i];
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ val = SBITS (bit_buf, 8);
+ if (! (val & 0x7f)) {
+ DUMPBITS (bit_buf, bits, 8);
+ val = UBITS (bit_buf, 8) + 2 * val;
+ }
+ val = (val * quant_matrix[j]) / 16;
+
+ /* oddification */
+ val = (val + ~SBITS (val, 1)) | 1;
+
+ SATURATE (val);
+ dest[j] = val;
+
+ DUMPBITS (bit_buf, bits, 8);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x02000000) {
+ tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00800000) {
+ tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00200000) {
+ tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else {
+ tab = DCT_16 + UBITS (bit_buf, 16);
+ bit_buf <<= 16;
+ GETWORD (bit_buf, bits + 16, bit_ptr);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ }
+ break; /* illegal, check needed to avoid buffer overflow */
+ }
+ DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */
+ decoder->bitstream_buf = bit_buf;
+ decoder->bitstream_bits = bits;
+ decoder->bitstream_ptr = bit_ptr;
+}
+
+static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder)
+{
+ int i;
+ int j;
+ int val;
+ const uint8_t * const scan = decoder->scan;
+ const uint16_t * const quant_matrix = decoder->quantizer_matrix[1];
+ const DCTtab * tab;
+ uint32_t bit_buf;
+ int bits;
+ const uint8_t * bit_ptr;
+ int16_t * const dest = decoder->DCTblock;
+
+ i = -1;
+
+ bit_buf = decoder->bitstream_buf;
+ bits = decoder->bitstream_bits;
+ bit_ptr = decoder->bitstream_ptr;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ if (bit_buf >= 0x28000000) {
+ tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
+ goto entry_1;
+ } else
+ goto entry_2;
+
+ while (1) {
+ if (bit_buf >= 0x28000000) {
+
+ tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
+
+ entry_1:
+ i += tab->run;
+ if (i >= 64)
+ break; /* end of block */
+
+ normal_code:
+ j = scan[i];
+ bit_buf <<= tab->len;
+ bits += tab->len + 1;
+ val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5;
+
+ /* oddification */
+ val = (val - 1) | 1;
+
+ /* if (bitstream_get (1)) val = -val; */
+ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1);
+
+ SATURATE (val);
+ dest[j] = val;
+
+ bit_buf <<= 1;
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ }
+
+ entry_2:
+ if (bit_buf >= 0x04000000) {
+
+ tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
+
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+
+ /* escape code */
+
+ i += UBITS (bit_buf << 6, 6) - 64;
+ if (i >= 64)
+ break; /* illegal, check needed to avoid buffer overflow */
+
+ j = scan[i];
+
+ DUMPBITS (bit_buf, bits, 12);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ val = SBITS (bit_buf, 8);
+ if (! (val & 0x7f)) {
+ DUMPBITS (bit_buf, bits, 8);
+ val = UBITS (bit_buf, 8) + 2 * val;
+ }
+ val = 2 * (val + SBITS (val, 1)) + 1;
+ val = (val * quant_matrix[j]) / 32;
+
+ /* oddification */
+ val = (val + ~SBITS (val, 1)) | 1;
+
+ SATURATE (val);
+ dest[j] = val;
+
+ DUMPBITS (bit_buf, bits, 8);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ continue;
+
+ } else if (bit_buf >= 0x02000000) {
+ tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00800000) {
+ tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else if (bit_buf >= 0x00200000) {
+ tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ } else {
+ tab = DCT_16 + UBITS (bit_buf, 16);
+ bit_buf <<= 16;
+ GETWORD (bit_buf, bits + 16, bit_ptr);
+ i += tab->run;
+ if (i < 64)
+ goto normal_code;
+ }
+ break; /* illegal, check needed to avoid buffer overflow */
+ }
+ DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */
+ decoder->bitstream_buf = bit_buf;
+ decoder->bitstream_bits = bits;
+ decoder->bitstream_ptr = bit_ptr;
+ return i;
+}
+
+static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder,
+ const int cc,
+ uint8_t * const dest, const int stride)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ /* Get the intra DC coefficient and inverse quantize it */
+ if (cc == 0)
+ decoder->DCTblock[0] =
+ decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
+ else
+ decoder->DCTblock[0] =
+ decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
+
+ if (decoder->mpeg1) {
+ if (decoder->coding_type != D_TYPE)
+ get_mpeg1_intra_block (decoder);
+ } else if (decoder->intra_vlc_format)
+ get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+ else
+ get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]);
+ mpeg2_idct_copy (decoder->DCTblock, dest, stride);
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder,
+ const int cc,
+ uint8_t * const dest, const int stride)
+{
+ int last;
+
+ if (decoder->mpeg1)
+ last = get_mpeg1_non_intra_block (decoder);
+ else
+ last = get_non_intra_block (decoder,
+ decoder->quantizer_matrix[cc ? 3 : 1]);
+ mpeg2_idct_add (last, decoder->DCTblock, dest, stride);
+}
+
+#define MOTION_420(table,ref,motion_x,motion_y,size,y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y_ ## size)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \
+ motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+ ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \
+ decoder->stride, size); \
+ motion_x /= 2; motion_y /= 2; \
+ xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \
+ offset = (((decoder->offset + motion_x) >> 1) + \
+ ((((decoder->v_offset + motion_y) >> 1) + y/2) * \
+ decoder->uv_stride)); \
+ table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[1] + offset, \
+ decoder->uv_stride, size/2); \
+ table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[2] + offset, \
+ decoder->uv_stride, size/2)
+
+#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \
+ decoder->offset, \
+ (ref[0] + (pos_x >> 1) + \
+ ((pos_y op) + src_field) * decoder->stride), \
+ 2 * decoder->stride, 8); \
+ motion_x /= 2; motion_y /= 2; \
+ xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \
+ offset = (((decoder->offset + motion_x) >> 1) + \
+ (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \
+ decoder->uv_stride)); \
+ table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[1] + offset, \
+ 2 * decoder->uv_stride, 4); \
+ table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[2] + offset, \
+ 2 * decoder->uv_stride, 4)
+
+#define MOTION_DMV_420(table,ref,motion_x,motion_y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + decoder->offset, \
+ ref[0] + offset, 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \
+ ref[0] + decoder->stride + offset, \
+ 2 * decoder->stride, 8); \
+ motion_x /= 2; motion_y /= 2; \
+ xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \
+ offset = (((decoder->offset + motion_x) >> 1) + \
+ (((decoder->v_offset >> 1) + (motion_y & ~1)) * \
+ decoder->uv_stride)); \
+ table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \
+ ref[1] + offset, 2 * decoder->uv_stride, 4); \
+ table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \
+ (decoder->offset >> 1), \
+ ref[1] + decoder->uv_stride + offset, \
+ 2 * decoder->uv_stride, 4); \
+ table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \
+ ref[2] + offset, 2 * decoder->uv_stride, 4); \
+ table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \
+ (decoder->offset >> 1), \
+ ref[2] + decoder->uv_stride + offset, \
+ 2 * decoder->uv_stride, 4)
+
+#define MOTION_ZERO_420(table,ref) \
+ table[0] (decoder->dest[0] + decoder->offset, \
+ (ref[0] + decoder->offset + \
+ decoder->v_offset * decoder->stride), decoder->stride, 16); \
+ offset = ((decoder->offset >> 1) + \
+ (decoder->v_offset >> 1) * decoder->uv_stride); \
+ table[4] (decoder->dest[1] + (decoder->offset >> 1), \
+ ref[1] + offset, decoder->uv_stride, 8); \
+ table[4] (decoder->dest[2] + (decoder->offset >> 1), \
+ ref[2] + offset, decoder->uv_stride, 8)
+
+#define MOTION_422(table,ref,motion_x,motion_y,size,y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y_ ## size)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \
+ motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+ ref[0] + offset, decoder->stride, size); \
+ offset = (offset + (motion_x & (motion_x < 0))) >> 1; \
+ motion_x /= 2; \
+ xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \
+ table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[1] + offset, \
+ decoder->uv_stride, size); \
+ table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[2] + offset, \
+ decoder->uv_stride, size)
+
+#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \
+ decoder->offset, ref[0] + offset, \
+ 2 * decoder->stride, 8); \
+ offset = (offset + (motion_x & (motion_x < 0))) >> 1; \
+ motion_x /= 2; \
+ xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \
+ table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[1] + offset, \
+ 2 * decoder->uv_stride, 8); \
+ table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \
+ (decoder->offset >> 1), ref[2] + offset, \
+ 2 * decoder->uv_stride, 8)
+
+#define MOTION_DMV_422(table,ref,motion_x,motion_y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + decoder->offset, \
+ ref[0] + offset, 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \
+ ref[0] + decoder->stride + offset, \
+ 2 * decoder->stride, 8); \
+ offset = (offset + (motion_x & (motion_x < 0))) >> 1; \
+ motion_x /= 2; \
+ xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \
+ table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \
+ ref[1] + offset, 2 * decoder->uv_stride, 8); \
+ table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \
+ (decoder->offset >> 1), \
+ ref[1] + decoder->uv_stride + offset, \
+ 2 * decoder->uv_stride, 8); \
+ table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \
+ ref[2] + offset, 2 * decoder->uv_stride, 8); \
+ table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \
+ (decoder->offset >> 1), \
+ ref[2] + decoder->uv_stride + offset, \
+ 2 * decoder->uv_stride, 8)
+
+#define MOTION_ZERO_422(table,ref) \
+ offset = decoder->offset + decoder->v_offset * decoder->stride; \
+ table[0] (decoder->dest[0] + decoder->offset, \
+ ref[0] + offset, decoder->stride, 16); \
+ offset >>= 1; \
+ table[4] (decoder->dest[1] + (decoder->offset >> 1), \
+ ref[1] + offset, decoder->uv_stride, 16); \
+ table[4] (decoder->dest[2] + (decoder->offset >> 1), \
+ ref[2] + offset, decoder->uv_stride, 16)
+
+#define MOTION_444(table,ref,motion_x,motion_y,size,y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y_ ## size)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \
+ motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \
+ ref[0] + offset, decoder->stride, size); \
+ table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \
+ ref[1] + offset, decoder->stride, size); \
+ table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \
+ ref[2] + offset, decoder->stride, size)
+
+#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \
+ decoder->offset, ref[0] + offset, \
+ 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \
+ decoder->offset, ref[1] + offset, \
+ 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \
+ decoder->offset, ref[2] + offset, \
+ 2 * decoder->stride, 8)
+
+#define MOTION_DMV_444(table,ref,motion_x,motion_y) \
+ pos_x = 2 * decoder->offset + motion_x; \
+ pos_y = decoder->v_offset + motion_y; \
+ if (unlikely (pos_x > decoder->limit_x)) { \
+ pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \
+ motion_x = pos_x - 2 * decoder->offset; \
+ } \
+ if (unlikely (pos_y > decoder->limit_y)) { \
+ pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \
+ motion_y = pos_y - decoder->v_offset; \
+ } \
+ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \
+ offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \
+ table[xy_half] (decoder->dest[0] + decoder->offset, \
+ ref[0] + offset, 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \
+ ref[0] + decoder->stride + offset, \
+ 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[1] + decoder->offset, \
+ ref[1] + offset, 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \
+ ref[1] + decoder->stride + offset, \
+ 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[2] + decoder->offset, \
+ ref[2] + offset, 2 * decoder->stride, 8); \
+ table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \
+ ref[2] + decoder->stride + offset, \
+ 2 * decoder->stride, 8)
+
+#define MOTION_ZERO_444(table,ref) \
+ offset = decoder->offset + decoder->v_offset * decoder->stride; \
+ table[0] (decoder->dest[0] + decoder->offset, \
+ ref[0] + offset, decoder->stride, 16); \
+ table[4] (decoder->dest[1] + decoder->offset, \
+ ref[1] + offset, decoder->stride, 16); \
+ table[4] (decoder->dest[2] + decoder->offset, \
+ ref[2] + offset, decoder->stride, 16)
+
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+
+static void motion_mp1 (mpeg2_decoder_t * const decoder,
+ motion_t * const motion,
+ mpeg2_mc_fct * const * const table)
+{
+ int motion_x, motion_y;
+ unsigned int pos_x, pos_y, xy_half, offset;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ motion_x = (motion->pmv[0][0] +
+ (get_motion_delta (decoder,
+ motion->f_code[0]) << motion->f_code[1]));
+ motion_x = bound_motion_vector (motion_x,
+ motion->f_code[0] + motion->f_code[1]);
+ motion->pmv[0][0] = motion_x;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ motion_y = (motion->pmv[0][1] +
+ (get_motion_delta (decoder,
+ motion->f_code[0]) << motion->f_code[1]));
+ motion_y = bound_motion_vector (motion_y,
+ motion->f_code[0] + motion->f_code[1]);
+ motion->pmv[0][1] = motion_y;
+
+ MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0);
+}
+
+#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \
+ \
+static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \
+ motion->f_code[1]); \
+ motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \
+ motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \
+ \
+ MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \
+} \
+ \
+static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y, field; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ field = UBITS (bit_buf, 1); \
+ DUMPBITS (bit_buf, bits, 1); \
+ \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[0][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = ((motion->pmv[0][1] >> 1) + \
+ get_motion_delta (decoder, motion->f_code[1])); \
+ /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \
+ motion->pmv[0][1] = motion_y << 1; \
+ \
+ MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ field = UBITS (bit_buf, 1); \
+ DUMPBITS (bit_buf, bits, 1); \
+ \
+ motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = ((motion->pmv[1][1] >> 1) + \
+ get_motion_delta (decoder, motion->f_code[1])); \
+ /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \
+ motion->pmv[1][1] = motion_y << 1; \
+ \
+ MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \
+} \
+ \
+static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ dmv_x = get_dmv (decoder); \
+ \
+ motion_y = ((motion->pmv[0][1] >> 1) + \
+ get_motion_delta (decoder, motion->f_code[1])); \
+ /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \
+ motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \
+ dmv_y = get_dmv (decoder); \
+ \
+ m = decoder->top_field_first ? 1 : 3; \
+ other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \
+ other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \
+ MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \
+ \
+ m = decoder->top_field_first ? 3 : 1; \
+ other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \
+ other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \
+ MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\
+ \
+ MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \
+} \
+ \
+static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ motion_x = motion->pmv[0][0]; \
+ motion_y = motion->pmv[0][1]; \
+ \
+ MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \
+} \
+ \
+static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ unsigned int offset; \
+ \
+ motion->pmv[0][0] = motion->pmv[0][1] = 0; \
+ motion->pmv[1][0] = motion->pmv[1][1] = 0; \
+ \
+ MOTION_ZERO (table, motion->ref[0]); \
+} \
+ \
+static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y; \
+ uint8_t ** ref_field; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ ref_field = motion->ref2[UBITS (bit_buf, 1)]; \
+ DUMPBITS (bit_buf, bits, 1); \
+ \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \
+ motion->f_code[1]); \
+ motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \
+ motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \
+ \
+ MOTION (table, ref_field, motion_x, motion_y, 16, 0); \
+} \
+ \
+static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y; \
+ uint8_t ** ref_field; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ ref_field = motion->ref2[UBITS (bit_buf, 1)]; \
+ DUMPBITS (bit_buf, bits, 1); \
+ \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[0][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \
+ motion->f_code[1]); \
+ motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \
+ motion->pmv[0][1] = motion_y; \
+ \
+ MOTION (table, ref_field, motion_x, motion_y, 8, 0); \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ ref_field = motion->ref2[UBITS (bit_buf, 1)]; \
+ DUMPBITS (bit_buf, bits, 1); \
+ \
+ motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion_x; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \
+ motion->f_code[1]); \
+ motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \
+ motion->pmv[1][1] = motion_y; \
+ \
+ MOTION (table, ref_field, motion_x, motion_y, 8, 8); \
+} \
+ \
+static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \
+ motion_t * const motion, \
+ mpeg2_mc_fct * const * const table) \
+{ \
+ int motion_x, motion_y, other_x, other_y; \
+ unsigned int pos_x, pos_y, xy_half, offset; \
+ \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \
+ motion->f_code[0]); \
+ motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \
+ motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \
+ NEEDBITS (bit_buf, bits, bit_ptr); \
+ other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \
+ \
+ motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \
+ motion->f_code[1]); \
+ motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \
+ motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \
+ other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \
+ decoder->dmv_offset); \
+ \
+ MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \
+ MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \
+} \
+
+MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420,
+ MOTION_ZERO_420)
+MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422,
+ MOTION_ZERO_422)
+MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444,
+ MOTION_ZERO_444)
+
+/* like motion_frame, but parsing without actual motion compensation */
+static void motion_fr_conceal (mpeg2_decoder_t * const decoder)
+{
+ int tmp;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ tmp = (decoder->f_motion.pmv[0][0] +
+ get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+ tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+ decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ tmp = (decoder->f_motion.pmv[0][1] +
+ get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+ tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+ decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+ DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+}
+
+static void motion_fi_conceal (mpeg2_decoder_t * const decoder)
+{
+ int tmp;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ DUMPBITS (bit_buf, bits, 1); /* remove field_select */
+
+ tmp = (decoder->f_motion.pmv[0][0] +
+ get_motion_delta (decoder, decoder->f_motion.f_code[0]));
+ tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]);
+ decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ tmp = (decoder->f_motion.pmv[0][1] +
+ get_motion_delta (decoder, decoder->f_motion.f_code[1]));
+ tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]);
+ decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;
+
+ DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
+}
+
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+
+#define MOTION_CALL(routine,direction) \
+do { \
+ if ((direction) & MACROBLOCK_MOTION_FORWARD) \
+ routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \
+ if ((direction) & MACROBLOCK_MOTION_BACKWARD) \
+ routine (decoder, &(decoder->b_motion), \
+ ((direction) & MACROBLOCK_MOTION_FORWARD ? \
+ mpeg2_mc.avg : mpeg2_mc.put)); \
+} while (0)
+
+#define NEXT_MACROBLOCK \
+do { \
+ decoder->offset += 16; \
+ if (decoder->offset == decoder->width) { \
+ do { /* just so we can use the break statement */ \
+ if (decoder->convert) { \
+ decoder->convert (decoder->convert_id, decoder->dest, \
+ decoder->v_offset); \
+ if (decoder->coding_type == B_TYPE) \
+ break; \
+ } \
+ decoder->dest[0] += decoder->slice_stride; \
+ decoder->dest[1] += decoder->slice_uv_stride; \
+ decoder->dest[2] += decoder->slice_uv_stride; \
+ } while (0); \
+ decoder->v_offset += 16; \
+ if (decoder->v_offset > decoder->limit_y) { \
+ if (mpeg2_cpu_state_restore) \
+ mpeg2_cpu_state_restore (&cpu_state); \
+ return; \
+ } \
+ decoder->offset = 0; \
+ } \
+} while (0)
+
+/**
+ * Dummy motion decoding function, to avoid calling NULL in
+ * case of malformed streams.
+ */
+static void motion_dummy (mpeg2_decoder_t * const decoder,
+ motion_t * const motion,
+ mpeg2_mc_fct * const * const table)
+{
+}
+
+void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3],
+ uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3])
+{
+ int offset, stride, height, bottom_field;
+
+ stride = decoder->stride_frame;
+ bottom_field = (decoder->picture_structure == BOTTOM_FIELD);
+ offset = bottom_field ? stride : 0;
+ height = decoder->height;
+
+ decoder->picture_dest[0] = current_fbuf[0] + offset;
+ decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1);
+ decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1);
+
+ decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset;
+ decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1);
+ decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1);
+
+ decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset;
+ decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1);
+ decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1);
+
+ if (decoder->picture_structure != FRAME_PICTURE) {
+ decoder->dmv_offset = bottom_field ? 1 : -1;
+ decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field];
+ decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field];
+ decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field];
+ decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field];
+ offset = stride - offset;
+
+ if (decoder->second_field && (decoder->coding_type != B_TYPE))
+ forward_fbuf = current_fbuf;
+
+ decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset;
+ decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1);
+ decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1);
+
+ decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset;
+ decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1);
+ decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1);
+
+ stride <<= 1;
+ height >>= 1;
+ }
+
+ decoder->stride = stride;
+ decoder->uv_stride = stride >> 1;
+ decoder->slice_stride = 16 * stride;
+ decoder->slice_uv_stride =
+ decoder->slice_stride >> (2 - decoder->chroma_format);
+ decoder->limit_x = 2 * decoder->width - 32;
+ decoder->limit_y_16 = 2 * height - 32;
+ decoder->limit_y_8 = 2 * height - 16;
+ decoder->limit_y = height - 16;
+
+ if (decoder->mpeg1) {
+ decoder->motion_parser[0] = motion_zero_420;
+ decoder->motion_parser[MC_FIELD] = motion_dummy;
+ decoder->motion_parser[MC_FRAME] = motion_mp1;
+ decoder->motion_parser[MC_DMV] = motion_dummy;
+ decoder->motion_parser[4] = motion_reuse_420;
+ } else if (decoder->picture_structure == FRAME_PICTURE) {
+ if (decoder->chroma_format == 0) {
+ decoder->motion_parser[0] = motion_zero_420;
+ decoder->motion_parser[MC_FIELD] = motion_fr_field_420;
+ decoder->motion_parser[MC_FRAME] = motion_fr_frame_420;
+ decoder->motion_parser[MC_DMV] = motion_fr_dmv_420;
+ decoder->motion_parser[4] = motion_reuse_420;
+ } else if (decoder->chroma_format == 1) {
+ decoder->motion_parser[0] = motion_zero_422;
+ decoder->motion_parser[MC_FIELD] = motion_fr_field_422;
+ decoder->motion_parser[MC_FRAME] = motion_fr_frame_422;
+ decoder->motion_parser[MC_DMV] = motion_fr_dmv_422;
+ decoder->motion_parser[4] = motion_reuse_422;
+ } else {
+ decoder->motion_parser[0] = motion_zero_444;
+ decoder->motion_parser[MC_FIELD] = motion_fr_field_444;
+ decoder->motion_parser[MC_FRAME] = motion_fr_frame_444;
+ decoder->motion_parser[MC_DMV] = motion_fr_dmv_444;
+ decoder->motion_parser[4] = motion_reuse_444;
+ }
+ } else {
+ if (decoder->chroma_format == 0) {
+ decoder->motion_parser[0] = motion_zero_420;
+ decoder->motion_parser[MC_FIELD] = motion_fi_field_420;
+ decoder->motion_parser[MC_16X8] = motion_fi_16x8_420;
+ decoder->motion_parser[MC_DMV] = motion_fi_dmv_420;
+ decoder->motion_parser[4] = motion_reuse_420;
+ } else if (decoder->chroma_format == 1) {
+ decoder->motion_parser[0] = motion_zero_422;
+ decoder->motion_parser[MC_FIELD] = motion_fi_field_422;
+ decoder->motion_parser[MC_16X8] = motion_fi_16x8_422;
+ decoder->motion_parser[MC_DMV] = motion_fi_dmv_422;
+ decoder->motion_parser[4] = motion_reuse_422;
+ } else {
+ decoder->motion_parser[0] = motion_zero_444;
+ decoder->motion_parser[MC_FIELD] = motion_fi_field_444;
+ decoder->motion_parser[MC_16X8] = motion_fi_16x8_444;
+ decoder->motion_parser[MC_DMV] = motion_fi_dmv_444;
+ decoder->motion_parser[4] = motion_reuse_444;
+ }
+ }
+}
+
+static inline int slice_init (mpeg2_decoder_t * const decoder, int code)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ int offset;
+ const MBAtab * mba;
+
+ decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+ decoder->dc_dct_pred[2] = 16384;
+
+ decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+ decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+ decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+ decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+
+ if (decoder->vertical_position_extension) {
+ code += UBITS (bit_buf, 3) << 7;
+ DUMPBITS (bit_buf, bits, 3);
+ }
+ decoder->v_offset = (code - 1) * 16;
+ offset = 0;
+ if (!(decoder->convert) || decoder->coding_type != B_TYPE)
+ offset = (code - 1) * decoder->slice_stride;
+
+ decoder->dest[0] = decoder->picture_dest[0] + offset;
+ offset >>= (2 - decoder->chroma_format);
+ decoder->dest[1] = decoder->picture_dest[1] + offset;
+ decoder->dest[2] = decoder->picture_dest[2] + offset;
+
+ get_quantizer_scale (decoder);
+
+ /* ignore intra_slice and all the extra data */
+ while (bit_buf & 0x80000000) {
+ DUMPBITS (bit_buf, bits, 9);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ }
+
+ /* decode initial macroblock address increment */
+ offset = 0;
+ while (1) {
+ if (bit_buf >= 0x08000000) {
+ mba = MBA_5 + (UBITS (bit_buf, 6) - 2);
+ break;
+ } else if (bit_buf >= 0x01800000) {
+ mba = MBA_11 + (UBITS (bit_buf, 12) - 24);
+ break;
+ } else switch (UBITS (bit_buf, 12)) {
+ case 8: /* macroblock_escape */
+ offset += 33;
+ DUMPBITS (bit_buf, bits, 11);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ continue;
+ case 15: /* macroblock_stuffing (MPEG1 only) */
+ bit_buf &= 0xfffff;
+ DUMPBITS (bit_buf, bits, 11);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ continue;
+ default: /* error */
+ return 1;
+ }
+ }
+ DUMPBITS (bit_buf, bits, mba->len + 1);
+ decoder->offset = (offset + mba->mba) << 4;
+
+ while (decoder->offset - decoder->width >= 0) {
+ decoder->offset -= decoder->width;
+ if (!(decoder->convert) || decoder->coding_type != B_TYPE) {
+ decoder->dest[0] += decoder->slice_stride;
+ decoder->dest[1] += decoder->slice_uv_stride;
+ decoder->dest[2] += decoder->slice_uv_stride;
+ }
+ decoder->v_offset += 16;
+ }
+ if (decoder->v_offset > decoder->limit_y)
+ return 1;
+
+ return 0;
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
+
+void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code,
+ const uint8_t * const buffer)
+{
+#define bit_buf (decoder->bitstream_buf)
+#define bits (decoder->bitstream_bits)
+#define bit_ptr (decoder->bitstream_ptr)
+ cpu_state_t cpu_state;
+
+ bitstream_init (decoder, buffer);
+
+ if (slice_init (decoder, code))
+ return;
+
+ if (mpeg2_cpu_state_save)
+ mpeg2_cpu_state_save (&cpu_state);
+
+ while (1) {
+ int macroblock_modes;
+ int mba_inc;
+ const MBAtab * mba;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+
+ macroblock_modes = get_macroblock_modes (decoder);
+
+ /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
+ if (macroblock_modes & MACROBLOCK_QUANT)
+ get_quantizer_scale (decoder);
+
+ if (macroblock_modes & MACROBLOCK_INTRA) {
+
+ int DCT_offset, DCT_stride;
+ int offset;
+ uint8_t * dest_y;
+
+ if (decoder->concealment_motion_vectors) {
+ if (decoder->picture_structure == FRAME_PICTURE)
+ motion_fr_conceal (decoder);
+ else
+ motion_fi_conceal (decoder);
+ } else {
+ decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
+ decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
+ decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
+ decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
+ }
+
+ if (macroblock_modes & DCT_TYPE_INTERLACED) {
+ DCT_offset = decoder->stride;
+ DCT_stride = decoder->stride * 2;
+ } else {
+ DCT_offset = decoder->stride * 8;
+ DCT_stride = decoder->stride;
+ }
+
+ offset = decoder->offset;
+ dest_y = decoder->dest[0] + offset;
+ slice_intra_DCT (decoder, 0, dest_y, DCT_stride);
+ slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride);
+ slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride);
+ slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride);
+ if (likely (decoder->chroma_format == 0)) {
+ slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1),
+ decoder->uv_stride);
+ slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1),
+ decoder->uv_stride);
+ if (decoder->coding_type == D_TYPE) {
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ DUMPBITS (bit_buf, bits, 1);
+ }
+ } else if (likely (decoder->chroma_format == 1)) {
+ uint8_t * dest_u = decoder->dest[1] + (offset >> 1);
+ uint8_t * dest_v = decoder->dest[2] + (offset >> 1);
+ DCT_stride >>= 1;
+ DCT_offset >>= 1;
+ slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+ slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+ } else {
+ uint8_t * dest_u = decoder->dest[1] + offset;
+ uint8_t * dest_v = decoder->dest[2] + offset;
+ slice_intra_DCT (decoder, 1, dest_u, DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v, DCT_stride);
+ slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride);
+ slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride);
+ slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8,
+ DCT_stride);
+ slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8,
+ DCT_stride);
+ }
+ } else {
+
+ motion_parser_t * parser;
+
+ if ( ((macroblock_modes >> MOTION_TYPE_SHIFT) < 0)
+ || ((macroblock_modes >> MOTION_TYPE_SHIFT) >=
+ (int)(sizeof(decoder->motion_parser)
+ / sizeof(decoder->motion_parser[0])))
+ ) {
+ break; // Illegal !
+ }
+
+ parser =
+ decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT];
+ MOTION_CALL (parser, macroblock_modes);
+
+ if (macroblock_modes & MACROBLOCK_PATTERN) {
+ int coded_block_pattern;
+ int DCT_offset, DCT_stride;
+
+ if (macroblock_modes & DCT_TYPE_INTERLACED) {
+ DCT_offset = decoder->stride;
+ DCT_stride = decoder->stride * 2;
+ } else {
+ DCT_offset = decoder->stride * 8;
+ DCT_stride = decoder->stride;
+ }
+
+ coded_block_pattern = get_coded_block_pattern (decoder);
+
+ if (likely (decoder->chroma_format == 0)) {
+ int offset = decoder->offset;
+ uint8_t * dest_y = decoder->dest[0] + offset;
+ if (coded_block_pattern & 1)
+ slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+ if (coded_block_pattern & 2)
+ slice_non_intra_DCT (decoder, 0, dest_y + 8,
+ DCT_stride);
+ if (coded_block_pattern & 4)
+ slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & 8)
+ slice_non_intra_DCT (decoder, 0,
+ dest_y + DCT_offset + 8,
+ DCT_stride);
+ if (coded_block_pattern & 16)
+ slice_non_intra_DCT (decoder, 1,
+ decoder->dest[1] + (offset >> 1),
+ decoder->uv_stride);
+ if (coded_block_pattern & 32)
+ slice_non_intra_DCT (decoder, 2,
+ decoder->dest[2] + (offset >> 1),
+ decoder->uv_stride);
+ } else if (likely (decoder->chroma_format == 1)) {
+ int offset;
+ uint8_t * dest_y;
+
+ coded_block_pattern |= bit_buf & (3 << 30);
+ DUMPBITS (bit_buf, bits, 2);
+
+ offset = decoder->offset;
+ dest_y = decoder->dest[0] + offset;
+ if (coded_block_pattern & 1)
+ slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+ if (coded_block_pattern & 2)
+ slice_non_intra_DCT (decoder, 0, dest_y + 8,
+ DCT_stride);
+ if (coded_block_pattern & 4)
+ slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & 8)
+ slice_non_intra_DCT (decoder, 0,
+ dest_y + DCT_offset + 8,
+ DCT_stride);
+
+ DCT_stride >>= 1;
+ DCT_offset = (DCT_offset + offset) >> 1;
+ if (coded_block_pattern & 16)
+ slice_non_intra_DCT (decoder, 1,
+ decoder->dest[1] + (offset >> 1),
+ DCT_stride);
+ if (coded_block_pattern & 32)
+ slice_non_intra_DCT (decoder, 2,
+ decoder->dest[2] + (offset >> 1),
+ DCT_stride);
+ if (coded_block_pattern & (2 << 30))
+ slice_non_intra_DCT (decoder, 1,
+ decoder->dest[1] + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & (1 << 30))
+ slice_non_intra_DCT (decoder, 2,
+ decoder->dest[2] + DCT_offset,
+ DCT_stride);
+ } else {
+ int offset;
+ uint8_t * dest_y, * dest_u, * dest_v;
+
+ coded_block_pattern |= bit_buf & (63 << 26);
+ DUMPBITS (bit_buf, bits, 6);
+
+ offset = decoder->offset;
+ dest_y = decoder->dest[0] + offset;
+ dest_u = decoder->dest[1] + offset;
+ dest_v = decoder->dest[2] + offset;
+
+ if (coded_block_pattern & 1)
+ slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride);
+ if (coded_block_pattern & 2)
+ slice_non_intra_DCT (decoder, 0, dest_y + 8,
+ DCT_stride);
+ if (coded_block_pattern & 4)
+ slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & 8)
+ slice_non_intra_DCT (decoder, 0,
+ dest_y + DCT_offset + 8,
+ DCT_stride);
+
+ if (coded_block_pattern & 16)
+ slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride);
+ if (coded_block_pattern & 32)
+ slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride);
+ if (coded_block_pattern & (32 << 26))
+ slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & (16 << 26))
+ slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset,
+ DCT_stride);
+ if (coded_block_pattern & (8 << 26))
+ slice_non_intra_DCT (decoder, 1, dest_u + 8,
+ DCT_stride);
+ if (coded_block_pattern & (4 << 26))
+ slice_non_intra_DCT (decoder, 2, dest_v + 8,
+ DCT_stride);
+ if (coded_block_pattern & (2 << 26))
+ slice_non_intra_DCT (decoder, 1,
+ dest_u + DCT_offset + 8,
+ DCT_stride);
+ if (coded_block_pattern & (1 << 26))
+ slice_non_intra_DCT (decoder, 2,
+ dest_v + DCT_offset + 8,
+ DCT_stride);
+ }
+ }
+
+ decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+ decoder->dc_dct_pred[2] = 16384;
+ }
+
+ NEXT_MACROBLOCK;
+
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ mba_inc = 0;
+ while (1) {
+ if (bit_buf >= 0x10000000) {
+ mba = MBA_5 + (UBITS (bit_buf, 5) - 2);
+ break;
+ } else if (bit_buf >= 0x03000000) {
+ mba = MBA_11 + (UBITS (bit_buf, 11) - 24);
+ break;
+ } else switch (UBITS (bit_buf, 11)) {
+ case 8: /* macroblock_escape */
+ mba_inc += 33;
+ /* pass through */
+ case 15: /* macroblock_stuffing (MPEG1 only) */
+ DUMPBITS (bit_buf, bits, 11);
+ NEEDBITS (bit_buf, bits, bit_ptr);
+ continue;
+ default: /* end of slice, or error */
+ if (mpeg2_cpu_state_restore)
+ mpeg2_cpu_state_restore (&cpu_state);
+ return;
+ }
+ }
+ DUMPBITS (bit_buf, bits, mba->len);
+ mba_inc += mba->mba;
+
+ if (mba_inc) {
+ decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
+ decoder->dc_dct_pred[2] = 16384;
+
+ if (decoder->coding_type == P_TYPE) {
+ do {
+ MOTION_CALL (decoder->motion_parser[0],
+ MACROBLOCK_MOTION_FORWARD);
+ NEXT_MACROBLOCK;
+ } while (--mba_inc);
+ } else {
+ do {
+ MOTION_CALL (decoder->motion_parser[4], macroblock_modes);
+ NEXT_MACROBLOCK;
+ } while (--mba_inc);
+ }
+ }
+ }
+#undef bit_buf
+#undef bits
+#undef bit_ptr
+}
diff --git a/libmpeg2/vlc.h b/libmpeg2/vlc.h
new file mode 100644
index 0000000..b3a868e
--- /dev/null
+++ b/libmpeg2/vlc.h
@@ -0,0 +1,434 @@
+/*
+ * vlc.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LIBMPEG2_VLC_H
+#define LIBMPEG2_VLC_H
+
+#define GETWORD(bit_buf,shift,bit_ptr) \
+do { \
+ bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \
+ bit_ptr += 2; \
+} while (0)
+
+static inline void bitstream_init (mpeg2_decoder_t * decoder,
+ const uint8_t * start)
+{
+ decoder->bitstream_buf =
+ (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3];
+ decoder->bitstream_ptr = start + 4;
+ decoder->bitstream_bits = -16;
+}
+
+/* make sure that there are at least 16 valid bits in bit_buf */
+#define NEEDBITS(bit_buf,bits,bit_ptr) \
+do { \
+ if (unlikely (bits > 0)) { \
+ GETWORD (bit_buf, bits, bit_ptr); \
+ bits -= 16; \
+ } \
+} while (0)
+
+/* remove num valid bits from bit_buf */
+#define DUMPBITS(bit_buf,bits,num) \
+do { \
+ bit_buf <<= (num); \
+ bits += (num); \
+} while (0)
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num)))
+
+typedef struct {
+ uint8_t modes;
+ uint8_t len;
+} MBtab;
+
+typedef struct {
+ uint8_t delta;
+ uint8_t len;
+} MVtab;
+
+typedef struct {
+ int8_t dmv;
+ uint8_t len;
+} DMVtab;
+
+typedef struct {
+ uint8_t cbp;
+ uint8_t len;
+} CBPtab;
+
+typedef struct {
+ uint8_t size;
+ uint8_t len;
+} DCtab;
+
+typedef struct {
+ uint8_t run;
+ uint8_t level;
+ uint8_t len;
+} DCTtab;
+
+typedef struct {
+ uint8_t mba;
+ uint8_t len;
+} MBAtab;
+
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+
+static const MBtab MB_I [] = {
+ {INTRA|QUANT, 2}, {INTRA, 1}
+};
+
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+
+static const MBtab MB_P [] = {
+ {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5},
+ {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3},
+ {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
+ {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2},
+ {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1},
+ {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1},
+ {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1},
+ {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}
+};
+
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
+
+static const MBtab MB_B [] = {
+ {0, 6}, {INTRA|QUANT, 6},
+ {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6},
+ {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
+ {INTRA, 5}, {INTRA, 5},
+ {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4},
+ {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4},
+ {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3},
+ {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3},
+ {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3},
+ {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3},
+ {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+ {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+ {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+ {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2},
+ {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+ {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+ {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+ {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
+};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+
+static const MVtab MV_4 [] = {
+ { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
+};
+
+static const MVtab MV_10 [] = {
+ { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
+ { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
+ {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
+ { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
+ { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
+ { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
+};
+
+
+static const DMVtab DMV_2 [] = {
+ { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
+};
+
+
+static const CBPtab CBP_7 [] = {
+ {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7},
+ {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7},
+ {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6},
+ {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6},
+ {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5},
+ {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5},
+ {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5},
+ {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5},
+ {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5},
+ {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5},
+ {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5},
+ {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5},
+ {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5},
+ {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5},
+ {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5},
+ {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+ {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
+ {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4},
+ {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
+ {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4},
+ {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+ {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+ {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+ {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+ {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+ {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+ {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3},
+ {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}
+};
+
+static const CBPtab CBP_9 [] = {
+ {0, 9}, {0x00, 9}, {0x39, 9}, {0x36, 9},
+ {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9},
+ {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8},
+ {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8},
+ {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8},
+ {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8},
+ {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8},
+ {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8},
+ {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8},
+ {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8},
+ {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8},
+ {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8},
+ {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8},
+ {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8},
+ {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8},
+ {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8}
+};
+
+
+static const DCtab DC_lum_5 [] = {
+ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+ {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
+};
+
+static const DCtab DC_chrom_5 [] = {
+ {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+ {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+ {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+ {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
+};
+
+static const DCtab DC_long [] = {
+ {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+ {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+ {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
+ {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
+};
+
+
+static const DCTtab DCT_16 [] = {
+ {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+ {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+ {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+ {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+ { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0},
+ { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
+ { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
+ { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
+};
+
+static const DCTtab DCT_15 [] = {
+ { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15},
+ { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15},
+ { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15},
+ { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15},
+ { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14},
+ { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14},
+ { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14},
+ { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14},
+ { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14},
+ { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14},
+ { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14},
+ { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14}
+};
+
+static const DCTtab DCT_13 [] = {
+ { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13},
+ { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13},
+ { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13},
+ { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
+ { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12},
+ { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12},
+ { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12},
+ { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
+ { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12},
+ { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12},
+ { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12},
+ { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12}
+};
+
+static const DCTtab DCT_B14_10 [] = {
+ { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10},
+ { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10}
+};
+
+static const DCTtab DCT_B14_8 [] = {
+ { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
+ { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
+ { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7},
+ { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6},
+ { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6},
+ { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6},
+ { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6},
+ { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
+ { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8}
+};
+
+static const DCTtab DCT_B14AC_5 [] = {
+ { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5},
+ { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+ {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}
+};
+
+static const DCTtab DCT_B14DC_5 [] = {
+ { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5},
+ { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1},
+ { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1},
+ { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1},
+ { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}
+};
+
+static const DCTtab DCT_B15_10 [] = {
+ { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
+ { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
+};
+
+static const DCTtab DCT_B15_8 [] = {
+ { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12},
+ { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7},
+ { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7},
+ { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6},
+ { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6},
+ { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6},
+ { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6},
+ { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8},
+ { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8},
+ { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5},
+ { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5},
+ { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5},
+ { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5},
+ { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5},
+ { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3},
+ {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+ {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+ {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+ {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+ { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4},
+ { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4},
+ { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4},
+ { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3},
+ { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5},
+ { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5},
+ { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5},
+ { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5},
+ { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7},
+ { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7},
+ { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8},
+ { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8}
+};
+
+
+static const MBAtab MBA_5 [] = {
+ {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+ {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+ {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+ {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
+};
+
+static const MBAtab MBA_11 [] = {
+ {32, 11}, {31, 11}, {30, 11}, {29, 11},
+ {28, 11}, {27, 11}, {26, 11}, {25, 11},
+ {24, 11}, {23, 11}, {22, 11}, {21, 11},
+ {20, 10}, {20, 10}, {19, 10}, {19, 10},
+ {18, 10}, {18, 10}, {17, 10}, {17, 10},
+ {16, 10}, {16, 10}, {15, 10}, {15, 10},
+ {14, 8}, {14, 8}, {14, 8}, {14, 8},
+ {14, 8}, {14, 8}, {14, 8}, {14, 8},
+ {13, 8}, {13, 8}, {13, 8}, {13, 8},
+ {13, 8}, {13, 8}, {13, 8}, {13, 8},
+ {12, 8}, {12, 8}, {12, 8}, {12, 8},
+ {12, 8}, {12, 8}, {12, 8}, {12, 8},
+ {11, 8}, {11, 8}, {11, 8}, {11, 8},
+ {11, 8}, {11, 8}, {11, 8}, {11, 8},
+ {10, 8}, {10, 8}, {10, 8}, {10, 8},
+ {10, 8}, {10, 8}, {10, 8}, {10, 8},
+ { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
+ { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8},
+ { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
+ { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
+ { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
+ { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7},
+ { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
+ { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
+ { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7},
+ { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}
+};
+
+#endif /* LIBMPEG2_VLC_H */