diff options
Diffstat (limited to 'media/libjpeg/jcphuff.c')
-rw-r--r-- | media/libjpeg/jcphuff.c | 1114 |
1 files changed, 1114 insertions, 0 deletions
diff --git a/media/libjpeg/jcphuff.c b/media/libjpeg/jcphuff.c new file mode 100644 index 0000000000..872e570bff --- /dev/null +++ b/media/libjpeg/jcphuff.c @@ -0,0 +1,1114 @@ +/* + * jcphuff.c + * + * This file was part of the Independent JPEG Group's software: + * Copyright (C) 1995-1997, Thomas G. Lane. + * libjpeg-turbo Modifications: + * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander. + * Copyright (C) 2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. + * Copyright (C) 2021, Alex Richardson. + * For conditions of distribution and use, see the accompanying README.ijg + * file. + * + * This file contains Huffman entropy encoding routines for progressive JPEG. + * + * We do not support output suspension in this module, since the library + * currently does not allow multiple-scan files to be written with output + * suspension. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jsimd.h" +#include "jconfigint.h" +#include <limits.h> + +#ifdef HAVE_INTRIN_H +#include <intrin.h> +#ifdef _MSC_VER +#ifdef HAVE_BITSCANFORWARD64 +#pragma intrinsic(_BitScanForward64) +#endif +#ifdef HAVE_BITSCANFORWARD +#pragma intrinsic(_BitScanForward) +#endif +#endif +#endif + +#ifdef C_PROGRESSIVE_SUPPORTED + +/* + * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be + * used for bit counting rather than the lookup table. This will reduce the + * memory footprint by 64k, which is important for some mobile applications + * that create many isolated instances of libjpeg-turbo (web browsers, for + * instance.) This may improve performance on some mobile platforms as well. + * This feature is enabled by default only on Arm processors, because some x86 + * chips have a slow implementation of bsr, and the use of clz/bsr cannot be + * shown to have a significant performance impact even on the x86 chips that + * have a fast implementation of it. When building for Armv6, you can + * explicitly disable the use of clz/bsr by adding -mthumb to the compiler + * flags (this defines __thumb__). + */ + +/* NOTE: Both GCC and Clang define __GNUC__ */ +#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \ + defined(_M_ARM) || defined(_M_ARM64) +#if !defined(__thumb__) || defined(__thumb2__) +#define USE_CLZ_INTRINSIC +#endif +#endif + +#ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else +#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif +#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) +#else +#include "jpeg_nbits_table.h" +#define JPEG_NBITS(x) (jpeg_nbits_table[x]) +#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) +#endif + + +/* Expanded entropy encoder object for progressive Huffman encoding. */ + +typedef struct { + struct jpeg_entropy_encoder pub; /* public fields */ + + /* Pointer to routine to prepare data for encode_mcu_AC_first() */ + void (*AC_first_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *zerobits); + /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ + int (*AC_refine_prepare) (const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits); + + /* Mode flag: TRUE for optimization, FALSE for actual data output */ + boolean gather_statistics; + + /* Bit-level coding status. + * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. + */ + JOCTET *next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + size_t put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ + j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ + + /* Coding status for DC components */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + + /* Coding status for AC components */ + int ac_tbl_no; /* the table number of the single component */ + unsigned int EOBRUN; /* run length of EOBs */ + unsigned int BE; /* # of buffered correction bits before MCU */ + char *bit_buffer; /* buffer for correction bits (1 per char) */ + /* packing correction bits tightly would save some space but cost time... */ + + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ + + /* Pointers to derived tables (these workspaces have image lifespan). + * Since any one scan codes only DC or only AC, we only need one set + * of tables, not one for DC and one for AC. + */ + c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; + + /* Statistics tables for optimization; again, one set is enough */ + long *count_ptrs[NUM_HUFF_TBLS]; +} phuff_entropy_encoder; + +typedef phuff_entropy_encoder *phuff_entropy_ptr; + +/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit + * buffer can hold. Larger sizes may slightly improve compression, but + * 1000 is already well into the realm of overkill. + * The minimum safe size is 64 bits. + */ + +#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ + +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. + * We assume that int right shift is unsigned if JLONG right shift is, + * which should be safe. + */ + +#ifdef RIGHT_SHIFT_IS_UNSIGNED +#define ISHIFT_TEMPS int ishift_temp; +#define IRIGHT_SHIFT(x, shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ + (ishift_temp >> (shft))) +#else +#define ISHIFT_TEMPS +#define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) +#endif + +#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) + +/* Forward declarations */ +METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) encode_mcu_AC_first_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *values, size_t *zerobits); +METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(int) encode_mcu_AC_refine_prepare + (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, + JCOEF *absvalues, size_t *bits); +METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, + JBLOCKROW *MCU_data); +METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); +METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo); + + +/* Count bit loop zeroes */ +INLINE +METHODDEF(int) +count_zeroes(size_t *x) +{ +#if defined(HAVE_BUILTIN_CTZL) + int result; + result = __builtin_ctzl(*x); + *x >>= result; +#elif defined(HAVE_BITSCANFORWARD64) + unsigned long result; + _BitScanForward64(&result, *x); + *x >>= result; +#elif defined(HAVE_BITSCANFORWARD) + unsigned long result; + _BitScanForward(&result, *x); + *x >>= result; +#else + int result = 0; + while ((*x & 1) == 0) { + ++result; + *x >>= 1; + } +#endif + return (int)result; +} + + +/* + * Initialize for a Huffman-compressed scan using progressive JPEG. + */ + +METHODDEF(void) +start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + boolean is_DC_band; + int ci, tbl; + jpeg_component_info *compptr; + + entropy->cinfo = cinfo; + entropy->gather_statistics = gather_statistics; + + is_DC_band = (cinfo->Ss == 0); + + /* We assume jcmaster.c already validated the scan parameters. */ + + /* Select execution routines */ + if (cinfo->Ah == 0) { + if (is_DC_band) + entropy->pub.encode_mcu = encode_mcu_DC_first; + else + entropy->pub.encode_mcu = encode_mcu_AC_first; + if (jsimd_can_encode_mcu_AC_first_prepare()) + entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare; + else + entropy->AC_first_prepare = encode_mcu_AC_first_prepare; + } else { + if (is_DC_band) + entropy->pub.encode_mcu = encode_mcu_DC_refine; + else { + entropy->pub.encode_mcu = encode_mcu_AC_refine; + if (jsimd_can_encode_mcu_AC_refine_prepare()) + entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare; + else + entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare; + /* AC refinement needs a correction bit buffer */ + if (entropy->bit_buffer == NULL) + entropy->bit_buffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + MAX_CORR_BITS * sizeof(char)); + } + } + if (gather_statistics) + entropy->pub.finish_pass = finish_pass_gather_phuff; + else + entropy->pub.finish_pass = finish_pass_phuff; + + /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1 + * for AC coefficients. + */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Initialize DC predictions to 0 */ + entropy->last_dc_val[ci] = 0; + /* Get table index */ + if (is_DC_band) { + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; + tbl = compptr->dc_tbl_no; + } else { + entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; + } + if (gather_statistics) { + /* Check for invalid table index */ + /* (make_c_derived_tbl does this in the other path) */ + if (tbl < 0 || tbl >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); + /* Allocate and zero the statistics tables */ + /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ + if (entropy->count_ptrs[tbl] == NULL) + entropy->count_ptrs[tbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + 257 * sizeof(long)); + memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long)); + } else { + /* Compute derived values for Huffman table */ + /* We may do this more than once for a table, but it's not expensive */ + jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, + &entropy->derived_tbls[tbl]); + } + } + + /* Initialize AC stuff */ + entropy->EOBRUN = 0; + entropy->BE = 0; + + /* Initialize bit buffer to empty */ + entropy->put_buffer = 0; + entropy->put_bits = 0; + + /* Initialize restart stuff */ + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num = 0; +} + + +/* Outputting bytes to the file. + * NB: these must be called only when actually outputting, + * that is, entropy->gather_statistics == FALSE. + */ + +/* Emit a byte */ +#define emit_byte(entropy, val) { \ + *(entropy)->next_output_byte++ = (JOCTET)(val); \ + if (--(entropy)->free_in_buffer == 0) \ + dump_buffer(entropy); \ +} + + +LOCAL(void) +dump_buffer(phuff_entropy_ptr entropy) +/* Empty the output buffer; we do not support suspension in this module. */ +{ + struct jpeg_destination_mgr *dest = entropy->cinfo->dest; + + if (!(*dest->empty_output_buffer) (entropy->cinfo)) + ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); + /* After a successful buffer dump, must reset buffer pointers */ + entropy->next_output_byte = dest->next_output_byte; + entropy->free_in_buffer = dest->free_in_buffer; +} + + +/* Outputting bits to the file */ + +/* Only the right 24 bits of put_buffer are used; the valid bits are + * left-justified in this part. At most 16 bits can be passed to emit_bits + * in one call, and we never retain more than 7 bits in put_buffer + * between calls, so 24 bits are sufficient. + */ + +LOCAL(void) +emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size) +/* Emit some bits, unless we are in gather mode */ +{ + /* This routine is heavily used, so it's worth coding tightly. */ + register size_t put_buffer = (size_t)code; + register int put_bits = entropy->put_bits; + + /* if size is 0, caller used an invalid Huffman table entry */ + if (size == 0) + ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); + + if (entropy->gather_statistics) + return; /* do nothing if we're only getting stats */ + + put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */ + + put_bits += size; /* new number of bits in buffer */ + + put_buffer <<= 24 - put_bits; /* align incoming bits */ + + put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */ + + while (put_bits >= 8) { + int c = (int)((put_buffer >> 16) & 0xFF); + + emit_byte(entropy, c); + if (c == 0xFF) { /* need to stuff a zero byte? */ + emit_byte(entropy, 0); + } + put_buffer <<= 8; + put_bits -= 8; + } + + entropy->put_buffer = put_buffer; /* update variables */ + entropy->put_bits = put_bits; +} + + +LOCAL(void) +flush_bits(phuff_entropy_ptr entropy) +{ + emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ + entropy->put_buffer = 0; /* and reset bit-buffer to empty */ + entropy->put_bits = 0; +} + + +/* + * Emit (or just count) a Huffman symbol. + */ + +LOCAL(void) +emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol) +{ + if (entropy->gather_statistics) + entropy->count_ptrs[tbl_no][symbol]++; + else { + c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; + emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); + } +} + + +/* + * Emit bits from a correction bit buffer. + */ + +LOCAL(void) +emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart, + unsigned int nbits) +{ + if (entropy->gather_statistics) + return; /* no real work */ + + while (nbits > 0) { + emit_bits(entropy, (unsigned int)(*bufstart), 1); + bufstart++; + nbits--; + } +} + + +/* + * Emit any pending EOBRUN symbol. + */ + +LOCAL(void) +emit_eobrun(phuff_entropy_ptr entropy) +{ + register int temp, nbits; + + if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ + temp = entropy->EOBRUN; + nbits = JPEG_NBITS_NONZERO(temp) - 1; + /* safety check: shouldn't happen given limited correction-bit buffer */ + if (nbits > 14) + ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); + + emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4); + if (nbits) + emit_bits(entropy, entropy->EOBRUN, nbits); + + entropy->EOBRUN = 0; + + /* Emit any buffered correction bits */ + emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); + entropy->BE = 0; + } +} + + +/* + * Emit a restart marker & resynchronize predictions. + */ + +LOCAL(void) +emit_restart(phuff_entropy_ptr entropy, int restart_num) +{ + int ci; + + emit_eobrun(entropy); + + if (!entropy->gather_statistics) { + flush_bits(entropy); + emit_byte(entropy, 0xFF); + emit_byte(entropy, JPEG_RST0 + restart_num); + } + + if (entropy->cinfo->Ss == 0) { + /* Re-initialize DC predictions to 0 */ + for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) + entropy->last_dc_val[ci] = 0; + } else { + /* Re-initialize all AC-related fields to 0 */ + entropy->EOBRUN = 0; + entropy->BE = 0; + } +} + + +/* + * MCU encoding for DC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp, temp2, temp3; + register int nbits; + int blkn, ci; + int Al = cinfo->Al; + JBLOCKROW block; + jpeg_component_info *compptr; + ISHIFT_TEMPS + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart(entropy, entropy->next_restart_num); + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + + /* Compute the DC value after the required point transform by Al. + * This is simply an arithmetic right shift. + */ + temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al); + + /* DC differences are figured on the point-transformed values. */ + temp = temp2 - entropy->last_dc_val[ci]; + entropy->last_dc_val[ci] = temp2; + + /* Encode the DC coefficient difference per section G.1.2.1 */ + + /* This is a well-known technique for obtaining the absolute value without + * a branch. It is derived from an assembly language technique presented + * in "How to Optimize for the Pentium Processors", Copyright (c) 1996, + * 1997 by Agner Fog. + */ + temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); + temp ^= temp3; + temp -= temp3; /* temp is abs value of input */ + /* For a negative input, want temp2 = bitwise complement of abs(input) */ + temp2 = temp ^ temp3; + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = JPEG_NBITS(temp); + /* Check for out-of-range coefficient values. + * Since we're encoding a difference, the range limit is twice as much. + */ + if (nbits > MAX_COEF_BITS + 1) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + + /* Count/emit the Huffman-coded symbol for the number of bits */ + emit_symbol(entropy, compptr->dc_tbl_no, nbits); + + /* Emit that number of bits of the value, if positive, */ + /* or the complement of its magnitude, if negative. */ + if (nbits) /* emit_bits rejects calls with size 0 */ + emit_bits(entropy, (unsigned int)temp2, nbits); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * Data preparation for encode_mcu_AC_first(). + */ + +#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ + for (k = 0; k < Sl; k++) { \ + temp = block[jpeg_natural_order_start[k]]; \ + if (temp == 0) \ + continue; \ + /* We must apply the point transform by Al. For AC coefficients this \ + * is an integer division with rounding towards 0. To do this portably \ + * in C, we shift after obtaining the absolute value; so the code is \ + * interwoven with finding the abs value (temp) and output bits (temp2). \ + */ \ + temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp ^= temp2; \ + temp -= temp2; /* temp is abs value of input */ \ + temp >>= Al; /* apply the point transform */ \ + /* Watch out for case that nonzero coef is zero after point transform */ \ + if (temp == 0) \ + continue; \ + /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ + temp2 ^= temp; \ + values[k] = (JCOEF)temp; \ + values[k + DCTSIZE2] = (JCOEF)temp2; \ + zerobits |= ((size_t)1U) << k; \ + } \ +} + +METHODDEF(void) +encode_mcu_AC_first_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *values, size_t *bits) +{ + register int k, temp, temp2; + size_t zerobits = 0U; + int Sl0 = Sl; + +#if SIZEOF_SIZE_T == 4 + if (Sl0 > 32) + Sl0 = 32; +#endif + + COMPUTE_ABSVALUES_AC_FIRST(Sl0); + + bits[0] = zerobits; +#if SIZEOF_SIZE_T == 4 + zerobits = 0U; + + if (Sl > 32) { + Sl -= 32; + jpeg_natural_order_start += 32; + values += 32; + + COMPUTE_ABSVALUES_AC_FIRST(Sl); + } + bits[1] = zerobits; +#endif +} + +/* + * MCU encoding for AC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +#define ENCODE_COEFS_AC_FIRST(label) { \ + while (zerobits) { \ + r = count_zeroes(&zerobits); \ + cvalue += r; \ +label \ + temp = cvalue[0]; \ + temp2 = cvalue[DCTSIZE2]; \ + \ + /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ + while (r > 15) { \ + emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ + r -= 16; \ + } \ + \ + /* Find the number of bits needed for the magnitude of the coefficient */ \ + nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \ + /* Check for out-of-range coefficient values */ \ + if (nbits > MAX_COEF_BITS) \ + ERREXIT(cinfo, JERR_BAD_DCT_COEF); \ + \ + /* Count/emit Huffman symbol for run length / number of bits */ \ + emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \ + \ + /* Emit that number of bits of the value, if positive, */ \ + /* or the complement of its magnitude, if negative. */ \ + emit_bits(entropy, (unsigned int)temp2, nbits); \ + \ + cvalue++; \ + zerobits >>= 1; \ + } \ +} + +METHODDEF(boolean) +encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp, temp2; + register int nbits, r; + int Sl = cinfo->Se - cinfo->Ss + 1; + int Al = cinfo->Al; + JCOEF values_unaligned[2 * DCTSIZE2 + 15]; + JCOEF *values; + const JCOEF *cvalue; + size_t zerobits; + size_t bits[8 / SIZEOF_SIZE_T]; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart(entropy, entropy->next_restart_num); + +#ifdef WITH_SIMD + cvalue = values = (JCOEF *)PAD((JUINTPTR)values_unaligned, 16); +#else + /* Not using SIMD, so alignment is not needed */ + cvalue = values = values_unaligned; +#endif + + /* Prepare data */ + entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, + Sl, Al, values, bits); + + zerobits = bits[0]; +#if SIZEOF_SIZE_T == 4 + zerobits |= bits[1]; +#endif + + /* Emit any pending EOBRUN */ + if (zerobits && (entropy->EOBRUN > 0)) + emit_eobrun(entropy); + +#if SIZEOF_SIZE_T == 4 + zerobits = bits[0]; +#endif + + /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ + + ENCODE_COEFS_AC_FIRST((void)0;); + +#if SIZEOF_SIZE_T == 4 + zerobits = bits[1]; + if (zerobits) { + int diff = ((values + DCTSIZE2 / 2) - cvalue); + r = count_zeroes(&zerobits); + r += diff; + cvalue += r; + goto first_iter_ac_first; + } + + ENCODE_COEFS_AC_FIRST(first_iter_ac_first:); +#endif + + if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + if (entropy->EOBRUN == 0x7FFF) + emit_eobrun(entropy); /* force it out to avoid overflow */ + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * MCU encoding for DC successive approximation refinement scan. + * Note: we assume such scans can be multi-component, although the spec + * is not very clear on the point. + */ + +METHODDEF(boolean) +encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp; + int blkn; + int Al = cinfo->Al; + JBLOCKROW block; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart(entropy, entropy->next_restart_num); + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + + /* We simply emit the Al'th bit of the DC coefficient value. */ + temp = (*block)[0]; + emit_bits(entropy, (unsigned int)(temp >> Al), 1); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * Data preparation for encode_mcu_AC_refine(). + */ + +#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ + /* It is convenient to make a pre-pass to determine the transformed \ + * coefficients' absolute values and the EOB position. \ + */ \ + for (k = 0; k < Sl; k++) { \ + temp = block[jpeg_natural_order_start[k]]; \ + /* We must apply the point transform by Al. For AC coefficients this \ + * is an integer division with rounding towards 0. To do this portably \ + * in C, we shift after obtaining the absolute value. \ + */ \ + temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ + temp ^= temp2; \ + temp -= temp2; /* temp is abs value of input */ \ + temp >>= Al; /* apply the point transform */ \ + if (temp != 0) { \ + zerobits |= ((size_t)1U) << k; \ + signbits |= ((size_t)(temp2 + 1)) << k; \ + } \ + absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ + if (temp == 1) \ + EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ + } \ +} + +METHODDEF(int) +encode_mcu_AC_refine_prepare(const JCOEF *block, + const int *jpeg_natural_order_start, int Sl, + int Al, JCOEF *absvalues, size_t *bits) +{ + register int k, temp, temp2; + int EOB = 0; + size_t zerobits = 0U, signbits = 0U; + int Sl0 = Sl; + +#if SIZEOF_SIZE_T == 4 + if (Sl0 > 32) + Sl0 = 32; +#endif + + COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0); + + bits[0] = zerobits; +#if SIZEOF_SIZE_T == 8 + bits[1] = signbits; +#else + bits[2] = signbits; + + zerobits = 0U; + signbits = 0U; + + if (Sl > 32) { + Sl -= 32; + jpeg_natural_order_start += 32; + absvalues += 32; + + COMPUTE_ABSVALUES_AC_REFINE(Sl, 32); + } + + bits[1] = zerobits; + bits[3] = signbits; +#endif + + return EOB; +} + + +/* + * MCU encoding for AC successive approximation refinement scan. + */ + +#define ENCODE_COEFS_AC_REFINE(label) { \ + while (zerobits) { \ + idx = count_zeroes(&zerobits); \ + r += idx; \ + cabsvalue += idx; \ + signbits >>= idx; \ +label \ + /* Emit any required ZRLs, but not if they can be folded into EOB */ \ + while (r > 15 && (cabsvalue <= EOBPTR)) { \ + /* emit any pending EOBRUN and the BE correction bits */ \ + emit_eobrun(entropy); \ + /* Emit ZRL */ \ + emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ + r -= 16; \ + /* Emit buffered correction bits that must be associated with ZRL */ \ + emit_buffered_bits(entropy, BR_buffer, BR); \ + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ + BR = 0; \ + } \ + \ + temp = *cabsvalue++; \ + \ + /* If the coef was previously nonzero, it only needs a correction bit. \ + * NOTE: a straight translation of the spec's figure G.7 would suggest \ + * that we also need to test r > 15. But if r > 15, we can only get here \ + * if k > EOB, which implies that this coefficient is not 1. \ + */ \ + if (temp > 1) { \ + /* The correction bit is the next bit of the absolute value. */ \ + BR_buffer[BR++] = (char)(temp & 1); \ + signbits >>= 1; \ + zerobits >>= 1; \ + continue; \ + } \ + \ + /* Emit any pending EOBRUN and the BE correction bits */ \ + emit_eobrun(entropy); \ + \ + /* Count/emit Huffman symbol for run length / number of bits */ \ + emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \ + \ + /* Emit output bit for newly-nonzero coef */ \ + temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \ + emit_bits(entropy, (unsigned int)temp, 1); \ + \ + /* Emit buffered correction bits that must be associated with this code */ \ + emit_buffered_bits(entropy, BR_buffer, BR); \ + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ + BR = 0; \ + r = 0; /* reset zero run length */ \ + signbits >>= 1; \ + zerobits >>= 1; \ + } \ +} + +METHODDEF(boolean) +encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + register int temp, r, idx; + char *BR_buffer; + unsigned int BR; + int Sl = cinfo->Se - cinfo->Ss + 1; + int Al = cinfo->Al; + JCOEF absvalues_unaligned[DCTSIZE2 + 15]; + JCOEF *absvalues; + const JCOEF *cabsvalue, *EOBPTR; + size_t zerobits, signbits; + size_t bits[16 / SIZEOF_SIZE_T]; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart(entropy, entropy->next_restart_num); + +#ifdef WITH_SIMD + cabsvalue = absvalues = (JCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16); +#else + /* Not using SIMD, so alignment is not needed */ + cabsvalue = absvalues = absvalues_unaligned; +#endif + + /* Prepare data */ + EOBPTR = absvalues + + entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, + Sl, Al, absvalues, bits); + + /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ + + r = 0; /* r = run length of zeros */ + BR = 0; /* BR = count of buffered bits added now */ + BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ + + zerobits = bits[0]; +#if SIZEOF_SIZE_T == 8 + signbits = bits[1]; +#else + signbits = bits[2]; +#endif + ENCODE_COEFS_AC_REFINE((void)0;); + +#if SIZEOF_SIZE_T == 4 + zerobits = bits[1]; + signbits = bits[3]; + + if (zerobits) { + int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); + idx = count_zeroes(&zerobits); + signbits >>= idx; + idx += diff; + r += idx; + cabsvalue += idx; + goto first_iter_ac_refine; + } + + ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:); +#endif + + r |= (int)((absvalues + Sl) - cabsvalue); + + if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + entropy->BE += BR; /* concat my correction bits to older ones */ + /* We force out the EOB if we risk either: + * 1. overflow of the EOB counter; + * 2. overflow of the correction bit buffer during the next MCU. + */ + if (entropy->EOBRUN == 0x7FFF || + entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1)) + emit_eobrun(entropy); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * Finish up at the end of a Huffman-compressed progressive scan. + */ + +METHODDEF(void) +finish_pass_phuff(j_compress_ptr cinfo) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Flush out any buffered data */ + emit_eobrun(entropy); + flush_bits(entropy); + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; +} + + +/* + * Finish up a statistics-gathering pass and create the new Huffman tables. + */ + +METHODDEF(void) +finish_pass_gather_phuff(j_compress_ptr cinfo) +{ + phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; + boolean is_DC_band; + int ci, tbl; + jpeg_component_info *compptr; + JHUFF_TBL **htblptr; + boolean did[NUM_HUFF_TBLS]; + + /* Flush out buffered data (all we care about is counting the EOB symbol) */ + emit_eobrun(entropy); + + is_DC_band = (cinfo->Ss == 0); + + /* It's important not to apply jpeg_gen_optimal_table more than once + * per table, because it clobbers the input frequency counts! + */ + memset(did, 0, sizeof(did)); + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + if (is_DC_band) { + if (cinfo->Ah != 0) /* DC refinement needs no table */ + continue; + tbl = compptr->dc_tbl_no; + } else { + tbl = compptr->ac_tbl_no; + } + if (!did[tbl]) { + if (is_DC_band) + htblptr = &cinfo->dc_huff_tbl_ptrs[tbl]; + else + htblptr = &cinfo->ac_huff_tbl_ptrs[tbl]; + if (*htblptr == NULL) + *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); + jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); + did[tbl] = TRUE; + } + } +} + + +/* + * Module initialization routine for progressive Huffman entropy encoding. + */ + +GLOBAL(void) +jinit_phuff_encoder(j_compress_ptr cinfo) +{ + phuff_entropy_ptr entropy; + int i; + + entropy = (phuff_entropy_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(phuff_entropy_encoder)); + cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; + entropy->pub.start_pass = start_pass_phuff; + + /* Mark tables unallocated */ + for (i = 0; i < NUM_HUFF_TBLS; i++) { + entropy->derived_tbls[i] = NULL; + entropy->count_ptrs[i] = NULL; + } + entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ +} + +#endif /* C_PROGRESSIVE_SUPPORTED */ |