/******************************************************************** * * * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 * * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: last mod: $Id$ ********************************************************************/ #include #include #include #include "huffdec.h" #include "decint.h" /*Instead of storing every branching in the tree, subtrees can be collapsed into one node, with a table of size 1<>8). @ARTICLE{Hash95, author="Reza Hashemian", title="Memory Efficient and High-Speed Search {Huffman} Coding", journal="{IEEE} Transactions on Communications", volume=43, number=10, pages="2576--2581", month=Oct, year=1995 }*/ /*The map from external spec-defined tokens to internal tokens. This is constructed so that any extra bits read with the original token value can be masked off the least significant bits of its internal token index. In addition, all of the tokens which require additional extra bits are placed at the start of the list, and grouped by type. OC_DCT_REPEAT_RUN3_TOKEN is placed first, as it is an extra-special case, so giving it index 0 may simplify comparisons on some architectures. These requirements require some substantial reordering.*/ static const unsigned char OC_DCT_TOKEN_MAP[TH_NDCT_TOKENS]={ /*OC_DCT_EOB1_TOKEN (0 extra bits)*/ 15, /*OC_DCT_EOB2_TOKEN (0 extra bits)*/ 16, /*OC_DCT_EOB3_TOKEN (0 extra bits)*/ 17, /*OC_DCT_REPEAT_RUN0_TOKEN (2 extra bits)*/ 88, /*OC_DCT_REPEAT_RUN1_TOKEN (3 extra bits)*/ 80, /*OC_DCT_REPEAT_RUN2_TOKEN (4 extra bits)*/ 1, /*OC_DCT_REPEAT_RUN3_TOKEN (12 extra bits)*/ 0, /*OC_DCT_SHORT_ZRL_TOKEN (3 extra bits)*/ 48, /*OC_DCT_ZRL_TOKEN (6 extra bits)*/ 14, /*OC_ONE_TOKEN (0 extra bits)*/ 56, /*OC_MINUS_ONE_TOKEN (0 extra bits)*/ 57, /*OC_TWO_TOKEN (0 extra bits)*/ 58, /*OC_MINUS_TWO_TOKEN (0 extra bits)*/ 59, /*OC_DCT_VAL_CAT2 (1 extra bit)*/ 60, 62, 64, 66, /*OC_DCT_VAL_CAT3 (2 extra bits)*/ 68, /*OC_DCT_VAL_CAT4 (3 extra bits)*/ 72, /*OC_DCT_VAL_CAT5 (4 extra bits)*/ 2, /*OC_DCT_VAL_CAT6 (5 extra bits)*/ 4, /*OC_DCT_VAL_CAT7 (6 extra bits)*/ 6, /*OC_DCT_VAL_CAT8 (10 extra bits)*/ 8, /*OC_DCT_RUN_CAT1A (1 extra bit)*/ 18, 20, 22, 24, 26, /*OC_DCT_RUN_CAT1B (3 extra bits)*/ 32, /*OC_DCT_RUN_CAT1C (4 extra bits)*/ 12, /*OC_DCT_RUN_CAT2A (2 extra bits)*/ 28, /*OC_DCT_RUN_CAT2B (3 extra bits)*/ 40 }; /*The log base 2 of number of internal tokens associated with each of the spec tokens (i.e., how many of the extra bits are folded into the token value). Increasing the maximum value beyond 3 will enlarge the amount of stack required for tree construction.*/ static const unsigned char OC_DCT_TOKEN_MAP_LOG_NENTRIES[TH_NDCT_TOKENS]={ 0,0,0,2,3,0,0,3,0,0,0,0,0,1,1,1,1,2,3,1,1,1,2,1,1,1,1,1,3,1,2,3 }; /*The size a lookup table is allowed to grow to relative to the number of unique nodes it contains. E.g., if OC_HUFF_SLUSH is 4, then at most 75% of the space in the tree is wasted (1/4 of the space must be used). Larger numbers can decode tokens with fewer read operations, while smaller numbers may save more space. With a sample file: 32233473 read calls are required when no tree collapsing is done (100.0%). 19269269 read calls are required when OC_HUFF_SLUSH is 1 (59.8%). 11144969 read calls are required when OC_HUFF_SLUSH is 2 (34.6%). 10538563 read calls are required when OC_HUFF_SLUSH is 4 (32.7%). 10192578 read calls are required when OC_HUFF_SLUSH is 8 (31.6%). Since a value of 2 gets us the vast majority of the speed-up with only a small amount of wasted memory, this is what we use. This value must be less than 128, or you could create a tree with more than 32767 entries, which would overflow the 16-bit words used to index it.*/ #define OC_HUFF_SLUSH (2) /*The root of the tree is on the fast path, and a larger value here is more beneficial than elsewhere in the tree. 7 appears to give the best performance, trading off between increased use of the single-read fast path and cache footprint for the tables, though obviously this will depend on your cache size. Using 7 here, the VP3 tables are about twice as large compared to using 2.*/ #define OC_ROOT_HUFF_SLUSH (7) /*Unpacks a Huffman codebook. _opb: The buffer to unpack from. _tokens: Stores a list of internal tokens, in the order they were found in the codebook, and the lengths of their corresponding codewords. This is enough to completely define the codebook, while minimizing stack usage and avoiding temporary allocations (for platforms where free() is a no-op). Return: The number of internal tokens in the codebook, or a negative value on error.*/ int oc_huff_tree_unpack(oc_pack_buf *_opb,unsigned char _tokens[256][2]){ ogg_uint32_t code; int len; int ntokens; int nleaves; code=0; len=ntokens=nleaves=0; for(;;){ long bits; bits=oc_pack_read1(_opb); /*Only process nodes so long as there's more bits in the buffer.*/ if(oc_pack_bytes_left(_opb)<0)return TH_EBADHEADER; /*Read an internal node:*/ if(!bits){ len++; /*Don't allow codewords longer than 32 bits.*/ if(len>32)return TH_EBADHEADER; } /*Read a leaf node:*/ else{ ogg_uint32_t code_bit; int neb; int nentries; int token; /*Don't allow more than 32 spec-tokens per codebook.*/ if(++nleaves>32)return TH_EBADHEADER; bits=oc_pack_read(_opb,OC_NDCT_TOKEN_BITS); neb=OC_DCT_TOKEN_MAP_LOG_NENTRIES[bits]; token=OC_DCT_TOKEN_MAP[bits]; nentries=1<0){ _tokens[ntokens][0]=(unsigned char)token++; _tokens[ntokens][1]=(unsigned char)(len+neb); ntokens++; } code_bit=0x80000000U>>len-1; while(len>0&&(code&code_bit)){ code^=code_bit; code_bit<<=1; len--; } if(len<=0)break; code|=code_bit; } } return ntokens; } /*Count how many tokens would be required to fill a subtree at depth _depth. _tokens: A list of internal tokens, in the order they are found in the codebook, and the lengths of their corresponding codewords. _depth: The depth of the desired node in the corresponding tree structure. Return: The number of tokens that belong to that subtree.*/ static int oc_huff_subtree_tokens(unsigned char _tokens[][2],int _depth){ ogg_uint32_t code; int ti; code=0; ti=0; do{ if(_tokens[ti][1]-_depth<32)code+=0x80000000U>>_tokens[ti++][1]-_depth; else{ /*Because of the expanded internal tokens, we can have codewords as long as 35 bits. A single recursion here is enough to advance past them.*/ code++; ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+31); } } while(code<0x80000000U); return ti; } /*Compute the number of bits to use for a collapsed tree node at the given depth. _tokens: A list of internal tokens, in the order they are found in the codebook, and the lengths of their corresponding codewords. _ntokens: The number of tokens corresponding to this tree node. _depth: The depth of this tree node. Return: The number of bits to use for a collapsed tree node rooted here. This is always at least one, even if this was a leaf node.*/ static int oc_huff_tree_collapse_depth(unsigned char _tokens[][2], int _ntokens,int _depth){ int got_leaves; int loccupancy; int occupancy; int slush; int nbits; int best_nbits; slush=_depth>0?OC_HUFF_SLUSH:OC_ROOT_HUFF_SLUSH; /*It's legal to have a tree with just a single node, which requires no bits to decode and always returns the same token. However, no encoder actually does this (yet). To avoid a special case in oc_huff_token_decode(), we force the number of lookahead bits to be at least one. This will produce a tree that looks ahead one bit and then advances the stream zero bits.*/ nbits=1; occupancy=2; got_leaves=1; do{ int ti; if(got_leaves)best_nbits=nbits; nbits++; got_leaves=0; loccupancy=occupancy; for(occupancy=ti=0;ti<_ntokens;occupancy++){ if(_tokens[ti][1]<_depth+nbits)ti++; else if(_tokens[ti][1]==_depth+nbits){ got_leaves=1; ti++; } else ti+=oc_huff_subtree_tokens(_tokens+ti,_depth+nbits); } } while(occupancy>loccupancy&&occupancy*slush>=1<0)_tree[node[l]++]=leaf; } ti++; } if(ti<=last[l]){ /*We need to recurse*/ depth[l+1]=(unsigned char)(depth[l]+nbits); if(_tree!=NULL)_tree[node[l]++]=(ogg_int16_t)ntree; l++; last[l]= (unsigned char)(ti+oc_huff_subtree_tokens(_tokens+ti,depth[l])-1); break; } /*Pop back up a level of recursion.*/ else if(l-->0)nbits=depth[l+1]-depth[l]; } while(l>=0); } while(l>=0); return ntree; } /*Unpacks a set of Huffman trees, and reduces them to a collapsed representation. _opb: The buffer to unpack the trees from. _nodes: The table to fill with the Huffman trees. Return: 0 on success, or a negative value on error. The caller is responsible for cleaning up any partially initialized _nodes on failure.*/ int oc_huff_trees_unpack(oc_pack_buf *_opb, ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){ int i; for(i=0;i32767)return TH_EIMPL; tree=(ogg_int16_t *)_ogg_malloc(size*sizeof(*tree)); if(tree==NULL)return TH_EFAULT; /*Construct the collapsed the tree.*/ oc_huff_tree_collapse(tree,tokens,ntokens); _nodes[i]=tree; } return 0; } /*Determines the size in words of a Huffman subtree. _tree: The complete Huffman tree. _node: The index of the root of the desired subtree. Return: The number of words required to store the tree.*/ static size_t oc_huff_tree_size(const ogg_int16_t *_tree,int _node){ size_t size; int nchildren; int n; int i; n=_tree[_node]; size=oc_huff_node_size(n); nchildren=1<>8); else{ size+=oc_huff_tree_size(_tree,child); i++; } } while(i0)_ogg_free(_dst[i]); return TH_EFAULT; } memcpy(_dst[i],_src[i],size*sizeof(*_dst[i])); } return 0; } /*Frees the memory used by a set of Huffman trees. _nodes: The array of trees to free.*/ void oc_huff_trees_clear(ogg_int16_t *_nodes[TH_NHUFFMAN_TABLES]){ int i; for(i=0;iptr; window=_opb->window; stop=_opb->stop; available=_opb->bits; node=0; for(;;){ n=_tree[node]; if(n>available){ unsigned shift; shift=OC_PB_WINDOW_SIZE-available; do{ /*We don't bother setting eof because we won't check for it after we've started decoding DCT tokens.*/ if(ptr>=stop){ shift=(unsigned)-OC_LOTS_OF_BITS; break; } shift-=8; window|=(oc_pb_window)*ptr++<=8); /*Note: We never request more than 24 bits, so there's no need to fill in the last partial byte here.*/ available=OC_PB_WINDOW_SIZE-shift; } bits=window>>OC_PB_WINDOW_SIZE-n; node=_tree[node+1+bits]; if(node<=0)break; window<<=n; available-=n; } node=-node; n=node>>8; window<<=n; available-=n; _opb->ptr=ptr; _opb->window=window; _opb->bits=available; return node&255; }