gamut.codecs.jpegload source code

1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
40  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
41  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
42  */
43 /// JPEG image loading.
44 module gamut.codecs.jpegload;
45 
46 import gamut.types;
47 import gamut.internals.binop;
48 import core.stdc.string : memcpy, memset;
49 import core.stdc.stdlib : malloc, free;
50 import inteli.emmintrin;
51 
52 version(decodeJPEG):
53 
54 nothrow:
55 @nogc:
56 
57 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
58 // This is slower, but results in higher quality on images with highly saturated colors.
59 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
60 
61 /// Input stream interface.
62 /// This function is called when the internal input buffer is empty.
63 /// Parameters:
64 ///   pBuf - input buffer
65 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
66 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
67 ///   userData - user context for being used as closure.
68 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
69 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
70 alias JpegStreamReadFunc = int function(void* pBuf, int max_bytes_to_read, bool* pEOF_flag, void* userData);
71 
72 
73 // ////////////////////////////////////////////////////////////////////////// //
74 private:
75 
76 void *jpgd_malloc (size_t nSize) 
77 { 
78     return malloc(nSize);
79 }
80 
81 void jpgd_free (void *p) 
82 { 
83     free(p);
84 }
85 
86 // Success/failure error codes.
87 alias jpgd_status = int;
88 enum /*jpgd_status*/ {
89   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
90   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
91   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
92   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
93   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
94   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
95   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
96   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
97   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
98 }
99 
100 enum {
101   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
102   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
103 }
104 
105 // DCT coefficients are stored in this sequence.
106 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
107 
108 alias JPEG_MARKER = int;
109 enum /*JPEG_MARKER*/ {
110   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
111   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
112   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
113   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
114   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
115 }
116 
117 alias JPEG_SUBSAMPLING = int;
118 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
119 
120 enum CONST_BITS = 13;
121 enum PASS1_BITS = 2;
122 enum SCALEDONE = cast(int)1;
123 
124 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
125 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
126 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
127 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
128 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
129 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
130 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
131 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
132 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
133 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
134 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
135 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
136 
137 int DESCALE() (int x, int n) 
138 { 
139     return ((x + (SCALEDONE << (n-1))) >> n); 
140 }
141 
142 int DESCALE_ZEROSHIFT() (int x, int n) 
143 { 
144     pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); 
145 }
146 
147 ubyte CLAMP() (int i) 
148 { 
149     if (i < 0) i = 0;
150     if (i > 255) i = 255;
151     return cast(ubyte)i;
152 }
153 
154 
155 // Compiler creates a fast path 1D IDCT for X non-zero columns
156 struct Row(int NONZERO_COLS) {
157 pure nothrow @trusted @nogc:
158   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
159     static if (NONZERO_COLS == 0) {
160       // nothing
161     } else static if (NONZERO_COLS == 1) {
162       immutable int dcval = (pSrc[0] << PASS1_BITS);
163       pTemp[0] = dcval;
164       pTemp[1] = dcval;
165       pTemp[2] = dcval;
166       pTemp[3] = dcval;
167       pTemp[4] = dcval;
168       pTemp[5] = dcval;
169       pTemp[6] = dcval;
170       pTemp[7] = dcval;
171     } else {
172       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
173       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
174       template ACCESS_COL(int x) {
175         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
176       }
177 
178       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
179 
180       immutable int z1 = (z2 + z3)*FIX_0_541196100;
181       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
182       immutable int tmp3 = z1 + z2*FIX_0_765366865;
183 
184       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
185       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
186 
187       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
188 
189       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
190 
191       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
192       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
193 
194       immutable int az1 = bz1*(-FIX_0_899976223);
195       immutable int az2 = bz2*(-FIX_2_562915447);
196       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
197       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
198 
199       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
200       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
201       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
202       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
203 
204       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
205       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
206       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
207       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
208       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
209       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
210       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
211       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
212     }
213   }
214 }
215 
216 
217 // Compiler creates a fast path 1D IDCT for X non-zero rows
218 struct Col (int NONZERO_ROWS) {
219 pure nothrow @trusted @nogc:
220   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
221     static assert(NONZERO_ROWS > 0);
222     static if (NONZERO_ROWS == 1) {
223       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
224       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
225       pDst_ptr[0*8] = dcval_clamped;
226       pDst_ptr[1*8] = dcval_clamped;
227       pDst_ptr[2*8] = dcval_clamped;
228       pDst_ptr[3*8] = dcval_clamped;
229       pDst_ptr[4*8] = dcval_clamped;
230       pDst_ptr[5*8] = dcval_clamped;
231       pDst_ptr[6*8] = dcval_clamped;
232       pDst_ptr[7*8] = dcval_clamped;
233     } else {
234       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
235       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
236       template ACCESS_ROW(int x) {
237         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
238       }
239 
240       immutable int z2 = mixin(ACCESS_ROW!2);
241       immutable int z3 = mixin(ACCESS_ROW!6);
242 
243       immutable int z1 = (z2 + z3)*FIX_0_541196100;
244       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
245       immutable int tmp3 = z1 + z2*FIX_0_765366865;
246 
247       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
248       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
249 
250       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
251 
252       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
253 
254       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
255       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
256 
257       immutable int az1 = bz1*(-FIX_0_899976223);
258       immutable int az2 = bz2*(-FIX_2_562915447);
259       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
260       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
261 
262       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
263       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
264       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
265       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
266 
267       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
268       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
269 
270       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
271       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
272 
273       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
274       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
275 
276       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
277       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
278 
279       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
280       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
281 
282       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
283       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
284 
285       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
286       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
287 
288       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
289       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
290     }
291   }
292 }
293 
294 
295 static immutable ubyte[512] s_idct_row_table = [
296   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
297   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
298   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
299   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
300   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
301   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
302   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
303   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
304 ];
305 
306 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
307 
308 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
309   assert(block_max_zag >= 1);
310   assert(block_max_zag <= 64);
311 
312   if (block_max_zag <= 1)
313   {
314     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
315     k = CLAMP(k);
316     k = k | (k<<8);
317     k = k | (k<<16);
318 
319     for (int i = 8; i > 0; i--)
320     {
321       *cast(int*)&pDst_ptr[0] = k;
322       *cast(int*)&pDst_ptr[4] = k;
323       pDst_ptr += 8;
324     }
325     return;
326   }
327 
328   int[64] temp;
329 
330   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
331   int* pTemp = temp.ptr;
332 
333   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
334   int i;
335   for (i = 8; i > 0; i--, pRow_tab++)
336   {
337     switch (*pRow_tab)
338     {
339       case 0: Row!(0).idct(pTemp, pSrc); break;
340       case 1: Row!(1).idct(pTemp, pSrc); break;
341       case 2: Row!(2).idct(pTemp, pSrc); break;
342       case 3: Row!(3).idct(pTemp, pSrc); break;
343       case 4: Row!(4).idct(pTemp, pSrc); break;
344       case 5: Row!(5).idct(pTemp, pSrc); break;
345       case 6: Row!(6).idct(pTemp, pSrc); break;
346       case 7: Row!(7).idct(pTemp, pSrc); break;
347       case 8: Row!(8).idct(pTemp, pSrc); break;
348       default: assert(0);
349     }
350 
351     pSrc += 8;
352     pTemp += 8;
353   }
354 
355   pTemp = temp.ptr;
356 
357   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
358   for (i = 8; i > 0; i--)
359   {
360     switch (nonzero_rows)
361     {
362       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
363       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
364       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
365       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
366       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
367       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
368       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
369       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
370       default: assert(0);
371     }
372 
373     pTemp++;
374     pDst_ptr++;
375   }
376 }
377 
378 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
379   int[64] temp;
380   int* pTemp = temp.ptr;
381   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
382 
383   for (int i = 4; i > 0; i--)
384   {
385     Row!(4).idct(pTemp, pSrc);
386     pSrc += 8;
387     pTemp += 8;
388   }
389 
390   pTemp = temp.ptr;
391   for (int i = 8; i > 0; i--)
392   {
393     Col!(4).idct(pDst_ptr, pTemp);
394     pTemp++;
395     pDst_ptr++;
396   }
397 }
398 
399 
400 // ////////////////////////////////////////////////////////////////////////// //
401 struct jpeg_decoder {
402 nothrow:
403 @nogc:
404 
405 private:
406   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
407   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
408 
409   alias jpgd_quant_t = short;
410   alias jpgd_block_t = short;
411   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
412 
413   static struct huff_tables {
414     bool ac_table;
415     uint[256] look_up;
416     uint[256] look_up2;
417     ubyte[256] code_size;
418     uint[512] tree;
419   }
420 
421   static struct coeff_buf {
422     ubyte* pData;
423     int block_num_x, block_num_y;
424     int block_len_x, block_len_y;
425     int block_size;
426   }
427 
428   static struct mem_block {
429     mem_block* m_pNext;
430     size_t m_used_count;
431     size_t m_size;
432     char[1] m_data;
433   }
434 
435   mem_block* m_pMem_blocks;
436   int m_image_x_size;
437   int m_image_y_size;
438   JpegStreamReadFunc readfn;
439   void* userData;
440   int m_progressive_flag;
441   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
442   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
443   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
444   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
445   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
446   int m_comps_in_frame;                         // # of components in frame
447   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
448   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
449   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
450   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
451   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
452   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
453   int m_comps_in_scan;                          // # of components in scan
454   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
455   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
456   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
457   int m_spectral_start;                         // spectral selection start
458   int m_spectral_end;                           // spectral selection end
459   int m_successive_low;                         // successive approximation low
460   int m_successive_high;                        // successive approximation high
461   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
462   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
463   int m_blocks_per_mcu;
464   int m_max_blocks_per_row;
465   int m_mcus_per_row, m_mcus_per_col;
466   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
467   int m_total_lines_left;                       // total # lines left in image
468   int m_mcu_lines_left;                         // total # lines left in this MCU
469   int m_real_dest_bytes_per_scan_line;
470   int m_dest_bytes_per_scan_line;               // rounded up
471   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
472   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
473   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
474   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
475   int m_eob_run;
476   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
477   ubyte* m_pIn_buf_ofs;
478   int m_in_buf_left;
479   int m_tem_flag;
480   bool m_eof_flag;
481   ubyte[128] m_in_buf_pad_start;
482   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
483   ubyte[128] m_in_buf_pad_end;
484   int m_bits_left;
485   uint m_bit_buf;
486   int m_restart_interval;
487   int m_restarts_left;
488   int m_next_restart_num;
489   int m_max_mcus_per_row;
490   int m_max_blocks_per_mcu;
491   int m_expanded_blocks_per_mcu;
492   int m_expanded_blocks_per_row;
493   int m_expanded_blocks_per_component;
494   bool m_freq_domain_chroma_upsample;
495   int m_max_mcus_per_col;
496   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
497   jpgd_block_t* m_pMCU_coefficients;
498   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
499   ubyte* m_pSample_buf;
500   int[256] m_crr;
501   int[256] m_cbb;
502   int[256] m_crg;
503   int[256] m_cbg;
504   ubyte* m_pScan_line_0;
505   ubyte* m_pScan_line_1;
506   jpgd_status m_error_code;
507   bool m_ready_flag;
508   int m_total_bytes_read;
509 
510   float m_pixelsPerInchX;
511   float m_pixelsPerInchY; // -1 if not available
512   float m_pixelAspectRatio; // -1 if not available
513 
514 public:
515   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
516   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
517   this (JpegStreamReadFunc rfn, void* userData) 
518   { 
519     decode_init(rfn, userData); 
520   }
521 
522   ~this () { free_all_blocks(); }
523 
524   @disable this (this); // no copies
525 
526   // Call this method after constructing the object to begin decompression.
527   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
528   int begin_decoding () {
529     if (m_ready_flag) return JPGD_SUCCESS;
530     if (m_error_code) return JPGD_FAILED;
531 
532     decode_start();
533     m_ready_flag = true;
534     return JPGD_SUCCESS;
535   }
536 
537   // Returns the next scan line.
538   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
539   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
540   // Returns JPGD_SUCCESS if a scan line has been returned.
541   // Returns JPGD_DONE if all scan lines have been returned.
542   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
543   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
544     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
545     if (m_total_lines_left == 0) return JPGD_DONE;
546 
547       if (m_mcu_lines_left == 0) {
548         if (m_progressive_flag) load_next_row(); else decode_next_row();
549         // Find the EOI marker if that was the last row.
550         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
551         m_mcu_lines_left = m_max_mcu_y_size;
552       }
553       if (m_freq_domain_chroma_upsample) {
554         expanded_convert();
555         *pScan_line = m_pScan_line_0;
556       } else {
557         switch (m_scan_type) {
558           case JPGD_YH2V2:
559             if ((m_mcu_lines_left & 1) == 0) {
560               H2V2Convert();
561               *pScan_line = m_pScan_line_0;
562             } else {
563               *pScan_line = m_pScan_line_1;
564             }
565             break;
566           case JPGD_YH2V1:
567             H2V1Convert();
568             *pScan_line = m_pScan_line_0;
569             break;
570           case JPGD_YH1V2:
571             if ((m_mcu_lines_left & 1) == 0) {
572               H1V2Convert();
573               *pScan_line = m_pScan_line_0;
574             } else {
575               *pScan_line = m_pScan_line_1;
576             }
577             break;
578           case JPGD_YH1V1:
579             H1V1Convert();
580             *pScan_line = m_pScan_line_0;
581             break;
582           case JPGD_GRAYSCALE:
583             gray_convert();
584             *pScan_line = m_pScan_line_0;
585             break;
586           default:
587         }
588       }
589       *pScan_line_len = m_real_dest_bytes_per_scan_line;
590       --m_mcu_lines_left;
591       --m_total_lines_left;
592       return JPGD_SUCCESS;
593   }
594 
595   @property const pure nothrow @safe @nogc {
596     jpgd_status error_code () { return m_error_code; }
597 
598     int width () { return m_image_x_size; }
599     int height () { return m_image_y_size; }
600 
601     int num_components () { return m_comps_in_frame; }
602 
603     int bytes_per_pixel () { return m_dest_bytes_per_pixel; }
604     int bytes_per_scan_line () { return m_image_x_size * bytes_per_pixel(); }
605 
606     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
607     int total_bytes_read () { return m_total_bytes_read; }
608   }
609 
610 private:
611   // Retrieve one character from the input stream.
612   uint get_char () {
613     // Any bytes remaining in buffer?
614     if (!m_in_buf_left) {
615       // Try to get more bytes.
616       prep_in_buffer();
617       // Still nothing to get?
618       if (!m_in_buf_left) {
619         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
620         int t = m_tem_flag;
621         m_tem_flag ^= 1;
622         return (t ? 0xD9 : 0xFF);
623       }
624     }
625     uint c = *m_pIn_buf_ofs++;
626     --m_in_buf_left;
627     return c;
628   }
629 
630   // Same as previous method, except can indicate if the character is a pad character or not.
631   uint get_char (bool* pPadding_flag) {
632     if (!m_in_buf_left) {
633       prep_in_buffer();
634       if (!m_in_buf_left) {
635         *pPadding_flag = true;
636         int t = m_tem_flag;
637         m_tem_flag ^= 1;
638         return (t ? 0xD9 : 0xFF);
639       }
640     }
641     *pPadding_flag = false;
642     uint c = *m_pIn_buf_ofs++;
643     --m_in_buf_left;
644     return c;
645   }
646 
647   // Inserts a previously retrieved character back into the input buffer.
648   void stuff_char (ubyte q) {
649     *(--m_pIn_buf_ofs) = q;
650     m_in_buf_left++;
651   }
652 
653   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
654   ubyte get_octet () {
655     bool padding_flag;
656     int c = get_char(&padding_flag);
657     if (c == 0xFF) {
658       if (padding_flag) return 0xFF;
659       c = get_char(&padding_flag);
660       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
661       if (c == 0x00) return 0xFF;
662       stuff_char(cast(ubyte)(c));
663       stuff_char(0xFF);
664       return 0xFF;
665     }
666     return cast(ubyte)(c);
667   }
668 
669   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
670   uint get_bits (int num_bits) {
671     if (!num_bits) return 0;
672     uint i = m_bit_buf >> (32 - num_bits);
673     if ((m_bits_left -= num_bits) <= 0) {
674       m_bit_buf <<= (num_bits += m_bits_left);
675       uint c1 = get_char();
676       uint c2 = get_char();
677       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
678       m_bit_buf <<= -m_bits_left;
679       m_bits_left += 16;
680       assert(m_bits_left >= 0);
681     } else {
682       m_bit_buf <<= num_bits;
683     }
684     return i;
685   }
686 
687   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
688   uint get_bits_no_markers (int num_bits) {
689     if (!num_bits) return 0;
690     uint i = m_bit_buf >> (32 - num_bits);
691     if ((m_bits_left -= num_bits) <= 0) {
692       m_bit_buf <<= (num_bits += m_bits_left);
693       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
694         uint c1 = get_octet();
695         uint c2 = get_octet();
696         m_bit_buf |= (c1 << 8) | c2;
697       } else {
698         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
699         m_in_buf_left -= 2;
700         m_pIn_buf_ofs += 2;
701       }
702       m_bit_buf <<= -m_bits_left;
703       m_bits_left += 16;
704       assert(m_bits_left >= 0);
705     } else {
706       m_bit_buf <<= num_bits;
707     }
708     return i;
709   }
710 
711   // Decodes a Huffman encoded symbol.
712   int huff_decode (huff_tables *pH) {
713     int symbol;
714     // Check first 8-bits: do we have a complete symbol?
715     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
716       // Decode more bits, use a tree traversal to find symbol.
717       int ofs = 23;
718       do {
719         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
720         --ofs;
721       } while (symbol < 0);
722       get_bits_no_markers(8 + (23 - ofs));
723     } else {
724       get_bits_no_markers(pH.code_size.ptr[symbol]);
725     }
726     return symbol;
727   }
728 
729   // Decodes a Huffman encoded symbol.
730   int huff_decode (huff_tables *pH, ref int extra_bits) {
731     int symbol;
732     // Check first 8-bits: do we have a complete symbol?
733     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
734       // Use a tree traversal to find symbol.
735       int ofs = 23;
736       do {
737         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
738         --ofs;
739       } while (symbol < 0);
740       get_bits_no_markers(8 + (23 - ofs));
741       extra_bits = get_bits_no_markers(symbol & 0xF);
742     } else {
743       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
744       if (symbol & 0x8000) {
745         get_bits_no_markers((symbol >> 8) & 31);
746         extra_bits = symbol >> 16;
747       } else {
748         int code_size = (symbol >> 8) & 31;
749         int num_extra_bits = symbol & 0xF;
750         int bits = code_size + num_extra_bits;
751         if (bits <= (m_bits_left + 16)) {
752           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
753         } else {
754           get_bits_no_markers(code_size);
755           extra_bits = get_bits_no_markers(num_extra_bits);
756         }
757       }
758       symbol &= 0xFF;
759     }
760     return symbol;
761   }
762 
763   // Tables and macro used to fully decode the DPCM differences.
764   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
765   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
766   
767   static int JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc 
768   { 
769       return (((x) < s_extend_test.ptr[s]) ? ((x) + s_extend_offset.ptr[s]) : (x)); 
770   }
771 
772   // Clamps a value between 0-255.
773   alias clamp = CLAMP;
774 
775   static struct DCT_Upsample {
776   static:
777     static struct Matrix44 {
778     pure nothrow @trusted @nogc:
779       alias Element_Type = int;
780       enum { NUM_ROWS = 4, NUM_COLS = 4 }
781 
782       Element_Type[NUM_COLS][NUM_ROWS] v;
783 
784       this() (in Matrix44 m) {
785         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
786       }
787 
788       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
789 
790       ref Matrix44 opOpAssign(string op:"+") (in Matrix44 a) {
791         foreach (int r; 0..NUM_ROWS) {
792           at(r, 0) += a.at(r, 0);
793           at(r, 1) += a.at(r, 1);
794           at(r, 2) += a.at(r, 2);
795           at(r, 3) += a.at(r, 3);
796         }
797         return this;
798       }
799 
800       ref Matrix44 opOpAssign(string op:"-") (in Matrix44 a) {
801         foreach (int r; 0..NUM_ROWS) {
802           at(r, 0) -= a.at(r, 0);
803           at(r, 1) -= a.at(r, 1);
804           at(r, 2) -= a.at(r, 2);
805           at(r, 3) -= a.at(r, 3);
806         }
807         return this;
808       }
809 
810       Matrix44 opBinary(string op:"+") (in Matrix44 b) const {
811         alias a = this;
812         Matrix44 ret;
813         foreach (int r; 0..NUM_ROWS) {
814           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
815           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
816           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
817           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
818         }
819         return ret;
820       }
821 
822       Matrix44 opBinary(string op:"-") (in Matrix44 b) const {
823         alias a = this;
824         Matrix44 ret;
825         foreach (int r; 0..NUM_ROWS) {
826           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
827           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
828           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
829           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
830         }
831         return ret;
832       }
833 
834       static void add_and_store() (jpgd_block_t* pDst, in Matrix44 a, in Matrix44 b) {
835         foreach (int r; 0..4) {
836           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
837           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
838           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
839           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
840         }
841       }
842 
843       static void sub_and_store() (jpgd_block_t* pDst, in Matrix44 a, in Matrix44 b) {
844         foreach (int r; 0..4) {
845           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
846           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
847           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
848           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
849         }
850       }
851     }
852 
853     enum FRACT_BITS = 10;
854     enum SCALE = 1 << FRACT_BITS;
855 
856     alias Temp_Type = int;
857 
858     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
859     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
860 
861     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
862     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
863       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
864         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
865         template AT(int c, int r) {
866           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
867         }
868         // 4x8 = 4x8 times 8x8, matrix 0 is constant
869         immutable Temp_Type X000 = mixin(AT!(0, 0));
870         immutable Temp_Type X001 = mixin(AT!(0, 1));
871         immutable Temp_Type X002 = mixin(AT!(0, 2));
872         immutable Temp_Type X003 = mixin(AT!(0, 3));
873         immutable Temp_Type X004 = mixin(AT!(0, 4));
874         immutable Temp_Type X005 = mixin(AT!(0, 5));
875         immutable Temp_Type X006 = mixin(AT!(0, 6));
876         immutable Temp_Type X007 = mixin(AT!(0, 7));
877         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
878         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
879         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
880         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
881         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
882         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
883         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
884         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
885         immutable Temp_Type X020 = mixin(AT!(4, 0));
886         immutable Temp_Type X021 = mixin(AT!(4, 1));
887         immutable Temp_Type X022 = mixin(AT!(4, 2));
888         immutable Temp_Type X023 = mixin(AT!(4, 3));
889         immutable Temp_Type X024 = mixin(AT!(4, 4));
890         immutable Temp_Type X025 = mixin(AT!(4, 5));
891         immutable Temp_Type X026 = mixin(AT!(4, 6));
892         immutable Temp_Type X027 = mixin(AT!(4, 7));
893         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
894         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
895         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
896         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
897         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
898         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
899         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
900         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
901 
902         // 4x4 = 4x8 times 8x4, matrix 1 is constant
903         P.at(0, 0) = X000;
904         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
905         P.at(0, 2) = X004;
906         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
907         P.at(1, 0) = X010;
908         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
909         P.at(1, 2) = X014;
910         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
911         P.at(2, 0) = X020;
912         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
913         P.at(2, 2) = X024;
914         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
915         P.at(3, 0) = X030;
916         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
917         P.at(3, 2) = X034;
918         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
919         // 40 muls 24 adds
920 
921         // 4x4 = 4x8 times 8x4, matrix 1 is constant
922         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
923         Q.at(0, 1) = X002;
924         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
925         Q.at(0, 3) = X006;
926         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
927         Q.at(1, 1) = X012;
928         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
929         Q.at(1, 3) = X016;
930         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
931         Q.at(2, 1) = X022;
932         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
933         Q.at(2, 3) = X026;
934         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
935         Q.at(3, 1) = X032;
936         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
937         Q.at(3, 3) = X036;
938         // 40 muls 24 adds
939       }
940     }
941 
942     static struct R_S(int NUM_ROWS, int NUM_COLS) {
943       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
944         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
945         template AT(int c, int r) {
946           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
947         }
948         // 4x8 = 4x8 times 8x8, matrix 0 is constant
949         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
950         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
951         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
952         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
953         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
954         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
955         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
956         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
957         immutable Temp_Type X110 = mixin(AT!(2, 0));
958         immutable Temp_Type X111 = mixin(AT!(2, 1));
959         immutable Temp_Type X112 = mixin(AT!(2, 2));
960         immutable Temp_Type X113 = mixin(AT!(2, 3));
961         immutable Temp_Type X114 = mixin(AT!(2, 4));
962         immutable Temp_Type X115 = mixin(AT!(2, 5));
963         immutable Temp_Type X116 = mixin(AT!(2, 6));
964         immutable Temp_Type X117 = mixin(AT!(2, 7));
965         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
966         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
967         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
968         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
969         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
970         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
971         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
972         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
973         immutable Temp_Type X130 = mixin(AT!(6, 0));
974         immutable Temp_Type X131 = mixin(AT!(6, 1));
975         immutable Temp_Type X132 = mixin(AT!(6, 2));
976         immutable Temp_Type X133 = mixin(AT!(6, 3));
977         immutable Temp_Type X134 = mixin(AT!(6, 4));
978         immutable Temp_Type X135 = mixin(AT!(6, 5));
979         immutable Temp_Type X136 = mixin(AT!(6, 6));
980         immutable Temp_Type X137 = mixin(AT!(6, 7));
981         // 80 muls 48 adds
982 
983         // 4x4 = 4x8 times 8x4, matrix 1 is constant
984         R.at(0, 0) = X100;
985         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
986         R.at(0, 2) = X104;
987         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
988         R.at(1, 0) = X110;
989         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
990         R.at(1, 2) = X114;
991         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
992         R.at(2, 0) = X120;
993         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
994         R.at(2, 2) = X124;
995         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
996         R.at(3, 0) = X130;
997         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
998         R.at(3, 2) = X134;
999         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
1000         // 40 muls 24 adds
1001         // 4x4 = 4x8 times 8x4, matrix 1 is constant
1002         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
1003         S.at(0, 1) = X102;
1004         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
1005         S.at(0, 3) = X106;
1006         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
1007         S.at(1, 1) = X112;
1008         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
1009         S.at(1, 3) = X116;
1010         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
1011         S.at(2, 1) = X122;
1012         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
1013         S.at(2, 3) = X126;
1014         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
1015         S.at(3, 1) = X132;
1016         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
1017         S.at(3, 3) = X136;
1018         // 40 muls 24 adds
1019       }
1020     }
1021   } // end namespace DCT_Upsample
1022 
1023   // Unconditionally frees all allocated m_blocks.
1024   void free_all_blocks () {
1025     //m_pStream = null;
1026     readfn = null;
1027     for (mem_block *b = m_pMem_blocks; b; ) {
1028       mem_block* n = b.m_pNext;
1029       jpgd_free(b);
1030       b = n;
1031     }
1032     m_pMem_blocks = null;
1033   }
1034 
1035   // This method handles all errors. It will never return.
1036   // It could easily be changed to use C++ exceptions.
1037   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status) {
1038     m_error_code = status;
1039     free_all_blocks();
1040     //longjmp(m_jmp_state, status);
1041     assert(false, "jpeg decoding error");
1042   }
1043 
1044   void* alloc (size_t nSize, bool zero=false) {
1045     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1046     char *rv = null;
1047     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1048     {
1049       if ((b.m_used_count + nSize) <= b.m_size)
1050       {
1051         rv = b.m_data.ptr + b.m_used_count;
1052         b.m_used_count += nSize;
1053         break;
1054       }
1055     }
1056     if (!rv)
1057     {
1058       int capacity = cast(int) JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1059       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1060       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1061       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1062       b.m_used_count = nSize;
1063       b.m_size = capacity;
1064       rv = b.m_data.ptr;
1065     }
1066     if (zero) memset(rv, 0, nSize);
1067     return rv;
1068   }
1069 
1070   void word_clear (void *p, ushort c, uint n) {
1071     ubyte *pD = cast(ubyte*)p;
1072     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1073     while (n)
1074     {
1075       pD[0] = l; pD[1] = h; pD += 2;
1076       n--;
1077     }
1078   }
1079 
1080   // Refill the input buffer.
1081   // This method will sit in a loop until (A) the buffer is full or (B)
1082   // the stream's read() method reports and end of file condition.
1083   void prep_in_buffer () {
1084     m_in_buf_left = 0;
1085     m_pIn_buf_ofs = m_in_buf.ptr;
1086 
1087     if (m_eof_flag)
1088       return;
1089 
1090     do
1091     {
1092       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag, userData);
1093       if (bytes_read == -1)
1094         stop_decoding(JPGD_STREAM_READ);
1095 
1096       m_in_buf_left += bytes_read;
1097     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1098 
1099     m_total_bytes_read += m_in_buf_left;
1100 
1101     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1102     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1103     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1104   }
1105 
1106   // Read a Huffman code table.
1107   void read_dht_marker () {
1108     int i, index, count;
1109     ubyte[17] huff_num;
1110     ubyte[256] huff_val;
1111 
1112     uint num_left = get_bits(16);
1113 
1114     if (num_left < 2)
1115       stop_decoding(JPGD_BAD_DHT_MARKER);
1116 
1117     num_left -= 2;
1118 
1119     while (num_left)
1120     {
1121       index = get_bits(8);
1122 
1123       huff_num.ptr[0] = 0;
1124 
1125       count = 0;
1126 
1127       for (i = 1; i <= 16; i++)
1128       {
1129         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1130         count += huff_num.ptr[i];
1131       }
1132 
1133       if (count > 255)
1134         stop_decoding(JPGD_BAD_DHT_COUNTS);
1135 
1136       for (i = 0; i < count; i++)
1137         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1138 
1139       i = 1 + 16 + count;
1140 
1141       if (num_left < cast(uint)i)
1142         stop_decoding(JPGD_BAD_DHT_MARKER);
1143 
1144       num_left -= i;
1145 
1146       if ((index & 0x10) > 0x10)
1147         stop_decoding(JPGD_BAD_DHT_INDEX);
1148 
1149       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1150 
1151       if (index >= JPGD_MAX_HUFF_TABLES)
1152         stop_decoding(JPGD_BAD_DHT_INDEX);
1153 
1154       if (!m_huff_num.ptr[index])
1155         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1156 
1157       if (!m_huff_val.ptr[index])
1158         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1159 
1160       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1161       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1162       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1163     }
1164   }
1165 
1166   // Read a quantization table.
1167   void read_dqt_marker () {
1168     int n, i, prec;
1169     uint num_left;
1170     uint temp;
1171 
1172     num_left = get_bits(16);
1173 
1174     if (num_left < 2)
1175       stop_decoding(JPGD_BAD_DQT_MARKER);
1176 
1177     num_left -= 2;
1178 
1179     while (num_left)
1180     {
1181       n = get_bits(8);
1182       prec = n >> 4;
1183       n &= 0x0F;
1184 
1185       if (n >= JPGD_MAX_QUANT_TABLES)
1186         stop_decoding(JPGD_BAD_DQT_TABLE);
1187 
1188       if (!m_quant.ptr[n])
1189         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1190 
1191       // read quantization entries, in zag order
1192       for (i = 0; i < 64; i++)
1193       {
1194         temp = get_bits(8);
1195 
1196         if (prec)
1197           temp = (temp << 8) + get_bits(8);
1198 
1199         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1200       }
1201 
1202       i = 64 + 1;
1203 
1204       if (prec)
1205         i += 64;
1206 
1207       if (num_left < cast(uint)i)
1208         stop_decoding(JPGD_BAD_DQT_LENGTH);
1209 
1210       num_left -= i;
1211     }
1212   }
1213 
1214   // Read the start of frame (SOF) marker.
1215   void read_sof_marker () {
1216     int i;
1217     uint num_left;
1218 
1219     num_left = get_bits(16);
1220 
1221     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1222       stop_decoding(JPGD_BAD_PRECISION);
1223 
1224     m_image_y_size = get_bits(16);
1225 
1226     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1227       stop_decoding(JPGD_BAD_HEIGHT);
1228 
1229     m_image_x_size = get_bits(16);
1230 
1231     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1232       stop_decoding(JPGD_BAD_WIDTH);
1233 
1234     m_comps_in_frame = get_bits(8);
1235 
1236     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1237       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1238 
1239     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1240       stop_decoding(JPGD_BAD_SOF_LENGTH);
1241 
1242     for (i = 0; i < m_comps_in_frame; i++)
1243     {
1244       m_comp_ident.ptr[i]  = get_bits(8);
1245       m_comp_h_samp.ptr[i] = get_bits(4);
1246       m_comp_v_samp.ptr[i] = get_bits(4);
1247       m_comp_quant.ptr[i]  = get_bits(8);
1248     }
1249   }
1250 
1251   // Used to skip unrecognized markers.
1252   void skip_variable_marker () {
1253     uint num_left;
1254 
1255     num_left = get_bits(16);
1256 
1257     if (num_left < 2)
1258       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1259 
1260     num_left -= 2;
1261 
1262     while (num_left)
1263     {
1264       get_bits(8);
1265       num_left--;
1266     }
1267   }
1268 
1269   // Read a define restart interval (DRI) marker.
1270   void read_dri_marker () {
1271     if (get_bits(16) != 4)
1272       stop_decoding(JPGD_BAD_DRI_LENGTH);
1273 
1274     m_restart_interval = get_bits(16);
1275   }
1276 
1277   // Read a start of scan (SOS) marker.
1278   void read_sos_marker () {
1279     uint num_left;
1280     int i, ci, n, c, cc;
1281 
1282     num_left = get_bits(16);
1283 
1284     n = get_bits(8);
1285 
1286     m_comps_in_scan = n;
1287 
1288     num_left -= 3;
1289 
1290     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1291       stop_decoding(JPGD_BAD_SOS_LENGTH);
1292 
1293     for (i = 0; i < n; i++)
1294     {
1295       cc = get_bits(8);
1296       c = get_bits(8);
1297       num_left -= 2;
1298 
1299       for (ci = 0; ci < m_comps_in_frame; ci++)
1300         if (cc == m_comp_ident.ptr[ci])
1301           break;
1302 
1303       if (ci >= m_comps_in_frame)
1304         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1305 
1306       m_comp_list.ptr[i]    = ci;
1307       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1308       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1309     }
1310 
1311     m_spectral_start  = get_bits(8);
1312     m_spectral_end    = get_bits(8);
1313     m_successive_high = get_bits(4);
1314     m_successive_low  = get_bits(4);
1315 
1316     if (!m_progressive_flag)
1317     {
1318       m_spectral_start = 0;
1319       m_spectral_end = 63;
1320     }
1321 
1322     num_left -= 3;
1323 
1324     /* read past whatever is num_left */
1325     while (num_left)
1326     {
1327       get_bits(8);
1328       num_left--;
1329     }
1330   }
1331 
1332   // Finds the next marker.
1333   int next_marker () {
1334     uint c, bytes;
1335 
1336     bytes = 0;
1337 
1338     do
1339     {
1340       do
1341       {
1342         bytes++;
1343         c = get_bits(8);
1344       } while (c != 0xFF);
1345 
1346       do
1347       {
1348         c = get_bits(8);
1349       } while (c == 0xFF);
1350 
1351     } while (c == 0);
1352 
1353     // If bytes > 0 here, there where extra bytes before the marker (not good).
1354 
1355     return c;
1356   }
1357 
1358   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1359   // encountered.
1360   int process_markers () {
1361     int c;
1362 
1363     for ( ; ; ) {
1364       c = next_marker();
1365 
1366       switch (c)
1367       {
1368         case M_SOF0:
1369         case M_SOF1:
1370         case M_SOF2:
1371         case M_SOF3:
1372         case M_SOF5:
1373         case M_SOF6:
1374         case M_SOF7:
1375         //case M_JPG:
1376         case M_SOF9:
1377         case M_SOF10:
1378         case M_SOF11:
1379         case M_SOF13:
1380         case M_SOF14:
1381         case M_SOF15:
1382         case M_SOI:
1383         case M_EOI:
1384         case M_SOS:
1385           return c;
1386         case M_DHT:
1387           read_dht_marker();
1388           break;
1389         // No arithmitic support - dumb patents!
1390         case M_DAC:
1391           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1392           break;
1393         case M_DQT:
1394           read_dqt_marker();
1395           break;
1396         case M_DRI:
1397           read_dri_marker();
1398           break;
1399 
1400         case M_APP0:
1401             uint num_left;
1402 
1403             num_left = get_bits(16);
1404 
1405             if (num_left < 7)
1406                 stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1407 
1408             num_left -= 2;
1409 
1410             ubyte[5] jfif_id;
1411             foreach(i; 0..5)
1412                 jfif_id[i] = cast(ubyte) get_bits(8);
1413 
1414             num_left -= 5;
1415             static immutable ubyte[5] JFIF = [0x4A, 0x46, 0x49, 0x46, 0x00];
1416             if (jfif_id == JFIF && num_left >= 7)
1417             {
1418                 // skip version
1419                 get_bits(16);
1420                 uint units = get_bits(8);
1421                 int Xdensity = get_bits(16);
1422                 int Ydensity = get_bits(16);
1423                 num_left -= 7;
1424 
1425                 m_pixelAspectRatio = (Xdensity/cast(double)Ydensity);
1426 
1427                 switch (units)
1428                 {
1429                     case 0: // no units, just a ratio
1430                         m_pixelsPerInchX = -1;
1431                         m_pixelsPerInchY = -1;
1432                         break;
1433 
1434                     case 1: // dot per inch
1435                         m_pixelsPerInchX = Xdensity;
1436                         m_pixelsPerInchY = Ydensity;
1437                         break;
1438 
1439                     case 2: // dot per cm
1440                         m_pixelsPerInchX = convertInchesToMeters(Xdensity * 100.0f);
1441                         m_pixelsPerInchY = convertInchesToMeters(Ydensity * 100.0f);
1442                         break;
1443                     default:
1444                 }
1445             }
1446 
1447             // skip rests of chunk
1448 
1449             while (num_left)
1450             {
1451                 get_bits(8);
1452                 num_left--;
1453             }
1454             break;
1455 
1456         case M_APP0+1: // possibly EXIF data
1457          
1458             uint num_left;
1459             num_left = get_bits(16);
1460 
1461             if (num_left < 2)
1462                 stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1463             num_left -= 2;
1464 
1465             ubyte[] exifData = (cast(ubyte*) malloc(num_left))[0..num_left];
1466             scope(exit) free(exifData.ptr);
1467 
1468             foreach(i; 0..num_left)
1469                 exifData[i] = cast(ubyte)(get_bits(8));
1470 
1471             const(ubyte)* s = exifData.ptr;
1472 
1473             ubyte[6] exif_id;
1474             foreach(i; 0..6)
1475                 exif_id[i] = read_ubyte(s);
1476 
1477             const(ubyte)* remainExifData = s;
1478 
1479             static immutable ubyte[6] ExifIdentifierCode = [0x45, 0x78, 0x69, 0x66, 0x00, 0x00]; // "Exif\0\0"
1480             if (exif_id == ExifIdentifierCode)
1481             {
1482                 // See EXIF specification: http://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf
1483 
1484                 const(ubyte)* tiffFile = s; // save exif chunk from "start of TIFF file"
1485 
1486                 ushort byteOrder = read_ushort_BE(s);
1487                 if (byteOrder != 0x4949 && byteOrder != 0x4D4D)
1488                     stop_decoding(JPGD_DECODE_ERROR);
1489                 bool littleEndian = (byteOrder == 0x4949);
1490 
1491                 ushort version_ = littleEndian ? read_ushort_LE(s) : read_ushort_BE(s);
1492                 if (version_ != 42)
1493                     stop_decoding(JPGD_DECODE_ERROR);
1494 
1495                 uint offset = littleEndian ? read_uint_LE(s) : read_uint_BE(s);
1496 
1497                 double resolutionX = 72;
1498                 double resolutionY = 72;
1499                 int unit = 2;
1500 
1501                 // parse all IFDs
1502                 while(offset != 0)
1503                 {
1504                     if (offset > exifData.length)
1505                         stop_decoding(JPGD_DECODE_ERROR);
1506                     const(ubyte)* pIFD = tiffFile + offset;
1507                     ushort numEntries = littleEndian ? read_ushort_LE(pIFD) : read_ushort_BE(pIFD);
1508 
1509                     foreach(entry; 0..numEntries)
1510                     {
1511                         ushort tag = littleEndian ? read_ushort_LE(pIFD) : read_ushort_BE(pIFD);
1512                         ushort type = littleEndian ? read_ushort_LE(pIFD) : read_ushort_BE(pIFD);
1513                         uint count = littleEndian ? read_uint_LE(pIFD) : read_uint_BE(pIFD);
1514                         uint valueOffset = littleEndian ? read_uint_LE(pIFD) : read_uint_BE(pIFD);
1515 
1516                         if (tag == 282 || tag == 283) // XResolution
1517                         {
1518                             const(ubyte)* tagData = tiffFile + valueOffset;
1519                             double num = littleEndian ? read_uint_LE(tagData) : read_uint_BE(tagData);
1520                             double denom = littleEndian ? read_uint_LE(tagData) : read_uint_BE(tagData);
1521                             double frac = num / denom;
1522                             if (tag == 282)
1523                                 resolutionX = frac;
1524                             else
1525                                 resolutionY = frac;
1526                         }
1527 
1528                         if (tag == 296) // unit
1529                             unit = valueOffset;
1530                     }
1531                     offset = littleEndian ? read_uint_LE(pIFD) : read_uint_BE(pIFD);
1532                 }
1533 
1534                 if (unit == 2) // inches
1535                 {
1536                     m_pixelsPerInchX = resolutionX;
1537                     m_pixelsPerInchY = resolutionY;
1538                     m_pixelAspectRatio = resolutionX / resolutionY;
1539                 }
1540                 else if (unit == 3) // dots per cm
1541                 {
1542                     m_pixelsPerInchX = convertInchesToMeters(resolutionX * 100);
1543                     m_pixelsPerInchY = convertInchesToMeters(resolutionY * 100);
1544                     m_pixelAspectRatio = resolutionX / resolutionY;
1545                 }
1546             }
1547             break;
1548 
1549         case M_JPG:
1550         case M_RST0:    /* no parameters */
1551         case M_RST1:
1552         case M_RST2:
1553         case M_RST3:
1554         case M_RST4:
1555         case M_RST5:
1556         case M_RST6:
1557         case M_RST7:
1558         case M_TEM:
1559           stop_decoding(JPGD_UNEXPECTED_MARKER);
1560           break;
1561         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1562           skip_variable_marker();
1563           break;
1564       }
1565     }
1566   }
1567 
1568   // Finds the start of image (SOI) marker.
1569   // This code is rather defensive: it only checks the first 512 bytes to avoid
1570   // false positives.
1571   void locate_soi_marker () {
1572     uint lastchar, thischar;
1573     uint bytesleft;
1574 
1575     lastchar = get_bits(8);
1576 
1577     thischar = get_bits(8);
1578 
1579     /* ok if it's a normal JPEG file without a special header */
1580 
1581     if ((lastchar == 0xFF) && (thischar == M_SOI))
1582       return;
1583 
1584     bytesleft = 4096; //512;
1585 
1586     for ( ; ; )
1587     {
1588       if (--bytesleft == 0)
1589         stop_decoding(JPGD_NOT_JPEG);
1590 
1591       lastchar = thischar;
1592 
1593       thischar = get_bits(8);
1594 
1595       if (lastchar == 0xFF)
1596       {
1597         if (thischar == M_SOI)
1598           break;
1599         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1600           stop_decoding(JPGD_NOT_JPEG);
1601       }
1602     }
1603 
1604     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1605     thischar = (m_bit_buf >> 24) & 0xFF;
1606 
1607     if (thischar != 0xFF)
1608       stop_decoding(JPGD_NOT_JPEG);
1609   }
1610 
1611   // Find a start of frame (SOF) marker.
1612   void locate_sof_marker () {
1613     locate_soi_marker();
1614 
1615     int c = process_markers();
1616 
1617     switch (c)
1618     {
1619       case M_SOF2:
1620         m_progressive_flag = true;
1621         goto case;
1622       case M_SOF0:  /* baseline DCT */
1623       case M_SOF1:  /* extended sequential DCT */
1624         read_sof_marker();
1625         break;
1626       case M_SOF9:  /* Arithmitic coding */
1627         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1628         break;
1629       default:
1630         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1631         break;
1632     }
1633   }
1634 
1635   // Find a start of scan (SOS) marker.
1636   int locate_sos_marker () {
1637     int c;
1638 
1639     c = process_markers();
1640 
1641     if (c == M_EOI)
1642       return false;
1643     else if (c != M_SOS)
1644       stop_decoding(JPGD_UNEXPECTED_MARKER);
1645 
1646     read_sos_marker();
1647 
1648     return true;
1649   }
1650 
1651   // Reset everything to default/uninitialized state.
1652   void initit (JpegStreamReadFunc rfn, void* userData) 
1653   {
1654     m_pMem_blocks = null;
1655     m_error_code = JPGD_SUCCESS;
1656     m_ready_flag = false;
1657     m_image_x_size = m_image_y_size = 0;
1658     readfn = rfn;
1659     this.userData = userData;
1660     m_progressive_flag = false;
1661 
1662     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1663     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1664     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1665     memset(m_quant.ptr, 0, m_quant.sizeof);
1666 
1667     m_scan_type = 0;
1668     m_comps_in_frame = 0;
1669 
1670     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1671     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1672     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1673     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1674     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1675     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1676 
1677     m_comps_in_scan = 0;
1678     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1679     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1680     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1681 
1682     m_spectral_start = 0;
1683     m_spectral_end = 0;
1684     m_successive_low = 0;
1685     m_successive_high = 0;
1686     m_max_mcu_x_size = 0;
1687     m_max_mcu_y_size = 0;
1688     m_blocks_per_mcu = 0;
1689     m_max_blocks_per_row = 0;
1690     m_mcus_per_row = 0;
1691     m_mcus_per_col = 0;
1692     m_expanded_blocks_per_component = 0;
1693     m_expanded_blocks_per_mcu = 0;
1694     m_expanded_blocks_per_row = 0;
1695     m_freq_domain_chroma_upsample = false;
1696 
1697     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1698 
1699     m_total_lines_left = 0;
1700     m_mcu_lines_left = 0;
1701     m_real_dest_bytes_per_scan_line = 0;
1702     m_dest_bytes_per_scan_line = 0;
1703     m_dest_bytes_per_pixel = 0;
1704 
1705     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1706 
1707     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1708     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1709     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1710 
1711     m_eob_run = 0;
1712 
1713     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1714 
1715     m_pIn_buf_ofs = m_in_buf.ptr;
1716     m_in_buf_left = 0;
1717     m_eof_flag = false;
1718     m_tem_flag = 0;
1719 
1720     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1721     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1722     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1723 
1724     m_restart_interval = 0;
1725     m_restarts_left    = 0;
1726     m_next_restart_num = 0;
1727 
1728     m_max_mcus_per_row = 0;
1729     m_max_blocks_per_mcu = 0;
1730     m_max_mcus_per_col = 0;
1731 
1732     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1733     m_pMCU_coefficients = null;
1734     m_pSample_buf = null;
1735 
1736     m_total_bytes_read = 0;
1737 
1738     m_pScan_line_0 = null;
1739     m_pScan_line_1 = null;
1740 
1741     // Ready the input buffer.
1742     prep_in_buffer();
1743 
1744     // Prime the bit buffer.
1745     m_bits_left = 16;
1746     m_bit_buf = 0;
1747 
1748     get_bits(16);
1749     get_bits(16);
1750 
1751     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1752       m_mcu_block_max_zag.ptr[i] = 64;
1753   }
1754 
1755   enum SCALEBITS = 16;
1756   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1757   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1758 
1759   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1760   void create_look_ups () {
1761     for (int i = 0; i <= 255; i++)
1762     {
1763       int k = i - 128;
1764       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1765       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1766       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1767       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1768     }
1769   }
1770 
1771   // This method throws back into the stream any bytes that where read
1772   // into the bit buffer during initial marker scanning.
1773   void fix_in_buffer () {
1774     // In case any 0xFF's where pulled into the buffer during marker scanning.
1775     assert((m_bits_left & 7) == 0);
1776 
1777     if (m_bits_left == 16)
1778       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1779 
1780     if (m_bits_left >= 8)
1781       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1782 
1783     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1784     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1785 
1786     m_bits_left = 16;
1787     get_bits_no_markers(16);
1788     get_bits_no_markers(16);
1789   }
1790 
1791   void transform_mcu (int mcu_row) {
1792     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1793     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1794 
1795     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1796     {
1797       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1798       pSrc_ptr += 64;
1799       pDst_ptr += 64;
1800     }
1801   }
1802 
1803   static immutable ubyte[64] s_max_rc = [
1804     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1805     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1806     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1807     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1808   ];
1809 
1810   void transform_mcu_expand (int mcu_row) {
1811     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1812     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1813 
1814     // Y IDCT
1815     int mcu_block;
1816     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1817     {
1818       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1819       pSrc_ptr += 64;
1820       pDst_ptr += 64;
1821     }
1822 
1823     // Chroma IDCT, with upsampling
1824     jpgd_block_t[64] temp_block;
1825 
1826     for (int i = 0; i < 2; i++)
1827     {
1828       DCT_Upsample.Matrix44 P, Q, R, S;
1829 
1830       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1831       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1832 
1833       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1834       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1835       switch (s_max_rc.ptr[max_zag])
1836       {
1837       case 1*16+1:
1838         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1839         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1840         break;
1841       case 1*16+2:
1842         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1843         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1844         break;
1845       case 2*16+2:
1846         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1847         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1848         break;
1849       case 3*16+2:
1850         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1851         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1852         break;
1853       case 3*16+3:
1854         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1855         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1856         break;
1857       case 3*16+4:
1858         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1859         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1860         break;
1861       case 4*16+4:
1862         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1863         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1864         break;
1865       case 5*16+4:
1866         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1867         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1868         break;
1869       case 5*16+5:
1870         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1871         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1872         break;
1873       case 5*16+6:
1874         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1875         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1876         break;
1877       case 6*16+6:
1878         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1879         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1880         break;
1881       case 7*16+6:
1882         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1883         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1884         break;
1885       case 7*16+7:
1886         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1887         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1888         break;
1889       case 7*16+8:
1890         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1891         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1892         break;
1893       case 8*16+8:
1894         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1895         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1896         break;
1897       default:
1898         assert(false);
1899       }
1900 
1901       auto a = DCT_Upsample.Matrix44(P + Q);
1902       P -= Q;
1903       DCT_Upsample.Matrix44* b = &P;
1904       auto c = DCT_Upsample.Matrix44(R + S);
1905       R -= S;
1906       DCT_Upsample.Matrix44* d = &R;
1907 
1908       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1909       idct_4x4(temp_block.ptr, pDst_ptr);
1910       pDst_ptr += 64;
1911 
1912       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1913       idct_4x4(temp_block.ptr, pDst_ptr);
1914       pDst_ptr += 64;
1915 
1916       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1917       idct_4x4(temp_block.ptr, pDst_ptr);
1918       pDst_ptr += 64;
1919 
1920       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1921       idct_4x4(temp_block.ptr, pDst_ptr);
1922       pDst_ptr += 64;
1923 
1924       pSrc_ptr += 64;
1925     }
1926   }
1927 
1928   // Loads and dequantizes the next row of (already decoded) coefficients.
1929   // Progressive images only.
1930   void load_next_row () {
1931     int i;
1932     jpgd_block_t *p;
1933     jpgd_quant_t *q;
1934     int mcu_row, mcu_block, row_block = 0;
1935     int component_num, component_id;
1936     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1937 
1938     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1939 
1940     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1941     {
1942       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1943 
1944       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1945       {
1946         component_id = m_mcu_org.ptr[mcu_block];
1947         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1948 
1949         p = m_pMCU_coefficients + 64 * mcu_block;
1950 
1951         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1952         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1953         p[0] = pDC[0];
1954         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1955 
1956         for (i = 63; i > 0; i--)
1957           if (p[g_ZAG[i]])
1958             break;
1959 
1960         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1961 
1962         for ( ; i >= 0; i--)
1963           if (p[g_ZAG[i]])
1964             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1965 
1966         row_block++;
1967 
1968         if (m_comps_in_scan == 1)
1969           block_x_mcu.ptr[component_id]++;
1970         else
1971         {
1972           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1973           {
1974             block_x_mcu_ofs = 0;
1975 
1976             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1977             {
1978               block_y_mcu_ofs = 0;
1979 
1980               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1981             }
1982           }
1983         }
1984       }
1985 
1986       if (m_freq_domain_chroma_upsample)
1987         transform_mcu_expand(mcu_row);
1988       else
1989         transform_mcu(mcu_row);
1990     }
1991 
1992     if (m_comps_in_scan == 1)
1993       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1994     else
1995     {
1996       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1997       {
1998         component_id = m_comp_list.ptr[component_num];
1999 
2000         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2001       }
2002     }
2003   }
2004 
2005   // Restart interval processing.
2006   void process_restart () {
2007     int i;
2008     int c = 0;
2009 
2010     // Align to a byte boundry
2011     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
2012     //get_bits_no_markers(m_bits_left & 7);
2013 
2014     // Let's scan a little bit to find the marker, but not _too_ far.
2015     // 1536 is a "fudge factor" that determines how much to scan.
2016     for (i = 1536; i > 0; i--)
2017       if (get_char() == 0xFF)
2018         break;
2019 
2020     if (i == 0)
2021       stop_decoding(JPGD_BAD_RESTART_MARKER);
2022 
2023     for ( ; i > 0; i--)
2024       if ((c = get_char()) != 0xFF)
2025         break;
2026 
2027     if (i == 0)
2028       stop_decoding(JPGD_BAD_RESTART_MARKER);
2029 
2030     // Is it the expected marker? If not, something bad happened.
2031     if (c != (m_next_restart_num + M_RST0))
2032       stop_decoding(JPGD_BAD_RESTART_MARKER);
2033 
2034     // Reset each component's DC prediction values.
2035     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
2036 
2037     m_eob_run = 0;
2038 
2039     m_restarts_left = m_restart_interval;
2040 
2041     m_next_restart_num = (m_next_restart_num + 1) & 7;
2042 
2043     // Get the bit buffer going again...
2044 
2045     m_bits_left = 16;
2046     get_bits_no_markers(16);
2047     get_bits_no_markers(16);
2048   }
2049 
2050   // Decodes and dequantizes the next row of coefficients.
2051   void decode_next_row () {
2052     int row_block = 0;
2053 
2054     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2055     {
2056       if ((m_restart_interval) && (m_restarts_left == 0))
2057         process_restart();
2058 
2059       jpgd_block_t* p = m_pMCU_coefficients;
2060       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
2061       {
2062         int component_id = m_mcu_org.ptr[mcu_block];
2063         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
2064 
2065         int r, s;
2066         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
2067         s = JPGD_HUFF_EXTEND(r, s);
2068 
2069         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
2070 
2071         p[0] = cast(jpgd_block_t)(s * q[0]);
2072 
2073         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
2074 
2075         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
2076 
2077         int k;
2078         for (k = 1; k < 64; k++)
2079         {
2080           int extra_bits;
2081           s = huff_decode(pH, extra_bits);
2082 
2083           r = s >> 4;
2084           s &= 15;
2085 
2086           if (s)
2087           {
2088             if (r)
2089             {
2090               if ((k + r) > 63)
2091                 stop_decoding(JPGD_DECODE_ERROR);
2092 
2093               if (k < prev_num_set)
2094               {
2095                 int n = JPGD_MIN(r, prev_num_set - k);
2096                 int kt = k;
2097                 while (n--)
2098                   p[g_ZAG[kt++]] = 0;
2099               }
2100 
2101               k += r;
2102             }
2103 
2104             s = JPGD_HUFF_EXTEND(extra_bits, s);
2105 
2106             assert(k < 64);
2107 
2108             p[g_ZAG[k]] = cast(jpgd_block_t)( s * q[k] ); // dequantize
2109           }
2110           else
2111           {
2112             if (r == 15)
2113             {
2114               if ((k + 16) > 64)
2115                 stop_decoding(JPGD_DECODE_ERROR);
2116 
2117               if (k < prev_num_set)
2118               {
2119                 int n = JPGD_MIN(16, prev_num_set - k);
2120                 int kt = k;
2121                 while (n--)
2122                 {
2123                   assert(kt <= 63);
2124                   p[g_ZAG[kt++]] = 0;
2125                 }
2126               }
2127 
2128               k += 16 - 1; // - 1 because the loop counter is k
2129               assert(p[g_ZAG[k]] == 0);
2130             }
2131             else
2132               break;
2133           }
2134         }
2135 
2136         if (k < prev_num_set)
2137         {
2138           int kt = k;
2139           while (kt < prev_num_set)
2140             p[g_ZAG[kt++]] = 0;
2141         }
2142 
2143         m_mcu_block_max_zag.ptr[mcu_block] = k;
2144 
2145         row_block++;
2146       }
2147 
2148       if (m_freq_domain_chroma_upsample)
2149         transform_mcu_expand(mcu_row);
2150       else
2151         transform_mcu(mcu_row);
2152 
2153       m_restarts_left--;
2154     }
2155   }
2156 
2157   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
2158   void H1V1Convert () {
2159     int row = m_max_mcu_y_size - m_mcu_lines_left;
2160     ubyte *d = m_pScan_line_0;
2161     ubyte *s = m_pSample_buf + row * 8;
2162 
2163     for (int i = m_max_mcus_per_row; i > 0; i--)
2164     {
2165       for (int j = 0; j < 8; j++)
2166       {
2167         int y = s[j];
2168         int cb = s[64+j];
2169         int cr = s[128+j];
2170 
2171 
2172         __m128i zero = _mm_setzero_si128();
2173         __m128i A = _mm_setr_epi32(y + m_crr.ptr[cr], 
2174                                    y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16),
2175                                    y + m_cbb.ptr[cb],
2176                                    255);
2177         A = _mm_packs_epi32(A, zero);
2178         A = _mm_packus_epi16(A, zero);
2179         _mm_storeu_si32(&d[0], A);
2180         d += 4;
2181       }
2182 
2183       s += 64*3;
2184     }
2185   }
2186 
2187   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2188   void H2V1Convert () 
2189   {
2190     int row = m_max_mcu_y_size - m_mcu_lines_left;
2191     ubyte *d0 = m_pScan_line_0;
2192     ubyte *y = m_pSample_buf + row * 8;
2193     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2194 
2195     for (int i = m_max_mcus_per_row; i > 0; i--)
2196     {
2197       for (int l = 0; l < 2; l++)
2198       {
2199         for (int j = 0; j < 4; j++)
2200         {
2201           int cb = c[0];
2202           int cr = c[64];
2203 
2204           int rc = m_crr.ptr[cr];
2205           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2206           int bc = m_cbb.ptr[cb];
2207 
2208           int yy = y[j<<1];
2209           d0[0] = clamp(yy+rc);
2210           d0[1] = clamp(yy+gc);
2211           d0[2] = clamp(yy+bc);
2212           d0[3] = 255;
2213 
2214           yy = y[(j<<1)+1];
2215           d0[4] = clamp(yy+rc);
2216           d0[5] = clamp(yy+gc);
2217           d0[6] = clamp(yy+bc);
2218           d0[7] = 255;
2219 
2220           d0 += 8;
2221 
2222           c++;
2223         }
2224         y += 64;
2225       }
2226 
2227       y += 64*4 - 64*2;
2228       c += 64*4 - 8;
2229     }
2230   }
2231 
2232   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2233   void H1V2Convert () {
2234     int row = m_max_mcu_y_size - m_mcu_lines_left;
2235     ubyte *d0 = m_pScan_line_0;
2236     ubyte *d1 = m_pScan_line_1;
2237     ubyte *y;
2238     ubyte *c;
2239 
2240     if (row < 8)
2241       y = m_pSample_buf + row * 8;
2242     else
2243       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2244 
2245     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2246 
2247     for (int i = m_max_mcus_per_row; i > 0; i--)
2248     {
2249       for (int j = 0; j < 8; j++)
2250       {
2251         int cb = c[0+j];
2252         int cr = c[64+j];
2253 
2254         int rc = m_crr.ptr[cr];
2255         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2256         int bc = m_cbb.ptr[cb];
2257 
2258         int yy = y[j];
2259         d0[0] = clamp(yy+rc);
2260         d0[1] = clamp(yy+gc);
2261         d0[2] = clamp(yy+bc);
2262         d0[3] = 255;
2263 
2264         yy = y[8+j];
2265         d1[0] = clamp(yy+rc);
2266         d1[1] = clamp(yy+gc);
2267         d1[2] = clamp(yy+bc);
2268         d1[3] = 255;
2269 
2270         d0 += 4;
2271         d1 += 4;
2272       }
2273 
2274       y += 64*4;
2275       c += 64*4;
2276     }
2277   }
2278 
2279   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2280   void H2V2Convert () {
2281     int row = m_max_mcu_y_size - m_mcu_lines_left;
2282     ubyte *d0 = m_pScan_line_0;
2283     ubyte *d1 = m_pScan_line_1;
2284     ubyte *y;
2285     ubyte *c;
2286 
2287     if (row < 8)
2288       y = m_pSample_buf + row * 8;
2289     else
2290       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2291 
2292     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2293 
2294     for (int i = m_max_mcus_per_row; i > 0; i--)
2295     {
2296       for (int l = 0; l < 2; l++)
2297       {
2298         for (int j = 0; j < 8; j += 2)
2299         {
2300           int cb = c[0];
2301           int cr = c[64];
2302 
2303           int rc = m_crr.ptr[cr];
2304           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2305           int bc = m_cbb.ptr[cb];
2306 
2307           int yy = y[j];
2308           d0[0] = clamp(yy+rc);
2309           d0[1] = clamp(yy+gc);
2310           d0[2] = clamp(yy+bc);
2311           d0[3] = 255;
2312 
2313           yy = y[j+1];
2314           d0[4] = clamp(yy+rc);
2315           d0[5] = clamp(yy+gc);
2316           d0[6] = clamp(yy+bc);
2317           d0[7] = 255;
2318 
2319           yy = y[j+8];
2320           d1[0] = clamp(yy+rc);
2321           d1[1] = clamp(yy+gc);
2322           d1[2] = clamp(yy+bc);
2323           d1[3] = 255;
2324 
2325           yy = y[j+8+1];
2326           d1[4] = clamp(yy+rc);
2327           d1[5] = clamp(yy+gc);
2328           d1[6] = clamp(yy+bc);
2329           d1[7] = 255;
2330 
2331           d0 += 8;
2332           d1 += 8;
2333 
2334           c++;
2335         }
2336         y += 64;
2337       }
2338 
2339       y += 64*6 - 64*2;
2340       c += 64*6 - 8;
2341     }
2342   }
2343 
2344   // Y (1 block per MCU) to 8-bit grayscale
2345   void gray_convert () {
2346     int row = m_max_mcu_y_size - m_mcu_lines_left;
2347     ubyte *d = m_pScan_line_0;
2348     ubyte *s = m_pSample_buf + row * 8;
2349 
2350     for (int i = m_max_mcus_per_row; i > 0; i--)
2351     {
2352       *cast(uint*)d = *cast(uint*)s;
2353       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2354 
2355       s += 64;
2356       d += 8;
2357     }
2358   }
2359 
2360 
2361   void expanded_convert () 
2362   {
2363     int row = m_max_mcu_y_size - m_mcu_lines_left;
2364 
2365     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2366 
2367     ubyte* d = m_pScan_line_0;
2368 
2369     for (int i = m_max_mcus_per_row; i > 0; i--)
2370     {
2371       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2372       {
2373         immutable int Y_ofs = k * 8;
2374         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2375         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2376         for (int j = 0; j < 8; j++)
2377         {
2378           int y = Py[Y_ofs + j];
2379           int cb = Py[Cb_ofs + j];
2380           int cr = Py[Cr_ofs + j];
2381 
2382           __m128i zero = _mm_setzero_si128();
2383           __m128i A = _mm_setr_epi32(y + m_crr.ptr[cr], 
2384                                      y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16),
2385                                      y + m_cbb.ptr[cb],
2386                                      255);
2387           A = _mm_packs_epi32(A, zero);
2388           A = _mm_packus_epi16(A, zero);
2389           _mm_storeu_si32(&d[0], A);
2390           d += 4;
2391         }
2392       }
2393 
2394       Py += 64 * m_expanded_blocks_per_mcu;
2395     }
2396   }
2397 
2398   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2399   void find_eoi () {
2400     if (!m_progressive_flag)
2401     {
2402       // Attempt to read the EOI marker.
2403       //get_bits_no_markers(m_bits_left & 7);
2404 
2405       // Prime the bit buffer
2406       m_bits_left = 16;
2407       get_bits(16);
2408       get_bits(16);
2409 
2410       // The next marker _should_ be EOI
2411       process_markers();
2412     }
2413 
2414     m_total_bytes_read -= m_in_buf_left;
2415   }
2416 
2417   // Creates the tables needed for efficient Huffman decoding.
2418   void make_huff_table (int index, huff_tables *pH) {
2419     int p, i, l, si;
2420     ubyte[257] huffsize;
2421     uint[257] huffcode;
2422     uint code;
2423     uint subtree;
2424     int code_size;
2425     int lastp;
2426     int nextfreeentry;
2427     int currententry;
2428 
2429     pH.ac_table = m_huff_ac.ptr[index] != 0;
2430 
2431     p = 0;
2432 
2433     for (l = 1; l <= 16; l++)
2434     {
2435       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2436         huffsize.ptr[p++] = cast(ubyte)(l);
2437     }
2438 
2439     huffsize.ptr[p] = 0;
2440 
2441     lastp = p;
2442 
2443     code = 0;
2444     si = huffsize.ptr[0];
2445     p = 0;
2446 
2447     while (huffsize.ptr[p])
2448     {
2449       while (huffsize.ptr[p] == si)
2450       {
2451         huffcode.ptr[p++] = code;
2452         code++;
2453       }
2454 
2455       code <<= 1;
2456       si++;
2457     }
2458 
2459     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2460     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2461     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2462     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2463 
2464     nextfreeentry = -1;
2465 
2466     p = 0;
2467 
2468     while (p < lastp)
2469     {
2470       i = m_huff_val.ptr[index][p];
2471       code = huffcode.ptr[p];
2472       code_size = huffsize.ptr[p];
2473 
2474       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2475 
2476       if (code_size <= 8)
2477       {
2478         code <<= (8 - code_size);
2479 
2480         for (l = 1 << (8 - code_size); l > 0; l--)
2481         {
2482           assert(i < 256);
2483 
2484           pH.look_up.ptr[code] = i;
2485 
2486           bool has_extrabits = false;
2487           int extra_bits = 0;
2488           int num_extra_bits = i & 15;
2489 
2490           int bits_to_fetch = code_size;
2491           if (num_extra_bits)
2492           {
2493             int total_codesize = code_size + num_extra_bits;
2494             if (total_codesize <= 8)
2495             {
2496               has_extrabits = true;
2497               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2498               assert(extra_bits <= 0x7FFF);
2499               bits_to_fetch += num_extra_bits;
2500             }
2501           }
2502 
2503           if (!has_extrabits)
2504             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2505           else
2506             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2507 
2508           code++;
2509         }
2510       }
2511       else
2512       {
2513         subtree = (code >> (code_size - 8)) & 0xFF;
2514 
2515         currententry = pH.look_up.ptr[subtree];
2516 
2517         if (currententry == 0)
2518         {
2519           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2520           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2521 
2522           nextfreeentry -= 2;
2523         }
2524 
2525         code <<= (16 - (code_size - 8));
2526 
2527         for (l = code_size; l > 9; l--)
2528         {
2529           if ((code & 0x8000) == 0)
2530             currententry--;
2531 
2532           if (pH.tree.ptr[-currententry - 1] == 0)
2533           {
2534             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2535 
2536             currententry = nextfreeentry;
2537 
2538             nextfreeentry -= 2;
2539           }
2540           else
2541             currententry = pH.tree.ptr[-currententry - 1];
2542 
2543           code <<= 1;
2544         }
2545 
2546         if ((code & 0x8000) == 0)
2547           currententry--;
2548 
2549         pH.tree.ptr[-currententry - 1] = i;
2550       }
2551 
2552       p++;
2553     }
2554   }
2555 
2556   // Verifies the quantization tables needed for this scan are available.
2557   void check_quant_tables () {
2558     for (int i = 0; i < m_comps_in_scan; i++)
2559       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2560         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2561   }
2562 
2563   // Verifies that all the Huffman tables needed for this scan are available.
2564   void check_huff_tables () {
2565     for (int i = 0; i < m_comps_in_scan; i++)
2566     {
2567       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2568         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2569 
2570       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2571         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2572     }
2573 
2574     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2575       if (m_huff_num.ptr[i])
2576       {
2577         if (!m_pHuff_tabs.ptr[i])
2578           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2579 
2580         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2581       }
2582   }
2583 
2584   // Determines the component order inside each MCU.
2585   // Also calcs how many MCU's are on each row, etc.
2586   void calc_mcu_block_order () {
2587     int component_num, component_id;
2588     int max_h_samp = 0, max_v_samp = 0;
2589 
2590     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2591     {
2592       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2593         max_h_samp = m_comp_h_samp.ptr[component_id];
2594 
2595       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2596         max_v_samp = m_comp_v_samp.ptr[component_id];
2597     }
2598 
2599     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2600     {
2601       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2602       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2603     }
2604 
2605     if (m_comps_in_scan == 1)
2606     {
2607       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2608       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2609     }
2610     else
2611     {
2612       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2613       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2614     }
2615 
2616     if (m_comps_in_scan == 1)
2617     {
2618       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2619 
2620       m_blocks_per_mcu = 1;
2621     }
2622     else
2623     {
2624       m_blocks_per_mcu = 0;
2625 
2626       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2627       {
2628         int num_blocks;
2629 
2630         component_id = m_comp_list.ptr[component_num];
2631 
2632         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2633 
2634         while (num_blocks--)
2635           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2636       }
2637     }
2638   }
2639 
2640   // Starts a new scan.
2641   int init_scan () {
2642     if (!locate_sos_marker())
2643       return false;
2644 
2645     calc_mcu_block_order();
2646 
2647     check_huff_tables();
2648 
2649     check_quant_tables();
2650 
2651     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2652 
2653     m_eob_run = 0;
2654 
2655     if (m_restart_interval)
2656     {
2657       m_restarts_left = m_restart_interval;
2658       m_next_restart_num = 0;
2659     }
2660 
2661     fix_in_buffer();
2662 
2663     return true;
2664   }
2665 
2666   // Starts a frame. Determines if the number of components or sampling factors
2667   // are supported.
2668   void init_frame () {
2669     int i;
2670 
2671     if (m_comps_in_frame == 1)
2672     {
2673       if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2674         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2675 
2676       m_scan_type = JPGD_GRAYSCALE;
2677       m_max_blocks_per_mcu = 1;
2678       m_max_mcu_x_size = 8;
2679       m_max_mcu_y_size = 8;
2680     }
2681     else if (m_comps_in_frame == 3)
2682     {
2683       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2684            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2685         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2686 
2687       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2688       {
2689         m_scan_type = JPGD_YH1V1;
2690 
2691         m_max_blocks_per_mcu = 3;
2692         m_max_mcu_x_size = 8;
2693         m_max_mcu_y_size = 8;
2694       }
2695       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2696       {
2697         m_scan_type = JPGD_YH2V1;
2698         m_max_blocks_per_mcu = 4;
2699         m_max_mcu_x_size = 16;
2700         m_max_mcu_y_size = 8;
2701       }
2702       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2703       {
2704         m_scan_type = JPGD_YH1V2;
2705         m_max_blocks_per_mcu = 4;
2706         m_max_mcu_x_size = 8;
2707         m_max_mcu_y_size = 16;
2708       }
2709       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2710       {
2711         m_scan_type = JPGD_YH2V2;
2712         m_max_blocks_per_mcu = 6;
2713         m_max_mcu_x_size = 16;
2714         m_max_mcu_y_size = 16;
2715       }
2716       else
2717         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2718     }
2719     else
2720       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2721 
2722     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2723     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2724 
2725     // These values are for the *destination* pixels: after conversion.
2726     if (m_scan_type == JPGD_GRAYSCALE)
2727       m_dest_bytes_per_pixel = 1;
2728     else
2729       m_dest_bytes_per_pixel = 4;
2730 
2731     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2732 
2733     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2734 
2735     // Initialize two scan line buffers.
2736     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2737     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2738       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2739 
2740     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2741 
2742     // Should never happen
2743     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2744       stop_decoding(JPGD_ASSERTION_ERROR);
2745 
2746     // Allocate the coefficient buffer, enough for one MCU
2747     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2748 
2749     for (i = 0; i < m_max_blocks_per_mcu; i++)
2750       m_mcu_block_max_zag.ptr[i] = 64;
2751 
2752     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2753     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2754     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2755     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2756     m_freq_domain_chroma_upsample = false;
2757     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2758       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2759     }
2760 
2761     if (m_freq_domain_chroma_upsample)
2762       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2763     else
2764       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2765 
2766     m_total_lines_left = m_image_y_size;
2767 
2768     m_mcu_lines_left = 0;
2769 
2770     create_look_ups();
2771   }
2772 
2773   // The coeff_buf series of methods originally stored the coefficients
2774   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2775   // was used to make this process more efficient. Now, we can store the entire
2776   // thing in RAM.
2777   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2778     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2779 
2780     cb.block_num_x = block_num_x;
2781     cb.block_num_y = block_num_y;
2782     cb.block_len_x = block_len_x;
2783     cb.block_len_y = block_len_y;
2784     cb.block_size = (block_len_x * block_len_y) * cast(int)(jpgd_block_t.sizeof);
2785     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2786     return cb;
2787   }
2788 
2789   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2790     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2791     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2792   }
2793 
2794   // The following methods decode the various types of m_blocks encountered
2795   // in progressively encoded images.
2796   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2797     int s, r;
2798     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2799 
2800     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2801     {
2802       r = pD.get_bits_no_markers(s);
2803       s = JPGD_HUFF_EXTEND(r, s);
2804     }
2805 
2806     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2807 
2808     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2809   }
2810 
2811   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2812     if (pD.get_bits_no_markers(1))
2813     {
2814       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2815 
2816       p[0] |= (1 << pD.m_successive_low);
2817     }
2818   }
2819 
2820   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2821     int k, s, r;
2822 
2823     if (pD.m_eob_run)
2824     {
2825       pD.m_eob_run--;
2826       return;
2827     }
2828 
2829     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2830 
2831     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2832     {
2833       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2834 
2835       r = s >> 4;
2836       s &= 15;
2837 
2838       if (s)
2839       {
2840         if ((k += r) > 63)
2841           pD.stop_decoding(JPGD_DECODE_ERROR);
2842 
2843         r = pD.get_bits_no_markers(s);
2844         s = JPGD_HUFF_EXTEND(r, s);
2845 
2846         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2847       }
2848       else
2849       {
2850         if (r == 15)
2851         {
2852           if ((k += 15) > 63)
2853             pD.stop_decoding(JPGD_DECODE_ERROR);
2854         }
2855         else
2856         {
2857           pD.m_eob_run = 1 << r;
2858 
2859           if (r)
2860             pD.m_eob_run += pD.get_bits_no_markers(r);
2861 
2862           pD.m_eob_run--;
2863 
2864           break;
2865         }
2866       }
2867     }
2868   }
2869 
2870   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2871     int s, k, r;
2872     int p1 = 1 << pD.m_successive_low;
2873     int m1 = (-1) << pD.m_successive_low;
2874     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2875 
2876     assert(pD.m_spectral_end <= 63);
2877 
2878     k = pD.m_spectral_start;
2879 
2880     if (pD.m_eob_run == 0)
2881     {
2882       for ( ; k <= pD.m_spectral_end; k++)
2883       {
2884         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2885 
2886         r = s >> 4;
2887         s &= 15;
2888 
2889         if (s)
2890         {
2891           if (s != 1)
2892             pD.stop_decoding(JPGD_DECODE_ERROR);
2893 
2894           if (pD.get_bits_no_markers(1))
2895             s = p1;
2896           else
2897             s = m1;
2898         }
2899         else
2900         {
2901           if (r != 15)
2902           {
2903             pD.m_eob_run = 1 << r;
2904 
2905             if (r)
2906               pD.m_eob_run += pD.get_bits_no_markers(r);
2907 
2908             break;
2909           }
2910         }
2911 
2912         do
2913         {
2914           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2915 
2916           if (*this_coef != 0)
2917           {
2918             if (pD.get_bits_no_markers(1))
2919             {
2920               if ((*this_coef & p1) == 0)
2921               {
2922                 if (*this_coef >= 0)
2923                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2924                 else
2925                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2926               }
2927             }
2928           }
2929           else
2930           {
2931             if (--r < 0)
2932               break;
2933           }
2934 
2935           k++;
2936 
2937         } while (k <= pD.m_spectral_end);
2938 
2939         if ((s) && (k < 64))
2940         {
2941           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2942         }
2943       }
2944     }
2945 
2946     if (pD.m_eob_run > 0)
2947     {
2948       for ( ; k <= pD.m_spectral_end; k++)
2949       {
2950         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2951 
2952         if (*this_coef != 0)
2953         {
2954           if (pD.get_bits_no_markers(1))
2955           {
2956             if ((*this_coef & p1) == 0)
2957             {
2958               if (*this_coef >= 0)
2959                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2960               else
2961                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2962             }
2963           }
2964         }
2965       }
2966 
2967       pD.m_eob_run--;
2968     }
2969   }
2970 
2971   // Decode a scan in a progressively encoded image.
2972   void decode_scan (pDecode_block_func decode_block_func) {
2973     int mcu_row, mcu_col, mcu_block;
2974     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2975     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2976 
2977     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2978 
2979     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2980     {
2981       int component_num, component_id;
2982 
2983       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2984 
2985       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2986       {
2987         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2988 
2989         if ((m_restart_interval) && (m_restarts_left == 0))
2990           process_restart();
2991 
2992         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2993         {
2994           component_id = m_mcu_org.ptr[mcu_block];
2995 
2996           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2997 
2998           if (m_comps_in_scan == 1)
2999             block_x_mcu.ptr[component_id]++;
3000           else
3001           {
3002             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
3003             {
3004               block_x_mcu_ofs = 0;
3005 
3006               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
3007               {
3008                 block_y_mcu_ofs = 0;
3009                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
3010               }
3011             }
3012           }
3013         }
3014 
3015         m_restarts_left--;
3016       }
3017 
3018       if (m_comps_in_scan == 1)
3019         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
3020       else
3021       {
3022         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
3023         {
3024           component_id = m_comp_list.ptr[component_num];
3025           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
3026         }
3027       }
3028     }
3029   }
3030 
3031   // Decode a progressively encoded image.
3032   void init_progressive () {
3033     int i;
3034 
3035     if (m_comps_in_frame == 4)
3036       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
3037 
3038     // Allocate the coefficient buffers.
3039     for (i = 0; i < m_comps_in_frame; i++)
3040     {
3041       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
3042       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
3043     }
3044 
3045     for ( ; ; )
3046     {
3047       int dc_only_scan, refinement_scan;
3048       pDecode_block_func decode_block_func;
3049 
3050       if (!init_scan())
3051         break;
3052 
3053       dc_only_scan = (m_spectral_start == 0);
3054       refinement_scan = (m_successive_high != 0);
3055 
3056       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
3057         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3058 
3059       if (dc_only_scan)
3060       {
3061         if (m_spectral_end)
3062           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3063       }
3064       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
3065         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
3066 
3067       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
3068         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
3069 
3070       if (dc_only_scan)
3071       {
3072         if (refinement_scan)
3073           decode_block_func = &decode_block_dc_refine;
3074         else
3075           decode_block_func = &decode_block_dc_first;
3076       }
3077       else
3078       {
3079         if (refinement_scan)
3080           decode_block_func = &decode_block_ac_refine;
3081         else
3082           decode_block_func = &decode_block_ac_first;
3083       }
3084 
3085       decode_scan(decode_block_func);
3086 
3087       m_bits_left = 16;
3088       get_bits(16);
3089       get_bits(16);
3090     }
3091 
3092     m_comps_in_scan = m_comps_in_frame;
3093 
3094     for (i = 0; i < m_comps_in_frame; i++)
3095       m_comp_list.ptr[i] = i;
3096 
3097     calc_mcu_block_order();
3098   }
3099 
3100   void init_sequential () {
3101     if (!init_scan())
3102       stop_decoding(JPGD_UNEXPECTED_MARKER);
3103   }
3104 
3105   void decode_start () {
3106     init_frame();
3107 
3108     if (m_progressive_flag)
3109       init_progressive();
3110     else
3111       init_sequential();
3112   }
3113 
3114   void decode_init (JpegStreamReadFunc rfn, void* userData) {
3115     initit(rfn, userData);
3116     locate_sof_marker();
3117   }
3118 }
3119 
3120 // ////////////////////////////////////////////////////////////////////////// //
3121 /// decompress JPEG image, what else?
3122 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3123 /// Returns pixelAspectRatio and dotsPerInchY, -1 if not available.
3124 public ubyte[] decompress_jpeg_image_from_stream(scope JpegStreamReadFunc rfn, void* userData,
3125                                                  out int width, out int height, out int actual_comps, 
3126                                                  out float pixelAspectRatio, out float dotsPerInchY,
3127                                                  int req_comps=-1) {
3128 
3129   //actual_comps = 0;
3130   if (rfn is null) return null;
3131   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3132 
3133   auto decoder = jpeg_decoder(rfn, userData);
3134   if (decoder.error_code != JPGD_SUCCESS) return null;
3135   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3136 
3137   immutable int image_width = decoder.width;
3138   immutable int image_height = decoder.height;
3139   width = image_width;
3140   height = image_height;
3141   pixelAspectRatio = -1;
3142   dotsPerInchY = -1;
3143   actual_comps = decoder.num_components;
3144   if (req_comps < 0) req_comps = decoder.num_components;
3145 
3146   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3147 
3148   immutable int dst_bpl = image_width*req_comps;
3149 
3150    ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3151    if (pImage_data is null) return null;
3152    auto idata = pImage_data[0..dst_bpl*image_height];
3153 
3154   for (int y = 0; y < image_height; ++y) {
3155     const(ubyte)* pScan_line;
3156     uint scan_line_len;
3157     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3158       jpgd_free(pImage_data);
3159       return null;
3160     }
3161 
3162     ubyte* pDst = pImage_data+y*dst_bpl;
3163 
3164     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3165       memcpy(pDst, pScan_line, dst_bpl);
3166     } else if (decoder.num_components == 1) {
3167       if (req_comps == 3) {
3168         for (int x = 0; x < image_width; ++x) {
3169           ubyte luma = pScan_line[x];
3170           pDst[0] = luma;
3171           pDst[1] = luma;
3172           pDst[2] = luma;
3173           pDst += 3;
3174         }
3175       } else {
3176         for (int x = 0; x < image_width; ++x) {
3177           ubyte luma = pScan_line[x];
3178           pDst[0] = luma;
3179           pDst[1] = luma;
3180           pDst[2] = luma;
3181           pDst[3] = 255;
3182           pDst += 4;
3183         }
3184       }
3185     } else if (decoder.num_components == 3) {
3186       if (req_comps == 1) {
3187         immutable int YR = 19595, YG = 38470, YB = 7471;
3188         for (int x = 0; x < image_width; ++x) {
3189           int r = pScan_line[x*4+0];
3190           int g = pScan_line[x*4+1];
3191           int b = pScan_line[x*4+2];
3192           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3193         }
3194       } else {
3195         for (int x = 0; x < image_width; ++x) {
3196           pDst[0] = pScan_line[x*4+0];
3197           pDst[1] = pScan_line[x*4+1];
3198           pDst[2] = pScan_line[x*4+2];
3199           pDst += 3;
3200         }
3201       }
3202     }
3203   }
3204 
3205   pixelAspectRatio = decoder.m_pixelAspectRatio;
3206   dotsPerInchY = decoder.m_pixelsPerInchY;
3207 
3208   return idata;
3209 }