OpenRaider  0.1.4-dev
Open Source Tomb Raider Game Engine implementation
stb_image.h
Go to the documentation of this file.
1 /* stb_image - v2.00b - public domain image loader - http://nothings.org/stb_image.h
2  no warranty implied; use at your own risk
3 
4  Do this:
5  #define STB_IMAGE_IMPLEMENTATION
6  before you include this file in *one* C or C++ file to create the implementation.
7 
8  // i.e. it should look like this:
9  #include ...
10  #include ...
11  #include ...
12  #define STB_IMAGE_IMPLEMENTATION
13  #include "stb_image.h"
14 
15  You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16  And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19  QUICK NOTES:
20  Primarily of interest to game developers and other people who can
21  avoid problematic images and only need the trivial interface
22 
23  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24  PNG 1/2/4/8-bit-per-channel (16 bpc not supported)
25 
26  TGA (not sure what subset, if a subset)
27  BMP non-1bpp, non-RLE
28  PSD (composited view only, no extra channels)
29 
30  GIF (*comp always reports as 4-channel)
31  HDR (radiance rgbE format)
32  PIC (Softimage PIC)
33  PNM (PPM and PGM binary only)
34 
35  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
36  - decode from arbitrary I/O callbacks
37  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
38 
39  Full documentation under "DOCUMENTATION" below.
40 
41 
42  Revision 2.00 release notes:
43 
44  - Progressive JPEG is now supported.
45 
46  - PPM and PGM binary formats are now supported, thanks to Ken Miller.
47 
48  - x86 platforms now make use of SSE2 SIMD instructions for
49  JPEG decoding, and ARM platforms can use NEON SIMD if requested.
50  This work was done by Fabian "ryg" Giesen. SSE2 is used by
51  default, but NEON must be enabled explicitly; see docs.
52 
53  With other JPEG optimizations included in this version, we see
54  2x speedup on a JPEG on an x86 machine, and a 1.5x speedup
55  on a JPEG on an ARM machine, relative to previous versions of this
56  library. The same results will not obtain for all JPGs and for all
57  x86/ARM machines. (Note that progressive JPEGs are significantly
58  slower to decode than regular JPEGs.) This doesn't mean that this
59  is the fastest JPEG decoder in the land; rather, it brings it
60  closer to parity with standard libraries. If you want the fastest
61  decode, look elsewhere. (See "Philosophy" section of docs below.)
62 
63  See final bullet items below for more info on SIMD.
64 
65  - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing
66  the memory allocator. Unlike other STBI libraries, these macros don't
67  support a context parameter, so if you need to pass a context in to
68  the allocator, you'll have to store it in a global or a thread-local
69  variable.
70 
71  - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and
72  STBI_NO_LINEAR.
73  STBI_NO_HDR: suppress implementation of .hdr reader format
74  STBI_NO_LINEAR: suppress high-dynamic-range light-linear float API
75 
76  - You can suppress implementation of any of the decoders to reduce
77  your code footprint by #defining one or more of the following
78  symbols before creating the implementation.
79 
80  STBI_NO_JPEG
81  STBI_NO_PNG
82  STBI_NO_BMP
83  STBI_NO_PSD
84  STBI_NO_TGA
85  STBI_NO_GIF
86  STBI_NO_HDR
87  STBI_NO_PIC
88  STBI_NO_PNM (.ppm and .pgm)
89 
90  - You can request *only* certain decoders and suppress all other ones
91  (this will be more forward-compatible, as addition of new decoders
92  doesn't require you to disable them explicitly):
93 
94  STBI_ONLY_JPEG
95  STBI_ONLY_PNG
96  STBI_ONLY_BMP
97  STBI_ONLY_PSD
98  STBI_ONLY_TGA
99  STBI_ONLY_GIF
100  STBI_ONLY_HDR
101  STBI_ONLY_PIC
102  STBI_ONLY_PNM (.ppm and .pgm)
103 
104  Note that you can define multiples of these, and you will get all
105  of them ("only x" and "only y" is interpreted to mean "only x&y").
106 
107  - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
108  want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
109 
110  - Compilation of all SIMD code can be suppressed with
111  #define STBI_NO_SIMD
112  It should not be necessary to disable SIMD unless you have issues
113  compiling (e.g. using an x86 compiler which doesn't support SSE
114  intrinsics or that doesn't support the method used to detect
115  SSE2 support at run-time), and even those can be reported as
116  bugs so I can refine the built-in compile-time checking to be
117  smarter.
118 
119  - The old STBI_SIMD system which allowed installing a user-defined
120  IDCT etc. has been removed. If you need this, don't upgrade. My
121  assumption is that almost nobody was doing this, and those who
122  were will find the built-in SIMD more satisfactory anyway.
123 
124  - RGB values computed for JPEG images are slightly different from
125  previous versions of stb_image. (This is due to using less
126  integer precision in SIMD.) The C code has been adjusted so
127  that the same RGB values will be computed regardless of whether
128  SIMD support is available, so your app should always produce
129  consistent results. But these results are slightly different from
130  previous versions. (Specifically, about 3% of available YCbCr values
131  will compute different RGB results from pre-1.49 versions by +-1;
132  most of the deviating values are one smaller in the G channel.)
133 
134  - If you must produce consistent results with previous versions of
135  stb_image, #define STBI_JPEG_OLD and you will get the same results
136  you used to; however, you will not get the SIMD speedups for
137  the YCbCr-to-RGB conversion step (although you should still see
138  significant JPEG speedup from the other changes).
139 
140  Please note that STBI_JPEG_OLD is a temporary feature; it will be
141  removed in future versions of the library. It is only intended for
142  near-term back-compatibility use.
143 
144 
145  Latest revision history:
146  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
147  2.00 (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD
148  progressive JPEG
149  PGM/PPM support
150  STBI_MALLOC,STBI_REALLOC,STBI_FREE
151  STBI_NO_*, STBI_ONLY_*
152  GIF bugfix
153  1.48 (2014-12-14) fix incorrectly-named assert()
154  1.47 (2014-12-14) 1/2/4-bit PNG support (both grayscale and paletted)
155  optimize PNG
156  fix bug in interlaced PNG with user-specified channel count
157  1.46 (2014-08-26) fix broken tRNS chunk in non-paletted PNG
158  1.45 (2014-08-16) workaround MSVC-ARM internal compiler error by wrapping malloc
159 
160  See end of file for full revision history.
161 
162 
163  ============================ Contributors =========================
164 
165  Image formats Bug fixes & warning fixes
166  Sean Barrett (jpeg, png, bmp) Marc LeBlanc
167  Nicolas Schulz (hdr, psd) Christpher Lloyd
168  Jonathan Dummer (tga) Dave Moore
169  Jean-Marc Lienher (gif) Won Chun
170  Tom Seddon (pic) the Horde3D community
171  Thatcher Ulrich (psd) Janez Zemva
172  Ken Miller (pgm, ppm) Jonathan Blow
173  Laurent Gomila
174  Aruelien Pocheville
175  Extensions, features Ryamond Barbiero
176  Jetro Lauha (stbi_info) David Woo
177  Martin "SpartanJ" Golini (stbi_info) Martin Golini
178  James "moose2000" Brown (iPhone PNG) Roy Eltham
179  Ben "Disch" Wenger (io callbacks) Luke Graham
180  Omar Cornut (1/2/4-bit PNG) Thomas Ruf
181  John Bartholomew
182  Ken Hamada
183  Optimizations & bugfixes Cort Stratton
184  Fabian "ryg" Giesen Blazej Dariusz Roszkowski
185  Arseny Kapoulkine Thibault Reuille
186  Paul Du Bois
187  Guillaume George
188  If your name should be here but Jerry Jansson
189  isn't, let Sean know. Hayaki Saito
190  Johan Duparc
191  Ronny Chevalier
192  Michal Cichon
193  Tero Hanninen
194 
195 License:
196  This software is in the public domain. Where that dedication is not
197  recognized, you are granted a perpetual, irrevocable license to copy
198  and modify this file however you want.
199 
200 */
201 
202 #ifndef STBI_INCLUDE_STB_IMAGE_H
203 #define STBI_INCLUDE_STB_IMAGE_H
204 
205 // DOCUMENTATION
206 //
207 // Limitations:
208 // - no 16-bit-per-channel PNG
209 // - no 12-bit-per-channel JPEG
210 // - no JPEGs with arithmetic coding
211 // - no 1-bit BMP
212 // - GIF always returns *comp=4
213 //
214 // Basic usage (see HDR discussion below for HDR usage):
215 // int x,y,n;
216 // unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
217 // // ... process data if not NULL ...
218 // // ... x = width, y = height, n = # 8-bit components per pixel ...
219 // // ... replace '0' with '1'..'4' to force that many components per pixel
220 // // ... but 'n' will always be the number that it would have been if you said 0
221 // stbi_image_free(data)
222 //
223 // Standard parameters:
224 // int *x -- outputs image width in pixels
225 // int *y -- outputs image height in pixels
226 // int *comp -- outputs # of image components in image file
227 // int req_comp -- if non-zero, # of image components requested in result
228 //
229 // The return value from an image loader is an 'unsigned char *' which points
230 // to the pixel data, or NULL on an allocation failure or if the image is
231 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
232 // with each pixel consisting of N interleaved 8-bit components; the first
233 // pixel pointed to is top-left-most in the image. There is no padding between
234 // image scanlines or between pixels, regardless of format. The number of
235 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
236 // If req_comp is non-zero, *comp has the number of components that _would_
237 // have been output otherwise. E.g. if you set req_comp to 4, you will always
238 // get RGBA output, but you can check *comp to see if it's trivially opaque
239 // because e.g. there were only 3 channels in the source image.
240 //
241 // An output image with N components has the following components interleaved
242 // in this order in each pixel:
243 //
244 // N=#comp components
245 // 1 grey
246 // 2 grey, alpha
247 // 3 red, green, blue
248 // 4 red, green, blue, alpha
249 //
250 // If image loading fails for any reason, the return value will be NULL,
251 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
252 // can be queried for an extremely brief, end-user unfriendly explanation
253 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
254 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
255 // more user-friendly ones.
256 //
257 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
258 //
259 // ===========================================================================
260 //
261 // Philosophy
262 //
263 // stb libraries are designed with the following priorities:
264 //
265 // 1. easy to use
266 // 2. easy to maintain
267 // 3. good performance
268 //
269 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
270 // and for best performance I may provide less-easy-to-use APIs that give higher
271 // performance, in addition to the easy to use ones. Nevertheless, it's important
272 // to keep in mind that from the standpoint of you, a client of this library,
273 // all you care about is #1 and #3, and stb libraries do not emphasize #3 above all.
274 //
275 // Some secondary priorities arise directly from the first two, some of which
276 // make more explicit reasons why performance can't be emphasized.
277 //
278 // - Portable ("ease of use")
279 // - Small footprint ("easy to maintain")
280 // - No dependencies ("ease of use")
281 //
282 // ===========================================================================
283 //
284 // I/O callbacks
285 //
286 // I/O callbacks allow you to read from arbitrary sources, like packaged
287 // files or some other source. Data read from callbacks are processed
288 // through a small internal buffer (currently 128 bytes) to try to reduce
289 // overhead.
290 //
291 // The three functions you must define are "read" (reads some bytes of data),
292 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
293 //
294 // ===========================================================================
295 //
296 // SIMD support
297 //
298 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
299 // supported by the compiler. For ARM Neon support, you must explicitly
300 // request it.
301 //
302 // (The old do-it-yourself SIMD API is no longer supported in the current
303 // code.)
304 //
305 // On x86, SSE2 will automatically be used when available based on a run-time
306 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
307 // the typical path is to have separate builds for NEON and non-NEON devices
308 // (at least this is true for iOS and Android). Therefore, the NEON support is
309 // toggled by a build flag: define STBI_NEON to get NEON loops.
310 //
311 // The output of the JPEG decoder is slightly different from versions where
312 // SIMD support was introduced (that is, for versions before 1.49). The
313 // difference is only +-1 in the 8-bit RGB channels, and only on a small
314 // fraction of pixels. You can force the pre-1.49 behavior by defining
315 // STBI_JPEG_OLD, but this will disable some of the SIMD decoding path
316 // and hence cost some performance.
317 //
318 // If for some reason you do not want to use any of SIMD code, or if
319 // you have issues compiling it, you can disable it entirely by
320 // defining STBI_NO_SIMD.
321 //
322 // ===========================================================================
323 //
324 // HDR image support (disable by defining STBI_NO_HDR)
325 //
326 // stb_image now supports loading HDR images in general, and currently
327 // the Radiance .HDR file format, although the support is provided
328 // generically. You can still load any file through the existing interface;
329 // if you attempt to load an HDR file, it will be automatically remapped to
330 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
331 // both of these constants can be reconfigured through this interface:
332 //
333 // stbi_hdr_to_ldr_gamma(2.2f);
334 // stbi_hdr_to_ldr_scale(1.0f);
335 //
336 // (note, do not use _inverse_ constants; stbi_image will invert them
337 // appropriately).
338 //
339 // Additionally, there is a new, parallel interface for loading files as
340 // (linear) floats to preserve the full dynamic range:
341 //
342 // float *data = stbi_loadf(filename, &x, &y, &n, 0);
343 //
344 // If you load LDR images through this interface, those images will
345 // be promoted to floating point values, run through the inverse of
346 // constants corresponding to the above:
347 //
348 // stbi_ldr_to_hdr_scale(1.0f);
349 // stbi_ldr_to_hdr_gamma(2.2f);
350 //
351 // Finally, given a filename (or an open file or memory block--see header
352 // file for details) containing image data, you can query for the "most
353 // appropriate" interface to use (that is, whether the image is HDR or
354 // not), using:
355 //
356 // stbi_is_hdr(char *filename);
357 //
358 // ===========================================================================
359 //
360 // iPhone PNG support:
361 //
362 // By default we convert iphone-formatted PNGs back to RGB, even though
363 // they are internally encoded differently. You can disable this conversion
364 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
365 // you will always just get the native iphone "format" through (which
366 // is BGR stored in RGB).
367 //
368 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
369 // pixel to remove any premultiplied alpha *only* if the image file explicitly
370 // says there's premultiplied data (currently only happens in iPhone images,
371 // and only if iPhone convert-to-rgb processing is on).
372 //
373 
374 
375 #ifndef STBI_NO_STDIO
376 #include <stdio.h>
377 #endif // STBI_NO_STDIO
378 
379 #define STBI_VERSION 1
380 
381 enum
382 {
383  STBI_default = 0, // only used for req_comp
384 
387  STBI_rgb = 3,
389 };
390 
391 typedef unsigned char stbi_uc;
392 
393 #ifdef __cplusplus
394 extern "C" {
395 #endif
396 
397 #ifdef STB_IMAGE_STATIC
398 #define STBIDEF static
399 #else
400 #define STBIDEF extern
401 #endif
402 
404 //
405 // PRIMARY API - works on images of any type
406 //
407 
408 //
409 // load image by filename, open file, or memory buffer
410 //
411 
412 typedef struct
413 {
414  int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
415  void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
416  int (*eof) (void *user); // returns nonzero if we are at end of file/data
418 
419 STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp);
420 STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *comp, int req_comp);
421 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *comp, int req_comp);
422 
423 #ifndef STBI_NO_STDIO
424 STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
425 // for stbi_load_from_file, file pointer is left pointing immediately after image
426 #endif
427 
428 #ifndef STBI_NO_LINEAR
429  STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp);
430  STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp);
431  STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp);
432 
433  #ifndef STBI_NO_STDIO
434  STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp);
435  #endif
436 #endif
437 
438 #ifndef STBI_NO_HDR
439  STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
440  STBIDEF void stbi_hdr_to_ldr_scale(float scale);
441 #endif
442 
443 #ifndef STBI_NO_LINEAR
444  STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
445  STBIDEF void stbi_ldr_to_hdr_scale(float scale);
446 #endif // STBI_NO_HDR
447 
448 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
449 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
450 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
451 #ifndef STBI_NO_STDIO
452 STBIDEF int stbi_is_hdr (char const *filename);
453 STBIDEF int stbi_is_hdr_from_file(FILE *f);
454 #endif // STBI_NO_STDIO
455 
456 
457 // get a VERY brief reason for failure
458 // NOT THREADSAFE
459 STBIDEF const char *stbi_failure_reason (void);
460 
461 // free the loaded image -- this is just free()
462 STBIDEF void stbi_image_free (void *retval_from_stbi_load);
463 
464 // get image dimensions & components without fully decoding
465 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
466 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
467 
468 #ifndef STBI_NO_STDIO
469 STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
470 STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
471 
472 #endif
473 
474 
475 
476 // for image formats that explicitly notate that they have premultiplied alpha,
477 // we just return the colors as stored in the file. set this flag to force
478 // unpremultiplication. results are undefined if the unpremultiply overflow.
479 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
480 
481 // indicate whether we should process iphone images back to canonical format,
482 // or just pass them through "as-is"
483 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
484 
485 
486 // ZLIB client - used by PNG, available for other purposes
487 
488 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
489 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
490 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
491 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
492 
493 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
494 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
495 
496 
497 #ifdef __cplusplus
498 }
499 #endif
500 
501 //
502 //
504 #endif // STBI_INCLUDE_STB_IMAGE_H
505 
506 #ifdef STB_IMAGE_IMPLEMENTATION
507 
508 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
509  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
510  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
511  || defined(STBI_ONLY_ZLIB)
512  #ifndef STBI_ONLY_JPEG
513  #define STBI_NO_JPEG
514  #endif
515  #ifndef STBI_ONLY_PNG
516  #define STBI_NO_PNG
517  #endif
518  #ifndef STBI_ONLY_BMP
519  #define STBI_NO_BMP
520  #endif
521  #ifndef STBI_ONLY_PSD
522  #define STBI_NO_PSD
523  #endif
524  #ifndef STBI_ONLY_TGA
525  #define STBI_NO_TGA
526  #endif
527  #ifndef STBI_ONLY_GIF
528  #define STBI_NO_GIF
529  #endif
530  #ifndef STBI_ONLY_HDR
531  #define STBI_NO_HDR
532  #endif
533  #ifndef STBI_ONLY_PIC
534  #define STBI_NO_PIC
535  #endif
536  #ifndef STBI_ONLY_PNM
537  #define STBI_NO_PNM
538  #endif
539 #endif
540 
541 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
542 #define STBI_NO_ZLIB
543 #endif
544 
545 
546 #include <stdarg.h>
547 #include <stddef.h> // ptrdiff_t on osx
548 #include <stdlib.h>
549 #include <string.h>
550 
551 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
552 #include <math.h> // ldexp
553 #endif
554 
555 #ifndef STBI_NO_STDIO
556 #include <stdio.h>
557 #endif
558 
559 #ifndef STBI_ASSERT
560 #include <assert.h>
561 #define STBI_ASSERT(x) assert(x)
562 #endif
563 
564 
565 #ifndef _MSC_VER
566  #ifdef __cplusplus
567  #define stbi_inline inline
568  #else
569  #define stbi_inline
570  #endif
571 #else
572  #define stbi_inline __forceinline
573 #endif
574 
575 
576 #ifdef _MSC_VER
577 typedef unsigned short stbi__uint16;
578 typedef signed short stbi__int16;
579 typedef unsigned int stbi__uint32;
580 typedef signed int stbi__int32;
581 #else
582 #include <stdint.h>
583 typedef uint16_t stbi__uint16;
584 typedef int16_t stbi__int16;
585 typedef uint32_t stbi__uint32;
586 typedef int32_t stbi__int32;
587 #endif
588 
589 // should produce compiler error if size is wrong
590 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
591 
592 #ifdef _MSC_VER
593 #define STBI_NOTUSED(v) (void)(v)
594 #else
595 #define STBI_NOTUSED(v) (void)sizeof(v)
596 #endif
597 
598 #ifdef _MSC_VER
599 #define STBI_HAS_LROTL
600 #endif
601 
602 #ifdef STBI_HAS_LROTL
603  #define stbi_lrot(x,y) _lrotl(x,y)
604 #else
605  #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y))))
606 #endif
607 
608 #if defined(STBI_MALLOC) && defined(STBI_FREE) && defined(STBI_REALLOC)
609 // ok
610 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC)
611 // ok
612 #else
613 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC."
614 #endif
615 
616 #ifndef STBI_MALLOC
617 #define STBI_MALLOC(sz) malloc(sz)
618 #define STBI_REALLOC(p,sz) realloc(p,sz)
619 #define STBI_FREE(p) free(p)
620 #endif
621 
622 #if !defined(STBI_NO_SIMD) && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
623 #define STBI_SSE2
624 #include <emmintrin.h>
625 
626 #ifdef _MSC_VER
627 
628 #if _MSC_VER >= 1400 // not VC6
629 #include <intrin.h> // __cpuid
630 static int stbi__cpuid3(void)
631 {
632  int info[4];
633  __cpuid(info,1);
634  return info[3];
635 }
636 #else
637 static int stbi__cpuid3(void)
638 {
639  int res;
640  __asm {
641  mov eax,1
642  cpuid
643  mov res,edx
644  }
645  return res;
646 }
647 #endif
648 
649 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
650 
651 static int stbi__sse2_available()
652 {
653  int info3 = stbi__cpuid3();
654  return ((info3 >> 26) & 1) != 0;
655 }
656 #else // assume GCC-style if not VC++
657 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
658 
659 static int stbi__sse2_available()
660 {
661 #if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
662  // GCC 4.8+ has a nice way to do this
663  return __builtin_cpu_supports("sse2");
664 #else
665  // portable way to do this, preferably without using GCC inline ASM?
666  // just bail for now.
667  return 0;
668 #endif
669 }
670 #endif
671 #endif
672 
673 // ARM NEON
674 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
675 #undef STBI_NEON
676 #endif
677 
678 #ifdef STBI_NEON
679 #include <arm_neon.h>
680 // assume GCC or Clang on ARM targets
681 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
682 #endif
683 
684 #ifndef STBI_SIMD_ALIGN
685 #define STBI_SIMD_ALIGN(type, name) type name
686 #endif
687 
689 //
690 // stbi__context struct and start_xxx functions
691 
692 // stbi__context structure is our basic context used by all images, so it
693 // contains all the IO context, plus some basic image information
694 typedef struct
695 {
696  stbi__uint32 img_x, img_y;
697  int img_n, img_out_n;
698 
700  void *io_user_data;
701 
702  int read_from_callbacks;
703  int buflen;
704  stbi_uc buffer_start[128];
705 
706  stbi_uc *img_buffer, *img_buffer_end;
707  stbi_uc *img_buffer_original;
708 } stbi__context;
709 
710 
711 static void stbi__refill_buffer(stbi__context *s);
712 
713 // initialize a memory-decode context
714 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
715 {
716  s->io.read = NULL;
717  s->read_from_callbacks = 0;
718  s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
719  s->img_buffer_end = (stbi_uc *) buffer+len;
720 }
721 
722 // initialize a callback-based context
723 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
724 {
725  s->io = *c;
726  s->io_user_data = user;
727  s->buflen = sizeof(s->buffer_start);
728  s->read_from_callbacks = 1;
729  s->img_buffer_original = s->buffer_start;
730  stbi__refill_buffer(s);
731 }
732 
733 #ifndef STBI_NO_STDIO
734 
735 static int stbi__stdio_read(void *user, char *data, int size)
736 {
737  return (int) fread(data,1,size,(FILE*) user);
738 }
739 
740 static void stbi__stdio_skip(void *user, int n)
741 {
742  fseek((FILE*) user, n, SEEK_CUR);
743 }
744 
745 static int stbi__stdio_eof(void *user)
746 {
747  return feof((FILE*) user);
748 }
749 
750 static stbi_io_callbacks stbi__stdio_callbacks =
751 {
752  stbi__stdio_read,
753  stbi__stdio_skip,
754  stbi__stdio_eof,
755 };
756 
757 static void stbi__start_file(stbi__context *s, FILE *f)
758 {
759  stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
760 }
761 
762 //static void stop_file(stbi__context *s) { }
763 
764 #endif // !STBI_NO_STDIO
765 
766 static void stbi__rewind(stbi__context *s)
767 {
768  // conceptually rewind SHOULD rewind to the beginning of the stream,
769  // but we just rewind to the beginning of the initial buffer, because
770  // we only use it after doing 'test', which only ever looks at at most 92 bytes
771  s->img_buffer = s->img_buffer_original;
772 }
773 
774 #ifndef STBI_NO_JPEG
775 static int stbi__jpeg_test(stbi__context *s);
776 static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
777 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
778 #endif
779 
780 #ifndef STBI_NO_PNG
781 static int stbi__png_test(stbi__context *s);
782 static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
783 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
784 #endif
785 
786 #ifndef STBI_NO_BMP
787 static int stbi__bmp_test(stbi__context *s);
788 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
789 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
790 #endif
791 
792 #ifndef STBI_NO_TGA
793 static int stbi__tga_test(stbi__context *s);
794 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
795 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
796 #endif
797 
798 #ifndef STBI_NO_PSD
799 static int stbi__psd_test(stbi__context *s);
800 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
801 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
802 #endif
803 
804 #ifndef STBI_NO_HDR
805 static int stbi__hdr_test(stbi__context *s);
806 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
807 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
808 #endif
809 
810 #ifndef STBI_NO_PIC
811 static int stbi__pic_test(stbi__context *s);
812 static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
813 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
814 #endif
815 
816 #ifndef STBI_NO_GIF
817 static int stbi__gif_test(stbi__context *s);
818 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
819 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
820 #endif
821 
822 #ifndef STBI_NO_PNM
823 static int stbi__pnm_test(stbi__context *s);
824 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp);
825 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
826 #endif
827 
828 // this is not threadsafe
829 static const char *stbi__g_failure_reason;
830 
831 STBIDEF const char *stbi_failure_reason(void)
832 {
833  return stbi__g_failure_reason;
834 }
835 
836 static int stbi__err(const char *str)
837 {
838  stbi__g_failure_reason = str;
839  return 0;
840 }
841 
842 static void *stbi__malloc(size_t size)
843 {
844  return STBI_MALLOC(size);
845 }
846 
847 // stbi__err - error
848 // stbi__errpf - error returning pointer to float
849 // stbi__errpuc - error returning pointer to unsigned char
850 
851 #ifdef STBI_NO_FAILURE_STRINGS
852  #define stbi__err(x,y) 0
853 #elif defined(STBI_FAILURE_USERMSG)
854  #define stbi__err(x,y) stbi__err(y)
855 #else
856  #define stbi__err(x,y) stbi__err(x)
857 #endif
858 
859 #define stbi__errpf(x,y) ((float *) (stbi__err(x,y)?NULL:NULL))
860 #define stbi__errpuc(x,y) ((unsigned char *) (stbi__err(x,y)?NULL:NULL))
861 
862 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
863 {
864  STBI_FREE(retval_from_stbi_load);
865 }
866 
867 #ifndef STBI_NO_LINEAR
868 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
869 #endif
870 
871 #ifndef STBI_NO_HDR
872 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
873 #endif
874 
875 static unsigned char *stbi_load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
876 {
877  #ifndef STBI_NO_JPEG
878  if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp);
879  #endif
880  #ifndef STBI_NO_PNG
881  if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp);
882  #endif
883  #ifndef STBI_NO_BMP
884  if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp);
885  #endif
886  #ifndef STBI_NO_GIF
887  if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp);
888  #endif
889  #ifndef STBI_NO_PSD
890  if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp);
891  #endif
892  #ifndef STBI_NO_PIC
893  if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp);
894  #endif
895  #ifndef STBI_NO_PNM
896  if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp);
897  #endif
898 
899  #ifndef STBI_NO_HDR
900  if (stbi__hdr_test(s)) {
901  float *hdr = stbi__hdr_load(s, x,y,comp,req_comp);
902  return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
903  }
904  #endif
905 
906  #ifndef STBI_NO_TGA
907  // test tga last because it's a crappy test!
908  if (stbi__tga_test(s))
909  return stbi__tga_load(s,x,y,comp,req_comp);
910  #endif
911 
912  return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
913 }
914 
915 #ifndef STBI_NO_STDIO
916 
917 static FILE *stbi__fopen(char const *filename, char const *mode)
918 {
919  FILE *f;
920 #if defined(_MSC_VER) && _MSC_VER >= 1400
921  if (0 != fopen_s(&f, filename, mode))
922  f=0;
923 #else
924  f = fopen(filename, mode);
925 #endif
926  return f;
927 }
928 
929 
930 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
931 {
932  FILE *f = stbi__fopen(filename, "rb");
933  unsigned char *result;
934  if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
935  result = stbi_load_from_file(f,x,y,comp,req_comp);
936  fclose(f);
937  return result;
938 }
939 
940 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
941 {
942  unsigned char *result;
943  stbi__context s;
944  stbi__start_file(&s,f);
945  result = stbi_load_main(&s,x,y,comp,req_comp);
946  if (result) {
947  // need to 'unget' all the characters in the IO buffer
948  fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
949  }
950  return result;
951 }
952 #endif
953 
954 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
955 {
956  stbi__context s;
957  stbi__start_mem(&s,buffer,len);
958  return stbi_load_main(&s,x,y,comp,req_comp);
959 }
960 
961 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
962 {
963  stbi__context s;
964  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
965  return stbi_load_main(&s,x,y,comp,req_comp);
966 }
967 
968 #ifndef STBI_NO_LINEAR
969 static float *stbi_loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
970 {
971  unsigned char *data;
972  #ifndef STBI_NO_HDR
973  if (stbi__hdr_test(s))
974  return stbi__hdr_load(s,x,y,comp,req_comp);
975  #endif
976  data = stbi_load_main(s, x, y, comp, req_comp);
977  if (data)
978  return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
979  return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
980 }
981 
982 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
983 {
984  stbi__context s;
985  stbi__start_mem(&s,buffer,len);
986  return stbi_loadf_main(&s,x,y,comp,req_comp);
987 }
988 
989 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
990 {
991  stbi__context s;
992  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
993  return stbi_loadf_main(&s,x,y,comp,req_comp);
994 }
995 
996 #ifndef STBI_NO_STDIO
997 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
998 {
999  float *result;
1000  FILE *f = stbi__fopen(filename, "rb");
1001  if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1002  result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1003  fclose(f);
1004  return result;
1005 }
1006 
1007 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1008 {
1009  stbi__context s;
1010  stbi__start_file(&s,f);
1011  return stbi_loadf_main(&s,x,y,comp,req_comp);
1012 }
1013 #endif // !STBI_NO_STDIO
1014 
1015 #endif // !STBI_NO_LINEAR
1016 
1017 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1018 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1019 // reports false!
1020 
1021 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1022 {
1023  #ifndef STBI_NO_HDR
1024  stbi__context s;
1025  stbi__start_mem(&s,buffer,len);
1026  return stbi__hdr_test(&s);
1027  #else
1028  STBI_NOTUSED(buffer);
1029  STBI_NOTUSED(len);
1030  return 0;
1031  #endif
1032 }
1033 
1034 #ifndef STBI_NO_STDIO
1035 STBIDEF int stbi_is_hdr (char const *filename)
1036 {
1037  FILE *f = stbi__fopen(filename, "rb");
1038  int result=0;
1039  if (f) {
1040  result = stbi_is_hdr_from_file(f);
1041  fclose(f);
1042  }
1043  return result;
1044 }
1045 
1046 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1047 {
1048  #ifndef STBI_NO_HDR
1049  stbi__context s;
1050  stbi__start_file(&s,f);
1051  return stbi__hdr_test(&s);
1052  #else
1053  return 0;
1054  #endif
1055 }
1056 #endif // !STBI_NO_STDIO
1057 
1058 STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1059 {
1060  #ifndef STBI_NO_HDR
1061  stbi__context s;
1062  stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1063  return stbi__hdr_test(&s);
1064  #else
1065  return 0;
1066  #endif
1067 }
1068 
1069 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1070 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1071 
1072 #ifndef STBI_NO_LINEAR
1073 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1074 STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1075 #endif
1076 
1077 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1078 STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1079 
1080 
1082 //
1083 // Common code used by all image loaders
1084 //
1085 
1086 enum
1087 {
1088  STBI__SCAN_load=0,
1089  STBI__SCAN_type,
1090  STBI__SCAN_header
1091 };
1092 
1093 static void stbi__refill_buffer(stbi__context *s)
1094 {
1095  int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1096  if (n == 0) {
1097  // at end of file, treat same as if from memory, but need to handle case
1098  // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1099  s->read_from_callbacks = 0;
1100  s->img_buffer = s->buffer_start;
1101  s->img_buffer_end = s->buffer_start+1;
1102  *s->img_buffer = 0;
1103  } else {
1104  s->img_buffer = s->buffer_start;
1105  s->img_buffer_end = s->buffer_start + n;
1106  }
1107 }
1108 
1109 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1110 {
1111  if (s->img_buffer < s->img_buffer_end)
1112  return *s->img_buffer++;
1113  if (s->read_from_callbacks) {
1114  stbi__refill_buffer(s);
1115  return *s->img_buffer++;
1116  }
1117  return 0;
1118 }
1119 
1120 stbi_inline static int stbi__at_eof(stbi__context *s)
1121 {
1122  if (s->io.read) {
1123  if (!(s->io.eof)(s->io_user_data)) return 0;
1124  // if feof() is true, check if buffer = end
1125  // special case: we've only got the special 0 character at the end
1126  if (s->read_from_callbacks == 0) return 1;
1127  }
1128 
1129  return s->img_buffer >= s->img_buffer_end;
1130 }
1131 
1132 static void stbi__skip(stbi__context *s, int n)
1133 {
1134  if (s->io.read) {
1135  int blen = (int) (s->img_buffer_end - s->img_buffer);
1136  if (blen < n) {
1137  s->img_buffer = s->img_buffer_end;
1138  (s->io.skip)(s->io_user_data, n - blen);
1139  return;
1140  }
1141  }
1142  s->img_buffer += n;
1143 }
1144 
1145 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1146 {
1147  if (s->io.read) {
1148  int blen = (int) (s->img_buffer_end - s->img_buffer);
1149  if (blen < n) {
1150  int res, count;
1151 
1152  memcpy(buffer, s->img_buffer, blen);
1153 
1154  count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1155  res = (count == (n-blen));
1156  s->img_buffer = s->img_buffer_end;
1157  return res;
1158  }
1159  }
1160 
1161  if (s->img_buffer+n <= s->img_buffer_end) {
1162  memcpy(buffer, s->img_buffer, n);
1163  s->img_buffer += n;
1164  return 1;
1165  } else
1166  return 0;
1167 }
1168 
1169 static int stbi__get16be(stbi__context *s)
1170 {
1171  int z = stbi__get8(s);
1172  return (z << 8) + stbi__get8(s);
1173 }
1174 
1175 static stbi__uint32 stbi__get32be(stbi__context *s)
1176 {
1177  stbi__uint32 z = stbi__get16be(s);
1178  return (z << 16) + stbi__get16be(s);
1179 }
1180 
1181 static int stbi__get16le(stbi__context *s)
1182 {
1183  int z = stbi__get8(s);
1184  return z + (stbi__get8(s) << 8);
1185 }
1186 
1187 static stbi__uint32 stbi__get32le(stbi__context *s)
1188 {
1189  stbi__uint32 z = stbi__get16le(s);
1190  return z + (stbi__get16le(s) << 16);
1191 }
1192 
1193 #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1194 
1195 
1197 //
1198 // generic converter from built-in img_n to req_comp
1199 // individual types do this automatically as much as possible (e.g. jpeg
1200 // does all cases internally since it needs to colorspace convert anyway,
1201 // and it never has alpha, so very few cases ). png can automatically
1202 // interleave an alpha=255 channel, but falls back to this for other cases
1203 //
1204 // assume data buffer is malloced, so malloc a new one and free that one
1205 // only failure mode is malloc failing
1206 
1207 static stbi_uc stbi__compute_y(int r, int g, int b)
1208 {
1209  return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1210 }
1211 
1212 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1213 {
1214  int i,j;
1215  unsigned char *good;
1216 
1217  if (req_comp == img_n) return data;
1218  STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1219 
1220  good = (unsigned char *) stbi__malloc(req_comp * x * y);
1221  if (good == NULL) {
1222  STBI_FREE(data);
1223  return stbi__errpuc("outofmem", "Out of memory");
1224  }
1225 
1226  for (j=0; j < (int) y; ++j) {
1227  unsigned char *src = data + j * x * img_n ;
1228  unsigned char *dest = good + j * x * req_comp;
1229 
1230  #define COMBO(a,b) ((a)*8+(b))
1231  #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1232  // convert source image with img_n components to one with req_comp components;
1233  // avoid switch per pixel, so use switch per scanline and massive macros
1234  switch (COMBO(img_n, req_comp)) {
1235  CASE(1,2) dest[0]=src[0], dest[1]=255; break;
1236  CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1237  CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break;
1238  CASE(2,1) dest[0]=src[0]; break;
1239  CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break;
1240  CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break;
1241  CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break;
1242  CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
1243  CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break;
1244  CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break;
1245  CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break;
1246  CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break;
1247  default: STBI_ASSERT(0);
1248  }
1249  #undef CASE
1250  }
1251 
1252  STBI_FREE(data);
1253  return good;
1254 }
1255 
1256 #ifndef STBI_NO_LINEAR
1257 static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1258 {
1259  int i,k,n;
1260  float *output = (float *) stbi__malloc(x * y * comp * sizeof(float));
1261  if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1262  // compute number of non-alpha components
1263  if (comp & 1) n = comp; else n = comp-1;
1264  for (i=0; i < x*y; ++i) {
1265  for (k=0; k < n; ++k) {
1266  output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1267  }
1268  if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1269  }
1270  STBI_FREE(data);
1271  return output;
1272 }
1273 #endif
1274 
1275 #ifndef STBI_NO_HDR
1276 #define stbi__float2int(x) ((int) (x))
1277 static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1278 {
1279  int i,k,n;
1280  stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp);
1281  if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1282  // compute number of non-alpha components
1283  if (comp & 1) n = comp; else n = comp-1;
1284  for (i=0; i < x*y; ++i) {
1285  for (k=0; k < n; ++k) {
1286  float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1287  if (z < 0) z = 0;
1288  if (z > 255) z = 255;
1289  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1290  }
1291  if (k < comp) {
1292  float z = data[i*comp+k] * 255 + 0.5f;
1293  if (z < 0) z = 0;
1294  if (z > 255) z = 255;
1295  output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1296  }
1297  }
1298  STBI_FREE(data);
1299  return output;
1300 }
1301 #endif
1302 
1304 //
1305 // "baseline" JPEG/JFIF decoder
1306 //
1307 // simple implementation
1308 // - doesn't support delayed output of y-dimension
1309 // - simple interface (only one output format: 8-bit interleaved RGB)
1310 // - doesn't try to recover corrupt jpegs
1311 // - doesn't allow partial loading, loading multiple at once
1312 // - still fast on x86 (copying globals into locals doesn't help x86)
1313 // - allocates lots of intermediate memory (full size of all components)
1314 // - non-interleaved case requires this anyway
1315 // - allows good upsampling (see next)
1316 // high-quality
1317 // - upsampled channels are bilinearly interpolated, even across blocks
1318 // - quality integer IDCT derived from IJG's 'slow'
1319 // performance
1320 // - fast huffman; reasonable integer IDCT
1321 // - some SIMD kernels for common paths on targets with SSE2/NEON
1322 // - uses a lot of intermediate memory, could cache poorly
1323 
1324 #ifndef STBI_NO_JPEG
1325 
1326 // huffman decoding acceleration
1327 #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1328 
1329 typedef struct
1330 {
1331  stbi_uc fast[1 << FAST_BITS];
1332  // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1333  stbi__uint16 code[256];
1334  stbi_uc values[256];
1335  stbi_uc size[257];
1336  unsigned int maxcode[18];
1337  int delta[17]; // old 'firstsymbol' - old 'firstcode'
1338 } stbi__huffman;
1339 
1340 typedef struct
1341 {
1342  stbi__context *s;
1343  stbi__huffman huff_dc[4];
1344  stbi__huffman huff_ac[4];
1345  stbi_uc dequant[4][64];
1346  stbi__int16 fast_ac[4][1 << FAST_BITS];
1347 
1348 // sizes for components, interleaved MCUs
1349  int img_h_max, img_v_max;
1350  int img_mcu_x, img_mcu_y;
1351  int img_mcu_w, img_mcu_h;
1352 
1353 // definition of jpeg image component
1354  struct
1355  {
1356  int id;
1357  int h,v;
1358  int tq;
1359  int hd,ha;
1360  int dc_pred;
1361 
1362  int x,y,w2,h2;
1363  stbi_uc *data;
1364  void *raw_data, *raw_coeff;
1365  stbi_uc *linebuf;
1366  short *coeff; // progressive only
1367  int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1368  } img_comp[4];
1369 
1370  stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1371  int code_bits; // number of valid bits
1372  unsigned char marker; // marker seen while filling entropy buffer
1373  int nomore; // flag if we saw a marker so must stop
1374 
1375  int progressive;
1376  int spec_start;
1377  int spec_end;
1378  int succ_high;
1379  int succ_low;
1380  int eob_run;
1381 
1382  int scan_n, order[4];
1383  int restart_interval, todo;
1384 
1385 // kernels
1386  void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1387  void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1388  stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1389 } stbi__jpeg;
1390 
1391 static int stbi__build_huffman(stbi__huffman *h, int *count)
1392 {
1393  int i,j,k=0,code;
1394  // build size list for each symbol (from JPEG spec)
1395  for (i=0; i < 16; ++i)
1396  for (j=0; j < count[i]; ++j)
1397  h->size[k++] = (stbi_uc) (i+1);
1398  h->size[k] = 0;
1399 
1400  // compute actual symbols (from jpeg spec)
1401  code = 0;
1402  k = 0;
1403  for(j=1; j <= 16; ++j) {
1404  // compute delta to add to code to compute symbol id
1405  h->delta[j] = k - code;
1406  if (h->size[k] == j) {
1407  while (h->size[k] == j)
1408  h->code[k++] = (stbi__uint16) (code++);
1409  if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1410  }
1411  // compute largest code + 1 for this size, preshifted as needed later
1412  h->maxcode[j] = code << (16-j);
1413  code <<= 1;
1414  }
1415  h->maxcode[j] = 0xffffffff;
1416 
1417  // build non-spec acceleration table; 255 is flag for not-accelerated
1418  memset(h->fast, 255, 1 << FAST_BITS);
1419  for (i=0; i < k; ++i) {
1420  int s = h->size[i];
1421  if (s <= FAST_BITS) {
1422  int c = h->code[i] << (FAST_BITS-s);
1423  int m = 1 << (FAST_BITS-s);
1424  for (j=0; j < m; ++j) {
1425  h->fast[c+j] = (stbi_uc) i;
1426  }
1427  }
1428  }
1429  return 1;
1430 }
1431 
1432 // build a table that decodes both magnitude and value of small ACs in
1433 // one go.
1434 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1435 {
1436  int i;
1437  for (i=0; i < (1 << FAST_BITS); ++i) {
1438  stbi_uc fast = h->fast[i];
1439  fast_ac[i] = 0;
1440  if (fast < 255) {
1441  int rs = h->values[fast];
1442  int run = (rs >> 4) & 15;
1443  int magbits = rs & 15;
1444  int len = h->size[fast];
1445 
1446  if (magbits && len + magbits <= FAST_BITS) {
1447  // magnitude code followed by receive_extend code
1448  int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1449  int m = 1 << (magbits - 1);
1450  if (k < m) k += (-1 << magbits) + 1;
1451  // if the result is small enough, we can fit it in fast_ac table
1452  if (k >= -128 && k <= 127)
1453  fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1454  }
1455  }
1456  }
1457 }
1458 
1459 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1460 {
1461  do {
1462  int b = j->nomore ? 0 : stbi__get8(j->s);
1463  if (b == 0xff) {
1464  int c = stbi__get8(j->s);
1465  if (c != 0) {
1466  j->marker = (unsigned char) c;
1467  j->nomore = 1;
1468  return;
1469  }
1470  }
1471  j->code_buffer |= b << (24 - j->code_bits);
1472  j->code_bits += 8;
1473  } while (j->code_bits <= 24);
1474 }
1475 
1476 // (1 << n) - 1
1477 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1478 
1479 // decode a jpeg huffman value from the bitstream
1480 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1481 {
1482  unsigned int temp;
1483  int c,k;
1484 
1485  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1486 
1487  // look at the top FAST_BITS and determine what symbol ID it is,
1488  // if the code is <= FAST_BITS
1489  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1490  k = h->fast[c];
1491  if (k < 255) {
1492  int s = h->size[k];
1493  if (s > j->code_bits)
1494  return -1;
1495  j->code_buffer <<= s;
1496  j->code_bits -= s;
1497  return h->values[k];
1498  }
1499 
1500  // naive test is to shift the code_buffer down so k bits are
1501  // valid, then test against maxcode. To speed this up, we've
1502  // preshifted maxcode left so that it has (16-k) 0s at the
1503  // end; in other words, regardless of the number of bits, it
1504  // wants to be compared against something shifted to have 16;
1505  // that way we don't need to shift inside the loop.
1506  temp = j->code_buffer >> 16;
1507  for (k=FAST_BITS+1 ; ; ++k)
1508  if (temp < h->maxcode[k])
1509  break;
1510  if (k == 17) {
1511  // error! code not found
1512  j->code_bits -= 16;
1513  return -1;
1514  }
1515 
1516  if (k > j->code_bits)
1517  return -1;
1518 
1519  // convert the huffman code to the symbol id
1520  c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1521  STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1522 
1523  // convert the id to a symbol
1524  j->code_bits -= k;
1525  j->code_buffer <<= k;
1526  return h->values[c];
1527 }
1528 
1529 // bias[n] = (-1<<n) + 1
1530 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1531 
1532 // combined JPEG 'receive' and JPEG 'extend', since baseline
1533 // always extends everything it receives.
1534 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1535 {
1536  unsigned int k;
1537  int sgn;
1538  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1539 
1540  sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1541  k = stbi_lrot(j->code_buffer, n);
1542  j->code_buffer = k & ~stbi__bmask[n];
1543  k &= stbi__bmask[n];
1544  j->code_bits -= n;
1545  return k + (stbi__jbias[n] & ~sgn);
1546 }
1547 
1548 // get some unsigned bits
1549 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1550 {
1551  unsigned int k;
1552  if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1553  k = stbi_lrot(j->code_buffer, n);
1554  j->code_buffer = k & ~stbi__bmask[n];
1555  k &= stbi__bmask[n];
1556  j->code_bits -= n;
1557  return k;
1558 }
1559 
1560 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1561 {
1562  unsigned int k;
1563  if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1564  k = j->code_buffer;
1565  j->code_buffer <<= 1;
1566  --j->code_bits;
1567  return k & 0x80000000;
1568 }
1569 
1570 // given a value that's at position X in the zigzag stream,
1571 // where does it appear in the 8x8 matrix coded as row-major?
1572 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1573 {
1574  0, 1, 8, 16, 9, 2, 3, 10,
1575  17, 24, 32, 25, 18, 11, 4, 5,
1576  12, 19, 26, 33, 40, 48, 41, 34,
1577  27, 20, 13, 6, 7, 14, 21, 28,
1578  35, 42, 49, 56, 57, 50, 43, 36,
1579  29, 22, 15, 23, 30, 37, 44, 51,
1580  58, 59, 52, 45, 38, 31, 39, 46,
1581  53, 60, 61, 54, 47, 55, 62, 63,
1582  // let corrupt input sample past end
1583  63, 63, 63, 63, 63, 63, 63, 63,
1584  63, 63, 63, 63, 63, 63, 63
1585 };
1586 
1587 // decode one 64-entry block--
1588 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant)
1589 {
1590  int diff,dc,k;
1591  int t;
1592 
1593  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1594  t = stbi__jpeg_huff_decode(j, hdc);
1595  if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1596 
1597  // 0 all the ac values now so we can do it 32-bits at a time
1598  memset(data,0,64*sizeof(data[0]));
1599 
1600  diff = t ? stbi__extend_receive(j, t) : 0;
1601  dc = j->img_comp[b].dc_pred + diff;
1602  j->img_comp[b].dc_pred = dc;
1603  data[0] = (short) (dc * dequant[0]);
1604 
1605  // decode AC components, see JPEG spec
1606  k = 1;
1607  do {
1608  unsigned int zig;
1609  int c,r,s;
1610  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1611  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1612  r = fac[c];
1613  if (r) { // fast-AC path
1614  k += (r >> 4) & 15; // run
1615  s = r & 15; // combined length
1616  j->code_buffer <<= s;
1617  j->code_bits -= s;
1618  // decode into unzigzag'd location
1619  zig = stbi__jpeg_dezigzag[k++];
1620  data[zig] = (short) ((r >> 8) * dequant[zig]);
1621  } else {
1622  int rs = stbi__jpeg_huff_decode(j, hac);
1623  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1624  s = rs & 15;
1625  r = rs >> 4;
1626  if (s == 0) {
1627  if (rs != 0xf0) break; // end block
1628  k += 16;
1629  } else {
1630  k += r;
1631  // decode into unzigzag'd location
1632  zig = stbi__jpeg_dezigzag[k++];
1633  data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1634  }
1635  }
1636  } while (k < 64);
1637  return 1;
1638 }
1639 
1640 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1641 {
1642  int diff,dc;
1643  int t;
1644  if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1645 
1646  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1647 
1648  if (j->succ_high == 0) {
1649  // first scan for DC coefficient, must be first
1650  memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1651  t = stbi__jpeg_huff_decode(j, hdc);
1652  diff = t ? stbi__extend_receive(j, t) : 0;
1653 
1654  dc = j->img_comp[b].dc_pred + diff;
1655  j->img_comp[b].dc_pred = dc;
1656  data[0] = (short) (dc << j->succ_low);
1657  } else {
1658  // refinement scan for DC coefficient
1659  if (stbi__jpeg_get_bit(j))
1660  data[0] += (short) (1 << j->succ_low);
1661  }
1662  return 1;
1663 }
1664 
1665 // @OPTIMIZE: store non-zigzagged during the decode passes,
1666 // and only de-zigzag when dequantizing
1667 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1668 {
1669  int k;
1670  if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1671 
1672  if (j->succ_high == 0) {
1673  int shift = j->succ_low;
1674 
1675  if (j->eob_run) {
1676  --j->eob_run;
1677  return 1;
1678  }
1679 
1680  k = j->spec_start;
1681  do {
1682  unsigned int zig;
1683  int c,r,s;
1684  if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1685  c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1686  r = fac[c];
1687  if (r) { // fast-AC path
1688  k += (r >> 4) & 15; // run
1689  s = r & 15; // combined length
1690  j->code_buffer <<= s;
1691  j->code_bits -= s;
1692  zig = stbi__jpeg_dezigzag[k++];
1693  data[zig] = (short) ((r >> 8) << shift);
1694  } else {
1695  int rs = stbi__jpeg_huff_decode(j, hac);
1696  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1697  s = rs & 15;
1698  r = rs >> 4;
1699  if (s == 0) {
1700  if (r < 15) {
1701  j->eob_run = (1 << r);
1702  if (r)
1703  j->eob_run += stbi__jpeg_get_bits(j, r);
1704  --j->eob_run;
1705  break;
1706  }
1707  k += 16;
1708  } else {
1709  k += r;
1710  zig = stbi__jpeg_dezigzag[k++];
1711  data[zig] = (short) (stbi__extend_receive(j,s) << shift);
1712  }
1713  }
1714  } while (k <= j->spec_end);
1715  } else {
1716  // refinement scan for these AC coefficients
1717 
1718  short bit = (short) (1 << j->succ_low);
1719 
1720  if (j->eob_run) {
1721  --j->eob_run;
1722  for (k = j->spec_start; k <= j->spec_end; ++k) {
1723  short *p = &data[stbi__jpeg_dezigzag[k]];
1724  if (*p != 0)
1725  if (stbi__jpeg_get_bit(j))
1726  if ((*p & bit)==0)
1727  if (*p > 0)
1728  *p += bit;
1729  else
1730  *p -= bit;
1731  }
1732  } else {
1733  k = j->spec_start;
1734  do {
1735  int r,s;
1736  int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
1737  if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1738  s = rs & 15;
1739  r = rs >> 4;
1740  if (s == 0) {
1741  if (r < 15) {
1742  j->eob_run = (1 << r) - 1;
1743  if (r)
1744  j->eob_run += stbi__jpeg_get_bits(j, r);
1745  r = 64; // force end of block
1746  } else
1747  r = 16; // r=15 is the code for 16 0s
1748  } else {
1749  if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
1750  // sign bit
1751  if (stbi__jpeg_get_bit(j))
1752  s = bit;
1753  else
1754  s = -bit;
1755  }
1756 
1757  // advance by r
1758  while (k <= j->spec_end) {
1759  short *p = &data[stbi__jpeg_dezigzag[k]];
1760  if (*p != 0) {
1761  if (stbi__jpeg_get_bit(j))
1762  if ((*p & bit)==0)
1763  if (*p > 0)
1764  *p += bit;
1765  else
1766  *p -= bit;
1767  ++k;
1768  } else {
1769  if (r == 0) {
1770  if (s)
1771  data[stbi__jpeg_dezigzag[k++]] = s;
1772  break;
1773  }
1774  --r;
1775  ++k;
1776  }
1777  }
1778  } while (k <= j->spec_end);
1779  }
1780  }
1781  return 1;
1782 }
1783 
1784 // take a -128..127 value and stbi__clamp it and convert to 0..255
1785 stbi_inline static stbi_uc stbi__clamp(int x)
1786 {
1787  // trick to use a single test to catch both cases
1788  if ((unsigned int) x > 255) {
1789  if (x < 0) return 0;
1790  if (x > 255) return 255;
1791  }
1792  return (stbi_uc) x;
1793 }
1794 
1795 #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
1796 #define stbi__fsh(x) ((x) << 12)
1797 
1798 // derived from jidctint -- DCT_ISLOW
1799 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
1800  int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
1801  p2 = s2; \
1802  p3 = s6; \
1803  p1 = (p2+p3) * stbi__f2f(0.5411961f); \
1804  t2 = p1 + p3*stbi__f2f(-1.847759065f); \
1805  t3 = p1 + p2*stbi__f2f( 0.765366865f); \
1806  p2 = s0; \
1807  p3 = s4; \
1808  t0 = stbi__fsh(p2+p3); \
1809  t1 = stbi__fsh(p2-p3); \
1810  x0 = t0+t3; \
1811  x3 = t0-t3; \
1812  x1 = t1+t2; \
1813  x2 = t1-t2; \
1814  t0 = s7; \
1815  t1 = s5; \
1816  t2 = s3; \
1817  t3 = s1; \
1818  p3 = t0+t2; \
1819  p4 = t1+t3; \
1820  p1 = t0+t3; \
1821  p2 = t1+t2; \
1822  p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
1823  t0 = t0*stbi__f2f( 0.298631336f); \
1824  t1 = t1*stbi__f2f( 2.053119869f); \
1825  t2 = t2*stbi__f2f( 3.072711026f); \
1826  t3 = t3*stbi__f2f( 1.501321110f); \
1827  p1 = p5 + p1*stbi__f2f(-0.899976223f); \
1828  p2 = p5 + p2*stbi__f2f(-2.562915447f); \
1829  p3 = p3*stbi__f2f(-1.961570560f); \
1830  p4 = p4*stbi__f2f(-0.390180644f); \
1831  t3 += p1+p4; \
1832  t2 += p2+p3; \
1833  t1 += p2+p4; \
1834  t0 += p1+p3;
1835 
1836 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
1837 {
1838  int i,val[64],*v=val;
1839  stbi_uc *o;
1840  short *d = data;
1841 
1842  // columns
1843  for (i=0; i < 8; ++i,++d, ++v) {
1844  // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1845  if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
1846  && d[40]==0 && d[48]==0 && d[56]==0) {
1847  // no shortcut 0 seconds
1848  // (1|2|3|4|5|6|7)==0 0 seconds
1849  // all separate -0.047 seconds
1850  // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
1851  int dcterm = d[0] << 2;
1852  v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1853  } else {
1854  STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
1855  // constants scaled things up by 1<<12; let's bring them back
1856  // down, but keep 2 extra bits of precision
1857  x0 += 512; x1 += 512; x2 += 512; x3 += 512;
1858  v[ 0] = (x0+t3) >> 10;
1859  v[56] = (x0-t3) >> 10;
1860  v[ 8] = (x1+t2) >> 10;
1861  v[48] = (x1-t2) >> 10;
1862  v[16] = (x2+t1) >> 10;
1863  v[40] = (x2-t1) >> 10;
1864  v[24] = (x3+t0) >> 10;
1865  v[32] = (x3-t0) >> 10;
1866  }
1867  }
1868 
1869  for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
1870  // no fast case since the first 1D IDCT spread components out
1871  STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
1872  // constants scaled things up by 1<<12, plus we had 1<<2 from first
1873  // loop, plus horizontal and vertical each scale by sqrt(8) so together
1874  // we've got an extra 1<<3, so 1<<17 total we need to remove.
1875  // so we want to round that, which means adding 0.5 * 1<<17,
1876  // aka 65536. Also, we'll end up with -128 to 127 that we want
1877  // to encode as 0..255 by adding 128, so we'll add that before the shift
1878  x0 += 65536 + (128<<17);
1879  x1 += 65536 + (128<<17);
1880  x2 += 65536 + (128<<17);
1881  x3 += 65536 + (128<<17);
1882  // tried computing the shifts into temps, or'ing the temps to see
1883  // if any were out of range, but that was slower
1884  o[0] = stbi__clamp((x0+t3) >> 17);
1885  o[7] = stbi__clamp((x0-t3) >> 17);
1886  o[1] = stbi__clamp((x1+t2) >> 17);
1887  o[6] = stbi__clamp((x1-t2) >> 17);
1888  o[2] = stbi__clamp((x2+t1) >> 17);
1889  o[5] = stbi__clamp((x2-t1) >> 17);
1890  o[3] = stbi__clamp((x3+t0) >> 17);
1891  o[4] = stbi__clamp((x3-t0) >> 17);
1892  }
1893 }
1894 
1895 #ifdef STBI_SSE2
1896 // sse2 integer IDCT. not the fastest possible implementation but it
1897 // produces bit-identical results to the generic C version so it's
1898 // fully "transparent".
1899 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
1900 {
1901  // This is constructed to match our regular (generic) integer IDCT exactly.
1902  __m128i row0, row1, row2, row3, row4, row5, row6, row7;
1903  __m128i tmp;
1904 
1905  // dot product constant: even elems=x, odd elems=y
1906  #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
1907 
1908  // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
1909  // out(1) = c1[even]*x + c1[odd]*y
1910  #define dct_rot(out0,out1, x,y,c0,c1) \
1911  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
1912  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
1913  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
1914  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
1915  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
1916  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
1917 
1918  // out = in << 12 (in 16-bit, out 32-bit)
1919  #define dct_widen(out, in) \
1920  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
1921  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
1922 
1923  // wide add
1924  #define dct_wadd(out, a, b) \
1925  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
1926  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
1927 
1928  // wide sub
1929  #define dct_wsub(out, a, b) \
1930  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
1931  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
1932 
1933  // butterfly a/b, add bias, then shift by "s" and pack
1934  #define dct_bfly32o(out0, out1, a,b,bias,s) \
1935  { \
1936  __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
1937  __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
1938  dct_wadd(sum, abiased, b); \
1939  dct_wsub(dif, abiased, b); \
1940  out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
1941  out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
1942  }
1943 
1944  // 8-bit interleave step (for transposes)
1945  #define dct_interleave8(a, b) \
1946  tmp = a; \
1947  a = _mm_unpacklo_epi8(a, b); \
1948  b = _mm_unpackhi_epi8(tmp, b)
1949 
1950  // 16-bit interleave step (for transposes)
1951  #define dct_interleave16(a, b) \
1952  tmp = a; \
1953  a = _mm_unpacklo_epi16(a, b); \
1954  b = _mm_unpackhi_epi16(tmp, b)
1955 
1956  #define dct_pass(bias,shift) \
1957  { \
1958  /* even part */ \
1959  dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
1960  __m128i sum04 = _mm_add_epi16(row0, row4); \
1961  __m128i dif04 = _mm_sub_epi16(row0, row4); \
1962  dct_widen(t0e, sum04); \
1963  dct_widen(t1e, dif04); \
1964  dct_wadd(x0, t0e, t3e); \
1965  dct_wsub(x3, t0e, t3e); \
1966  dct_wadd(x1, t1e, t2e); \
1967  dct_wsub(x2, t1e, t2e); \
1968  /* odd part */ \
1969  dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
1970  dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
1971  __m128i sum17 = _mm_add_epi16(row1, row7); \
1972  __m128i sum35 = _mm_add_epi16(row3, row5); \
1973  dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
1974  dct_wadd(x4, y0o, y4o); \
1975  dct_wadd(x5, y1o, y5o); \
1976  dct_wadd(x6, y2o, y5o); \
1977  dct_wadd(x7, y3o, y4o); \
1978  dct_bfly32o(row0,row7, x0,x7,bias,shift); \
1979  dct_bfly32o(row1,row6, x1,x6,bias,shift); \
1980  dct_bfly32o(row2,row5, x2,x5,bias,shift); \
1981  dct_bfly32o(row3,row4, x3,x4,bias,shift); \
1982  }
1983 
1984  __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
1985  __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
1986  __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
1987  __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
1988  __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
1989  __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
1990  __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
1991  __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
1992 
1993  // rounding biases in column/row passes, see stbi__idct_block for explanation.
1994  __m128i bias_0 = _mm_set1_epi32(512);
1995  __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
1996 
1997  // load
1998  row0 = _mm_load_si128((const __m128i *) (data + 0*8));
1999  row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2000  row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2001  row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2002  row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2003  row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2004  row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2005  row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2006 
2007  // column pass
2008  dct_pass(bias_0, 10);
2009 
2010  {
2011  // 16bit 8x8 transpose pass 1
2012  dct_interleave16(row0, row4);
2013  dct_interleave16(row1, row5);
2014  dct_interleave16(row2, row6);
2015  dct_interleave16(row3, row7);
2016 
2017  // transpose pass 2
2018  dct_interleave16(row0, row2);
2019  dct_interleave16(row1, row3);
2020  dct_interleave16(row4, row6);
2021  dct_interleave16(row5, row7);
2022 
2023  // transpose pass 3
2024  dct_interleave16(row0, row1);
2025  dct_interleave16(row2, row3);
2026  dct_interleave16(row4, row5);
2027  dct_interleave16(row6, row7);
2028  }
2029 
2030  // row pass
2031  dct_pass(bias_1, 17);
2032 
2033  {
2034  // pack
2035  __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2036  __m128i p1 = _mm_packus_epi16(row2, row3);
2037  __m128i p2 = _mm_packus_epi16(row4, row5);
2038  __m128i p3 = _mm_packus_epi16(row6, row7);
2039 
2040  // 8bit 8x8 transpose pass 1
2041  dct_interleave8(p0, p2); // a0e0a1e1...
2042  dct_interleave8(p1, p3); // c0g0c1g1...
2043 
2044  // transpose pass 2
2045  dct_interleave8(p0, p1); // a0c0e0g0...
2046  dct_interleave8(p2, p3); // b0d0f0h0...
2047 
2048  // transpose pass 3
2049  dct_interleave8(p0, p2); // a0b0c0d0...
2050  dct_interleave8(p1, p3); // a4b4c4d4...
2051 
2052  // store
2053  _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2054  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2055  _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2056  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2057  _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2058  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2059  _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2060  _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2061  }
2062 
2063 #undef dct_const
2064 #undef dct_rot
2065 #undef dct_widen
2066 #undef dct_wadd
2067 #undef dct_wsub
2068 #undef dct_bfly32o
2069 #undef dct_interleave8
2070 #undef dct_interleave16
2071 #undef dct_pass
2072 }
2073 
2074 #endif // STBI_SSE2
2075 
2076 #ifdef STBI_NEON
2077 
2078 // NEON integer IDCT. should produce bit-identical
2079 // results to the generic C version.
2080 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2081 {
2082  int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2083 
2084  int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2085  int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2086  int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2087  int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2088  int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2089  int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2090  int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2091  int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2092  int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2093  int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2094  int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2095  int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2096 
2097 #define dct_long_mul(out, inq, coeff) \
2098  int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2099  int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2100 
2101 #define dct_long_mac(out, acc, inq, coeff) \
2102  int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2103  int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2104 
2105 #define dct_widen(out, inq) \
2106  int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2107  int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2108 
2109 // wide add
2110 #define dct_wadd(out, a, b) \
2111  int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2112  int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2113 
2114 // wide sub
2115 #define dct_wsub(out, a, b) \
2116  int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2117  int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2118 
2119 // butterfly a/b, then shift using "shiftop" by "s" and pack
2120 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2121  { \
2122  dct_wadd(sum, a, b); \
2123  dct_wsub(dif, a, b); \
2124  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2125  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2126  }
2127 
2128 #define dct_pass(shiftop, shift) \
2129  { \
2130  /* even part */ \
2131  int16x8_t sum26 = vaddq_s16(row2, row6); \
2132  dct_long_mul(p1e, sum26, rot0_0); \
2133  dct_long_mac(t2e, p1e, row6, rot0_1); \
2134  dct_long_mac(t3e, p1e, row2, rot0_2); \
2135  int16x8_t sum04 = vaddq_s16(row0, row4); \
2136  int16x8_t dif04 = vsubq_s16(row0, row4); \
2137  dct_widen(t0e, sum04); \
2138  dct_widen(t1e, dif04); \
2139  dct_wadd(x0, t0e, t3e); \
2140  dct_wsub(x3, t0e, t3e); \
2141  dct_wadd(x1, t1e, t2e); \
2142  dct_wsub(x2, t1e, t2e); \
2143  /* odd part */ \
2144  int16x8_t sum15 = vaddq_s16(row1, row5); \
2145  int16x8_t sum17 = vaddq_s16(row1, row7); \
2146  int16x8_t sum35 = vaddq_s16(row3, row5); \
2147  int16x8_t sum37 = vaddq_s16(row3, row7); \
2148  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2149  dct_long_mul(p5o, sumodd, rot1_0); \
2150  dct_long_mac(p1o, p5o, sum17, rot1_1); \
2151  dct_long_mac(p2o, p5o, sum35, rot1_2); \
2152  dct_long_mul(p3o, sum37, rot2_0); \
2153  dct_long_mul(p4o, sum15, rot2_1); \
2154  dct_wadd(sump13o, p1o, p3o); \
2155  dct_wadd(sump24o, p2o, p4o); \
2156  dct_wadd(sump23o, p2o, p3o); \
2157  dct_wadd(sump14o, p1o, p4o); \
2158  dct_long_mac(x4, sump13o, row7, rot3_0); \
2159  dct_long_mac(x5, sump24o, row5, rot3_1); \
2160  dct_long_mac(x6, sump23o, row3, rot3_2); \
2161  dct_long_mac(x7, sump14o, row1, rot3_3); \
2162  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2163  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2164  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2165  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2166  }
2167 
2168  // load
2169  row0 = vld1q_s16(data + 0*8);
2170  row1 = vld1q_s16(data + 1*8);
2171  row2 = vld1q_s16(data + 2*8);
2172  row3 = vld1q_s16(data + 3*8);
2173  row4 = vld1q_s16(data + 4*8);
2174  row5 = vld1q_s16(data + 5*8);
2175  row6 = vld1q_s16(data + 6*8);
2176  row7 = vld1q_s16(data + 7*8);
2177 
2178  // add DC bias
2179  row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2180 
2181  // column pass
2182  dct_pass(vrshrn_n_s32, 10);
2183 
2184  // 16bit 8x8 transpose
2185  {
2186 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2187 // whether compilers actually get this is another story, sadly.
2188 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2189 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2190 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2191 
2192  // pass 1
2193  dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2194  dct_trn16(row2, row3);
2195  dct_trn16(row4, row5);
2196  dct_trn16(row6, row7);
2197 
2198  // pass 2
2199  dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2200  dct_trn32(row1, row3);
2201  dct_trn32(row4, row6);
2202  dct_trn32(row5, row7);
2203 
2204  // pass 3
2205  dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2206  dct_trn64(row1, row5);
2207  dct_trn64(row2, row6);
2208  dct_trn64(row3, row7);
2209 
2210 #undef dct_trn16
2211 #undef dct_trn32
2212 #undef dct_trn64
2213  }
2214 
2215  // row pass
2216  // vrshrn_n_s32 only supports shifts up to 16, we need
2217  // 17. so do a non-rounding shift of 16 first then follow
2218  // up with a rounding shift by 1.
2219  dct_pass(vshrn_n_s32, 16);
2220 
2221  {
2222  // pack and round
2223  uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2224  uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2225  uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2226  uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2227  uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2228  uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2229  uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2230  uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2231 
2232  // again, these can translate into one instruction, but often don't.
2233 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2234 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2235 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2236 
2237  // sadly can't use interleaved stores here since we only write
2238  // 8 bytes to each scan line!
2239 
2240  // 8x8 8-bit transpose pass 1
2241  dct_trn8_8(p0, p1);
2242  dct_trn8_8(p2, p3);
2243  dct_trn8_8(p4, p5);
2244  dct_trn8_8(p6, p7);
2245 
2246  // pass 2
2247  dct_trn8_16(p0, p2);
2248  dct_trn8_16(p1, p3);
2249  dct_trn8_16(p4, p6);
2250  dct_trn8_16(p5, p7);
2251 
2252  // pass 3
2253  dct_trn8_32(p0, p4);
2254  dct_trn8_32(p1, p5);
2255  dct_trn8_32(p2, p6);
2256  dct_trn8_32(p3, p7);
2257 
2258  // store
2259  vst1_u8(out, p0); out += out_stride;
2260  vst1_u8(out, p1); out += out_stride;
2261  vst1_u8(out, p2); out += out_stride;
2262  vst1_u8(out, p3); out += out_stride;
2263  vst1_u8(out, p4); out += out_stride;
2264  vst1_u8(out, p5); out += out_stride;
2265  vst1_u8(out, p6); out += out_stride;
2266  vst1_u8(out, p7);
2267 
2268 #undef dct_trn8_8
2269 #undef dct_trn8_16
2270 #undef dct_trn8_32
2271  }
2272 
2273 #undef dct_long_mul
2274 #undef dct_long_mac
2275 #undef dct_widen
2276 #undef dct_wadd
2277 #undef dct_wsub
2278 #undef dct_bfly32o
2279 #undef dct_pass
2280 }
2281 
2282 #endif // STBI_NEON
2283 
2284 #define STBI__MARKER_none 0xff
2285 // if there's a pending marker from the entropy stream, return that
2286 // otherwise, fetch from the stream and get a marker. if there's no
2287 // marker, return 0xff, which is never a valid marker value
2288 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2289 {
2290  stbi_uc x;
2291  if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2292  x = stbi__get8(j->s);
2293  if (x != 0xff) return STBI__MARKER_none;
2294  while (x == 0xff)
2295  x = stbi__get8(j->s);
2296  return x;
2297 }
2298 
2299 // in each scan, we'll have scan_n components, and the order
2300 // of the components is specified by order[]
2301 #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2302 
2303 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2304 // the dc prediction
2305 static void stbi__jpeg_reset(stbi__jpeg *j)
2306 {
2307  j->code_bits = 0;
2308  j->code_buffer = 0;
2309  j->nomore = 0;
2310  j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
2311  j->marker = STBI__MARKER_none;
2312  j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2313  j->eob_run = 0;
2314  // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2315  // since we don't even allow 1<<30 pixels
2316 }
2317 
2318 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2319 {
2320  stbi__jpeg_reset(z);
2321  if (!z->progressive) {
2322  if (z->scan_n == 1) {
2323  int i,j;
2324  STBI_SIMD_ALIGN(short, data[64]);
2325  int n = z->order[0];
2326  // non-interleaved data, we just need to process one block at a time,
2327  // in trivial scanline order
2328  // number of blocks to do just depends on how many actual "pixels" this
2329  // component has, independent of interleaved MCU blocking and such
2330  int w = (z->img_comp[n].x+7) >> 3;
2331  int h = (z->img_comp[n].y+7) >> 3;
2332  for (j=0; j < h; ++j) {
2333  for (i=0; i < w; ++i) {
2334  int ha = z->img_comp[n].ha;
2335  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2336  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2337  // every data block is an MCU, so countdown the restart interval
2338  if (--z->todo <= 0) {
2339  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2340  // if it's NOT a restart, then just bail, so we get corrupt data
2341  // rather than no data
2342  if (!STBI__RESTART(z->marker)) return 1;
2343  stbi__jpeg_reset(z);
2344  }
2345  }
2346  }
2347  return 1;
2348  } else { // interleaved
2349  int i,j,k,x,y;
2350  STBI_SIMD_ALIGN(short, data[64]);
2351  for (j=0; j < z->img_mcu_y; ++j) {
2352  for (i=0; i < z->img_mcu_x; ++i) {
2353  // scan an interleaved mcu... process scan_n components in order
2354  for (k=0; k < z->scan_n; ++k) {
2355  int n = z->order[k];
2356  // scan out an mcu's worth of this component; that's just determined
2357  // by the basic H and V specified for the component
2358  for (y=0; y < z->img_comp[n].v; ++y) {
2359  for (x=0; x < z->img_comp[n].h; ++x) {
2360  int x2 = (i*z->img_comp[n].h + x)*8;
2361  int y2 = (j*z->img_comp[n].v + y)*8;
2362  int ha = z->img_comp[n].ha;
2363  if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2364  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2365  }
2366  }
2367  }
2368  // after all interleaved components, that's an interleaved MCU,
2369  // so now count down the restart interval
2370  if (--z->todo <= 0) {
2371  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2372  if (!STBI__RESTART(z->marker)) return 1;
2373  stbi__jpeg_reset(z);
2374  }
2375  }
2376  }
2377  return 1;
2378  }
2379  } else {
2380  if (z->scan_n == 1) {
2381  int i,j;
2382  int n = z->order[0];
2383  // non-interleaved data, we just need to process one block at a time,
2384  // in trivial scanline order
2385  // number of blocks to do just depends on how many actual "pixels" this
2386  // component has, independent of interleaved MCU blocking and such
2387  int w = (z->img_comp[n].x+7) >> 3;
2388  int h = (z->img_comp[n].y+7) >> 3;
2389  for (j=0; j < h; ++j) {
2390  for (i=0; i < w; ++i) {
2391  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2392  if (z->spec_start == 0) {
2393  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2394  return 0;
2395  } else {
2396  int ha = z->img_comp[n].ha;
2397  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2398  return 0;
2399  }
2400  // every data block is an MCU, so countdown the restart interval
2401  if (--z->todo <= 0) {
2402  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2403  if (!STBI__RESTART(z->marker)) return 1;
2404  stbi__jpeg_reset(z);
2405  }
2406  }
2407  }
2408  return 1;
2409  } else { // interleaved
2410  int i,j,k,x,y;
2411  for (j=0; j < z->img_mcu_y; ++j) {
2412  for (i=0; i < z->img_mcu_x; ++i) {
2413  // scan an interleaved mcu... process scan_n components in order
2414  for (k=0; k < z->scan_n; ++k) {
2415  int n = z->order[k];
2416  // scan out an mcu's worth of this component; that's just determined
2417  // by the basic H and V specified for the component
2418  for (y=0; y < z->img_comp[n].v; ++y) {
2419  for (x=0; x < z->img_comp[n].h; ++x) {
2420  int x2 = (i*z->img_comp[n].h + x);
2421  int y2 = (j*z->img_comp[n].v + y);
2422  int ha = z->img_comp[n].ha;
2423  short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2424  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2425  return 0;
2426  }
2427  }
2428  }
2429  // after all interleaved components, that's an interleaved MCU,
2430  // so now count down the restart interval
2431  if (--z->todo <= 0) {
2432  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2433  if (!STBI__RESTART(z->marker)) return 1;
2434  stbi__jpeg_reset(z);
2435  }
2436  }
2437  }
2438  return 1;
2439  }
2440  }
2441 }
2442 
2443 static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant)
2444 {
2445  int i;
2446  for (i=0; i < 64; ++i)
2447  data[i] *= dequant[i];
2448 }
2449 
2450 static void stbi__jpeg_finish(stbi__jpeg *z)
2451 {
2452  if (z->progressive) {
2453  // dequantize and idct the data
2454  int i,j,n;
2455  for (n=0; n < z->s->img_n; ++n) {
2456  int w = (z->img_comp[n].x+7) >> 3;
2457  int h = (z->img_comp[n].y+7) >> 3;
2458  for (j=0; j < h; ++j) {
2459  for (i=0; i < w; ++i) {
2460  short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2461  stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2462  z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2463  }
2464  }
2465  }
2466  }
2467 }
2468 
2469 static int stbi__process_marker(stbi__jpeg *z, int m)
2470 {
2471  int L;
2472  switch (m) {
2473  case STBI__MARKER_none: // no marker found
2474  return stbi__err("expected marker","Corrupt JPEG");
2475 
2476  case 0xDD: // DRI - specify restart interval
2477  if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2478  z->restart_interval = stbi__get16be(z->s);
2479  return 1;
2480 
2481  case 0xDB: // DQT - define quantization table
2482  L = stbi__get16be(z->s)-2;
2483  while (L > 0) {
2484  int q = stbi__get8(z->s);
2485  int p = q >> 4;
2486  int t = q & 15,i;
2487  if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG");
2488  if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2489  for (i=0; i < 64; ++i)
2490  z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s);
2491  L -= 65;
2492  }
2493  return L==0;
2494 
2495  case 0xC4: // DHT - define huffman table
2496  L = stbi__get16be(z->s)-2;
2497  while (L > 0) {
2498  stbi_uc *v;
2499  int sizes[16],i,n=0;
2500  int q = stbi__get8(z->s);
2501  int tc = q >> 4;
2502  int th = q & 15;
2503  if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2504  for (i=0; i < 16; ++i) {
2505  sizes[i] = stbi__get8(z->s);
2506  n += sizes[i];
2507  }
2508  L -= 17;
2509  if (tc == 0) {
2510  if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2511  v = z->huff_dc[th].values;
2512  } else {
2513  if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2514  v = z->huff_ac[th].values;
2515  }
2516  for (i=0; i < n; ++i)
2517  v[i] = stbi__get8(z->s);
2518  if (tc != 0)
2519  stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2520  L -= n;
2521  }
2522  return L==0;
2523  }
2524  // check for comment block or APP blocks
2525  if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2526  stbi__skip(z->s, stbi__get16be(z->s)-2);
2527  return 1;
2528  }
2529  return 0;
2530 }
2531 
2532 // after we see SOS
2533 static int stbi__process_scan_header(stbi__jpeg *z)
2534 {
2535  int i;
2536  int Ls = stbi__get16be(z->s);
2537  z->scan_n = stbi__get8(z->s);
2538  if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2539  if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2540  for (i=0; i < z->scan_n; ++i) {
2541  int id = stbi__get8(z->s), which;
2542  int q = stbi__get8(z->s);
2543  for (which = 0; which < z->s->img_n; ++which)
2544  if (z->img_comp[which].id == id)
2545  break;
2546  if (which == z->s->img_n) return 0; // no match
2547  z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2548  z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2549  z->order[i] = which;
2550  }
2551 
2552  {
2553  int aa;
2554  z->spec_start = stbi__get8(z->s);
2555  z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
2556  aa = stbi__get8(z->s);
2557  z->succ_high = (aa >> 4);
2558  z->succ_low = (aa & 15);
2559  if (z->progressive) {
2560  if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2561  return stbi__err("bad SOS", "Corrupt JPEG");
2562  } else {
2563  if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2564  if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2565  z->spec_end = 63;
2566  }
2567  }
2568 
2569  return 1;
2570 }
2571 
2572 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2573 {
2574  stbi__context *s = z->s;
2575  int Lf,p,i,q, h_max=1,v_max=1,c;
2576  Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2577  p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2578  s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2579  s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2580  c = stbi__get8(s);
2581  if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG"); // JFIF requires
2582  s->img_n = c;
2583  for (i=0; i < c; ++i) {
2584  z->img_comp[i].data = NULL;
2585  z->img_comp[i].linebuf = NULL;
2586  }
2587 
2588  if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2589 
2590  for (i=0; i < s->img_n; ++i) {
2591  z->img_comp[i].id = stbi__get8(s);
2592  if (z->img_comp[i].id != i+1) // JFIF requires
2593  if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files!
2594  return stbi__err("bad component ID","Corrupt JPEG");
2595  q = stbi__get8(s);
2596  z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2597  z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2598  z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2599  }
2600 
2601  if (scan != STBI__SCAN_load) return 1;
2602 
2603  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
2604 
2605  for (i=0; i < s->img_n; ++i) {
2606  if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2607  if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2608  }
2609 
2610  // compute interleaved mcu info
2611  z->img_h_max = h_max;
2612  z->img_v_max = v_max;
2613  z->img_mcu_w = h_max * 8;
2614  z->img_mcu_h = v_max * 8;
2615  z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
2616  z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
2617 
2618  for (i=0; i < s->img_n; ++i) {
2619  // number of effective pixels (e.g. for non-interleaved MCU)
2620  z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
2621  z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
2622  // to simplify generation, we'll allocate enough memory to decode
2623  // the bogus oversized data from using interleaved MCUs and their
2624  // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
2625  // discard the extra data until colorspace conversion
2626  z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
2627  z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
2628  z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15);
2629 
2630  if (z->img_comp[i].raw_data == NULL) {
2631  for(--i; i >= 0; --i) {
2632  STBI_FREE(z->img_comp[i].raw_data);
2633  z->img_comp[i].data = NULL;
2634  }
2635  return stbi__err("outofmem", "Out of memory");
2636  }
2637  // align blocks for idct using mmx/sse
2638  z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
2639  z->img_comp[i].linebuf = NULL;
2640  if (z->progressive) {
2641  z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3;
2642  z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3;
2643  z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15);
2644  z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
2645  } else {
2646  z->img_comp[i].coeff = 0;
2647  z->img_comp[i].raw_coeff = 0;
2648  }
2649  }
2650 
2651  return 1;
2652 }
2653 
2654 // use comparisons since in some cases we handle more than one case (e.g. SOF)
2655 #define stbi__DNL(x) ((x) == 0xdc)
2656 #define stbi__SOI(x) ((x) == 0xd8)
2657 #define stbi__EOI(x) ((x) == 0xd9)
2658 #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
2659 #define stbi__SOS(x) ((x) == 0xda)
2660 
2661 #define stbi__SOF_progressive(x) ((x) == 0xc2)
2662 
2663 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
2664 {
2665  int m;
2666  z->marker = STBI__MARKER_none; // initialize cached marker to empty
2667  m = stbi__get_marker(z);
2668  if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
2669  if (scan == STBI__SCAN_type) return 1;
2670  m = stbi__get_marker(z);
2671  while (!stbi__SOF(m)) {
2672  if (!stbi__process_marker(z,m)) return 0;
2673  m = stbi__get_marker(z);
2674  while (m == STBI__MARKER_none) {
2675  // some files have extra padding after their blocks, so ok, we'll scan
2676  if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
2677  m = stbi__get_marker(z);
2678  }
2679  }
2680  z->progressive = stbi__SOF_progressive(m);
2681  if (!stbi__process_frame_header(z, scan)) return 0;
2682  return 1;
2683 }
2684 
2685 // decode image to YCbCr format
2686 static int stbi__decode_jpeg_image(stbi__jpeg *j)
2687 {
2688  int m;
2689  j->restart_interval = 0;
2690  if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
2691  m = stbi__get_marker(j);
2692  while (!stbi__EOI(m)) {
2693  if (stbi__SOS(m)) {
2694  if (!stbi__process_scan_header(j)) return 0;
2695  if (!stbi__parse_entropy_coded_data(j)) return 0;
2696  if (j->marker == STBI__MARKER_none ) {
2697  // handle 0s at the end of image data from IP Kamera 9060
2698  while (!stbi__at_eof(j->s)) {
2699  int x = stbi__get8(j->s);
2700  if (x == 255) {
2701  j->marker = stbi__get8(j->s);
2702  break;
2703  } else if (x != 0) {
2704  return stbi__err("junk before marker", "Corrupt JPEG");
2705  }
2706  }
2707  // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
2708  }
2709  } else {
2710  if (!stbi__process_marker(j, m)) return 0;
2711  }
2712  m = stbi__get_marker(j);
2713  }
2714  if (j->progressive)
2715  stbi__jpeg_finish(j);
2716  return 1;
2717 }
2718 
2719 // static jfif-centered resampling (across block boundaries)
2720 
2721 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
2722  int w, int hs);
2723 
2724 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
2725 
2726 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2727 {
2728  STBI_NOTUSED(out);
2729  STBI_NOTUSED(in_far);
2730  STBI_NOTUSED(w);
2731  STBI_NOTUSED(hs);
2732  return in_near;
2733 }
2734 
2735 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2736 {
2737  // need to generate two samples vertically for every one in input
2738  int i;
2739  STBI_NOTUSED(hs);
2740  for (i=0; i < w; ++i)
2741  out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
2742  return out;
2743 }
2744 
2745 static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2746 {
2747  // need to generate two samples horizontally for every one in input
2748  int i;
2749  stbi_uc *input = in_near;
2750 
2751  if (w == 1) {
2752  // if only one sample, can't do any interpolation
2753  out[0] = out[1] = input[0];
2754  return out;
2755  }
2756 
2757  out[0] = input[0];
2758  out[1] = stbi__div4(input[0]*3 + input[1] + 2);
2759  for (i=1; i < w-1; ++i) {
2760  int n = 3*input[i]+2;
2761  out[i*2+0] = stbi__div4(n+input[i-1]);
2762  out[i*2+1] = stbi__div4(n+input[i+1]);
2763  }
2764  out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
2765  out[i*2+1] = input[w-1];
2766 
2767  STBI_NOTUSED(in_far);
2768  STBI_NOTUSED(hs);
2769 
2770  return out;
2771 }
2772 
2773 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
2774 
2775 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2776 {
2777  // need to generate 2x2 samples for every one in input
2778  int i,t0,t1;
2779  if (w == 1) {
2780  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2781  return out;
2782  }
2783 
2784  t1 = 3*in_near[0] + in_far[0];
2785  out[0] = stbi__div4(t1+2);
2786  for (i=1; i < w; ++i) {
2787  t0 = t1;
2788  t1 = 3*in_near[i]+in_far[i];
2789  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
2790  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
2791  }
2792  out[w*2-1] = stbi__div4(t1+2);
2793 
2794  STBI_NOTUSED(hs);
2795 
2796  return out;
2797 }
2798 
2799 #if defined(STBI_SSE2) || defined(STBI_NEON)
2800 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2801 {
2802  // need to generate 2x2 samples for every one in input
2803  int i=0,t0,t1;
2804 
2805  if (w == 1) {
2806  out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
2807  return out;
2808  }
2809 
2810  t1 = 3*in_near[0] + in_far[0];
2811  // process groups of 8 pixels for as long as we can.
2812  // note we can't handle the last pixel in a row in this loop
2813  // because we need to handle the filter boundary conditions.
2814  for (; i < ((w-1) & ~7); i += 8) {
2815 #if defined(STBI_SSE2)
2816  // load and perform the vertical filtering pass
2817  // this uses 3*x + y = 4*x + (y - x)
2818  __m128i zero = _mm_setzero_si128();
2819  __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i));
2820  __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
2821  __m128i farw = _mm_unpacklo_epi8(farb, zero);
2822  __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
2823  __m128i diff = _mm_sub_epi16(farw, nearw);
2824  __m128i nears = _mm_slli_epi16(nearw, 2);
2825  __m128i curr = _mm_add_epi16(nears, diff); // current row
2826 
2827  // horizontal filter works the same based on shifted vers of current
2828  // row. "prev" is current row shifted right by 1 pixel; we need to
2829  // insert the previous pixel value (from t1).
2830  // "next" is current row shifted left by 1 pixel, with first pixel
2831  // of next block of 8 pixels added in.
2832  __m128i prv0 = _mm_slli_si128(curr, 2);
2833  __m128i nxt0 = _mm_srli_si128(curr, 2);
2834  __m128i prev = _mm_insert_epi16(prv0, t1, 0);
2835  __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
2836 
2837  // horizontal filter, polyphase implementation since it's convenient:
2838  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2839  // odd pixels = 3*cur + next = cur*4 + (next - cur)
2840  // note the shared term.
2841  __m128i bias = _mm_set1_epi16(8);
2842  __m128i curs = _mm_slli_epi16(curr, 2);
2843  __m128i prvd = _mm_sub_epi16(prev, curr);
2844  __m128i nxtd = _mm_sub_epi16(next, curr);
2845  __m128i curb = _mm_add_epi16(curs, bias);
2846  __m128i even = _mm_add_epi16(prvd, curb);
2847  __m128i odd = _mm_add_epi16(nxtd, curb);
2848 
2849  // interleave even and odd pixels, then undo scaling.
2850  __m128i int0 = _mm_unpacklo_epi16(even, odd);
2851  __m128i int1 = _mm_unpackhi_epi16(even, odd);
2852  __m128i de0 = _mm_srli_epi16(int0, 4);
2853  __m128i de1 = _mm_srli_epi16(int1, 4);
2854 
2855  // pack and write output
2856  __m128i outv = _mm_packus_epi16(de0, de1);
2857  _mm_storeu_si128((__m128i *) (out + i*2), outv);
2858 #elif defined(STBI_NEON)
2859  // load and perform the vertical filtering pass
2860  // this uses 3*x + y = 4*x + (y - x)
2861  uint8x8_t farb = vld1_u8(in_far + i);
2862  uint8x8_t nearb = vld1_u8(in_near + i);
2863  int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
2864  int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
2865  int16x8_t curr = vaddq_s16(nears, diff); // current row
2866 
2867  // horizontal filter works the same based on shifted vers of current
2868  // row. "prev" is current row shifted right by 1 pixel; we need to
2869  // insert the previous pixel value (from t1).
2870  // "next" is current row shifted left by 1 pixel, with first pixel
2871  // of next block of 8 pixels added in.
2872  int16x8_t prv0 = vextq_s16(curr, curr, 7);
2873  int16x8_t nxt0 = vextq_s16(curr, curr, 1);
2874  int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
2875  int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
2876 
2877  // horizontal filter, polyphase implementation since it's convenient:
2878  // even pixels = 3*cur + prev = cur*4 + (prev - cur)
2879  // odd pixels = 3*cur + next = cur*4 + (next - cur)
2880  // note the shared term.
2881  int16x8_t curs = vshlq_n_s16(curr, 2);
2882  int16x8_t prvd = vsubq_s16(prev, curr);
2883  int16x8_t nxtd = vsubq_s16(next, curr);
2884  int16x8_t even = vaddq_s16(curs, prvd);
2885  int16x8_t odd = vaddq_s16(curs, nxtd);
2886 
2887  // undo scaling and round, then store with even/odd phases interleaved
2888  uint8x8x2_t o;
2889  o.val[0] = vqrshrun_n_s16(even, 4);
2890  o.val[1] = vqrshrun_n_s16(odd, 4);
2891  vst2_u8(out + i*2, o);
2892 #endif
2893 
2894  // "previous" value for next iter
2895  t1 = 3*in_near[i+7] + in_far[i+7];
2896  }
2897 
2898  t0 = t1;
2899  t1 = 3*in_near[i] + in_far[i];
2900  out[i*2] = stbi__div16(3*t1 + t0 + 8);
2901 
2902  for (++i; i < w; ++i) {
2903  t0 = t1;
2904  t1 = 3*in_near[i]+in_far[i];
2905  out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
2906  out[i*2 ] = stbi__div16(3*t1 + t0 + 8);
2907  }
2908  out[w*2-1] = stbi__div4(t1+2);
2909 
2910  STBI_NOTUSED(hs);
2911 
2912  return out;
2913 }
2914 #endif
2915 
2916 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
2917 {
2918  // resample with nearest-neighbor
2919  int i,j;
2920  STBI_NOTUSED(in_far);
2921  for (i=0; i < w; ++i)
2922  for (j=0; j < hs; ++j)
2923  out[i*hs+j] = in_near[i];
2924  return out;
2925 }
2926 
2927 #ifdef STBI_JPEG_OLD
2928 // this is the same YCbCr-to-RGB calculation that stb_image has used
2929 // historically before the algorithm changes in 1.49
2930 #define float2fixed(x) ((int) ((x) * 65536 + 0.5))
2931 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
2932 {
2933  int i;
2934  for (i=0; i < count; ++i) {
2935  int y_fixed = (y[i] << 16) + 32768; // rounding
2936  int r,g,b;
2937  int cr = pcr[i] - 128;
2938  int cb = pcb[i] - 128;
2939  r = y_fixed + cr*float2fixed(1.40200f);
2940  g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f);
2941  b = y_fixed + cb*float2fixed(1.77200f);
2942  r >>= 16;
2943  g >>= 16;
2944  b >>= 16;
2945  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
2946  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
2947  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
2948  out[0] = (stbi_uc)r;
2949  out[1] = (stbi_uc)g;
2950  out[2] = (stbi_uc)b;
2951  out[3] = 255;
2952  out += step;
2953  }
2954 }
2955 #else
2956 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
2957 // to make sure the code produces the same results in both SIMD and scalar
2958 #define float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8)
2959 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
2960 {
2961  int i;
2962  for (i=0; i < count; ++i) {
2963  int y_fixed = (y[i] << 20) + (1<<19); // rounding
2964  int r,g,b;
2965  int cr = pcr[i] - 128;
2966  int cb = pcb[i] - 128;
2967  r = y_fixed + cr* float2fixed(1.40200f);
2968  g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
2969  b = y_fixed + cb* float2fixed(1.77200f);
2970  r >>= 20;
2971  g >>= 20;
2972  b >>= 20;
2973  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
2974  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
2975  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
2976  out[0] = (stbi_uc)r;
2977  out[1] = (stbi_uc)g;
2978  out[2] = (stbi_uc)b;
2979  out[3] = 255;
2980  out += step;
2981  }
2982 }
2983 #endif
2984 
2985 #if defined(STBI_SSE2) || defined(STBI_NEON)
2986 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
2987 {
2988  int i = 0;
2989 
2990 #ifdef STBI_SSE2
2991  // step == 3 is pretty ugly on the final interleave, and i'm not convinced
2992  // it's useful in practice (you wouldn't use it for textures, for example).
2993  // so just accelerate step == 4 case.
2994  if (step == 4) {
2995  // this is a fairly straightforward implementation and not super-optimized.
2996  __m128i signflip = _mm_set1_epi8(-0x80);
2997  __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f));
2998  __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
2999  __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3000  __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f));
3001  __m128i y_bias = _mm_set1_epi8((char) 128);
3002  __m128i xw = _mm_set1_epi16(255); // alpha channel
3003 
3004  for (; i+7 < count; i += 8) {
3005  // load
3006  __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3007  __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3008  __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3009  __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3010  __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3011 
3012  // unpack to short (and left-shift cr, cb by 8)
3013  __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
3014  __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3015  __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3016 
3017  // color transform
3018  __m128i yws = _mm_srli_epi16(yw, 4);
3019  __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3020  __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3021  __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3022  __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3023  __m128i rws = _mm_add_epi16(cr0, yws);
3024  __m128i gwt = _mm_add_epi16(cb0, yws);
3025  __m128i bws = _mm_add_epi16(yws, cb1);
3026  __m128i gws = _mm_add_epi16(gwt, cr1);
3027 
3028  // descale
3029  __m128i rw = _mm_srai_epi16(rws, 4);
3030  __m128i bw = _mm_srai_epi16(bws, 4);
3031  __m128i gw = _mm_srai_epi16(gws, 4);
3032 
3033  // back to byte, set up for transpose
3034  __m128i brb = _mm_packus_epi16(rw, bw);
3035  __m128i gxb = _mm_packus_epi16(gw, xw);
3036 
3037  // transpose to interleave channels
3038  __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3039  __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3040  __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3041  __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3042 
3043  // store
3044  _mm_storeu_si128((__m128i *) (out + 0), o0);
3045  _mm_storeu_si128((__m128i *) (out + 16), o1);
3046  out += 32;
3047  }
3048  }
3049 #endif
3050 
3051 #ifdef STBI_NEON
3052  // in this version, step=3 support would be easy to add. but is there demand?
3053  if (step == 4) {
3054  // this is a fairly straightforward implementation and not super-optimized.
3055  uint8x8_t signflip = vdup_n_u8(0x80);
3056  int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f));
3057  int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3058  int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3059  int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f));
3060 
3061  for (; i+7 < count; i += 8) {
3062  // load
3063  uint8x8_t y_bytes = vld1_u8(y + i);
3064  uint8x8_t cr_bytes = vld1_u8(pcr + i);
3065  uint8x8_t cb_bytes = vld1_u8(pcb + i);
3066  int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3067  int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3068 
3069  // expand to s16
3070  int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3071  int16x8_t crw = vshll_n_s8(cr_biased, 7);
3072  int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3073 
3074  // color transform
3075  int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3076  int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3077  int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3078  int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3079  int16x8_t rws = vaddq_s16(yws, cr0);
3080  int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3081  int16x8_t bws = vaddq_s16(yws, cb1);
3082 
3083  // undo scaling, round, convert to byte
3084  uint8x8x4_t o;
3085  o.val[0] = vqrshrun_n_s16(rws, 4);
3086  o.val[1] = vqrshrun_n_s16(gws, 4);
3087  o.val[2] = vqrshrun_n_s16(bws, 4);
3088  o.val[3] = vdup_n_u8(255);
3089 
3090  // store, interleaving r/g/b/a
3091  vst4_u8(out, o);
3092  out += 8*4;
3093  }
3094  }
3095 #endif
3096 
3097  for (; i < count; ++i) {
3098  int y_fixed = (y[i] << 20) + (1<<19); // rounding
3099  int r,g,b;
3100  int cr = pcr[i] - 128;
3101  int cb = pcb[i] - 128;
3102  r = y_fixed + cr* float2fixed(1.40200f);
3103  g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000);
3104  b = y_fixed + cb* float2fixed(1.77200f);
3105  r >>= 20;
3106  g >>= 20;
3107  b >>= 20;
3108  if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3109  if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3110  if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3111  out[0] = (stbi_uc)r;
3112  out[1] = (stbi_uc)g;
3113  out[2] = (stbi_uc)b;
3114  out[3] = 255;
3115  out += step;
3116  }
3117 }
3118 #endif
3119 
3120 // set up the kernels
3121 static void stbi__setup_jpeg(stbi__jpeg *j)
3122 {
3123  j->idct_block_kernel = stbi__idct_block;
3124  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3125  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3126 
3127 #ifdef STBI_SSE2
3128  if (stbi__sse2_available()) {
3129  j->idct_block_kernel = stbi__idct_simd;
3130  #ifndef STBI_JPEG_OLD
3131  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3132  #endif
3133  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3134  }
3135 #endif
3136 
3137 #ifdef STBI_NEON
3138  j->idct_block_kernel = stbi__idct_simd;
3139  #ifndef STBI_JPEG_OLD
3140  j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3141  #endif
3142  j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3143 #endif
3144 }
3145 
3146 // clean up the temporary component buffers
3147 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3148 {
3149  int i;
3150  for (i=0; i < j->s->img_n; ++i) {
3151  if (j->img_comp[i].raw_data) {
3152  STBI_FREE(j->img_comp[i].raw_data);
3153  j->img_comp[i].raw_data = NULL;
3154  j->img_comp[i].data = NULL;
3155  }
3156  if (j->img_comp[i].raw_coeff) {
3157  STBI_FREE(j->img_comp[i].raw_coeff);
3158  j->img_comp[i].raw_coeff = 0;
3159  j->img_comp[i].coeff = 0;
3160  }
3161  if (j->img_comp[i].linebuf) {
3162  STBI_FREE(j->img_comp[i].linebuf);
3163  j->img_comp[i].linebuf = NULL;
3164  }
3165  }
3166 }
3167 
3168 typedef struct
3169 {
3170  resample_row_func resample;
3171  stbi_uc *line0,*line1;
3172  int hs,vs; // expansion factor in each axis
3173  int w_lores; // horizontal pixels pre-expansion
3174  int ystep; // how far through vertical expansion we are
3175  int ypos; // which pre-expansion row we're on
3176 } stbi__resample;
3177 
3178 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3179 {
3180  int n, decode_n;
3181  z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3182 
3183  // validate req_comp
3184  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3185 
3186  // load a jpeg image from whichever source, but leave in YCbCr format
3187  if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3188 
3189  // determine actual number of components to generate
3190  n = req_comp ? req_comp : z->s->img_n;
3191 
3192  if (z->s->img_n == 3 && n < 3)
3193  decode_n = 1;
3194  else
3195  decode_n = z->s->img_n;
3196 
3197  // resample and color-convert
3198  {
3199  int k;
3200  unsigned int i,j;
3201  stbi_uc *output;
3202  stbi_uc *coutput[4];
3203 
3204  stbi__resample res_comp[4];
3205 
3206  for (k=0; k < decode_n; ++k) {
3207  stbi__resample *r = &res_comp[k];
3208 
3209  // allocate line buffer big enough for upsampling off the edges
3210  // with upsample factor of 4
3211  z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3212  if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3213 
3214  r->hs = z->img_h_max / z->img_comp[k].h;
3215  r->vs = z->img_v_max / z->img_comp[k].v;
3216  r->ystep = r->vs >> 1;
3217  r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3218  r->ypos = 0;
3219  r->line0 = r->line1 = z->img_comp[k].data;
3220 
3221  if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3222  else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3223  else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3224  else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3225  else r->resample = stbi__resample_row_generic;
3226  }
3227 
3228  // can't error after this so, this is safe
3229  output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1);
3230  if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3231 
3232  // now go ahead and resample
3233  for (j=0; j < z->s->img_y; ++j) {
3234  stbi_uc *out = output + n * z->s->img_x * j;
3235  for (k=0; k < decode_n; ++k) {
3236  stbi__resample *r = &res_comp[k];
3237  int y_bot = r->ystep >= (r->vs >> 1);
3238  coutput[k] = r->resample(z->img_comp[k].linebuf,
3239  y_bot ? r->line1 : r->line0,
3240  y_bot ? r->line0 : r->line1,
3241  r->w_lores, r->hs);
3242  if (++r->ystep >= r->vs) {
3243  r->ystep = 0;
3244  r->line0 = r->line1;
3245  if (++r->ypos < z->img_comp[k].y)
3246  r->line1 += z->img_comp[k].w2;
3247  }
3248  }
3249  if (n >= 3) {
3250  stbi_uc *y = coutput[0];
3251  if (z->s->img_n == 3) {
3252  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3253  } else
3254  for (i=0; i < z->s->img_x; ++i) {
3255  out[0] = out[1] = out[2] = y[i];
3256  out[3] = 255; // not used if n==3
3257  out += n;
3258  }
3259  } else {
3260  stbi_uc *y = coutput[0];
3261  if (n == 1)
3262  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3263  else
3264  for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3265  }
3266  }
3267  stbi__cleanup_jpeg(z);
3268  *out_x = z->s->img_x;
3269  *out_y = z->s->img_y;
3270  if (comp) *comp = z->s->img_n; // report original components, not output
3271  return output;
3272  }
3273 }
3274 
3275 static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
3276 {
3277  stbi__jpeg j;
3278  j.s = s;
3279  stbi__setup_jpeg(&j);
3280  return load_jpeg_image(&j, x,y,comp,req_comp);
3281 }
3282 
3283 static int stbi__jpeg_test(stbi__context *s)
3284 {
3285  int r;
3286  stbi__jpeg j;
3287  j.s = s;
3288  stbi__setup_jpeg(&j);
3289  r = stbi__decode_jpeg_header(&j, STBI__SCAN_type);
3290  stbi__rewind(s);
3291  return r;
3292 }
3293 
3294 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3295 {
3296  if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3297  stbi__rewind( j->s );
3298  return 0;
3299  }
3300  if (x) *x = j->s->img_x;
3301  if (y) *y = j->s->img_y;
3302  if (comp) *comp = j->s->img_n;
3303  return 1;
3304 }
3305 
3306 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3307 {
3308  stbi__jpeg j;
3309  j.s = s;
3310  return stbi__jpeg_info_raw(&j, x, y, comp);
3311 }
3312 #endif
3313 
3314 // public domain zlib decode v0.2 Sean Barrett 2006-11-18
3315 // simple implementation
3316 // - all input must be provided in an upfront buffer
3317 // - all output is written to a single output buffer (can malloc/realloc)
3318 // performance
3319 // - fast huffman
3320 
3321 #ifndef STBI_NO_ZLIB
3322 
3323 // fast-way is faster to check than jpeg huffman, but slow way is slower
3324 #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables
3325 #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1)
3326 
3327 // zlib-style huffman encoding
3328 // (jpegs packs from left, zlib from right, so can't share code)
3329 typedef struct
3330 {
3331  stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3332  stbi__uint16 firstcode[16];
3333  int maxcode[17];
3334  stbi__uint16 firstsymbol[16];
3335  stbi_uc size[288];
3336  stbi__uint16 value[288];
3337 } stbi__zhuffman;
3338 
3339 stbi_inline static int stbi__bitreverse16(int n)
3340 {
3341  n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
3342  n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
3343  n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
3344  n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
3345  return n;
3346 }
3347 
3348 stbi_inline static int stbi__bit_reverse(int v, int bits)
3349 {
3350  STBI_ASSERT(bits <= 16);
3351  // to bit reverse n bits, reverse 16 and shift
3352  // e.g. 11 bits, bit reverse and shift away 5
3353  return stbi__bitreverse16(v) >> (16-bits);
3354 }
3355 
3356 static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
3357 {
3358  int i,k=0;
3359  int code, next_code[16], sizes[17];
3360 
3361  // DEFLATE spec for generating codes
3362  memset(sizes, 0, sizeof(sizes));
3363  memset(z->fast, 0, sizeof(z->fast));
3364  for (i=0; i < num; ++i)
3365  ++sizes[sizelist[i]];
3366  sizes[0] = 0;
3367  for (i=1; i < 16; ++i)
3368  STBI_ASSERT(sizes[i] <= (1 << i));
3369  code = 0;
3370  for (i=1; i < 16; ++i) {
3371  next_code[i] = code;
3372  z->firstcode[i] = (stbi__uint16) code;
3373  z->firstsymbol[i] = (stbi__uint16) k;
3374  code = (code + sizes[i]);
3375  if (sizes[i])
3376  if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt JPEG");
3377  z->maxcode[i] = code << (16-i); // preshift for inner loop
3378  code <<= 1;
3379  k += sizes[i];
3380  }
3381  z->maxcode[16] = 0x10000; // sentinel
3382  for (i=0; i < num; ++i) {
3383  int s = sizelist[i];
3384  if (s) {
3385  int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3386  stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3387  z->size [c] = (stbi_uc ) s;
3388  z->value[c] = (stbi__uint16) i;
3389  if (s <= STBI__ZFAST_BITS) {
3390  int k = stbi__bit_reverse(next_code[s],s);
3391  while (k < (1 << STBI__ZFAST_BITS)) {
3392  z->fast[k] = fastv;
3393  k += (1 << s);
3394  }
3395  }
3396  ++next_code[s];
3397  }
3398  }
3399  return 1;
3400 }
3401 
3402 // zlib-from-memory implementation for PNG reading
3403 // because PNG allows splitting the zlib stream arbitrarily,
3404 // and it's annoying structurally to have PNG call ZLIB call PNG,
3405 // we require PNG read all the IDATs and combine them into a single
3406 // memory buffer
3407 
3408 typedef struct
3409 {
3410  stbi_uc *zbuffer, *zbuffer_end;
3411  int num_bits;
3412  stbi__uint32 code_buffer;
3413 
3414  char *zout;
3415  char *zout_start;
3416  char *zout_end;
3417  int z_expandable;
3418 
3419  stbi__zhuffman z_length, z_distance;
3420 } stbi__zbuf;
3421 
3422 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3423 {
3424  if (z->zbuffer >= z->zbuffer_end) return 0;
3425  return *z->zbuffer++;
3426 }
3427 
3428 static void stbi__fill_bits(stbi__zbuf *z)
3429 {
3430  do {
3431  STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3432  z->code_buffer |= stbi__zget8(z) << z->num_bits;
3433  z->num_bits += 8;
3434  } while (z->num_bits <= 24);
3435 }
3436 
3437 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3438 {
3439  unsigned int k;
3440  if (z->num_bits < n) stbi__fill_bits(z);
3441  k = z->code_buffer & ((1 << n) - 1);
3442  z->code_buffer >>= n;
3443  z->num_bits -= n;
3444  return k;
3445 }
3446 
3447 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3448 {
3449  int b,s,k;
3450  // not resolved by fast table, so compute it the slow way
3451  // use jpeg approach, which requires MSbits at top
3452  k = stbi__bit_reverse(a->code_buffer, 16);
3453  for (s=STBI__ZFAST_BITS+1; ; ++s)
3454  if (k < z->maxcode[s])
3455  break;
3456  if (s == 16) return -1; // invalid code!
3457  // code size is s, so:
3458  b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3459  STBI_ASSERT(z->size[b] == s);
3460  a->code_buffer >>= s;
3461  a->num_bits -= s;
3462  return z->value[b];
3463 }
3464 
3465 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3466 {
3467  int b,s;
3468  if (a->num_bits < 16) stbi__fill_bits(a);
3469  b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3470  if (b) {
3471  s = b >> 9;
3472  a->code_buffer >>= s;
3473  a->num_bits -= s;
3474  return b & 511;
3475  }
3476  return stbi__zhuffman_decode_slowpath(a, z);
3477 }
3478 
3479 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes
3480 {
3481  char *q;
3482  int cur, limit;
3483  z->zout = zout;
3484  if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3485  cur = (int) (z->zout - z->zout_start);
3486  limit = (int) (z->zout_end - z->zout_start);
3487  while (cur + n > limit)
3488  limit *= 2;
3489  q = (char *) STBI_REALLOC(z->zout_start, limit);
3490  if (q == NULL) return stbi__err("outofmem", "Out of memory");
3491  z->zout_start = q;
3492  z->zout = q + cur;
3493  z->zout_end = q + limit;
3494  return 1;
3495 }
3496 
3497 static int stbi__zlength_base[31] = {
3498  3,4,5,6,7,8,9,10,11,13,
3499  15,17,19,23,27,31,35,43,51,59,
3500  67,83,99,115,131,163,195,227,258,0,0 };
3501 
3502 static int stbi__zlength_extra[31]=
3503 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3504 
3505 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3506 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3507 
3508 static int stbi__zdist_extra[32] =
3509 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3510 
3511 static int stbi__parse_huffman_block(stbi__zbuf *a)
3512 {
3513  char *zout = a->zout;
3514  for(;;) {
3515  int z = stbi__zhuffman_decode(a, &a->z_length);
3516  if (z < 256) {
3517  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3518  if (zout >= a->zout_end) {
3519  if (!stbi__zexpand(a, zout, 1)) return 0;
3520  zout = a->zout;
3521  }
3522  *zout++ = (char) z;
3523  } else {
3524  stbi_uc *p;
3525  int len,dist;
3526  if (z == 256) {
3527  a->zout = zout;
3528  return 1;
3529  }
3530  z -= 257;
3531  len = stbi__zlength_base[z];
3532  if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3533  z = stbi__zhuffman_decode(a, &a->z_distance);
3534  if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3535  dist = stbi__zdist_base[z];
3536  if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3537  if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3538  if (zout + len > a->zout_end) {
3539  if (!stbi__zexpand(a, zout, len)) return 0;
3540  zout = a->zout;
3541  }
3542  p = (stbi_uc *) (zout - dist);
3543  if (dist == 1) { // run of one byte; common in images.
3544  stbi_uc v = *p;
3545  do *zout++ = v; while (--len);
3546  } else {
3547  do *zout++ = *p++; while (--len);
3548  }
3549  }
3550  }
3551 }
3552 
3553 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3554 {
3555  static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3556  stbi__zhuffman z_codelength;
3557  stbi_uc lencodes[286+32+137];//padding for maximum single op
3558  stbi_uc codelength_sizes[19];
3559  int i,n;
3560 
3561  int hlit = stbi__zreceive(a,5) + 257;
3562  int hdist = stbi__zreceive(a,5) + 1;
3563  int hclen = stbi__zreceive(a,4) + 4;
3564 
3565  memset(codelength_sizes, 0, sizeof(codelength_sizes));
3566  for (i=0; i < hclen; ++i) {
3567  int s = stbi__zreceive(a,3);
3568  codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3569  }
3570  if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3571 
3572  n = 0;
3573  while (n < hlit + hdist) {
3574  int c = stbi__zhuffman_decode(a, &z_codelength);
3575  STBI_ASSERT(c >= 0 && c < 19);
3576  if (c < 16)
3577  lencodes[n++] = (stbi_uc) c;
3578  else if (c == 16) {
3579  c = stbi__zreceive(a,2)+3;
3580  memset(lencodes+n, lencodes[n-1], c);
3581  n += c;
3582  } else if (c == 17) {
3583  c = stbi__zreceive(a,3)+3;
3584  memset(lencodes+n, 0, c);
3585  n += c;
3586  } else {
3587  STBI_ASSERT(c == 18);
3588  c = stbi__zreceive(a,7)+11;
3589  memset(lencodes+n, 0, c);
3590  n += c;
3591  }
3592  }
3593  if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG");
3594  if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
3595  if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
3596  return 1;
3597 }
3598 
3599 static int stbi__parse_uncomperssed_block(stbi__zbuf *a)
3600 {
3601  stbi_uc header[4];
3602  int len,nlen,k;
3603  if (a->num_bits & 7)
3604  stbi__zreceive(a, a->num_bits & 7); // discard
3605  // drain the bit-packed data into header
3606  k = 0;
3607  while (a->num_bits > 0) {
3608  header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
3609  a->code_buffer >>= 8;
3610  a->num_bits -= 8;
3611  }
3612  STBI_ASSERT(a->num_bits == 0);
3613  // now fill header the normal way
3614  while (k < 4)
3615  header[k++] = stbi__zget8(a);
3616  len = header[1] * 256 + header[0];
3617  nlen = header[3] * 256 + header[2];
3618  if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
3619  if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
3620  if (a->zout + len > a->zout_end)
3621  if (!stbi__zexpand(a, a->zout, len)) return 0;
3622  memcpy(a->zout, a->zbuffer, len);
3623  a->zbuffer += len;
3624  a->zout += len;
3625  return 1;
3626 }
3627 
3628 static int stbi__parse_zlib_header(stbi__zbuf *a)
3629 {
3630  int cmf = stbi__zget8(a);
3631  int cm = cmf & 15;
3632  /* int cinfo = cmf >> 4; */
3633  int flg = stbi__zget8(a);
3634  if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
3635  if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
3636  if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
3637  // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
3638  return 1;
3639 }
3640 
3641 // @TODO: should statically initialize these for optimal thread safety
3642 static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
3643 static void stbi__init_zdefaults(void)
3644 {
3645  int i; // use <= to match clearly with spec
3646  for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8;
3647  for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9;
3648  for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7;
3649  for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8;
3650 
3651  for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5;
3652 }
3653 
3654 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
3655 {
3656  int final, type;
3657  if (parse_header)
3658  if (!stbi__parse_zlib_header(a)) return 0;
3659  a->num_bits = 0;
3660  a->code_buffer = 0;
3661  do {
3662  final = stbi__zreceive(a,1);
3663  type = stbi__zreceive(a,2);
3664  if (type == 0) {
3665  if (!stbi__parse_uncomperssed_block(a)) return 0;
3666  } else if (type == 3) {
3667  return 0;
3668  } else {
3669  if (type == 1) {
3670  // use fixed code lengths
3671  if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
3672  if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0;
3673  if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
3674  } else {
3675  if (!stbi__compute_huffman_codes(a)) return 0;
3676  }
3677  if (!stbi__parse_huffman_block(a)) return 0;
3678  }
3679  } while (!final);
3680  return 1;
3681 }
3682 
3683 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
3684 {
3685  a->zout_start = obuf;
3686  a->zout = obuf;
3687  a->zout_end = obuf + olen;
3688  a->z_expandable = exp;
3689 
3690  return stbi__parse_zlib(a, parse_header);
3691 }
3692 
3693 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
3694 {
3695  stbi__zbuf a;
3696  char *p = (char *) stbi__malloc(initial_size);
3697  if (p == NULL) return NULL;
3698  a.zbuffer = (stbi_uc *) buffer;
3699  a.zbuffer_end = (stbi_uc *) buffer + len;
3700  if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
3701  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3702  return a.zout_start;
3703  } else {
3704  STBI_FREE(a.zout_start);
3705  return NULL;
3706  }
3707 }
3708 
3709 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
3710 {
3711  return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
3712 }
3713 
3714 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
3715 {
3716  stbi__zbuf a;
3717  char *p = (char *) stbi__malloc(initial_size);
3718  if (p == NULL) return NULL;
3719  a.zbuffer = (stbi_uc *) buffer;
3720  a.zbuffer_end = (stbi_uc *) buffer + len;
3721  if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
3722  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3723  return a.zout_start;
3724  } else {
3725  STBI_FREE(a.zout_start);
3726  return NULL;
3727  }
3728 }
3729 
3730 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
3731 {
3732  stbi__zbuf a;
3733  a.zbuffer = (stbi_uc *) ibuffer;
3734  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3735  if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
3736  return (int) (a.zout - a.zout_start);
3737  else
3738  return -1;
3739 }
3740 
3741 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
3742 {
3743  stbi__zbuf a;
3744  char *p = (char *) stbi__malloc(16384);
3745  if (p == NULL) return NULL;
3746  a.zbuffer = (stbi_uc *) buffer;
3747  a.zbuffer_end = (stbi_uc *) buffer+len;
3748  if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
3749  if (outlen) *outlen = (int) (a.zout - a.zout_start);
3750  return a.zout_start;
3751  } else {
3752  STBI_FREE(a.zout_start);
3753  return NULL;
3754  }
3755 }
3756 
3757 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
3758 {
3759  stbi__zbuf a;
3760  a.zbuffer = (stbi_uc *) ibuffer;
3761  a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
3762  if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
3763  return (int) (a.zout - a.zout_start);
3764  else
3765  return -1;
3766 }
3767 #endif
3768 
3769 // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18
3770 // simple implementation
3771 // - only 8-bit samples
3772 // - no CRC checking
3773 // - allocates lots of intermediate memory
3774 // - avoids problem of streaming data between subsystems
3775 // - avoids explicit window management
3776 // performance
3777 // - uses stb_zlib, a PD zlib implementation with fast huffman decoding
3778 
3779 #ifndef STBI_NO_PNG
3780 typedef struct
3781 {
3782  stbi__uint32 length;
3783  stbi__uint32 type;
3784 } stbi__pngchunk;
3785 
3786 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
3787 {
3788  stbi__pngchunk c;
3789  c.length = stbi__get32be(s);
3790  c.type = stbi__get32be(s);
3791  return c;
3792 }
3793 
3794 static int stbi__check_png_header(stbi__context *s)
3795 {
3796  static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
3797  int i;
3798  for (i=0; i < 8; ++i)
3799  if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
3800  return 1;
3801 }
3802 
3803 typedef struct
3804 {
3805  stbi__context *s;
3806  stbi_uc *idata, *expanded, *out;
3807 } stbi__png;
3808 
3809 
3810 enum {
3811  STBI__F_none=0,
3812  STBI__F_sub=1,
3813  STBI__F_up=2,
3814  STBI__F_avg=3,
3815  STBI__F_paeth=4,
3816  // synthetic filters used for first scanline to avoid needing a dummy row of 0s
3817  STBI__F_avg_first,
3818  STBI__F_paeth_first
3819 };
3820 
3821 static stbi_uc first_row_filter[5] =
3822 {
3823  STBI__F_none,
3824  STBI__F_sub,
3825  STBI__F_none,
3826  STBI__F_avg_first,
3827  STBI__F_paeth_first
3828 };
3829 
3830 static int stbi__paeth(int a, int b, int c)
3831 {
3832  int p = a + b - c;
3833  int pa = abs(p-a);
3834  int pb = abs(p-b);
3835  int pc = abs(p-c);
3836  if (pa <= pb && pa <= pc) return a;
3837  if (pb <= pc) return b;
3838  return c;
3839 }
3840 
3841 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
3842 
3843 // create the png data from post-deflated data
3844 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
3845 {
3846  stbi__context *s = a->s;
3847  stbi__uint32 i,j,stride = x*out_n;
3848  stbi__uint32 img_len, img_width_bytes;
3849  int k;
3850  int img_n = s->img_n; // copy it into a local for later
3851 
3852  STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
3853  a->out = (stbi_uc *) stbi__malloc(x * y * out_n); // extra bytes to write off the end into
3854  if (!a->out) return stbi__err("outofmem", "Out of memory");
3855 
3856  img_width_bytes = (((img_n * x * depth) + 7) >> 3);
3857  img_len = (img_width_bytes + 1) * y;
3858  if (s->img_x == x && s->img_y == y) {
3859  if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
3860  } else { // interlaced:
3861  if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
3862  }
3863 
3864  for (j=0; j < y; ++j) {
3865  stbi_uc *cur = a->out + stride*j;
3866  stbi_uc *prior = cur - stride;
3867  int filter = *raw++;
3868  int filter_bytes = img_n;
3869  int width = x;
3870  if (filter > 4)
3871  return stbi__err("invalid filter","Corrupt PNG");
3872 
3873  if (depth < 8) {
3874  STBI_ASSERT(img_width_bytes <= x);
3875  cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
3876  filter_bytes = 1;
3877  width = img_width_bytes;
3878  }
3879 
3880  // if first row, use special filter that doesn't sample previous row
3881  if (j == 0) filter = first_row_filter[filter];
3882 
3883  // handle first byte explicitly
3884  for (k=0; k < filter_bytes; ++k) {
3885  switch (filter) {
3886  case STBI__F_none : cur[k] = raw[k]; break;
3887  case STBI__F_sub : cur[k] = raw[k]; break;
3888  case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3889  case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
3890  case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
3891  case STBI__F_avg_first : cur[k] = raw[k]; break;
3892  case STBI__F_paeth_first: cur[k] = raw[k]; break;
3893  }
3894  }
3895 
3896  if (depth == 8) {
3897  if (img_n != out_n)
3898  cur[img_n] = 255; // first pixel
3899  raw += img_n;
3900  cur += out_n;
3901  prior += out_n;
3902  } else {
3903  raw += 1;
3904  cur += 1;
3905  prior += 1;
3906  }
3907 
3908  // this is a little gross, so that we don't switch per-pixel or per-component
3909  if (depth < 8 || img_n == out_n) {
3910  int nk = (width - 1)*img_n;
3911  #define CASE(f) \
3912  case f: \
3913  for (k=0; k < nk; ++k)
3914  switch (filter) {
3915  // "none" filter turns into a memcpy here; make that explicit.
3916  case STBI__F_none: memcpy(cur, raw, nk); break;
3917  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break;
3918  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3919  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break;
3920  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break;
3921  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break;
3922  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break;
3923  }
3924  #undef CASE
3925  raw += nk;
3926  } else {
3927  STBI_ASSERT(img_n+1 == out_n);
3928  #define CASE(f) \
3929  case f: \
3930  for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \
3931  for (k=0; k < img_n; ++k)
3932  switch (filter) {
3933  CASE(STBI__F_none) cur[k] = raw[k]; break;
3934  CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-out_n]); break;
3935  CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
3936  CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-out_n])>>1)); break;
3937  CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-out_n],prior[k],prior[k-out_n])); break;
3938  CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-out_n] >> 1)); break;
3939  CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-out_n],0,0)); break;
3940  }
3941  #undef CASE
3942  }
3943  }
3944 
3945  // we make a separate pass to expand bits to pixels; for performance,
3946  // this could run two scanlines behind the above code, so it won't
3947  // intefere with filtering but will still be in the cache.
3948  if (depth < 8) {
3949  for (j=0; j < y; ++j) {
3950  stbi_uc *cur = a->out + stride*j;
3951  stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes;
3952  // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
3953  // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
3954  stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
3955 
3956  // note that the final byte might overshoot and write more data than desired.
3957  // we can allocate enough data that this never writes out of memory, but it
3958  // could also overwrite the next scanline. can it overwrite non-empty data
3959  // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
3960  // so we need to explicitly clamp the final ones
3961 
3962  if (depth == 4) {
3963  for (k=x*img_n; k >= 2; k-=2, ++in) {
3964  *cur++ = scale * ((*in >> 4) );
3965  *cur++ = scale * ((*in ) & 0x0f);
3966  }
3967  if (k > 0) *cur++ = scale * ((*in >> 4) );
3968  } else if (depth == 2) {
3969  for (k=x*img_n; k >= 4; k-=4, ++in) {
3970  *cur++ = scale * ((*in >> 6) );
3971  *cur++ = scale * ((*in >> 4) & 0x03);
3972  *cur++ = scale * ((*in >> 2) & 0x03);
3973  *cur++ = scale * ((*in ) & 0x03);
3974  }
3975  if (k > 0) *cur++ = scale * ((*in >> 6) );
3976  if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
3977  if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
3978  } else if (depth == 1) {
3979  for (k=x*img_n; k >= 8; k-=8, ++in) {
3980  *cur++ = scale * ((*in >> 7) );
3981  *cur++ = scale * ((*in >> 6) & 0x01);
3982  *cur++ = scale * ((*in >> 5) & 0x01);
3983  *cur++ = scale * ((*in >> 4) & 0x01);
3984  *cur++ = scale * ((*in >> 3) & 0x01);
3985  *cur++ = scale * ((*in >> 2) & 0x01);
3986  *cur++ = scale * ((*in >> 1) & 0x01);
3987  *cur++ = scale * ((*in ) & 0x01);
3988  }
3989  if (k > 0) *cur++ = scale * ((*in >> 7) );
3990  if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
3991  if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
3992  if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
3993  if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
3994  if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
3995  if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
3996  }
3997  if (img_n != out_n) {
3998  // insert alpha = 255
3999  stbi_uc *cur = a->out + stride*j;
4000  int i;
4001  if (img_n == 1) {
4002  for (i=x-1; i >= 0; --i) {
4003  cur[i*2+1] = 255;
4004  cur[i*2+0] = cur[i];
4005  }
4006  } else {
4007  assert(img_n == 3);
4008  for (i=x-1; i >= 0; --i) {
4009  cur[i*4+3] = 255;
4010  cur[i*4+2] = cur[i*3+2];
4011  cur[i*4+1] = cur[i*3+1];
4012  cur[i*4+0] = cur[i*3+0];
4013  }
4014  }
4015  }
4016  }
4017  }
4018 
4019  return 1;
4020 }
4021 
4022 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4023 {
4024  stbi_uc *final;
4025  int p;
4026  if (!interlaced)
4027  return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4028 
4029  // de-interlacing
4030  final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n);
4031  for (p=0; p < 7; ++p) {
4032  int xorig[] = { 0,4,0,2,0,1,0 };
4033  int yorig[] = { 0,0,4,0,2,0,1 };
4034  int xspc[] = { 8,8,4,4,2,2,1 };
4035  int yspc[] = { 8,8,8,4,4,2,2 };
4036  int i,j,x,y;
4037  // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4038  x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4039  y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4040  if (x && y) {
4041  stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4042  if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4043  STBI_FREE(final);
4044  return 0;
4045  }
4046  for (j=0; j < y; ++j) {
4047  for (i=0; i < x; ++i) {
4048  int out_y = j*yspc[p]+yorig[p];
4049  int out_x = i*xspc[p]+xorig[p];
4050  memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n,
4051  a->out + (j*x+i)*out_n, out_n);
4052  }
4053  }
4054  STBI_FREE(a->out);
4055  image_data += img_len;
4056  image_data_len -= img_len;
4057  }
4058  }
4059  a->out = final;
4060 
4061  return 1;
4062 }
4063 
4064 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4065 {
4066  stbi__context *s = z->s;
4067  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4068  stbi_uc *p = z->out;
4069 
4070  // compute color-based transparency, assuming we've
4071  // already got 255 as the alpha value in the output
4072  STBI_ASSERT(out_n == 2 || out_n == 4);
4073 
4074  if (out_n == 2) {
4075  for (i=0; i < pixel_count; ++i) {
4076  p[1] = (p[0] == tc[0] ? 0 : 255);
4077  p += 2;
4078  }
4079  } else {
4080  for (i=0; i < pixel_count; ++i) {
4081  if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4082  p[3] = 0;
4083  p += 4;
4084  }
4085  }
4086  return 1;
4087 }
4088 
4089 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4090 {
4091  stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4092  stbi_uc *p, *temp_out, *orig = a->out;
4093 
4094  p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n);
4095  if (p == NULL) return stbi__err("outofmem", "Out of memory");
4096 
4097  // between here and free(out) below, exitting would leak
4098  temp_out = p;
4099 
4100  if (pal_img_n == 3) {
4101  for (i=0; i < pixel_count; ++i) {
4102  int n = orig[i]*4;
4103  p[0] = palette[n ];
4104  p[1] = palette[n+1];
4105  p[2] = palette[n+2];
4106  p += 3;
4107  }
4108  } else {
4109  for (i=0; i < pixel_count; ++i) {
4110  int n = orig[i]*4;
4111  p[0] = palette[n ];
4112  p[1] = palette[n+1];
4113  p[2] = palette[n+2];
4114  p[3] = palette[n+3];
4115  p += 4;
4116  }
4117  }
4118  STBI_FREE(a->out);
4119  a->out = temp_out;
4120 
4121  STBI_NOTUSED(len);
4122 
4123  return 1;
4124 }
4125 
4126 static int stbi__unpremultiply_on_load = 0;
4127 static int stbi__de_iphone_flag = 0;
4128 
4129 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4130 {
4131  stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4132 }
4133 
4134 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4135 {
4136  stbi__de_iphone_flag = flag_true_if_should_convert;
4137 }
4138 
4139 static void stbi__de_iphone(stbi__png *z)
4140 {
4141  stbi__context *s = z->s;
4142  stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4143  stbi_uc *p = z->out;
4144 
4145  if (s->img_out_n == 3) { // convert bgr to rgb
4146  for (i=0; i < pixel_count; ++i) {
4147  stbi_uc t = p[0];
4148  p[0] = p[2];
4149  p[2] = t;
4150  p += 3;
4151  }
4152  } else {
4153  STBI_ASSERT(s->img_out_n == 4);
4154  if (stbi__unpremultiply_on_load) {
4155  // convert bgr to rgb and unpremultiply
4156  for (i=0; i < pixel_count; ++i) {
4157  stbi_uc a = p[3];
4158  stbi_uc t = p[0];
4159  if (a) {
4160  p[0] = p[2] * 255 / a;
4161  p[1] = p[1] * 255 / a;
4162  p[2] = t * 255 / a;
4163  } else {
4164  p[0] = p[2];
4165  p[2] = t;
4166  }
4167  p += 4;
4168  }
4169  } else {
4170  // convert bgr to rgb
4171  for (i=0; i < pixel_count; ++i) {
4172  stbi_uc t = p[0];
4173  p[0] = p[2];
4174  p[2] = t;
4175  p += 4;
4176  }
4177  }
4178  }
4179 }
4180 
4181 #define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4182 
4183 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4184 {
4185  stbi_uc palette[1024], pal_img_n=0;
4186  stbi_uc has_trans=0, tc[3];
4187  stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4188  int first=1,k,interlace=0, color=0, depth=0, is_iphone=0;
4189  stbi__context *s = z->s;
4190 
4191  z->expanded = NULL;
4192  z->idata = NULL;
4193  z->out = NULL;
4194 
4195  if (!stbi__check_png_header(s)) return 0;
4196 
4197  if (scan == STBI__SCAN_type) return 1;
4198 
4199  for (;;) {
4200  stbi__pngchunk c = stbi__get_chunk_header(s);
4201  switch (c.type) {
4202  case STBI__PNG_TYPE('C','g','B','I'):
4203  is_iphone = 1;
4204  stbi__skip(s, c.length);
4205  break;
4206  case STBI__PNG_TYPE('I','H','D','R'): {
4207  int comp,filter;
4208  if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4209  first = 0;
4210  if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4211  s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4212  s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4213  depth = stbi__get8(s); if (depth != 1 && depth != 2 && depth != 4 && depth != 8) return stbi__err("1/2/4/8-bit only","PNG not supported: 1/2/4/8-bit only");
4214  color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG");
4215  if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4216  comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG");
4217  filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG");
4218  interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4219  if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4220  if (!pal_img_n) {
4221  s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4222  if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4223  if (scan == STBI__SCAN_header) return 1;
4224  } else {
4225  // if paletted, then pal_n is our final components, and
4226  // img_n is # components to decompress/filter.
4227  s->img_n = 1;
4228  if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4229  // if SCAN_header, have to scan to see if we have a tRNS
4230  }
4231  break;
4232  }
4233 
4234  case STBI__PNG_TYPE('P','L','T','E'): {
4235  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4236  if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4237  pal_len = c.length / 3;
4238  if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4239  for (i=0; i < pal_len; ++i) {
4240  palette[i*4+0] = stbi__get8(s);
4241  palette[i*4+1] = stbi__get8(s);
4242  palette[i*4+2] = stbi__get8(s);
4243  palette[i*4+3] = 255;
4244  }
4245  break;
4246  }
4247 
4248  case STBI__PNG_TYPE('t','R','N','S'): {
4249  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4250  if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4251  if (pal_img_n) {
4252  if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4253  if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4254  if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4255  pal_img_n = 4;
4256  for (i=0; i < c.length; ++i)
4257  palette[i*4+3] = stbi__get8(s);
4258  } else {
4259  if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4260  if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4261  has_trans = 1;
4262  for (k=0; k < s->img_n; ++k)
4263  tc[k] = (stbi_uc) (stbi__get16be(s) & 255) * stbi__depth_scale_table[depth]; // non 8-bit images will be larger
4264  }
4265  break;
4266  }
4267 
4268  case STBI__PNG_TYPE('I','D','A','T'): {
4269  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4270  if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4271  if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4272  if (ioff + c.length > idata_limit) {
4273  stbi_uc *p;
4274  if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4275  while (ioff + c.length > idata_limit)
4276  idata_limit *= 2;
4277  p = (stbi_uc *) STBI_REALLOC(z->idata, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4278  z->idata = p;
4279  }
4280  if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4281  ioff += c.length;
4282  break;
4283  }
4284 
4285  case STBI__PNG_TYPE('I','E','N','D'): {
4286  stbi__uint32 raw_len, bpl;
4287  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4288  if (scan != STBI__SCAN_load) return 1;
4289  if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4290  // initial guess for decoded data size to avoid unnecessary reallocs
4291  bpl = (s->img_x * depth + 7) / 8; // bytes per line, per component
4292  raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4293  z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4294  if (z->expanded == NULL) return 0; // zlib should set error
4295  STBI_FREE(z->idata); z->idata = NULL;
4296  if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4297  s->img_out_n = s->img_n+1;
4298  else
4299  s->img_out_n = s->img_n;
4300  if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, depth, color, interlace)) return 0;
4301  if (has_trans)
4302  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4303  if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4304  stbi__de_iphone(z);
4305  if (pal_img_n) {
4306  // pal_img_n == 3 or 4
4307  s->img_n = pal_img_n; // record the actual colors we had
4308  s->img_out_n = pal_img_n;
4309  if (req_comp >= 3) s->img_out_n = req_comp;
4310  if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4311  return 0;
4312  }
4313  STBI_FREE(z->expanded); z->expanded = NULL;
4314  return 1;
4315  }
4316 
4317  default:
4318  // if critical, fail
4319  if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4320  if ((c.type & (1 << 29)) == 0) {
4321  #ifndef STBI_NO_FAILURE_STRINGS
4322  // not threadsafe
4323  static char invalid_chunk[] = "XXXX PNG chunk not known";
4324  invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4325  invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4326  invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
4327  invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
4328  #endif
4329  return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4330  }
4331  stbi__skip(s, c.length);
4332  break;
4333  }
4334  // end of PNG chunk, read and skip CRC
4335  stbi__get32be(s);
4336  }
4337 }
4338 
4339 static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp)
4340 {
4341  unsigned char *result=NULL;
4342  if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4343  if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4344  result = p->out;
4345  p->out = NULL;
4346  if (req_comp && req_comp != p->s->img_out_n) {
4347  result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4348  p->s->img_out_n = req_comp;
4349  if (result == NULL) return result;
4350  }
4351  *x = p->s->img_x;
4352  *y = p->s->img_y;
4353  if (n) *n = p->s->img_out_n;
4354  }
4355  STBI_FREE(p->out); p->out = NULL;
4356  STBI_FREE(p->expanded); p->expanded = NULL;
4357  STBI_FREE(p->idata); p->idata = NULL;
4358 
4359  return result;
4360 }
4361 
4362 static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4363 {
4364  stbi__png p;
4365  p.s = s;
4366  return stbi__do_png(&p, x,y,comp,req_comp);
4367 }
4368 
4369 static int stbi__png_test(stbi__context *s)
4370 {
4371  int r;
4372  r = stbi__check_png_header(s);
4373  stbi__rewind(s);
4374  return r;
4375 }
4376 
4377 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4378 {
4379  if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4380  stbi__rewind( p->s );
4381  return 0;
4382  }
4383  if (x) *x = p->s->img_x;
4384  if (y) *y = p->s->img_y;
4385  if (comp) *comp = p->s->img_n;
4386  return 1;
4387 }
4388 
4389 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4390 {
4391  stbi__png p;
4392  p.s = s;
4393  return stbi__png_info_raw(&p, x, y, comp);
4394 }
4395 #endif
4396 
4397 // Microsoft/Windows BMP image
4398 
4399 #ifndef STBI_NO_BMP
4400 static int stbi__bmp_test_raw(stbi__context *s)
4401 {
4402  int r;
4403  int sz;
4404  if (stbi__get8(s) != 'B') return 0;
4405  if (stbi__get8(s) != 'M') return 0;
4406  stbi__get32le(s); // discard filesize
4407  stbi__get16le(s); // discard reserved
4408  stbi__get16le(s); // discard reserved
4409  stbi__get32le(s); // discard data offset
4410  sz = stbi__get32le(s);
4411  r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4412  return r;
4413 }
4414 
4415 static int stbi__bmp_test(stbi__context *s)
4416 {
4417  int r = stbi__bmp_test_raw(s);
4418  stbi__rewind(s);
4419  return r;
4420 }
4421 
4422 
4423 // returns 0..31 for the highest set bit
4424 static int stbi__high_bit(unsigned int z)
4425 {
4426  int n=0;
4427  if (z == 0) return -1;
4428  if (z >= 0x10000) n += 16, z >>= 16;
4429  if (z >= 0x00100) n += 8, z >>= 8;
4430  if (z >= 0x00010) n += 4, z >>= 4;
4431  if (z >= 0x00004) n += 2, z >>= 2;
4432  if (z >= 0x00002) n += 1, z >>= 1;
4433  return n;
4434 }
4435 
4436 static int stbi__bitcount(unsigned int a)
4437 {
4438  a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
4439  a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
4440  a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4441  a = (a + (a >> 8)); // max 16 per 8 bits
4442  a = (a + (a >> 16)); // max 32 per 8 bits
4443  return a & 0xff;
4444 }
4445 
4446 static int stbi__shiftsigned(int v, int shift, int bits)
4447 {
4448  int result;
4449  int z=0;
4450 
4451  if (shift < 0) v <<= -shift;
4452  else v >>= shift;
4453  result = v;
4454 
4455  z = bits;
4456  while (z < 8) {
4457  result += v >> z;
4458  z += bits;
4459  }
4460  return result;
4461 }
4462 
4463 static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4464 {
4465  stbi_uc *out;
4466  unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0;
4467  stbi_uc pal[256][4];
4468  int psize=0,i,j,compress=0,width;
4469  int bpp, flip_vertically, pad, target, offset, hsz;
4470  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4471  stbi__get32le(s); // discard filesize
4472  stbi__get16le(s); // discard reserved
4473  stbi__get16le(s); // discard reserved
4474  offset = stbi__get32le(s);
4475  hsz = stbi__get32le(s);
4476  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
4477  if (hsz == 12) {
4478  s->img_x = stbi__get16le(s);
4479  s->img_y = stbi__get16le(s);
4480  } else {
4481  s->img_x = stbi__get32le(s);
4482  s->img_y = stbi__get32le(s);
4483  }
4484  if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
4485  bpp = stbi__get16le(s);
4486  if (bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
4487  flip_vertically = ((int) s->img_y) > 0;
4488  s->img_y = abs((int) s->img_y);
4489  if (hsz == 12) {
4490  if (bpp < 24)
4491  psize = (offset - 14 - 24) / 3;
4492  } else {
4493  compress = stbi__get32le(s);
4494  if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
4495  stbi__get32le(s); // discard sizeof
4496  stbi__get32le(s); // discard hres
4497  stbi__get32le(s); // discard vres
4498  stbi__get32le(s); // discard colorsused
4499  stbi__get32le(s); // discard max important
4500  if (hsz == 40 || hsz == 56) {
4501  if (hsz == 56) {
4502  stbi__get32le(s);
4503  stbi__get32le(s);
4504  stbi__get32le(s);
4505  stbi__get32le(s);
4506  }
4507  if (bpp == 16 || bpp == 32) {
4508  mr = mg = mb = 0;
4509  if (compress == 0) {
4510  if (bpp == 32) {
4511  mr = 0xffu << 16;
4512  mg = 0xffu << 8;
4513  mb = 0xffu << 0;
4514  ma = 0xffu << 24;
4515  fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
4516  STBI_NOTUSED(fake_a);
4517  } else {
4518  mr = 31u << 10;
4519  mg = 31u << 5;
4520  mb = 31u << 0;
4521  }
4522  } else if (compress == 3) {
4523  mr = stbi__get32le(s);
4524  mg = stbi__get32le(s);
4525  mb = stbi__get32le(s);
4526  // not documented, but generated by photoshop and handled by mspaint
4527  if (mr == mg && mg == mb) {
4528  // ?!?!?
4529  return stbi__errpuc("bad BMP", "bad BMP");
4530  }
4531  } else
4532  return stbi__errpuc("bad BMP", "bad BMP");
4533  }
4534  } else {
4535  STBI_ASSERT(hsz == 108 || hsz == 124);
4536  mr = stbi__get32le(s);
4537  mg = stbi__get32le(s);
4538  mb = stbi__get32le(s);
4539  ma = stbi__get32le(s);
4540  stbi__get32le(s); // discard color space
4541  for (i=0; i < 12; ++i)
4542  stbi__get32le(s); // discard color space parameters
4543  if (hsz == 124) {
4544  stbi__get32le(s); // discard rendering intent
4545  stbi__get32le(s); // discard offset of profile data
4546  stbi__get32le(s); // discard size of profile data
4547  stbi__get32le(s); // discard reserved
4548  }
4549  }
4550  if (bpp < 16)
4551  psize = (offset - 14 - hsz) >> 2;
4552  }
4553  s->img_n = ma ? 4 : 3;
4554  if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
4555  target = req_comp;
4556  else
4557  target = s->img_n; // if they want monochrome, we'll post-convert
4558  out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y);
4559  if (!out) return stbi__errpuc("outofmem", "Out of memory");
4560  if (bpp < 16) {
4561  int z=0;
4562  if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
4563  for (i=0; i < psize; ++i) {
4564  pal[i][2] = stbi__get8(s);
4565  pal[i][1] = stbi__get8(s);
4566  pal[i][0] = stbi__get8(s);
4567  if (hsz != 12) stbi__get8(s);
4568  pal[i][3] = 255;
4569  }
4570  stbi__skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
4571  if (bpp == 4) width = (s->img_x + 1) >> 1;
4572  else if (bpp == 8) width = s->img_x;
4573  else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
4574  pad = (-width)&3;
4575  for (j=0; j < (int) s->img_y; ++j) {
4576  for (i=0; i < (int) s->img_x; i += 2) {
4577  int v=stbi__get8(s),v2=0;
4578  if (bpp == 4) {
4579  v2 = v & 15;
4580  v >>= 4;
4581  }
4582  out[z++] = pal[v][0];
4583  out[z++] = pal[v][1];
4584  out[z++] = pal[v][2];
4585  if (target == 4) out[z++] = 255;
4586  if (i+1 == (int) s->img_x) break;
4587  v = (bpp == 8) ? stbi__get8(s) : v2;
4588  out[z++] = pal[v][0];
4589  out[z++] = pal[v][1];
4590  out[z++] = pal[v][2];
4591  if (target == 4) out[z++] = 255;
4592  }
4593  stbi__skip(s, pad);
4594  }
4595  } else {
4596  int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
4597  int z = 0;
4598  int easy=0;
4599  stbi__skip(s, offset - 14 - hsz);
4600  if (bpp == 24) width = 3 * s->img_x;
4601  else if (bpp == 16) width = 2*s->img_x;
4602  else /* bpp = 32 and pad = 0 */ width=0;
4603  pad = (-width) & 3;
4604  if (bpp == 24) {
4605  easy = 1;
4606  } else if (bpp == 32) {
4607  if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
4608  easy = 2;
4609  }
4610  if (!easy) {
4611  if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
4612  // right shift amt to put high bit in position #7
4613  rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
4614  gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
4615  bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
4616  ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
4617  }
4618  for (j=0; j < (int) s->img_y; ++j) {
4619  if (easy) {
4620  for (i=0; i < (int) s->img_x; ++i) {
4621  unsigned char a;
4622  out[z+2] = stbi__get8(s);
4623  out[z+1] = stbi__get8(s);
4624  out[z+0] = stbi__get8(s);
4625  z += 3;
4626  a = (easy == 2 ? stbi__get8(s) : 255);
4627  if (target == 4) out[z++] = a;
4628  }
4629  } else {
4630  for (i=0; i < (int) s->img_x; ++i) {
4631  stbi__uint32 v = (stbi__uint32) (bpp == 16 ? stbi__get16le(s) : stbi__get32le(s));
4632  int a;
4633  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
4634  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
4635  out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
4636  a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
4637  if (target == 4) out[z++] = STBI__BYTECAST(a);
4638  }
4639  }
4640  stbi__skip(s, pad);
4641  }
4642  }
4643  if (flip_vertically) {
4644  stbi_uc t;
4645  for (j=0; j < (int) s->img_y>>1; ++j) {
4646  stbi_uc *p1 = out + j *s->img_x*target;
4647  stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
4648  for (i=0; i < (int) s->img_x*target; ++i) {
4649  t = p1[i], p1[i] = p2[i], p2[i] = t;
4650  }
4651  }
4652  }
4653 
4654  if (req_comp && req_comp != target) {
4655  out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
4656  if (out == NULL) return out; // stbi__convert_format frees input on failure
4657  }
4658 
4659  *x = s->img_x;
4660  *y = s->img_y;
4661  if (comp) *comp = s->img_n;
4662  return out;
4663 }
4664 #endif
4665 
4666 // Targa Truevision - TGA
4667 // by Jonathan Dummer
4668 #ifndef STBI_NO_TGA
4669 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
4670 {
4671  int tga_w, tga_h, tga_comp;
4672  int sz;
4673  stbi__get8(s); // discard Offset
4674  sz = stbi__get8(s); // color type
4675  if( sz > 1 ) {
4676  stbi__rewind(s);
4677  return 0; // only RGB or indexed allowed
4678  }
4679  sz = stbi__get8(s); // image type
4680  // only RGB or grey allowed, +/- RLE
4681  if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
4682  stbi__skip(s,9);
4683  tga_w = stbi__get16le(s);
4684  if( tga_w < 1 ) {
4685  stbi__rewind(s);
4686  return 0; // test width
4687  }
4688  tga_h = stbi__get16le(s);
4689  if( tga_h < 1 ) {
4690  stbi__rewind(s);
4691  return 0; // test height
4692  }
4693  sz = stbi__get8(s); // bits per pixel
4694  // only RGB or RGBA or grey allowed
4695  if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) {
4696  stbi__rewind(s);
4697  return 0;
4698  }
4699  tga_comp = sz;
4700  if (x) *x = tga_w;
4701  if (y) *y = tga_h;
4702  if (comp) *comp = tga_comp / 8;
4703  return 1; // seems to have passed everything
4704 }
4705 
4706 static int stbi__tga_test(stbi__context *s)
4707 {
4708  int res;
4709  int sz;
4710  stbi__get8(s); // discard Offset
4711  sz = stbi__get8(s); // color type
4712  if ( sz > 1 ) return 0; // only RGB or indexed allowed
4713  sz = stbi__get8(s); // image type
4714  if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE
4715  stbi__get16be(s); // discard palette start
4716  stbi__get16be(s); // discard palette length
4717  stbi__get8(s); // discard bits per palette color entry
4718  stbi__get16be(s); // discard x origin
4719  stbi__get16be(s); // discard y origin
4720  if ( stbi__get16be(s) < 1 ) return 0; // test width
4721  if ( stbi__get16be(s) < 1 ) return 0; // test height
4722  sz = stbi__get8(s); // bits per pixel
4723  if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) )
4724  res = 0;
4725  else
4726  res = 1;
4727  stbi__rewind(s);
4728  return res;
4729 }
4730 
4731 static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4732 {
4733  // read in the TGA header stuff
4734  int tga_offset = stbi__get8(s);
4735  int tga_indexed = stbi__get8(s);
4736  int tga_image_type = stbi__get8(s);
4737  int tga_is_RLE = 0;
4738  int tga_palette_start = stbi__get16le(s);
4739  int tga_palette_len = stbi__get16le(s);
4740  int tga_palette_bits = stbi__get8(s);
4741  int tga_x_origin = stbi__get16le(s);
4742  int tga_y_origin = stbi__get16le(s);
4743  int tga_width = stbi__get16le(s);
4744  int tga_height = stbi__get16le(s);
4745  int tga_bits_per_pixel = stbi__get8(s);
4746  int tga_comp = tga_bits_per_pixel / 8;
4747  int tga_inverted = stbi__get8(s);
4748  // image data
4749  unsigned char *tga_data;
4750  unsigned char *tga_palette = NULL;
4751  int i, j;
4752  unsigned char raw_data[4];
4753  int RLE_count = 0;
4754  int RLE_repeating = 0;
4755  int read_next_pixel = 1;
4756 
4757  // do a tiny bit of precessing
4758  if ( tga_image_type >= 8 )
4759  {
4760  tga_image_type -= 8;
4761  tga_is_RLE = 1;
4762  }
4763  /* int tga_alpha_bits = tga_inverted & 15; */
4764  tga_inverted = 1 - ((tga_inverted >> 5) & 1);
4765 
4766  // error check
4767  if ( //(tga_indexed) ||
4768  (tga_width < 1) || (tga_height < 1) ||
4769  (tga_image_type < 1) || (tga_image_type > 3) ||
4770  ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
4771  (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32))
4772  )
4773  {
4774  return NULL; // we don't report this as a bad TGA because we don't even know if it's TGA
4775  }
4776 
4777  // If I'm paletted, then I'll use the number of bits from the palette
4778  if ( tga_indexed )
4779  {
4780  tga_comp = tga_palette_bits / 8;
4781  }
4782 
4783  // tga info
4784  *x = tga_width;
4785  *y = tga_height;
4786  if (comp) *comp = tga_comp;
4787 
4788  tga_data = (unsigned char*)stbi__malloc( tga_width * tga_height * tga_comp );
4789  if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
4790 
4791  // skip to the data's starting position (offset usually = 0)
4792  stbi__skip(s, tga_offset );
4793 
4794  if ( !tga_indexed && !tga_is_RLE) {
4795  for (i=0; i < tga_height; ++i) {
4796  int y = tga_inverted ? tga_height -i - 1 : i;
4797  stbi_uc *tga_row = tga_data + y*tga_width*tga_comp;
4798  stbi__getn(s, tga_row, tga_width * tga_comp);
4799  }
4800  } else {
4801  // do I need to load a palette?
4802  if ( tga_indexed)
4803  {
4804  // any data to skip? (offset usually = 0)
4805  stbi__skip(s, tga_palette_start );
4806  // load the palette
4807  tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_palette_bits / 8 );
4808  if (!tga_palette) {
4809  STBI_FREE(tga_data);
4810  return stbi__errpuc("outofmem", "Out of memory");
4811  }
4812  if (!stbi__getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 )) {
4813  STBI_FREE(tga_data);
4814  STBI_FREE(tga_palette);
4815  return stbi__errpuc("bad palette", "Corrupt TGA");
4816  }
4817  }
4818  // load the data
4819  for (i=0; i < tga_width * tga_height; ++i)
4820  {
4821  // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
4822  if ( tga_is_RLE )
4823  {
4824  if ( RLE_count == 0 )
4825  {
4826  // yep, get the next byte as a RLE command
4827  int RLE_cmd = stbi__get8(s);
4828  RLE_count = 1 + (RLE_cmd & 127);
4829  RLE_repeating = RLE_cmd >> 7;
4830  read_next_pixel = 1;
4831  } else if ( !RLE_repeating )
4832  {
4833  read_next_pixel = 1;
4834  }
4835  } else
4836  {
4837  read_next_pixel = 1;
4838  }
4839  // OK, if I need to read a pixel, do it now
4840  if ( read_next_pixel )
4841  {
4842  // load however much data we did have
4843  if ( tga_indexed )
4844  {
4845  // read in 1 byte, then perform the lookup
4846  int pal_idx = stbi__get8(s);
4847  if ( pal_idx >= tga_palette_len )
4848  {
4849  // invalid index
4850  pal_idx = 0;
4851  }
4852  pal_idx *= tga_bits_per_pixel / 8;
4853  for (j = 0; j*8 < tga_bits_per_pixel; ++j)
4854  {
4855  raw_data[j] = tga_palette[pal_idx+j];
4856  }
4857  } else
4858  {
4859  // read in the data raw
4860  for (j = 0; j*8 < tga_bits_per_pixel; ++j)
4861  {
4862  raw_data[j] = stbi__get8(s);
4863  }
4864  }
4865  // clear the reading flag for the next pixel
4866  read_next_pixel = 0;
4867  } // end of reading a pixel
4868 
4869  // copy data
4870  for (j = 0; j < tga_comp; ++j)
4871  tga_data[i*tga_comp+j] = raw_data[j];
4872 
4873  // in case we're in RLE mode, keep counting down
4874  --RLE_count;
4875  }
4876  // do I need to invert the image?
4877  if ( tga_inverted )
4878  {
4879  for (j = 0; j*2 < tga_height; ++j)
4880  {
4881  int index1 = j * tga_width * tga_comp;
4882  int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
4883  for (i = tga_width * tga_comp; i > 0; --i)
4884  {
4885  unsigned char temp = tga_data[index1];
4886  tga_data[index1] = tga_data[index2];
4887  tga_data[index2] = temp;
4888  ++index1;
4889  ++index2;
4890  }
4891  }
4892  }
4893  // clear my palette, if I had one
4894  if ( tga_palette != NULL )
4895  {
4896  STBI_FREE( tga_palette );
4897  }
4898  }
4899 
4900  // swap RGB
4901  if (tga_comp >= 3)
4902  {
4903  unsigned char* tga_pixel = tga_data;
4904  for (i=0; i < tga_width * tga_height; ++i)
4905  {
4906  unsigned char temp = tga_pixel[0];
4907  tga_pixel[0] = tga_pixel[2];
4908  tga_pixel[2] = temp;
4909  tga_pixel += tga_comp;
4910  }
4911  }
4912 
4913  // convert to target component count
4914  if (req_comp && req_comp != tga_comp)
4915  tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
4916 
4917  // the things I do to get rid of an error message, and yet keep
4918  // Microsoft's C compilers happy... [8^(
4919  tga_palette_start = tga_palette_len = tga_palette_bits =
4920  tga_x_origin = tga_y_origin = 0;
4921  // OK, done
4922  return tga_data;
4923 }
4924 #endif
4925 
4926 // *************************************************************************************************
4927 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
4928 
4929 #ifndef STBI_NO_PSD
4930 static int stbi__psd_test(stbi__context *s)
4931 {
4932  int r = (stbi__get32be(s) == 0x38425053);
4933  stbi__rewind(s);
4934  return r;
4935 }
4936 
4937 static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
4938 {
4939  int pixelCount;
4940  int channelCount, compression;
4941  int channel, i, count, len;
4942  int w,h;
4943  stbi_uc *out;
4944 
4945  // Check identifier
4946  if (stbi__get32be(s) != 0x38425053) // "8BPS"
4947  return stbi__errpuc("not PSD", "Corrupt PSD image");
4948 
4949  // Check file type version.
4950  if (stbi__get16be(s) != 1)
4951  return stbi__errpuc("wrong version", "Unsupported version of PSD image");
4952 
4953  // Skip 6 reserved bytes.
4954  stbi__skip(s, 6 );
4955 
4956  // Read the number of channels (R, G, B, A, etc).
4957  channelCount = stbi__get16be(s);
4958  if (channelCount < 0 || channelCount > 16)
4959  return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
4960 
4961  // Read the rows and columns of the image.
4962  h = stbi__get32be(s);
4963  w = stbi__get32be(s);
4964 
4965  // Make sure the depth is 8 bits.
4966  if (stbi__get16be(s) != 8)
4967  return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 bit");
4968 
4969  // Make sure the color mode is RGB.
4970  // Valid options are:
4971  // 0: Bitmap
4972  // 1: Grayscale
4973  // 2: Indexed color
4974  // 3: RGB color
4975  // 4: CMYK color
4976  // 7: Multichannel
4977  // 8: Duotone
4978  // 9: Lab color
4979  if (stbi__get16be(s) != 3)
4980  return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
4981 
4982  // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.)
4983  stbi__skip(s,stbi__get32be(s) );
4984 
4985  // Skip the image resources. (resolution, pen tool paths, etc)
4986  stbi__skip(s, stbi__get32be(s) );
4987 
4988  // Skip the reserved data.
4989  stbi__skip(s, stbi__get32be(s) );
4990 
4991  // Find out if the data is compressed.
4992  // Known values:
4993  // 0: no compression
4994  // 1: RLE compressed
4995  compression = stbi__get16be(s);
4996  if (compression > 1)
4997  return stbi__errpuc("bad compression", "PSD has an unknown compression format");
4998 
4999  // Create the destination image.
5000  out = (stbi_uc *) stbi__malloc(4 * w*h);
5001  if (!out) return stbi__errpuc("outofmem", "Out of memory");
5002  pixelCount = w*h;
5003 
5004  // Initialize the data to zero.
5005  //memset( out, 0, pixelCount * 4 );
5006 
5007  // Finally, the image data.
5008  if (compression) {
5009  // RLE as used by .PSD and .TIFF
5010  // Loop until you get the number of unpacked bytes you are expecting:
5011  // Read the next source byte into n.
5012  // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5013  // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5014  // Else if n is 128, noop.
5015  // Endloop
5016 
5017  // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5018  // which we're going to just skip.
5019  stbi__skip(s, h * channelCount * 2 );
5020 
5021  // Read the RLE data by channel.
5022  for (channel = 0; channel < 4; channel++) {
5023  stbi_uc *p;
5024 
5025  p = out+channel;
5026  if (channel >= channelCount) {
5027  // Fill this channel with default data.
5028  for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
5029  } else {
5030  // Read the RLE data.
5031  count = 0;
5032  while (count < pixelCount) {
5033  len = stbi__get8(s);
5034  if (len == 128) {
5035  // No-op.
5036  } else if (len < 128) {
5037  // Copy next len+1 bytes literally.
5038  len++;
5039  count += len;
5040  while (len) {
5041  *p = stbi__get8(s);
5042  p += 4;
5043  len--;
5044  }
5045  } else if (len > 128) {
5046  stbi_uc val;
5047  // Next -len+1 bytes in the dest are replicated from next source byte.
5048  // (Interpret len as a negative 8-bit int.)
5049  len ^= 0x0FF;
5050  len += 2;
5051  val = stbi__get8(s);
5052  count += len;
5053  while (len) {
5054  *p = val;
5055  p += 4;
5056  len--;
5057  }
5058  }
5059  }
5060  }
5061  }
5062 
5063  } else {
5064  // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...)
5065  // where each channel consists of an 8-bit value for each pixel in the image.
5066 
5067  // Read the data by channel.
5068  for (channel = 0; channel < 4; channel++) {
5069  stbi_uc *p;
5070 
5071  p = out + channel;
5072  if (channel > channelCount) {
5073  // Fill this channel with default data.
5074  for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
5075  } else {
5076  // Read the data.
5077  for (i = 0; i < pixelCount; i++)
5078  *p = stbi__get8(s), p += 4;
5079  }
5080  }
5081  }
5082 
5083  if (req_comp && req_comp != 4) {
5084  out = stbi__convert_format(out, 4, req_comp, w, h);
5085  if (out == NULL) return out; // stbi__convert_format frees input on failure
5086  }
5087 
5088  if (comp) *comp = channelCount;
5089  *y = h;
5090  *x = w;
5091 
5092  return out;
5093 }
5094 #endif
5095 
5096 // *************************************************************************************************
5097 // Softimage PIC loader
5098 // by Tom Seddon
5099 //
5100 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5101 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5102 
5103 #ifndef STBI_NO_PIC
5104 static int stbi__pic_is4(stbi__context *s,const char *str)
5105 {
5106  int i;
5107  for (i=0; i<4; ++i)
5108  if (stbi__get8(s) != (stbi_uc)str[i])
5109  return 0;
5110 
5111  return 1;
5112 }
5113 
5114 static int stbi__pic_test_core(stbi__context *s)
5115 {
5116  int i;
5117 
5118  if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5119  return 0;
5120 
5121  for(i=0;i<84;++i)
5122  stbi__get8(s);
5123 
5124  if (!stbi__pic_is4(s,"PICT"))
5125  return 0;
5126 
5127  return 1;
5128 }
5129 
5130 typedef struct
5131 {
5132  stbi_uc size,type,channel;
5133 } stbi__pic_packet;
5134 
5135 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5136 {
5137  int mask=0x80, i;
5138 
5139  for (i=0; i<4; ++i, mask>>=1) {
5140  if (channel & mask) {
5141  if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5142  dest[i]=stbi__get8(s);
5143  }
5144  }
5145 
5146  return dest;
5147 }
5148 
5149 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5150 {
5151  int mask=0x80,i;
5152 
5153  for (i=0;i<4; ++i, mask>>=1)
5154  if (channel&mask)
5155  dest[i]=src[i];
5156 }
5157 
5158 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5159 {
5160  int act_comp=0,num_packets=0,y,chained;
5161  stbi__pic_packet packets[10];
5162 
5163  // this will (should...) cater for even some bizarre stuff like having data
5164  // for the same channel in multiple packets.
5165  do {
5166  stbi__pic_packet *packet;
5167 
5168  if (num_packets==sizeof(packets)/sizeof(packets[0]))
5169  return stbi__errpuc("bad format","too many packets");
5170 
5171  packet = &packets[num_packets++];
5172 
5173  chained = stbi__get8(s);
5174  packet->size = stbi__get8(s);
5175  packet->type = stbi__get8(s);
5176  packet->channel = stbi__get8(s);
5177 
5178  act_comp |= packet->channel;
5179 
5180  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)");
5181  if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp");
5182  } while (chained);
5183 
5184  *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5185 
5186  for(y=0; y<height; ++y) {
5187  int packet_idx;
5188 
5189  for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5190  stbi__pic_packet *packet = &packets[packet_idx];
5191  stbi_uc *dest = result+y*width*4;
5192 
5193  switch (packet->type) {
5194  default:
5195  return stbi__errpuc("bad format","packet has bad compression type");
5196 
5197  case 0: {//uncompressed
5198  int x;
5199 
5200  for(x=0;x<width;++x, dest+=4)
5201  if (!stbi__readval(s,packet->channel,dest))
5202  return 0;
5203  break;
5204  }
5205 
5206  case 1://Pure RLE
5207  {
5208  int left=width, i;
5209 
5210  while (left>0) {
5211  stbi_uc count,value[4];
5212 
5213  count=stbi__get8(s);
5214  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)");
5215 
5216  if (count > left)
5217  count = (stbi_uc) left;
5218 
5219  if (!stbi__readval(s,packet->channel,value)) return 0;
5220 
5221  for(i=0; i<count; ++i,dest+=4)
5222  stbi__copyval(packet->channel,dest,value);
5223  left -= count;
5224  }
5225  }
5226  break;
5227 
5228  case 2: {//Mixed RLE
5229  int left=width;
5230  while (left>0) {
5231  int count = stbi__get8(s), i;
5232  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)");
5233 
5234  if (count >= 128) { // Repeated
5235  stbi_uc value[4];
5236  int i;
5237 
5238  if (count==128)
5239  count = stbi__get16be(s);
5240  else
5241  count -= 127;
5242  if (count > left)
5243  return stbi__errpuc("bad file","scanline overrun");
5244 
5245  if (!stbi__readval(s,packet->channel,value))
5246  return 0;
5247 
5248  for(i=0;i<count;++i, dest += 4)
5249  stbi__copyval(packet->channel,dest,value);
5250  } else { // Raw
5251  ++count;
5252  if (count>left) return stbi__errpuc("bad file","scanline overrun");
5253 
5254  for(i=0;i<count;++i, dest+=4)
5255  if (!stbi__readval(s,packet->channel,dest))
5256  return 0;
5257  }
5258  left-=count;
5259  }
5260  break;
5261  }
5262  }
5263  }
5264  }
5265 
5266  return result;
5267 }
5268 
5269 static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp)
5270 {
5271  stbi_uc *result;
5272  int i, x,y;
5273 
5274  for (i=0; i<92; ++i)
5275  stbi__get8(s);
5276 
5277  x = stbi__get16be(s);
5278  y = stbi__get16be(s);
5279  if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)");
5280  if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode");
5281 
5282  stbi__get32be(s); //skip `ratio'
5283  stbi__get16be(s); //skip `fields'
5284  stbi__get16be(s); //skip `pad'
5285 
5286  // intermediate buffer is RGBA
5287  result = (stbi_uc *) stbi__malloc(x*y*4);
5288  memset(result, 0xff, x*y*4);
5289 
5290  if (!stbi__pic_load_core(s,x,y,comp, result)) {
5291  STBI_FREE(result);
5292  result=0;
5293  }
5294  *px = x;
5295  *py = y;
5296  if (req_comp == 0) req_comp = *comp;
5297  result=stbi__convert_format(result,4,req_comp,x,y);
5298 
5299  return result;
5300 }
5301 
5302 static int stbi__pic_test(stbi__context *s)
5303 {
5304  int r = stbi__pic_test_core(s);
5305  stbi__rewind(s);
5306  return r;
5307 }
5308 #endif
5309 
5310 // *************************************************************************************************
5311 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
5312 
5313 #ifndef STBI_NO_GIF
5314 typedef struct
5315 {
5316  stbi__int16 prefix;
5317  stbi_uc first;
5318  stbi_uc suffix;
5319 } stbi__gif_lzw;
5320 
5321 typedef struct
5322 {
5323  int w,h;
5324  stbi_uc *out; // output buffer (always 4 components)
5325  int flags, bgindex, ratio, transparent, eflags;
5326  stbi_uc pal[256][4];
5327  stbi_uc lpal[256][4];
5328  stbi__gif_lzw codes[4096];
5329  stbi_uc *color_table;
5330  int parse, step;
5331  int lflags;
5332  int start_x, start_y;
5333  int max_x, max_y;
5334  int cur_x, cur_y;
5335  int line_size;
5336 } stbi__gif;
5337 
5338 static int stbi__gif_test_raw(stbi__context *s)
5339 {
5340  int sz;
5341  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
5342  sz = stbi__get8(s);
5343  if (sz != '9' && sz != '7') return 0;
5344  if (stbi__get8(s) != 'a') return 0;
5345  return 1;
5346 }
5347 
5348 static int stbi__gif_test(stbi__context *s)
5349 {
5350  int r = stbi__gif_test_raw(s);
5351  stbi__rewind(s);
5352  return r;
5353 }
5354 
5355 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
5356 {
5357  int i;
5358  for (i=0; i < num_entries; ++i) {
5359  pal[i][2] = stbi__get8(s);
5360  pal[i][1] = stbi__get8(s);
5361  pal[i][0] = stbi__get8(s);
5362  pal[i][3] = transp == i ? 0 : 255;
5363  }
5364 }
5365 
5366 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
5367 {
5368  stbi_uc version;
5369  if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
5370  return stbi__err("not GIF", "Corrupt GIF");
5371 
5372  version = stbi__get8(s);
5373  if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF");
5374  if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF");
5375 
5376  stbi__g_failure_reason = "";
5377  g->w = stbi__get16le(s);
5378  g->h = stbi__get16le(s);
5379  g->flags = stbi__get8(s);
5380  g->bgindex = stbi__get8(s);
5381  g->ratio = stbi__get8(s);
5382  g->transparent = -1;
5383 
5384  if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments
5385 
5386  if (is_info) return 1;
5387 
5388  if (g->flags & 0x80)
5389  stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
5390 
5391  return 1;
5392 }
5393 
5394 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
5395 {
5396  stbi__gif g;
5397  if (!stbi__gif_header(s, &g, comp, 1)) {
5398  stbi__rewind( s );
5399  return 0;
5400  }
5401  if (x) *x = g.w;
5402  if (y) *y = g.h;
5403  return 1;
5404 }
5405 
5406 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
5407 {
5408  stbi_uc *p, *c;
5409 
5410  // recurse to decode the prefixes, since the linked-list is backwards,
5411  // and working backwards through an interleaved image would be nasty
5412  if (g->codes[code].prefix >= 0)
5413  stbi__out_gif_code(g, g->codes[code].prefix);
5414 
5415  if (g->cur_y >= g->max_y) return;
5416 
5417  p = &g->out[g->cur_x + g->cur_y];
5418  c = &g->color_table[g->codes[code].suffix * 4];
5419 
5420  if (c[3] >= 128) {
5421  p[0] = c[2];
5422  p[1] = c[1];
5423  p[2] = c[0];
5424  p[3] = c[3];
5425  }
5426  g->cur_x += 4;
5427 
5428  if (g->cur_x >= g->max_x) {
5429  g->cur_x = g->start_x;
5430  g->cur_y += g->step;
5431 
5432  while (g->cur_y >= g->max_y && g->parse > 0) {
5433  g->step = (1 << g->parse) * g->line_size;
5434  g->cur_y = g->start_y + (g->step >> 1);
5435  --g->parse;
5436  }
5437  }
5438 }
5439 
5440 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
5441 {
5442  stbi_uc lzw_cs;
5443  stbi__int32 len, code;
5444  stbi__uint32 first;
5445  stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
5446  stbi__gif_lzw *p;
5447 
5448  lzw_cs = stbi__get8(s);
5449  clear = 1 << lzw_cs;
5450  first = 1;
5451  codesize = lzw_cs + 1;
5452  codemask = (1 << codesize) - 1;
5453  bits = 0;
5454  valid_bits = 0;
5455  for (code = 0; code < clear; code++) {
5456  g->codes[code].prefix = -1;
5457  g->codes[code].first = (stbi_uc) code;
5458  g->codes[code].suffix = (stbi_uc) code;
5459  }
5460 
5461  // support no starting clear code
5462  avail = clear+2;
5463  oldcode = -1;
5464 
5465  len = 0;
5466  for(;;) {
5467  if (valid_bits < codesize) {
5468  if (len == 0) {
5469  len = stbi__get8(s); // start new block
5470  if (len == 0)
5471  return g->out;
5472  }
5473  --len;
5474  bits |= (stbi__int32) stbi__get8(s) << valid_bits;
5475  valid_bits += 8;
5476  } else {
5477  stbi__int32 code = bits & codemask;
5478  bits >>= codesize;
5479  valid_bits -= codesize;
5480  // @OPTIMIZE: is there some way we can accelerate the non-clear path?
5481  if (code == clear) { // clear code
5482  codesize = lzw_cs + 1;
5483  codemask = (1 << codesize) - 1;
5484  avail = clear + 2;
5485  oldcode = -1;
5486  first = 0;
5487  } else if (code == clear + 1) { // end of stream code
5488  stbi__skip(s, len);
5489  while ((len = stbi__get8(s)) > 0)
5490  stbi__skip(s,len);
5491  return g->out;
5492  } else if (code <= avail) {
5493  if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
5494 
5495  if (oldcode >= 0) {
5496  p = &g->codes[avail++];
5497  if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF");
5498  p->prefix = (stbi__int16) oldcode;
5499  p->first = g->codes[oldcode].first;
5500  p->suffix = (code == avail) ? p->first : g->codes[code].first;
5501  } else if (code == avail)
5502  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5503 
5504  stbi__out_gif_code(g, (stbi__uint16) code);
5505 
5506  if ((avail & codemask) == 0 && avail <= 0x0FFF) {
5507  codesize++;
5508  codemask = (1 << codesize) - 1;
5509  }
5510 
5511  oldcode = code;
5512  } else {
5513  return stbi__errpuc("illegal code in raster", "Corrupt GIF");
5514  }
5515  }
5516  }
5517 }
5518 
5519 static void stbi__fill_gif_background(stbi__gif *g)
5520 {
5521  int i;
5522  stbi_uc *c = g->pal[g->bgindex];
5523  // @OPTIMIZE: write a dword at a time
5524  for (i = 0; i < g->w * g->h * 4; i += 4) {
5525  stbi_uc *p = &g->out[i];
5526  p[0] = c[2];
5527  p[1] = c[1];
5528  p[2] = c[0];
5529  p[3] = c[3];
5530  }
5531 }
5532 
5533 // this function is designed to support animated gifs, although stb_image doesn't support it
5534 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
5535 {
5536  int i;
5537  stbi_uc *old_out = 0;
5538 
5539  if (g->out == 0) {
5540  if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
5541  g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
5542  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5543  stbi__fill_gif_background(g);
5544  } else {
5545  // animated-gif-only path
5546  if (((g->eflags & 0x1C) >> 2) == 3) {
5547  old_out = g->out;
5548  g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
5549  if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
5550  memcpy(g->out, old_out, g->w*g->h*4);
5551  }
5552  }
5553 
5554  for (;;) {
5555  switch (stbi__get8(s)) {
5556  case 0x2C: /* Image Descriptor */
5557  {
5558  stbi__int32 x, y, w, h;
5559  stbi_uc *o;
5560 
5561  x = stbi__get16le(s);
5562  y = stbi__get16le(s);
5563  w = stbi__get16le(s);
5564  h = stbi__get16le(s);
5565  if (((x + w) > (g->w)) || ((y + h) > (g->h)))
5566  return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
5567 
5568  g->line_size = g->w * 4;
5569  g->start_x = x * 4;
5570  g->start_y = y * g->line_size;
5571  g->max_x = g->start_x + w * 4;
5572  g->max_y = g->start_y + h * g->line_size;
5573  g->cur_x = g->start_x;
5574  g->cur_y = g->start_y;
5575 
5576  g->lflags = stbi__get8(s);
5577 
5578  if (g->lflags & 0x40) {
5579  g->step = 8 * g->line_size; // first interlaced spacing
5580  g->parse = 3;
5581  } else {
5582  g->step = g->line_size;
5583  g->parse = 0;
5584  }
5585 
5586  if (g->lflags & 0x80) {
5587  stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
5588  g->color_table = (stbi_uc *) g->lpal;
5589  } else if (g->flags & 0x80) {
5590  for (i=0; i < 256; ++i) // @OPTIMIZE: stbi__jpeg_reset only the previous transparent
5591  g->pal[i][3] = 255;
5592  if (g->transparent >= 0 && (g->eflags & 0x01))
5593  g->pal[g->transparent][3] = 0;
5594  g->color_table = (stbi_uc *) g->pal;
5595  } else
5596  return stbi__errpuc("missing color table", "Corrupt GIF");
5597 
5598  o = stbi__process_gif_raster(s, g);
5599  if (o == NULL) return NULL;
5600 
5601  if (req_comp && req_comp != 4)
5602  o = stbi__convert_format(o, 4, req_comp, g->w, g->h);
5603  return o;
5604  }
5605 
5606  case 0x21: // Comment Extension.
5607  {
5608  int len;
5609  if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
5610  len = stbi__get8(s);
5611  if (len == 4) {
5612  g->eflags = stbi__get8(s);
5613  stbi__get16le(s); // delay
5614  g->transparent = stbi__get8(s);
5615  } else {
5616  stbi__skip(s, len);
5617  break;
5618  }
5619  }
5620  while ((len = stbi__get8(s)) != 0)
5621  stbi__skip(s, len);
5622  break;
5623  }
5624 
5625  case 0x3B: // gif stream termination code
5626  return (stbi_uc *) s; // using '1' causes warning on some compilers
5627 
5628  default:
5629  return stbi__errpuc("unknown code", "Corrupt GIF");
5630  }
5631  }
5632 }
5633 
5634 static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5635 {
5636  stbi_uc *u = 0;
5637  stbi__gif g;
5638  memset(&g, 0, sizeof(g));
5639 
5640  u = stbi__gif_load_next(s, &g, comp, req_comp);
5641  if (u == (stbi_uc *) s) u = 0; // end of animated gif marker
5642  if (u) {
5643  *x = g.w;
5644  *y = g.h;
5645  }
5646 
5647  return u;
5648 }
5649 
5650 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
5651 {
5652  return stbi__gif_info_raw(s,x,y,comp);
5653 }
5654 #endif
5655 
5656 // *************************************************************************************************
5657 // Radiance RGBE HDR loader
5658 // originally by Nicolas Schulz
5659 #ifndef STBI_NO_HDR
5660 static int stbi__hdr_test_core(stbi__context *s)
5661 {
5662  const char *signature = "#?RADIANCE\n";
5663  int i;
5664  for (i=0; signature[i]; ++i)
5665  if (stbi__get8(s) != signature[i])
5666  return 0;
5667  return 1;
5668 }
5669 
5670 static int stbi__hdr_test(stbi__context* s)
5671 {
5672  int r = stbi__hdr_test_core(s);
5673  stbi__rewind(s);
5674  return r;
5675 }
5676 
5677 #define STBI__HDR_BUFLEN 1024
5678 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
5679 {
5680  int len=0;
5681  char c = '\0';
5682 
5683  c = (char) stbi__get8(z);
5684 
5685  while (!stbi__at_eof(z) && c != '\n') {
5686  buffer[len++] = c;
5687  if (len == STBI__HDR_BUFLEN-1) {
5688  // flush to end of line
5689  while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
5690  ;
5691  break;
5692  }
5693  c = (char) stbi__get8(z);
5694  }
5695 
5696  buffer[len] = 0;
5697  return buffer;
5698 }
5699 
5700 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
5701 {
5702  if ( input[3] != 0 ) {
5703  float f1;
5704  // Exponent
5705  f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
5706  if (req_comp <= 2)
5707  output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
5708  else {
5709  output[0] = input[0] * f1;
5710  output[1] = input[1] * f1;
5711  output[2] = input[2] * f1;
5712  }
5713  if (req_comp == 2) output[1] = 1;
5714  if (req_comp == 4) output[3] = 1;
5715  } else {
5716  switch (req_comp) {
5717  case 4: output[3] = 1; /* fallthrough */
5718  case 3: output[0] = output[1] = output[2] = 0;
5719  break;
5720  case 2: output[1] = 1; /* fallthrough */
5721  case 1: output[0] = 0;
5722  break;
5723  }
5724  }
5725 }
5726 
5727 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
5728 {
5729  char buffer[STBI__HDR_BUFLEN];
5730  char *token;
5731  int valid = 0;
5732  int width, height;
5733  stbi_uc *scanline;
5734  float *hdr_data;
5735  int len;
5736  unsigned char count, value;
5737  int i, j, k, c1,c2, z;
5738 
5739 
5740  // Check identifier
5741  if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0)
5742  return stbi__errpf("not HDR", "Corrupt HDR image");
5743 
5744  // Parse header
5745  for(;;) {
5746  token = stbi__hdr_gettoken(s,buffer);
5747  if (token[0] == 0) break;
5748  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
5749  }
5750 
5751  if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format");
5752 
5753  // Parse width and height
5754  // can't use sscanf() if we're not using stdio!
5755  token = stbi__hdr_gettoken(s,buffer);
5756  if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5757  token += 3;
5758  height = (int) strtol(token, &token, 10);
5759  while (*token == ' ') ++token;
5760  if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format");
5761  token += 3;
5762  width = (int) strtol(token, NULL, 10);
5763 
5764  *x = width;
5765  *y = height;
5766 
5767  if (comp) *comp = 3;
5768  if (req_comp == 0) req_comp = 3;
5769 
5770  // Read data
5771  hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float));
5772 
5773  // Load image data
5774  // image data is stored as some number of sca
5775  if ( width < 8 || width >= 32768) {
5776  // Read flat data
5777  for (j=0; j < height; ++j) {
5778  for (i=0; i < width; ++i) {
5779  stbi_uc rgbe[4];
5780  main_decode_loop:
5781  stbi__getn(s, rgbe, 4);
5782  stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
5783  }
5784  }
5785  } else {
5786  // Read RLE-encoded data
5787  scanline = NULL;
5788 
5789  for (j = 0; j < height; ++j) {
5790  c1 = stbi__get8(s);
5791  c2 = stbi__get8(s);
5792  len = stbi__get8(s);
5793  if (c1 != 2 || c2 != 2 || (len & 0x80)) {
5794  // not run-length encoded, so we have to actually use THIS data as a decoded
5795  // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
5796  stbi_uc rgbe[4];
5797  rgbe[0] = (stbi_uc) c1;
5798  rgbe[1] = (stbi_uc) c2;
5799  rgbe[2] = (stbi_uc) len;
5800  rgbe[3] = (stbi_uc) stbi__get8(s);
5801  stbi__hdr_convert(hdr_data, rgbe, req_comp);
5802  i = 1;
5803  j = 0;
5804  STBI_FREE(scanline);
5805  goto main_decode_loop; // yes, this makes no sense
5806  }
5807  len <<= 8;
5808  len |= stbi__get8(s);
5809  if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
5810  if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4);
5811 
5812  for (k = 0; k < 4; ++k) {
5813  i = 0;
5814  while (i < width) {
5815  count = stbi__get8(s);
5816  if (count > 128) {
5817  // Run
5818  value = stbi__get8(s);
5819  count -= 128;
5820  for (z = 0; z < count; ++z)
5821  scanline[i++ * 4 + k] = value;
5822  } else {
5823  // Dump
5824  for (z = 0; z < count; ++z)
5825  scanline[i++ * 4 + k] = stbi__get8(s);
5826  }
5827  }
5828  }
5829  for (i=0; i < width; ++i)
5830  stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
5831  }
5832  STBI_FREE(scanline);
5833  }
5834 
5835  return hdr_data;
5836 }
5837 
5838 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
5839 {
5840  char buffer[STBI__HDR_BUFLEN];
5841  char *token;
5842  int valid = 0;
5843 
5844  if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0) {
5845  stbi__rewind( s );
5846  return 0;
5847  }
5848 
5849  for(;;) {
5850  token = stbi__hdr_gettoken(s,buffer);
5851  if (token[0] == 0) break;
5852  if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
5853  }
5854 
5855  if (!valid) {
5856  stbi__rewind( s );
5857  return 0;
5858  }
5859  token = stbi__hdr_gettoken(s,buffer);
5860  if (strncmp(token, "-Y ", 3)) {
5861  stbi__rewind( s );
5862  return 0;
5863  }
5864  token += 3;
5865  *y = (int) strtol(token, &token, 10);
5866  while (*token == ' ') ++token;
5867  if (strncmp(token, "+X ", 3)) {
5868  stbi__rewind( s );
5869  return 0;
5870  }
5871  token += 3;
5872  *x = (int) strtol(token, NULL, 10);
5873  *comp = 3;
5874  return 1;
5875 }
5876 #endif // STBI_NO_HDR
5877 
5878 #ifndef STBI_NO_BMP
5879 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
5880 {
5881  int hsz;
5882  if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') {
5883  stbi__rewind( s );
5884  return 0;
5885  }
5886  stbi__skip(s,12);
5887  hsz = stbi__get32le(s);
5888  if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) {
5889  stbi__rewind( s );
5890  return 0;
5891  }
5892  if (hsz == 12) {
5893  *x = stbi__get16le(s);
5894  *y = stbi__get16le(s);
5895  } else {
5896  *x = stbi__get32le(s);
5897  *y = stbi__get32le(s);
5898  }
5899  if (stbi__get16le(s) != 1) {
5900  stbi__rewind( s );
5901  return 0;
5902  }
5903  *comp = stbi__get16le(s) / 8;
5904  return 1;
5905 }
5906 #endif
5907 
5908 #ifndef STBI_NO_PSD
5909 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
5910 {
5911  int channelCount;
5912  if (stbi__get32be(s) != 0x38425053) {
5913  stbi__rewind( s );
5914  return 0;
5915  }
5916  if (stbi__get16be(s) != 1) {
5917  stbi__rewind( s );
5918  return 0;
5919  }
5920  stbi__skip(s, 6);
5921  channelCount = stbi__get16be(s);
5922  if (channelCount < 0 || channelCount > 16) {
5923  stbi__rewind( s );
5924  return 0;
5925  }
5926  *y = stbi__get32be(s);
5927  *x = stbi__get32be(s);
5928  if (stbi__get16be(s) != 8) {
5929  stbi__rewind( s );
5930  return 0;
5931  }
5932  if (stbi__get16be(s) != 3) {
5933  stbi__rewind( s );
5934  return 0;
5935  }
5936  *comp = 4;
5937  return 1;
5938 }
5939 #endif
5940 
5941 #ifndef STBI_NO_PIC
5942 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
5943 {
5944  int act_comp=0,num_packets=0,chained;
5945  stbi__pic_packet packets[10];
5946 
5947  stbi__skip(s, 92);
5948 
5949  *x = stbi__get16be(s);
5950  *y = stbi__get16be(s);
5951  if (stbi__at_eof(s)) return 0;
5952  if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
5953  stbi__rewind( s );
5954  return 0;
5955  }
5956 
5957  stbi__skip(s, 8);
5958 
5959  do {
5960  stbi__pic_packet *packet;
5961 
5962  if (num_packets==sizeof(packets)/sizeof(packets[0]))
5963  return 0;
5964 
5965  packet = &packets[num_packets++];
5966  chained = stbi__get8(s);
5967  packet->size = stbi__get8(s);
5968  packet->type = stbi__get8(s);
5969  packet->channel = stbi__get8(s);
5970  act_comp |= packet->channel;
5971 
5972  if (stbi__at_eof(s)) {
5973  stbi__rewind( s );
5974  return 0;
5975  }
5976  if (packet->size != 8) {
5977  stbi__rewind( s );
5978  return 0;
5979  }
5980  } while (chained);
5981 
5982  *comp = (act_comp & 0x10 ? 4 : 3);
5983 
5984  return 1;
5985 }
5986 #endif
5987 
5988 // *************************************************************************************************
5989 // Portable Gray Map and Portable Pixel Map loader
5990 // by Ken Miller
5991 //
5992 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
5993 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
5994 //
5995 // Known limitations:
5996 // Does not support comments in the header section
5997 // Does not support ASCII image data (formats P2 and P3)
5998 // Does not support 16-bit-per-channel
5999 
6000 #ifndef STBI_NO_PNM
6001 
6002 static int stbi__pnm_test(stbi__context *s)
6003 {
6004  char p, t;
6005  p = (char) stbi__get8(s);
6006  t = (char) stbi__get8(s);
6007  if (p != 'P' || (t != '5' && t != '6')) {
6008  stbi__rewind( s );
6009  return 0;
6010  }
6011  return 1;
6012 }
6013 
6014 static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp)
6015 {
6016  stbi_uc *out;
6017  if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6018  return 0;
6019  *x = s->img_x;
6020  *y = s->img_y;
6021  *comp = s->img_n;
6022 
6023  out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y);
6024  if (!out) return stbi__errpuc("outofmem", "Out of memory");
6025  stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6026 
6027  if (req_comp && req_comp != s->img_n) {
6028  out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6029  if (out == NULL) return out; // stbi__convert_format frees input on failure
6030  }
6031  return out;
6032 }
6033 
6034 static int stbi__pnm_isspace(char c)
6035 {
6036  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6037 }
6038 
6039 static void stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6040 {
6041  while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6042  *c = (char) stbi__get8(s);
6043 }
6044 
6045 static int stbi__pnm_isdigit(char c)
6046 {
6047  return c >= '0' && c <= '9';
6048 }
6049 
6050 static int stbi__pnm_getinteger(stbi__context *s, char *c)
6051 {
6052  int value = 0;
6053 
6054  while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6055  value = value*10 + (*c - '0');
6056  *c = (char) stbi__get8(s);
6057  }
6058 
6059  return value;
6060 }
6061 
6062 static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6063 {
6064  int maxv;
6065  char c, p, t;
6066 
6067  stbi__rewind( s );
6068 
6069  // Get identifier
6070  p = (char) stbi__get8(s);
6071  t = (char) stbi__get8(s);
6072  if (p != 'P' || (t != '5' && t != '6')) {
6073  stbi__rewind( s );
6074  return 0;
6075  }
6076 
6077  *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm
6078 
6079  c = (char) stbi__get8(s);
6080  stbi__pnm_skip_whitespace(s, &c);
6081 
6082  *x = stbi__pnm_getinteger(s, &c); // read width
6083  stbi__pnm_skip_whitespace(s, &c);
6084 
6085  *y = stbi__pnm_getinteger(s, &c); // read height
6086  stbi__pnm_skip_whitespace(s, &c);
6087 
6088  maxv = stbi__pnm_getinteger(s, &c); // read max value
6089 
6090  if (maxv > 255)
6091  return stbi__err("max value > 255", "PPM image not 8-bit");
6092  else
6093  return 1;
6094 }
6095 #endif
6096 
6097 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6098 {
6099  #ifndef STBI_NO_JPEG
6100  if (stbi__jpeg_info(s, x, y, comp)) return 1;
6101  #endif
6102 
6103  #ifndef STBI_NO_PNG
6104  if (stbi__png_info(s, x, y, comp)) return 1;
6105  #endif
6106 
6107  #ifndef STBI_NO_GIF
6108  if (stbi__gif_info(s, x, y, comp)) return 1;
6109  #endif
6110 
6111  #ifndef STBI_NO_BMP
6112  if (stbi__bmp_info(s, x, y, comp)) return 1;
6113  #endif
6114 
6115  #ifndef STBI_NO_PSD
6116  if (stbi__psd_info(s, x, y, comp)) return 1;
6117  #endif
6118 
6119  #ifndef STBI_NO_PIC
6120  if (stbi__pic_info(s, x, y, comp)) return 1;
6121  #endif
6122 
6123  #ifndef STBI_NO_PNM
6124  if (stbi__pnm_info(s, x, y, comp)) return 1;
6125  #endif
6126 
6127  #ifndef STBI_NO_HDR
6128  if (stbi__hdr_info(s, x, y, comp)) return 1;
6129  #endif
6130 
6131  // test tga last because it's a crappy test!
6132  #ifndef STBI_NO_TGA
6133  if (stbi__tga_info(s, x, y, comp))
6134  return 1;
6135  #endif
6136  return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6137 }
6138 
6139 #ifndef STBI_NO_STDIO
6140 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6141 {
6142  FILE *f = stbi__fopen(filename, "rb");
6143  int result;
6144  if (!f) return stbi__err("can't fopen", "Unable to open file");
6145  result = stbi_info_from_file(f, x, y, comp);
6146  fclose(f);
6147  return result;
6148 }
6149 
6150 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6151 {
6152  int r;
6153  stbi__context s;
6154  long pos = ftell(f);
6155  stbi__start_file(&s, f);
6156  r = stbi__info_main(&s,x,y,comp);
6157  fseek(f,pos,SEEK_SET);
6158  return r;
6159 }
6160 #endif // !STBI_NO_STDIO
6161 
6162 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6163 {
6164  stbi__context s;
6165  stbi__start_mem(&s,buffer,len);
6166  return stbi__info_main(&s,x,y,comp);
6167 }
6168 
6169 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6170 {
6171  stbi__context s;
6172  stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6173  return stbi__info_main(&s,x,y,comp);
6174 }
6175 
6176 #endif // STB_IMAGE_IMPLEMENTATION
6177 
6178 /*
6179  revision history:
6180  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
6181  2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
6182  progressive JPEG (stb)
6183  PGM/PPM support (Ken Miller)
6184  STBI_MALLOC,STBI_REALLOC,STBI_FREE
6185  GIF bugfix -- seemingly never worked
6186  STBI_NO_*, STBI_ONLY_*
6187  1.48 (2014-12-14) fix incorrectly-named assert()
6188  1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
6189  optimize PNG (ryg)
6190  fix bug in interlaced PNG with user-specified channel count (stb)
6191  1.46 (2014-08-26)
6192  fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
6193  1.45 (2014-08-16)
6194  fix MSVC-ARM internal compiler error by wrapping malloc
6195  1.44 (2014-08-07)
6196  various warning fixes from Ronny Chevalier
6197  1.43 (2014-07-15)
6198  fix MSVC-only compiler problem in code changed in 1.42
6199  1.42 (2014-07-09)
6200  don't define _CRT_SECURE_NO_WARNINGS (affects user code)
6201  fixes to stbi__cleanup_jpeg path
6202  added STBI_ASSERT to avoid requiring assert.h
6203  1.41 (2014-06-25)
6204  fix search&replace from 1.36 that messed up comments/error messages
6205  1.40 (2014-06-22)
6206  fix gcc struct-initialization warning
6207  1.39 (2014-06-15)
6208  fix to TGA optimization when req_comp != number of components in TGA;
6209  fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
6210  add support for BMP version 5 (more ignored fields)
6211  1.38 (2014-06-06)
6212  suppress MSVC warnings on integer casts truncating values
6213  fix accidental rename of 'skip' field of I/O
6214  1.37 (2014-06-04)
6215  remove duplicate typedef
6216  1.36 (2014-06-03)
6217  convert to header file single-file library
6218  if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
6219  1.35 (2014-05-27)
6220  various warnings
6221  fix broken STBI_SIMD path
6222  fix bug where stbi_load_from_file no longer left file pointer in correct place
6223  fix broken non-easy path for 32-bit BMP (possibly never used)
6224  TGA optimization by Arseny Kapoulkine
6225  1.34 (unknown)
6226  use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
6227  1.33 (2011-07-14)
6228  make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
6229  1.32 (2011-07-13)
6230  support for "info" function for all supported filetypes (SpartanJ)
6231  1.31 (2011-06-20)
6232  a few more leak fixes, bug in PNG handling (SpartanJ)
6233  1.30 (2011-06-11)
6234  added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
6235  removed deprecated format-specific test/load functions
6236  removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
6237  error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
6238  fix inefficiency in decoding 32-bit BMP (David Woo)
6239  1.29 (2010-08-16)
6240  various warning fixes from Aurelien Pocheville
6241  1.28 (2010-08-01)
6242  fix bug in GIF palette transparency (SpartanJ)
6243  1.27 (2010-08-01)
6244  cast-to-stbi_uc to fix warnings
6245  1.26 (2010-07-24)
6246  fix bug in file buffering for PNG reported by SpartanJ
6247  1.25 (2010-07-17)
6248  refix trans_data warning (Won Chun)
6249  1.24 (2010-07-12)
6250  perf improvements reading from files on platforms with lock-heavy fgetc()
6251  minor perf improvements for jpeg
6252  deprecated type-specific functions so we'll get feedback if they're needed
6253  attempt to fix trans_data warning (Won Chun)
6254  1.23 fixed bug in iPhone support
6255  1.22 (2010-07-10)
6256  removed image *writing* support
6257  stbi_info support from Jetro Lauha
6258  GIF support from Jean-Marc Lienher
6259  iPhone PNG-extensions from James Brown
6260  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
6261  1.21 fix use of 'stbi_uc' in header (reported by jon blow)
6262  1.20 added support for Softimage PIC, by Tom Seddon
6263  1.19 bug in interlaced PNG corruption check (found by ryg)
6264  1.18 2008-08-02
6265  fix a threading bug (local mutable static)
6266  1.17 support interlaced PNG
6267  1.16 major bugfix - stbi__convert_format converted one too many pixels
6268  1.15 initialize some fields for thread safety
6269  1.14 fix threadsafe conversion bug
6270  header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
6271  1.13 threadsafe
6272  1.12 const qualifiers in the API
6273  1.11 Support installable IDCT, colorspace conversion routines
6274  1.10 Fixes for 64-bit (don't use "unsigned long")
6275  optimized upsampling by Fabian "ryg" Giesen
6276  1.09 Fix format-conversion for PSD code (bad global variables!)
6277  1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz
6278  1.07 attempt to fix C++ warning/errors again
6279  1.06 attempt to fix C++ warning/errors again
6280  1.05 fix TGA loading to return correct *comp and use good luminance calc
6281  1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free
6282  1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR
6283  1.02 support for (subset of) HDR files, float interface for preferred access to them
6284  1.01 fix bug: possible bug in handling right-side up bmps... not sure
6285  fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
6286  1.00 interface to zlib that skips zlib header
6287  0.99 correct handling of alpha in palette
6288  0.98 TGA loader by lonesock; dynamically add loaders (untested)
6289  0.97 jpeg errors on too large a file; also catch another malloc failure
6290  0.96 fix detection of invalid v value - particleman@mollyrocket forum
6291  0.95 during header scan, seek to markers in case of padding
6292  0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same
6293  0.93 handle jpegtran output; verbose errors
6294  0.92 read 4,8,16,24,32-bit BMP files of several formats
6295  0.91 output 24-bit Windows 3.0 BMP files
6296  0.90 fix a few more warnings; bump version number to approach 1.0
6297  0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd
6298  0.60 fix compiling as c++
6299  0.59 fix warnings: merge Dave Moore's -Wall fixes
6300  0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian
6301  0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
6302  0.56 fix bug: zlib uncompressed mode len vs. nlen
6303  0.55 fix bug: restart_interval not initialized to 0
6304  0.54 allow NULL for 'int *comp'
6305  0.53 fix bug in png 3->4; speedup png decoding
6306  0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments
6307  0.51 obey req_comp requests, 1-component jpegs return as 1-component,
6308  on 'test' only check type, not whether we support this variant
6309  0.50 first released version
6310 */
#define STBIDEF
Definition: stb_image.h:400
STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
STBIDEF const char * stbi_failure_reason(void)
STBIDEF void stbi_hdr_to_ldr_scale(float scale)
STBIDEF float * stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
STBIDEF stbi_uc * stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_ldr_to_hdr_gamma(float gamma)
#define STBI_ASSERT(x)
Definition: stb.cpp:11
STBIDEF stbi_uc * stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_hdr_to_ldr_gamma(float gamma)
STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
STBIDEF void stbi_ldr_to_hdr_scale(float scale)
#define assert(x)
Definition: global.h:124
STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
STBIDEF int stbi_is_hdr_from_file(FILE *f)
STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
static float f1
Definition: binary.cpp:29
STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp)
STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
STBIDEF int stbi_is_hdr(char const *filename)
unsigned char stbi_uc
Definition: stb_image.h:391
STBIDEF stbi_uc * stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
STBIDEF float * stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_image_free(void *retval_from_stbi_load)
static const float scale
Definition: Sprite.cpp:15
STBIDEF char * stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen)
STBIDEF char * stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
STBIDEF char * stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen)
STBIDEF stbi_uc * stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
STBIDEF char * stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)