Browse Source

cherry-pick 157601b9b9 from pdfium (#28101)

Pedro Pontes 4 years ago
parent
commit
df5ff97144
3 changed files with 8887 additions and 1 deletions
  1. 3 1
      patches/config.json
  2. 1 0
      patches/pdfium/.patches
  3. 8883 0
      patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch

+ 3 - 1
patches/config.json

@@ -19,5 +19,7 @@
   
   "src/electron/patches/skia": "src/third_party/skia",
 
-  "src/electron/patches/usrsctp": "src/third_party/usrsctp/usrsctplib"
+  "src/electron/patches/usrsctp": "src/third_party/usrsctp/usrsctplib",
+
+  "src/electron/patches/pdfium": "src/third_party/pdfium"
 }

+ 1 - 0
patches/pdfium/.patches

@@ -0,0 +1 @@
+m89_upgrade_openjpeg_to_2_4_0.patch

+ 8883 - 0
patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch

@@ -0,0 +1,8883 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Daniel Hosseinian <[email protected]>
+Date: Mon, 22 Feb 2021 19:30:49 +0000
+Subject: M89: Upgrade OpenJPEG to 2.4.0
+
+Upgrade OpenJPEG by copying the files from 2.4.0 and then applying
+patches. Patch files that are no longer relevant are deleted.
+
+Some parts of patch 3 are no longer applicable.
+
+The bug from patch 36 was fixed by upstream commit
+024b8407392cb0b82b04b58ed256094ed5799e04.
+
+Add a new patch 39 to remove the unused opj_mqc_renorme() function.
+
+Fixed: pdfium:1634
+Change-Id: Iaf5e208ea1f32a84aedb09744e0df084621f73dd
+Bug: pdfium:1634, chromium:1177875
+Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/78050
+Reviewed-by: Tom Sepez <[email protected]>
+Commit-Queue: Daniel Hosseinian <[email protected]>
+(cherry picked from commit a81ff7286463b41d1055353a1e5ed6a2501a8b63)
+Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/78170
+Auto-Submit: Daniel Hosseinian <[email protected]>
+
+diff --git a/third_party/libopenjpeg20/0003-dwt-decode.patch b/third_party/libopenjpeg20/0003-dwt-decode.patch
+index 94d4b41a75fc439ca76d1a43f612388008324a96..99c7a8cda7d374f33eb2d897a11083e67469c14a 100644
+--- a/third_party/libopenjpeg20/0003-dwt-decode.patch
++++ b/third_party/libopenjpeg20/0003-dwt-decode.patch
+@@ -1,5 +1,5 @@
+ diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c
+-index 5930d1c71..6512b1e4c 100644
++index 4164ba090..a36b7ed10 100644
+ --- a/third_party/libopenjpeg20/dwt.c
+ +++ b/third_party/libopenjpeg20/dwt.c
+ @@ -63,9 +63,6 @@
+@@ -20,25 +20,7 @@ index 5930d1c71..6512b1e4c 100644
+      OPJ_INT32 dn;   /* number of elements in high pass band */
+      OPJ_INT32 sn;   /* number of elements in low pass band */
+      OPJ_INT32 cas;  /* 0 = start on even coord, 1 = start on odd coord */
+-@@ -133,13 +131,13 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+- /**
+- Forward 5-3 wavelet transform in 1-D
+- */
+--static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+--                             OPJ_INT32 cas);
+-+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+-+    OPJ_INT32 sn, OPJ_INT32 cas);
+- /**
+- Forward 9-7 wavelet transform in 1-D
+- */
+--static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+--                                  OPJ_INT32 cas);
+-+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+-+    OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
+- /**
+- Explicit calculation of the Quantization Stepsizes
+- */
+-@@ -149,14 +147,14 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
++@@ -140,7 +138,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
+  Inverse wavelet transform in 2-D.
+  */
+  static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+@@ -47,16 +29,7 @@ index 5930d1c71..6512b1e4c 100644
+  
+  static OPJ_BOOL opj_dwt_decode_partial_tile(
+      opj_tcd_tilecomp_t* tilec,
+-     OPJ_UINT32 numres);
+- 
+--static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+--        void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32));
+-+static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+-+        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32));
+- 
+- static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
+-         OPJ_UINT32 i);
+-@@ -205,13 +203,20 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
++@@ -181,13 +179,20 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
+  
+  /*@}*/
+  
+@@ -83,18 +56,7 @@ index 5930d1c71..6512b1e4c 100644
+  
+  /* <summary>                                                              */
+  /* This table contains the norms of the 5-3 wavelets for different bands. */
+-@@ -344,8 +349,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
+- /* <summary>                            */
+- /* Forward 5-3 wavelet transform in 1-D. */
+- /* </summary>                           */
+--static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+--                             OPJ_INT32 cas)
+-+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+-+                             OPJ_INT32 sn, OPJ_INT32 cas)
+- {
+-     OPJ_INT32 i;
+- 
+-@@ -376,8 +381,8 @@ static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
++@@ -296,8 +301,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
+  /* <summary>                            */
+  /* Inverse 5-3 wavelet transform in 1-D. */
+  /* </summary>                           */
+@@ -105,7 +67,7 @@ index 5930d1c71..6512b1e4c 100644
+  {
+      OPJ_INT32 i;
+  
+-@@ -406,7 +411,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
++@@ -326,7 +331,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+  
+  static void opj_dwt_decode_1(const opj_dwt_t *v)
+  {
+@@ -114,72 +76,7 @@ index 5930d1c71..6512b1e4c 100644
+  }
+  
+  #endif /* STANDARD_SLOW_VERSION */
+-@@ -1037,8 +1042,8 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
+- /* <summary>                             */
+- /* Forward 9-7 wavelet transform in 1-D. */
+- /* </summary>                            */
+--static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+--                                  OPJ_INT32 cas)
+-+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+-+                                  OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas)
+- {
+-     OPJ_INT32 i;
+-     if (!cas) {
+-@@ -1106,8 +1111,8 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
+- /* <summary>                            */
+- /* Forward 5-3 wavelet transform in 2-D. */
+- /* </summary>                           */
+--static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+--        void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32))
+-+static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+-+        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32))
+- {
+-     OPJ_INT32 i, j, k;
+-     OPJ_INT32 *a = 00;
+-@@ -1117,6 +1122,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+- 
+-     OPJ_INT32 rw;           /* width of the resolution level computed   */
+-     OPJ_INT32 rh;           /* height of the resolution level computed  */
+-+    OPJ_SIZE_T l_data_count;
+-     OPJ_SIZE_T l_data_size;
+- 
+-     opj_tcd_resolution_t * l_cur_res = 0;
+-@@ -1129,13 +1135,13 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+-     l_cur_res = tilec->resolutions + l;
+-     l_last_res = l_cur_res - 1;
+- 
+--    l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
+-+    l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
+-     /* overflow check */
+--    if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) {
+-+    if (l_data_count > (SIZE_MAX / sizeof(OPJ_INT32))) {
+-         /* FIXME event manager error callback */
+-         return OPJ_FALSE;
+-     }
+--    l_data_size *= sizeof(OPJ_INT32);
+-+    l_data_size = l_data_count * sizeof(OPJ_INT32);
+-     bj = (OPJ_INT32*)opj_malloc(l_data_size);
+-     /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
+-     /* in that case, so do not error out */
+-@@ -1167,7 +1173,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+-                 bj[k] = aj[k * w];
+-             }
+- 
+--            (*p_function)(bj, dn, sn, cas_col);
+-+            (*p_function) (bj, l_data_count, dn, sn, cas_col);
+- 
+-             opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
+-         }
+-@@ -1180,7 +1186,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec,
+-             for (k = 0; k < rw; k++) {
+-                 bj[k] = aj[k];
+-             }
+--            (*p_function)(bj, dn, sn, cas_row);
+-+            (*p_function) (bj, l_data_count, dn, sn, cas_row);
+-             opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
+-         }
+- 
+-@@ -1379,7 +1385,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
++@@ -2062,7 +2067,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
+  /* Inverse wavelet transform in 2-D.    */
+  /* </summary>                           */
+  static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+@@ -188,7 +85,7 @@ index 5930d1c71..6512b1e4c 100644
+  {
+      opj_dwt_t h;
+      opj_dwt_t v;
+-@@ -1401,22 +1407,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
++@@ -2084,22 +2089,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+          return OPJ_TRUE;
+      }
+      num_threads = opj_thread_pool_get_thread_count(tp);
+@@ -215,7 +112,7 @@ index 5930d1c71..6512b1e4c 100644
+      v.mem = h.mem;
+  
+      while (--numres) {
+-@@ -1594,7 +1601,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
++@@ -2277,7 +2283,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest,
+      OPJ_UNUSED(ret);
+  }
+  
+@@ -225,7 +122,7 @@ index 5930d1c71..6512b1e4c 100644
+                                       OPJ_INT32 cas,
+                                       OPJ_INT32 win_l_x0,
+                                       OPJ_INT32 win_l_x1,
+-@@ -1974,16 +1982,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
++@@ -2657,16 +2664,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+          opj_sparse_array_int32_free(sa);
+          return OPJ_TRUE;
+      }
+@@ -245,7 +142,7 @@ index 5930d1c71..6512b1e4c 100644
+      h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size);
+      if (! h.mem) {
+          /* FIXME event manager error callback */
+-@@ -1991,6 +1999,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
++@@ -2674,6 +2681,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+          return OPJ_FALSE;
+      }
+  
+@@ -253,7 +150,7 @@ index 5930d1c71..6512b1e4c 100644
+      v.mem = h.mem;
+  
+      for (resno = 1; resno < numres; resno ++) {
+-@@ -2101,7 +2110,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
++@@ -2784,7 +2792,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+                                               win_ll_x1,
+                                               win_hl_x0,
+                                               win_hl_x1);
+diff --git a/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch b/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch
+index 2d450172a5887b8a5197d7a2a5f2c911bf9049a7..cbc5a8f86a4b845809b7bcb26e70cdfbec9f25b4 100644
+--- a/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch
++++ b/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+-index 8dc1ecbe6..61b3f5821 100644
++index 7c065ba74..a5790b267 100644
+ --- a/third_party/libopenjpeg20/jp2.c
+ +++ b/third_party/libopenjpeg20/jp2.c
+-@@ -1073,8 +1073,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1079,8 +1079,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+              assert(pcol == 0);
+              new_comps[i] = old_comps[cmp];
+          } else {
+@@ -13,7 +13,7 @@ index 8dc1ecbe6..61b3f5821 100644
+          }
+  
+          /* Palette mapping: */
+-@@ -1102,7 +1102,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1108,7 +1108,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+          pcol = cmap[i].pcol;
+          src = old_comps[cmp].data;
+          assert(src); /* verified above */
+@@ -22,7 +22,7 @@ index 8dc1ecbe6..61b3f5821 100644
+  
+          /* Direct use: */
+          if (cmap[i].mtyp == 0) {
+-@@ -1112,8 +1112,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1118,8 +1118,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+                  dst[j] = src[j];
+              }
+          } else {
+diff --git a/third_party/libopenjpeg20/0006-tcd_init_tile.patch b/third_party/libopenjpeg20/0006-tcd_init_tile.patch
+index 8c37fc2733d685c99b60acf7544fa580d6a0c774..409fe7b927fccc67cb53bc8d7d96a0ac28044ef0 100644
+--- a/third_party/libopenjpeg20/0006-tcd_init_tile.patch
++++ b/third_party/libopenjpeg20/0006-tcd_init_tile.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c
+-index 1dd15405d..acc28dd55 100644
++index 6442669d6..4c728d4c6 100644
+ --- a/third_party/libopenjpeg20/tcd.c
+ +++ b/third_party/libopenjpeg20/tcd.c
+-@@ -818,6 +818,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
++@@ -819,6 +819,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+          if (isEncoder) {
+              OPJ_SIZE_T l_tile_data_size;
+  
+diff --git a/third_party/libopenjpeg20/0007-jp2_read_cmap.patch b/third_party/libopenjpeg20/0007-jp2_read_cmap.patch
+index 4cc434099ff0f14414a8000ee8fcb28d68948215..781f2c6d0f37217c15e6a71772aade4c0b1ad919 100644
+--- a/third_party/libopenjpeg20/0007-jp2_read_cmap.patch
++++ b/third_party/libopenjpeg20/0007-jp2_read_cmap.patch
+@@ -1,13 +1,13 @@
+ diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+-index 3ace09654..7ef7c9139 100644
++index a5790b267..a0f639d8e 100644
+ --- a/third_party/libopenjpeg20/jp2.c
+ +++ b/third_party/libopenjpeg20/jp2.c
+-@@ -1296,7 +1296,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2,
++@@ -1308,7 +1308,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2,
+  
+  
+      for (i = 0; i < nr_channels; ++i) {
+ -        opj_read_bytes(p_cmap_header_data, &l_value, 2);            /* CMP^i */
+-+        opj_read_bytes_BE(p_cmap_header_data, &l_value, 2);     /* CMP^i */
+++        opj_read_bytes_BE(p_cmap_header_data, &l_value, 2);         /* CMP^i */
+          p_cmap_header_data += 2;
+          cmap[i].cmp = (OPJ_UINT16) l_value;
+  
+diff --git a/third_party/libopenjpeg20/0009-opj_pi_next.patch b/third_party/libopenjpeg20/0009-opj_pi_next.patch
+index 99f17d313836b18d52d6bb49bea4c2061fa29766..ed0332fe0701348fe4a27088f6f60482df14e72f 100644
+--- a/third_party/libopenjpeg20/0009-opj_pi_next.patch
++++ b/third_party/libopenjpeg20/0009-opj_pi_next.patch
+@@ -1,31 +1,31 @@
+ diff --git a/third_party/libopenjpeg20/pi.c b/third_party/libopenjpeg20/pi.c
+-index 91642ee4e..256fe37a1 100644
++index 4f7dd50f1..1430d12a9 100644
+ --- a/third_party/libopenjpeg20/pi.c
+ +++ b/third_party/libopenjpeg20/pi.c
+-@@ -445,6 +445,9 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+-                                                 (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                            - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                     pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++@@ -464,6 +464,9 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
++                                                  (comp->dy << levelno)), res->pdy)
++                            - opj_uint_floordivpow2(try0, res->pdy);
++                     pi->precno = prci + prcj * res->pw;
+ +                    if (pi->precno >= res->pw * res->ph) {
+ +                      return OPJ_FALSE;
+ +                    }
+                      for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) {
+                          index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                                  pi->step_c + pi->precno * pi->step_p;
+-@@ -576,6 +579,9 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+-                                                 (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                            - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                     pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++@@ -602,6 +605,9 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
++                                                  (comp->dy << levelno)), res->pdy)
++                            - opj_uint_floordivpow2(try0, res->pdy);
++                     pi->precno = prci + prcj * res->pw;
+ +                    if (pi->precno >= res->pw * res->ph) {
+ +                      return OPJ_FALSE;
+ +                    }
+                      for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) {
+                          index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                                  pi->step_c + pi->precno * pi->step_p;
+-@@ -704,6 +710,9 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+-                                                 (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                            - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                     pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++@@ -737,6 +743,9 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
++                                                  (comp->dy << levelno)), res->pdy)
++                            - opj_uint_floordivpow2(try0, res->pdy);
++                     pi->precno = (OPJ_UINT32)(prci + prcj * res->pw);
+ +                    if (pi->precno >= res->pw * res->ph) {
+ +                      return OPJ_FALSE;
+ +                    }
+diff --git a/third_party/libopenjpeg20/0011-j2k_update_image_data.patch b/third_party/libopenjpeg20/0011-j2k_update_image_data.patch
+index b61324a6b4d967ff369442824331b6e6c96ecb61..1402129b7fca35b2e9f90d68a68c81b2692aaad7 100644
+--- a/third_party/libopenjpeg20/0011-j2k_update_image_data.patch
++++ b/third_party/libopenjpeg20/0011-j2k_update_image_data.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index ad6e1b86f..bf1cb4f36 100644
++index 8e343ab2e..5e1494394 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -9086,6 +9086,12 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd,
++@@ -9882,6 +9882,12 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd,
+           * */
+          assert(res_x0 >= 0);
+          assert(res_x1 >= 0);
+diff --git a/third_party/libopenjpeg20/0012-mct_sse.patch b/third_party/libopenjpeg20/0012-mct_sse.patch
+index 9bc2e6f0dc6d551580e0235e23d2c6b02ffd50cb..812ff01df5fe3d528fffeb75f35326b94babcc6e 100644
+--- a/third_party/libopenjpeg20/0012-mct_sse.patch
++++ b/third_party/libopenjpeg20/0012-mct_sse.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/mct.c b/third_party/libopenjpeg20/mct.c
+-index b79d4b87c..81ec223d8 100644
++index 88c8f4092..c4c2e732e 100644
+ --- a/third_party/libopenjpeg20/mct.c
+ +++ b/third_party/libopenjpeg20/mct.c
+-@@ -37,13 +37,16 @@
++@@ -37,13 +37,15 @@
+   * POSSIBILITY OF SUCH DAMAGE.
+   */
+  
+@@ -18,11 +18,10 @@ index b79d4b87c..81ec223d8 100644
+  #endif
+ -#ifdef __SSE4_1__
+ +#if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
+-+#define USE_SSE4
+  #include <smmintrin.h>
+  #endif
+  
+-@@ -72,7 +75,7 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real()
++@@ -72,7 +74,7 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real()
+  /* <summary> */
+  /* Forward reversible MCT. */
+  /* </summary> */
+@@ -31,7 +30,7 @@ index b79d4b87c..81ec223d8 100644
+  void opj_mct_encode(
+      OPJ_INT32* OPJ_RESTRICT c0,
+      OPJ_INT32* OPJ_RESTRICT c1,
+-@@ -141,7 +144,7 @@ void opj_mct_encode(
++@@ -141,7 +143,7 @@ void opj_mct_encode(
+  /* <summary> */
+  /* Inverse reversible MCT. */
+  /* </summary> */
+@@ -40,19 +39,19 @@ index b79d4b87c..81ec223d8 100644
+  void opj_mct_decode(
+      OPJ_INT32* OPJ_RESTRICT c0,
+      OPJ_INT32* OPJ_RESTRICT c1,
+-@@ -209,7 +212,7 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno)
+- /* <summary> */
+- /* Forward irreversible MCT. */
+- /* </summary> */
+--#ifdef __SSE4_1__
+-+#ifdef USE_SSE4
+- void opj_mct_encode_real(
+-     OPJ_INT32* OPJ_RESTRICT c0,
+-     OPJ_INT32* OPJ_RESTRICT c1,
+-@@ -389,7 +392,7 @@ void opj_mct_decode_real(
++@@ -216,7 +218,7 @@ void opj_mct_encode_real(
++     OPJ_SIZE_T n)
++ {
++     OPJ_SIZE_T i;
++-#ifdef __SSE__
+++#ifdef USE_SSE
++     const __m128 YR = _mm_set1_ps(0.299f);
++     const __m128 YG = _mm_set1_ps(0.587f);
++     const __m128 YB = _mm_set1_ps(0.114f);
++@@ -286,7 +288,7 @@ void opj_mct_decode_real(
+      OPJ_SIZE_T n)
+  {
+-     OPJ_UINT32 i;
++     OPJ_SIZE_T i;
+ -#ifdef __SSE__
+ +#ifdef USE_SSE
+      __m128 vrv, vgu, vgv, vbu;
+diff --git a/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch b/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch
+index 0ae0cfcb1b80560a9d21cbc57233f4f45bdc3927..ab0da52677f50c4e24a3d9280b54f047fb429bca 100644
+--- a/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch
++++ b/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+-index 7ef7c9139..1fa607d66 100644
++index a0f639d8e..38715b80f 100644
+ --- a/third_party/libopenjpeg20/jp2.c
+ +++ b/third_party/libopenjpeg20/jp2.c
+-@@ -593,6 +593,7 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2,
++@@ -599,6 +599,7 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2,
+      }
+  
+      /* allocate memory for components */
+@@ -10,7 +10,7 @@ index 7ef7c9139..1fa607d66 100644
+      jp2->comps = (opj_jp2_comps_t*) opj_calloc(jp2->numcomps,
+                   sizeof(opj_jp2_comps_t));
+      if (jp2->comps == 0) {
+-@@ -1882,6 +1883,7 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters)
++@@ -1897,6 +1898,7 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters)
+  
+      /* further JP2 initializations go here */
+      jp2->color.jp2_has_colr = 0;
+diff --git a/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch b/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch
+index 760ed7462bdda26055cb370b7e8a8a73ab75d9fc..11e22fbd1a9153d93dd13a691bb07032ebd9e655 100644
+--- a/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch
++++ b/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index 8e35b33ee..d95963a5c 100644
++index 5e1494394..413dbdd9f 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -9527,6 +9527,10 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
++@@ -10537,6 +10537,10 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+              p_j2k->m_specific_param.m_decoder.m_default_tcp;
+  
+      /* precondition again */
+diff --git a/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch b/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch
+index d7e06ead8fce73e6f71cb9f6b0b5542e6b00589c..7a63b9ba47e88d35c4e604aeb2de96ce88335a63 100644
+--- a/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch
++++ b/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index d95963a5c..ed449684f 100644
++index 413dbdd9f..1932fe20c 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -9864,7 +9864,9 @@ static OPJ_BOOL opj_j2k_read_SQcd_SQcc(opj_j2k_t *p_j2k,
++@@ -10885,7 +10885,9 @@ static OPJ_BOOL opj_j2k_read_SQcd_SQcc(opj_j2k_t *p_j2k,
+              p_j2k->m_specific_param.m_decoder.m_default_tcp;
+  
+      /* precondition again*/
+diff --git a/third_party/libopenjpeg20/0019-tcd_init_tile.patch b/third_party/libopenjpeg20/0019-tcd_init_tile.patch
+index 8746eace207e94cbd16dafb167282387dd7589d3..bf16596f0242d6291e19aefcc9a11fcaaa0588a4 100644
+--- a/third_party/libopenjpeg20/0019-tcd_init_tile.patch
++++ b/third_party/libopenjpeg20/0019-tcd_init_tile.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c
+-index be3b84363..5757fd401 100644
++index 4c728d4c6..b9f571410 100644
+ --- a/third_party/libopenjpeg20/tcd.c
+ +++ b/third_party/libopenjpeg20/tcd.c
+-@@ -1065,6 +1065,9 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
++@@ -1094,6 +1094,9 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+                      l_current_precinct->ch = (OPJ_UINT32)((brcblkyend - tlcblkystart) >>
+                                                            cblkheightexpn);
+  
+diff --git a/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch b/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch
+index c1773d48fe176ffb77164ec3e944ba790879de87..1546b956944c16e7562155d12b6df5e35063b9f3 100644
+--- a/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch
++++ b/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+-index 8dc2b977f..3e23bc363 100644
++index 38715b80f..dcaf3872c 100644
+ --- a/third_party/libopenjpeg20/jp2.c
+ +++ b/third_party/libopenjpeg20/jp2.c
+-@@ -1058,6 +1058,14 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1064,6 +1064,14 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+      }
+  
+      old_comps = image->comps;
+@@ -17,7 +17,7 @@ index 8dc2b977f..3e23bc363 100644
+      new_comps = (opj_image_comp_t*)
+                  opj_malloc(nr_channels * sizeof(opj_image_comp_t));
+      if (!new_comps) {
+-@@ -1102,20 +1110,26 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1108,20 +1116,26 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+          cmp = cmap[i].cmp;
+          pcol = cmap[i].pcol;
+          src = old_comps[cmp].data;
+diff --git a/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch b/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch
+index c8415ae4eee6f82d5c61865121ab1fb96c42099c..607d9f117f0b8e6c3537f47f82cd7b1ac201bbe5 100644
+--- a/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch
++++ b/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index ed449684f..c5f9dd53e 100644
++index 1932fe20c..d24564cc2 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -5553,6 +5553,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
++@@ -5749,6 +5749,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
+      OPJ_UINT32 l_tmp;
+      OPJ_UINT32 l_indix;
+      opj_mct_data_t * l_mct_data;
+@@ -10,7 +10,7 @@ index ed449684f..c5f9dd53e 100644
+  
+      /* preconditions */
+      assert(p_header_data != 00);
+-@@ -5640,7 +5641,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
++@@ -5836,7 +5837,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
+          }
+  
+          l_mct_data = l_tcp->m_mct_records + l_tcp->m_nb_mct_records;
+@@ -19,7 +19,7 @@ index ed449684f..c5f9dd53e 100644
+      }
+  
+      if (l_mct_data->m_data) {
+-@@ -5672,6 +5673,9 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
++@@ -5868,6 +5869,9 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k,
+  
+      l_mct_data->m_data_size = p_header_size;
+  
+diff --git a/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch b/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch
+index b2e7cdc75aa4ba8b0d3541ccceb37f4a0b90c34d..50679061c037b2f1e8dc3e0cef383a2e32e7ce96 100644
+--- a/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch
++++ b/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index c5f9dd53e..d31eb29a7 100644
++index d24564cc2..889c2cfc8 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -6201,7 +6201,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image,
++@@ -6397,7 +6397,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image,
+      if (l_deco_array) {
+          l_data_size = MCT_ELEMENT_SIZE[l_deco_array->m_element_type] * p_image->numcomps
+                        * p_image->numcomps;
+@@ -11,7 +11,7 @@ index c5f9dd53e..d31eb29a7 100644
+              return OPJ_FALSE;
+          }
+  
+-@@ -6222,7 +6222,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image,
++@@ -6418,7 +6418,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image,
+      if (l_offset_array) {
+          l_data_size = MCT_ELEMENT_SIZE[l_offset_array->m_element_type] *
+                        p_image->numcomps;
+diff --git a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
+index 038fb90ff4611dedf005410d485edd628fb4694a..0eb365453b3862c83628f7ebaa9d6866956932c9 100644
+--- a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
++++ b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index c5f9dd53e..1869833f7 100644
++index 889c2cfc8..711dd73e8 100644
+ --- a/third_party/libopenjpeg20/j2k.c
+ +++ b/third_party/libopenjpeg20/j2k.c
+-@@ -2311,10 +2311,8 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k,
++@@ -2299,10 +2299,8 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k,
+      }
+  
+      /* Compute the number of tiles */
+@@ -15,7 +15,7 @@ index c5f9dd53e..1869833f7 100644
+  
+      /* Check that the number of tiles is valid */
+      if (l_cp->tw == 0 || l_cp->th == 0 || l_cp->tw > 65535 / l_cp->th) {
+-@@ -2331,12 +2329,10 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k,
++@@ -2319,12 +2317,10 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k,
+              (p_j2k->m_specific_param.m_decoder.m_start_tile_x - l_cp->tx0) / l_cp->tdx;
+          p_j2k->m_specific_param.m_decoder.m_start_tile_y =
+              (p_j2k->m_specific_param.m_decoder.m_start_tile_y - l_cp->ty0) / l_cp->tdy;
+@@ -32,10 +32,10 @@ index c5f9dd53e..1869833f7 100644
+      } else {
+          p_j2k->m_specific_param.m_decoder.m_start_tile_x = 0;
+          p_j2k->m_specific_param.m_decoder.m_start_tile_y = 0;
+-@@ -6922,10 +6918,8 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+-     */
+- 
+-     if (parameters->tile_size_on) {
++@@ -7839,10 +7835,8 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
++             opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n");
++             return OPJ_FALSE;
++         }
+ -        cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0),
+ -                                             (OPJ_INT32)cp->tdx);
+ -        cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0),
+@@ -45,9 +45,9 @@ index c5f9dd53e..1869833f7 100644
+      } else {
+          cp->tdx = image->x1 - cp->tx0;
+          cp->tdy = image->y1 - cp->ty0;
+-@@ -9237,10 +9231,8 @@ OPJ_BOOL opj_j2k_set_decode_area(opj_j2k_t *p_j2k,
+-     for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) {
+-         OPJ_INT32 l_h, l_w;
++@@ -10035,10 +10029,8 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
++             return OPJ_FALSE;
++         }
+  
+ -        l_img_comp->x0 = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)p_image->x0,
+ -                         (OPJ_INT32)l_img_comp->dx);
+@@ -58,7 +58,7 @@ index c5f9dd53e..1869833f7 100644
+          l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx);
+          l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy);
+  
+-@@ -10848,10 +10840,8 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k,
++@@ -11950,10 +11942,8 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k,
+  
+          l_img_comp->factor = p_j2k->m_private_image->comps[compno].factor;
+  
+@@ -71,7 +71,7 @@ index c5f9dd53e..1869833f7 100644
+          l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx);
+          l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy);
+  
+-@@ -11161,10 +11151,8 @@ static void opj_get_tile_dimensions(opj_image_t * l_image,
++@@ -12304,10 +12294,8 @@ static void opj_get_tile_dimensions(opj_image_t * l_image,
+  
+      *l_width  = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0);
+      *l_height = (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0);
+diff --git a/third_party/libopenjpeg20/0035-opj_image_data_free.patch b/third_party/libopenjpeg20/0035-opj_image_data_free.patch
+index bc674da378f2277784223255f21ab975d590eb35..f0cdd8a3b806c808e51f6fc9fa5198c9c3db4319 100644
+--- a/third_party/libopenjpeg20/0035-opj_image_data_free.patch
++++ b/third_party/libopenjpeg20/0035-opj_image_data_free.patch
+@@ -1,8 +1,8 @@
+ diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+-index 298648a77..2374d459f 100644
++index dcaf3872c..02f3d04c7 100644
+ --- a/third_party/libopenjpeg20/jp2.c
+ +++ b/third_party/libopenjpeg20/jp2.c
+-@@ -1116,7 +1116,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
++@@ -1122,7 +1122,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image,
+          /* Prevent null pointer access */
+          if (!src || !dst) {
+            for (j = 0; j < nr_channels; ++j) {
+diff --git a/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch b/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch
+deleted file mode 100644
+index b918c0586e0f22ae080bcd041f984f3526419983..0000000000000000000000000000000000000000
+--- a/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch
++++ /dev/null
+@@ -1,49 +0,0 @@
+-diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+-index 784a0620a..cea614709 100644
+---- a/third_party/libopenjpeg20/j2k.c
+-+++ b/third_party/libopenjpeg20/j2k.c
+-@@ -9223,32 +9223,30 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
+- 
+-     l_img_comp = p_image->comps;
+-     for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) {
+--        OPJ_INT32 l_h, l_w;
+--
+-         l_img_comp->x0 = opj_uint_ceildiv(p_image->x0, l_img_comp->dx);
+-         l_img_comp->y0 = opj_uint_ceildiv(p_image->y0, l_img_comp->dy);
+-         l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx);
+-         l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy);
+- 
+--        l_w = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor)
+--              - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor);
+--        if (l_w < 0) {
+-+        OPJ_INT32 l_1 = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor);
+-+        OPJ_INT32 l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor);
+-+        if (l_1 < l_2) {
+-             opj_event_msg(p_manager, EVT_ERROR,
+--                          "Size x of the decoded component image is incorrect (comp[%d].w=%d).\n",
+--                          it_comp, l_w);
+-+                          "Size x of the decoded component image is incorrect (comp[%d].w<0).\n",
+-+                          it_comp);
+-             return OPJ_FALSE;
+-         }
+--        l_img_comp->w = (OPJ_UINT32)l_w;
+-+        l_img_comp->w = (OPJ_UINT32)(l_1-l_2);
+- 
+--        l_h = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor)
+--              - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor);
+--        if (l_h < 0) {
+-+        l_1 = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor);
+-+        l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor);
+-+        if (l_1 < l_2) {
+-             opj_event_msg(p_manager, EVT_ERROR,
+--                          "Size y of the decoded component image is incorrect (comp[%d].h=%d).\n",
+--                          it_comp, l_h);
+-+                          "Size y of the decoded component image is incorrect (comp[%d].h<0).\n",
+-+                          it_comp);
+-             return OPJ_FALSE;
+-         }
+--        l_img_comp->h = (OPJ_UINT32)l_h;
+-+        l_img_comp->h = (OPJ_UINT32)(l_1-l_2);
+- 
+-         l_img_comp++;
+-     }
+diff --git a/third_party/libopenjpeg20/0037-tcd_init_tile.patch b/third_party/libopenjpeg20/0037-tcd_init_tile.patch
+deleted file mode 100644
+index e38a7ec87122697f2dba052df8551207e3355750..0000000000000000000000000000000000000000
+--- a/third_party/libopenjpeg20/0037-tcd_init_tile.patch
++++ /dev/null
+@@ -1,31 +0,0 @@
+-diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c
+-index 2ae211ef4..9e98f04ab 100644
+---- a/third_party/libopenjpeg20/tcd.c
+-+++ b/third_party/libopenjpeg20/tcd.c
+-@@ -910,8 +910,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+-             /* p. 64, B.6, ISO/IEC FDIS15444-1 : 2000 (18 august 2000)  */
+-             l_tl_prc_x_start = opj_int_floordivpow2(l_res->x0, (OPJ_INT32)l_pdx) << l_pdx;
+-             l_tl_prc_y_start = opj_int_floordivpow2(l_res->y0, (OPJ_INT32)l_pdy) << l_pdy;
+--            l_br_prc_x_end = opj_int_ceildivpow2(l_res->x1, (OPJ_INT32)l_pdx) << l_pdx;
+--            l_br_prc_y_end = opj_int_ceildivpow2(l_res->y1, (OPJ_INT32)l_pdy) << l_pdy;
+-+            {
+-+                OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->x1,
+-+                                  (OPJ_INT32)l_pdx)) << l_pdx;
+-+                if (tmp > (OPJ_UINT32)INT_MAX) {
+-+                    opj_event_msg(manager, EVT_ERROR, "Integer overflow\n");
+-+                    return OPJ_FALSE;
+-+                }
+-+                l_br_prc_x_end = (OPJ_INT32)tmp;
+-+            }
+-+            {
+-+                OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->y1,
+-+                                  (OPJ_INT32)l_pdy)) << l_pdy;
+-+                if (tmp > (OPJ_UINT32)INT_MAX) {
+-+                    opj_event_msg(manager, EVT_ERROR, "Integer overflow\n");
+-+                    return OPJ_FALSE;
+-+                }
+-+                l_br_prc_y_end = (OPJ_INT32)tmp;
+-+            }
+-             /*fprintf(stderr, "\t\t\tprc_x_start=%d, prc_y_start=%d, br_prc_x_end=%d, br_prc_y_end=%d \n", l_tl_prc_x_start, l_tl_prc_y_start, l_br_prc_x_end ,l_br_prc_y_end );*/
+- 
+-             l_res->pw = (l_res->x0 == l_res->x1) ? 0U : (OPJ_UINT32)((
+diff --git a/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch b/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch
+deleted file mode 100644
+index 9431d821883a780559da1ec8686a0e108c12f4bd..0000000000000000000000000000000000000000
+--- a/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch
++++ /dev/null
+@@ -1,30 +0,0 @@
+-diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c
+-index 59b2bbb7..43be7677 100644
+---- a/src/lib/openjp2/j2k.c
+-+++ b/src/lib/openjp2/j2k.c
+-@@ -2698,6 +2698,12 @@ static OPJ_BOOL opj_j2k_read_cod(opj_j2k_t *p_j2k,
+-     opj_read_bytes(p_header_data, &l_tcp->mct, 1);          /* SGcod (C) */
+-     ++p_header_data;
+- 
+-+    if (l_tcp->mct > 1) {
+-+        opj_event_msg(p_manager, EVT_ERROR,
+-+                      "Invalid multiple component transformation\n");
+-+        return OPJ_FALSE;
+-+    }
+-+
+-     p_header_size -= 5;
+-     for (i = 0; i < l_image->numcomps; ++i) {
+-         l_tcp->tccps[i].csty = l_tcp->csty & J2K_CCP_CSTY_PRT;
+-@@ -9792,6 +9798,12 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+-     opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1);
+-     ++l_current_ptr;
+- 
+-+    if (l_tccp->qmfbid > 1) {
+-+        opj_event_msg(p_manager, EVT_ERROR,
+-+                      "Error reading SPCod SPCoc element, Invalid transformation found\n");
+-+        return OPJ_FALSE;
+-+    }
+-+
+-     *p_header_size = *p_header_size - 5;
+- 
+-     /* use custom precinct size ? */
+diff --git a/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch b/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch
+new file mode 100644
+index 0000000000000000000000000000000000000000..07e8a801bf0f5e2919a43a9710918bf83c4dfe73
+--- /dev/null
++++ b/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch
+@@ -0,0 +1,16 @@
++diff --git a/third_party/libopenjpeg20/mqc.c b/third_party/libopenjpeg20/mqc.c
++index 4cbfabd03..3caab9e7c 100644
++--- a/third_party/libopenjpeg20/mqc.c
+++++ b/third_party/libopenjpeg20/mqc.c
++@@ -370,11 +370,6 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc)
++     }
++ }
++ 
++-static INLINE void opj_mqc_renorme(opj_mqc_t *mqc)
++-{
++-    opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct);
++-}
++-
++ /**
++ Encode the most probable symbol
++ @param mqc MQC handle
+diff --git a/third_party/libopenjpeg20/README.pdfium b/third_party/libopenjpeg20/README.pdfium
+index 08f60079f5951cf120f77c7933125c939bd0392b..c62780c3d6c275a7db7e10d91f38fc3bf55f9291 100644
+--- a/third_party/libopenjpeg20/README.pdfium
++++ b/third_party/libopenjpeg20/README.pdfium
+@@ -1,8 +1,9 @@
+ Name: OpenJPEG
+ URL: http://www.openjpeg.org/
+-Version: 2.3.1 (also update in opj_config*)
++Version: 2.4.0 (also update in opj_config*)
+ Security Critical: yes
+ License: 2-clause BSD
++CPEPrefix: cpe:/a:uclouvain:openjpeg:2.4.0
+ 
+ Description:
+ JPEG 2000 library.
+@@ -24,9 +25,6 @@ Local Modifications:
+ 0023-opj_j2k_read_mct_records.patch: Fix opj_j2k_read to prevent heap-use-after-free.
+ 0025-opj_j2k_add_mct_null_data.patch: Check m_data != null before trying to read from it.
+ 0026-use_opj_uint_ceildiv.patch: Remove (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)a, (OPJ_INT32) b).
+-0033-undefined-shift-opj_t1_dec_clnpass.patch: fix undefined shifts originated from opj_t1_decode_cblk.
+ 0034-opj_malloc.patch: PDFium changes in opj_malloc.
+ 0035-opj_image_data_free.patch: Use the right free function in opj_jp2_apply_pclr.
+-0036-opj_j2k_update_image_dimensions.patch: fix integer overflow.
+-0037-tcd_init_tile.patch: Avoid integer overflow in opj_tcd_init_tile().
+-0038-opj_j2k_validate_param.patch: Validate all SGcod/SPcod/SPcoc parameter values.
++0039-opj_mqc_renorme.patch: Remove unused opj_mqc_renorme().
+diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c
+index 6512b1e4cee6bef02d623cab8efb469ef10b7f70..a36b7ed10b8738fb0bc47bb9585502b7f7da5f65 100644
+--- a/third_party/libopenjpeg20/dwt.c
++++ b/third_party/libopenjpeg20/dwt.c
+@@ -85,12 +85,14 @@ typedef struct dwt_local {
+     OPJ_INT32 cas;  /* 0 = start on even coord, 1 = start on odd coord */
+ } opj_dwt_t;
+ 
++#define NB_ELTS_V8  8
++
+ typedef union {
+-    OPJ_FLOAT32 f[4];
+-} opj_v4_t;
++    OPJ_FLOAT32 f[NB_ELTS_V8];
++} opj_v8_t;
+ 
+-typedef struct v4dwt_local {
+-    opj_v4_t*   wavelet ;
++typedef struct v8dwt_local {
++    opj_v8_t*   wavelet ;
+     OPJ_INT32       dn ;  /* number of elements in high pass band */
+     OPJ_INT32       sn ;  /* number of elements in low pass band */
+     OPJ_INT32       cas ; /* 0 = start on even coord, 1 = start on odd coord */
+@@ -98,46 +100,35 @@ typedef struct v4dwt_local {
+     OPJ_UINT32      win_l_x1; /* end coord in low pass band */
+     OPJ_UINT32      win_h_x0; /* start coord in high pass band */
+     OPJ_UINT32      win_h_x1; /* end coord in high pass band */
+-} opj_v4dwt_t ;
++} opj_v8dwt_t ;
+ 
+-static const OPJ_FLOAT32 opj_dwt_alpha =  1.586134342f; /*  12994 */
+-static const OPJ_FLOAT32 opj_dwt_beta  =  0.052980118f; /*    434 */
+-static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /*  -7233 */
+-static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /*  -3633 */
++/* From table F.4 from the standard */
++static const OPJ_FLOAT32 opj_dwt_alpha =  -1.586134342f;
++static const OPJ_FLOAT32 opj_dwt_beta  =  -0.052980118f;
++static const OPJ_FLOAT32 opj_dwt_gamma = 0.882911075f;
++static const OPJ_FLOAT32 opj_dwt_delta = 0.443506852f;
+ 
+-static const OPJ_FLOAT32 opj_K      = 1.230174105f; /*  10078 */
+-static const OPJ_FLOAT32 opj_c13318 = 1.625732422f;
++static const OPJ_FLOAT32 opj_K      = 1.230174105f;
++static const OPJ_FLOAT32 opj_invK   = (OPJ_FLOAT32)(1.0 / 1.230174105);
+ 
+ /*@}*/
+ 
+-/**
+-Virtual function type for wavelet transform in 1-D
+-*/
+-typedef void (*DWT1DFN)(const opj_dwt_t* v);
+-
+ /** @name Local static functions */
+ /*@{*/
+ 
+ /**
+ Forward lazy transform (horizontal)
+ */
+-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
++static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
++                                   OPJ_INT32 * OPJ_RESTRICT b,
++                                   OPJ_INT32 dn,
+                                    OPJ_INT32 sn, OPJ_INT32 cas);
+-/**
+-Forward lazy transform (vertical)
+-*/
+-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+-                                   OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas);
+-/**
+-Forward 5-3 wavelet transform in 1-D
+-*/
+-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+-    OPJ_INT32 sn, OPJ_INT32 cas);
++
+ /**
+ Forward 9-7 wavelet transform in 1-D
+ */
+-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+-    OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
++static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn,
++                                  OPJ_INT32 cas);
+ /**
+ Explicit calculation of the Quantization Stepsizes
+ */
+@@ -153,8 +144,29 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+     opj_tcd_tilecomp_t* tilec,
+     OPJ_UINT32 numres);
+ 
+-static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+-        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32));
++/* Forward transform, for the vertical pass, processing cols columns */
++/* where cols <= NB_ELTS_V8 */
++/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
++typedef void (*opj_encode_and_deinterleave_v_fnptr_type)(
++    void *array,
++    void *tmp,
++    OPJ_UINT32 height,
++    OPJ_BOOL even,
++    OPJ_UINT32 stride_width,
++    OPJ_UINT32 cols);
++
++/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
++typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)(
++    void *row,
++    void *tmp,
++    OPJ_UINT32 width,
++    OPJ_BOOL even);
++
++static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
++        opj_tcd_tilecomp_t * tilec,
++        opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
++        opj_encode_and_deinterleave_h_one_row_fnptr_type
++        p_encode_and_deinterleave_h_one_row);
+ 
+ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
+         OPJ_UINT32 i);
+@@ -162,42 +174,6 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
+ /* <summary>                             */
+ /* Inverse 9-7 wavelet transform in 1-D. */
+ /* </summary>                            */
+-static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt);
+-
+-static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
+-                                   OPJ_FLOAT32* OPJ_RESTRICT a,
+-                                   OPJ_UINT32 width,
+-                                   OPJ_UINT32 remaining_height);
+-
+-static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
+-                                   OPJ_FLOAT32* OPJ_RESTRICT a,
+-                                   OPJ_UINT32 width,
+-                                   OPJ_UINT32 nb_elts_read);
+-
+-#ifdef __SSE__
+-static void opj_v4dwt_decode_step1_sse(opj_v4_t* w,
+-                                       OPJ_UINT32 start,
+-                                       OPJ_UINT32 end,
+-                                       const __m128 c);
+-
+-static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w,
+-                                       OPJ_UINT32 start,
+-                                       OPJ_UINT32 end,
+-                                       OPJ_UINT32 m, __m128 c);
+-
+-#else
+-static void opj_v4dwt_decode_step1(opj_v4_t* w,
+-                                   OPJ_UINT32 start,
+-                                   OPJ_UINT32 end,
+-                                   const OPJ_FLOAT32 c);
+-
+-static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
+-                                   OPJ_UINT32 start,
+-                                   OPJ_UINT32 end,
+-                                   OPJ_UINT32 m,
+-                                   OPJ_FLOAT32 c);
+-
+-#endif
+ 
+ /*@}*/
+ 
+@@ -251,12 +227,14 @@ static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = {
+ /* <summary>                             */
+ /* Forward lazy transform (horizontal).  */
+ /* </summary>                            */
+-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
++static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a,
++                                   OPJ_INT32 * OPJ_RESTRICT b,
++                                   OPJ_INT32 dn,
+                                    OPJ_INT32 sn, OPJ_INT32 cas)
+ {
+     OPJ_INT32 i;
+-    OPJ_INT32 * l_dest = b;
+-    OPJ_INT32 * l_src = a + cas;
++    OPJ_INT32 * OPJ_RESTRICT l_dest = b;
++    const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas;
+ 
+     for (i = 0; i < sn; ++i) {
+         *l_dest++ = *l_src;
+@@ -272,40 +250,13 @@ static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+     }
+ }
+ 
+-/* <summary>                             */
+-/* Forward lazy transform (vertical).    */
+-/* </summary>                            */
+-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn,
+-                                   OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas)
+-{
+-    OPJ_INT32 i = sn;
+-    OPJ_INT32 * l_dest = b;
+-    OPJ_INT32 * l_src = a + cas;
+-
+-    while (i--) {
+-        *l_dest = *l_src;
+-        l_dest += x;
+-        l_src += 2;
+-    } /* b[i*x]=a[2*i+cas]; */
+-
+-    l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x;
+-    l_src = a + 1 - cas;
+-
+-    i = dn;
+-    while (i--) {
+-        *l_dest = *l_src;
+-        l_dest += x;
+-        l_src += 2;
+-    } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/
+-}
+-
+ #ifdef STANDARD_SLOW_VERSION
+ /* <summary>                             */
+ /* Inverse lazy transform (horizontal).  */
+ /* </summary>                            */
+ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a)
+ {
+-    OPJ_INT32 *ai = a;
++    const OPJ_INT32 *ai = a;
+     OPJ_INT32 *bi = h->mem + h->cas;
+     OPJ_INT32  i    = h->sn;
+     while (i--) {
+@@ -326,7 +277,7 @@ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a)
+ /* </summary>                            */
+ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
+ {
+-    OPJ_INT32 *ai = a;
++    const OPJ_INT32 *ai = a;
+     OPJ_INT32 *bi = v->mem + v->cas;
+     OPJ_INT32  i = v->sn;
+     while (i--) {
+@@ -346,37 +297,6 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x)
+ 
+ #endif /* STANDARD_SLOW_VERSION */
+ 
+-/* <summary>                            */
+-/* Forward 5-3 wavelet transform in 1-D. */
+-/* </summary>                           */
+-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn,
+-                             OPJ_INT32 sn, OPJ_INT32 cas)
+-{
+-    OPJ_INT32 i;
+-
+-    if (!cas) {
+-        if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
+-            for (i = 0; i < dn; i++) {
+-                OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
+-            }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
+-            }
+-        }
+-    } else {
+-        if (!sn && dn == 1) {       /* NEW :  CASE ONE ELEMENT */
+-            OPJ_S(0) *= 2;
+-        } else {
+-            for (i = 0; i < dn; i++) {
+-                OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
+-            }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
+-            }
+-        }
+-    }
+-}
+-
+ #ifdef STANDARD_SLOW_VERSION
+ /* <summary>                            */
+ /* Inverse 5-3 wavelet transform in 1-D. */
+@@ -1038,111 +958,799 @@ static void opj_idwt53_v(const opj_dwt_t *dwt,
+ #endif
+ }
+ 
++#if 0
++static void opj_dwt_encode_step1(OPJ_FLOAT32* fw,
++                                 OPJ_UINT32 end,
++                                 const OPJ_FLOAT32 c)
++{
++    OPJ_UINT32 i = 0;
++    for (; i < end; ++i) {
++        fw[0] *= c;
++        fw += 2;
++    }
++}
++#else
++static void opj_dwt_encode_step1_combined(OPJ_FLOAT32* fw,
++        OPJ_UINT32 iters_c1,
++        OPJ_UINT32 iters_c2,
++        const OPJ_FLOAT32 c1,
++        const OPJ_FLOAT32 c2)
++{
++    OPJ_UINT32 i = 0;
++    const OPJ_UINT32 iters_common =  opj_uint_min(iters_c1, iters_c2);
++    assert((((OPJ_SIZE_T)fw) & 0xf) == 0);
++    assert(opj_int_abs((OPJ_INT32)iters_c1 - (OPJ_INT32)iters_c2) <= 1);
++    for (; i + 3 < iters_common; i += 4) {
++#ifdef __SSE__
++        const __m128 vcst = _mm_set_ps(c2, c1, c2, c1);
++        *(__m128*)fw = _mm_mul_ps(*(__m128*)fw, vcst);
++        *(__m128*)(fw + 4) = _mm_mul_ps(*(__m128*)(fw + 4), vcst);
++#else
++        fw[0] *= c1;
++        fw[1] *= c2;
++        fw[2] *= c1;
++        fw[3] *= c2;
++        fw[4] *= c1;
++        fw[5] *= c2;
++        fw[6] *= c1;
++        fw[7] *= c2;
++#endif
++        fw += 8;
++    }
++    for (; i < iters_common; i++) {
++        fw[0] *= c1;
++        fw[1] *= c2;
++        fw += 2;
++    }
++    if (i < iters_c1) {
++        fw[0] *= c1;
++    } else if (i < iters_c2) {
++        fw[1] *= c2;
++    }
++}
+ 
+-/* <summary>                             */
+-/* Forward 9-7 wavelet transform in 1-D. */
+-/* </summary>                            */
+-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count,
+-                                  OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas)
++#endif
++
++static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw,
++                                 OPJ_UINT32 end,
++                                 OPJ_UINT32 m,
++                                 OPJ_FLOAT32 c)
+ {
+-    OPJ_INT32 i;
+-    if (!cas) {
+-        if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
+-            for (i = 0; i < dn; i++) {
+-                OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993);
++    OPJ_UINT32 i;
++    OPJ_UINT32 imax = opj_uint_min(end, m);
++    if (imax > 0) {
++        fw[-1] += (fl[0] + fw[0]) * c;
++        fw += 2;
++        i = 1;
++        for (; i + 3 < imax; i += 4) {
++            fw[-1] += (fw[-2] + fw[0]) * c;
++            fw[1] += (fw[0] + fw[2]) * c;
++            fw[3] += (fw[2] + fw[4]) * c;
++            fw[5] += (fw[4] + fw[6]) * c;
++            fw += 8;
++        }
++        for (; i < imax; ++i) {
++            fw[-1] += (fw[-2] + fw[0]) * c;
++            fw += 2;
++        }
++    }
++    if (m < end) {
++        assert(m + 1 == end);
++        fw[-1] += (2 * fw[-2]) * c;
++    }
++}
++
++static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
++                                  OPJ_INT32 cas)
++{
++    OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn;
++    OPJ_INT32 a, b;
++    assert(dn + sn > 1);
++    if (cas == 0) {
++        a = 0;
++        b = 1;
++    } else {
++        a = 1;
++        b = 0;
++    }
++    opj_dwt_encode_step2(w + a, w + b + 1,
++                         (OPJ_UINT32)dn,
++                         (OPJ_UINT32)opj_int_min(dn, sn - b),
++                         opj_dwt_alpha);
++    opj_dwt_encode_step2(w + b, w + a + 1,
++                         (OPJ_UINT32)sn,
++                         (OPJ_UINT32)opj_int_min(sn, dn - a),
++                         opj_dwt_beta);
++    opj_dwt_encode_step2(w + a, w + b + 1,
++                         (OPJ_UINT32)dn,
++                         (OPJ_UINT32)opj_int_min(dn, sn - b),
++                         opj_dwt_gamma);
++    opj_dwt_encode_step2(w + b, w + a + 1,
++                         (OPJ_UINT32)sn,
++                         (OPJ_UINT32)opj_int_min(sn, dn - a),
++                         opj_dwt_delta);
++#if 0
++    opj_dwt_encode_step1(w + b, (OPJ_UINT32)dn,
++                         opj_K);
++    opj_dwt_encode_step1(w + a, (OPJ_UINT32)sn,
++                         opj_invK);
++#else
++    if (a == 0) {
++        opj_dwt_encode_step1_combined(w,
++                                      (OPJ_UINT32)sn,
++                                      (OPJ_UINT32)dn,
++                                      opj_invK,
++                                      opj_K);
++    } else {
++        opj_dwt_encode_step1_combined(w,
++                                      (OPJ_UINT32)dn,
++                                      (OPJ_UINT32)sn,
++                                      opj_K,
++                                      opj_invK);
++    }
++#endif
++}
++
++static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
++                                    opj_stepsize_t *bandno_stepsize)
++{
++    OPJ_INT32 p, n;
++    p = opj_int_floorlog2(stepsize) - 13;
++    n = 11 - opj_int_floorlog2(stepsize);
++    bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff;
++    bandno_stepsize->expn = numbps - p;
++}
++
++/*
++==========================================================
++   DWT interface
++==========================================================
++*/
++
++/** Process one line for the horizontal pass of the 5x3 forward transform */
++static
++void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn,
++        void* tmpIn,
++        OPJ_UINT32 width,
++        OPJ_BOOL even)
++{
++    OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn;
++    OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn;
++    const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
++    const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
++
++    if (even) {
++        if (width > 1) {
++            OPJ_INT32 i;
++            for (i = 0; i < sn - 1; i++) {
++                tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1);
+             }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434);
++            if ((width % 2) == 0) {
++                tmp[sn + i] = row[2 * i + 1] - row[(i) * 2];
+             }
+-            for (i = 0; i < dn; i++) {
+-                OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233);
++            row[0] += (tmp[sn] + tmp[sn] + 2) >> 2;
++            for (i = 1; i < dn; i++) {
++                row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2);
+             }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633);
++            if ((width % 2) == 1) {
++                row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2);
+             }
+-            for (i = 0; i < dn; i++) {
+-                OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038);    /*5038 */
++            memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
++        }
++    } else {
++        if (width == 1) {
++            row[0] *= 2;
++        } else {
++            OPJ_INT32 i;
++            tmp[sn + 0] = row[0] - row[1];
++            for (i = 1; i < sn; i++) {
++                tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1);
+             }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659);    /*6660 */
++            if ((width % 2) == 1) {
++                tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1];
++            }
++
++            for (i = 0; i < dn - 1; i++) {
++                row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2);
++            }
++            if ((width % 2) == 0) {
++                row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2);
++            }
++            memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32));
++        }
++    }
++}
++
++/** Process one line for the horizontal pass of the 9x7 forward transform */
++static
++void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn,
++        void* tmpIn,
++        OPJ_UINT32 width,
++        OPJ_BOOL even)
++{
++    OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn;
++    OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn;
++    const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1);
++    const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn);
++    if (width == 1) {
++        return;
++    }
++    memcpy(tmp, row, width * sizeof(OPJ_FLOAT32));
++    opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1);
++    opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp,
++                           (OPJ_INT32 * OPJ_RESTRICT)row,
++                           dn, sn, even ? 0 : 1);
++}
++
++typedef struct {
++    opj_dwt_t h;
++    OPJ_UINT32 rw; /* Width of the resolution to process */
++    OPJ_UINT32 w; /* Width of tiledp */
++    OPJ_INT32 * OPJ_RESTRICT tiledp;
++    OPJ_UINT32 min_j;
++    OPJ_UINT32 max_j;
++    opj_encode_and_deinterleave_h_one_row_fnptr_type p_function;
++} opj_dwt_encode_h_job_t;
++
++static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls)
++{
++    OPJ_UINT32 j;
++    opj_dwt_encode_h_job_t* job;
++    (void)tls;
++
++    job = (opj_dwt_encode_h_job_t*)user_data;
++    for (j = job->min_j; j < job->max_j; j++) {
++        OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w;
++        (*job->p_function)(aj, job->h.mem, job->rw,
++                           job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE);
++    }
++
++    opj_aligned_free(job->h.mem);
++    opj_free(job);
++}
++
++typedef struct {
++    opj_dwt_t v;
++    OPJ_UINT32 rh;
++    OPJ_UINT32 w;
++    OPJ_INT32 * OPJ_RESTRICT tiledp;
++    OPJ_UINT32 min_j;
++    OPJ_UINT32 max_j;
++    opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v;
++} opj_dwt_encode_v_job_t;
++
++static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
++{
++    OPJ_UINT32 j;
++    opj_dwt_encode_v_job_t* job;
++    (void)tls;
++
++    job = (opj_dwt_encode_v_job_t*)user_data;
++    for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) {
++        (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
++                                            job->v.mem,
++                                            job->rh,
++                                            job->v.cas == 0,
++                                            job->w,
++                                            NB_ELTS_V8);
++    }
++    if (j < job->max_j) {
++        (*job->p_encode_and_deinterleave_v)(job->tiledp + j,
++                                            job->v.mem,
++                                            job->rh,
++                                            job->v.cas == 0,
++                                            job->w,
++                                            job->max_j - j);
++    }
++
++    opj_aligned_free(job->v.mem);
++    opj_free(job);
++}
++
++/** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */
++/* that has a NB_ELTS_V8 interleave factor. */
++static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn,
++        void *tmpOut,
++        OPJ_UINT32 height,
++        OPJ_UINT32 stride_width,
++        OPJ_UINT32 cols)
++{
++    const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn;
++    OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut;
++    if (cols == NB_ELTS_V8) {
++        OPJ_UINT32 k;
++        for (k = 0; k < height; ++k) {
++            memcpy(tmp + NB_ELTS_V8 * k,
++                   array + k * stride_width,
++                   NB_ELTS_V8 * sizeof(OPJ_INT32));
++        }
++    } else {
++        OPJ_UINT32 k;
++        for (k = 0; k < height; ++k) {
++            OPJ_UINT32 c;
++            for (c = 0; c < cols; c++) {
++                tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width];
++            }
++            for (; c < NB_ELTS_V8; c++) {
++                tmp[NB_ELTS_V8 * k + c] = 0;
++            }
++        }
++    }
++}
++
++/* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */
++/* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */
++/* columns. */
++static INLINE void opj_dwt_deinterleave_v_cols(
++    const OPJ_INT32 * OPJ_RESTRICT src,
++    OPJ_INT32 * OPJ_RESTRICT dst,
++    OPJ_INT32 dn,
++    OPJ_INT32 sn,
++    OPJ_UINT32 stride_width,
++    OPJ_INT32 cas,
++    OPJ_UINT32 cols)
++{
++    OPJ_INT32 k;
++    OPJ_INT32 i = sn;
++    OPJ_INT32 * OPJ_RESTRICT l_dest = dst;
++    const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8;
++    OPJ_UINT32 c;
++
++    for (k = 0; k < 2; k++) {
++        while (i--) {
++            if (cols == NB_ELTS_V8) {
++                memcpy(l_dest, l_src, NB_ELTS_V8 * sizeof(OPJ_INT32));
++            } else {
++                c = 0;
++                switch (cols) {
++                case 7:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                case 6:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                case 5:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                case 4:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                case 3:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                case 2:
++                    l_dest[c] = l_src[c];
++                    c++; /* fallthru */
++                default:
++                    l_dest[c] = l_src[c];
++                    break;
++                }
++            }
++            l_dest += stride_width;
++            l_src += 2 * NB_ELTS_V8;
++        }
++
++        l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width;
++        l_src = src + (1 - cas) * NB_ELTS_V8;
++        i = dn;
++    }
++}
++
++
++/* Forward 5-3 transform, for the vertical pass, processing cols columns */
++/* where cols <= NB_ELTS_V8 */
++static void opj_dwt_encode_and_deinterleave_v(
++    void *arrayIn,
++    void *tmpIn,
++    OPJ_UINT32 height,
++    OPJ_BOOL even,
++    OPJ_UINT32 stride_width,
++    OPJ_UINT32 cols)
++{
++    OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn;
++    OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn;
++    const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1;
++    const OPJ_UINT32 dn = height - sn;
++
++    opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols);
++
++#define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c]
++#define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c]
++
++#ifdef __SSE2__
++    if (height == 1) {
++        if (!even) {
++            OPJ_UINT32 c;
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                tmp[c] *= 2;
++            }
++        }
++    } else if (even) {
++        OPJ_UINT32 c;
++        OPJ_UINT32 i;
++        i = 0;
++        if (i + 1 < sn) {
++            __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0);
++            __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1);
++            for (; i + 1 < sn; i++) {
++                __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
++                                                       (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
++                                                       (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
++                __m128i xmm_Di_0 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Di_1 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
++                xmm_Di_0 = _mm_sub_epi32(xmm_Di_0,
++                                         _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1));
++                xmm_Di_1 = _mm_sub_epi32(xmm_Di_1,
++                                         _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1));
++                *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) =  xmm_Di_0;
++                *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) =  xmm_Di_1;
++                xmm_Si_0 = xmm_Sip1_0;
++                xmm_Si_1 = xmm_Sip1_1;
+             }
+         }
+-    } else {
+-        if ((sn > 0) || (dn > 1)) { /* NEW :  CASE ONE ELEMENT */
+-            for (i = 0; i < dn; i++) {
+-                OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993);
+-            }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434);
+-            }
+-            for (i = 0; i < dn; i++) {
+-                OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233);
+-            }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633);
+-            }
+-            for (i = 0; i < dn; i++) {
+-                OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038);    /*5038 */
+-            }
+-            for (i = 0; i < sn; i++) {
+-                OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659);    /*6660 */
++        if (((height) % 2) == 0) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Dc(i) -= OPJ_Sc(i);
++            }
++        }
++        for (c = 0; c < NB_ELTS_V8; c++) {
++            OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
++        }
++        i = 1;
++        if (i < dn) {
++            __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
++                                                   (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
++            __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
++                                                   (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
++            const __m128i xmm_two = _mm_set1_epi32(2);
++            for (; i < dn; i++) {
++                __m128i xmm_Di_0 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Di_1 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
++                __m128i xmm_Si_0 = *(const __m128i*)(tmp +
++                                                     (i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Si_1 = *(const __m128i*)(tmp +
++                                                     (i * 2) * NB_ELTS_V8 + 4 * 1);
++                xmm_Si_0 = _mm_add_epi32(xmm_Si_0,
++                                         _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2));
++                xmm_Si_1 = _mm_add_epi32(xmm_Si_1,
++                                         _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2));
++                *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
++                *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
++                xmm_Dim1_0 = xmm_Di_0;
++                xmm_Dim1_1 = xmm_Di_1;
++            }
++        }
++        if (((height) % 2) == 1) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
++            }
++        }
++    } else {
++        OPJ_UINT32 c;
++        OPJ_UINT32 i;
++        for (c = 0; c < NB_ELTS_V8; c++) {
++            OPJ_Sc(0) -= OPJ_Dc(0);
++        }
++        i = 1;
++        if (i < sn) {
++            __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 +
++                                                   (i - 1) * 2) * NB_ELTS_V8 + 4 * 0);
++            __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 +
++                                                   (i - 1) * 2) * NB_ELTS_V8 + 4 * 1);
++            for (; i < sn; i++) {
++                __m128i xmm_Di_0 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Di_1 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
++                __m128i xmm_Si_0 = *(const __m128i*)(tmp +
++                                                     (i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Si_1 = *(const __m128i*)(tmp +
++                                                     (i * 2) * NB_ELTS_V8 + 4 * 1);
++                xmm_Si_0 = _mm_sub_epi32(xmm_Si_0,
++                                         _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1));
++                xmm_Si_1 = _mm_sub_epi32(xmm_Si_1,
++                                         _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1));
++                *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0;
++                *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1;
++                xmm_Dim1_0 = xmm_Di_0;
++                xmm_Dim1_1 = xmm_Di_1;
++            }
++        }
++        if (((height) % 2) == 1) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Sc(i) -= OPJ_Dc(i - 1);
++            }
++        }
++        i = 0;
++        if (i + 1 < dn) {
++            __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0));
++            __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1));
++            const __m128i xmm_two = _mm_set1_epi32(2);
++            for (; i + 1 < dn; i++) {
++                __m128i xmm_Sip1_0 = *(const __m128i*)(tmp +
++                                                       (i + 1) * 2 * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Sip1_1 = *(const __m128i*)(tmp +
++                                                       (i + 1) * 2 * NB_ELTS_V8 + 4 * 1);
++                __m128i xmm_Di_0 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 0);
++                __m128i xmm_Di_1 = *(const __m128i*)(tmp +
++                                                     (1 + i * 2) * NB_ELTS_V8 + 4 * 1);
++                xmm_Di_0 = _mm_add_epi32(xmm_Di_0,
++                                         _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2));
++                xmm_Di_1 = _mm_add_epi32(xmm_Di_1,
++                                         _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2));
++                *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0;
++                *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1;
++                xmm_Si_0 = xmm_Sip1_0;
++                xmm_Si_1 = xmm_Sip1_1;
++            }
++        }
++        if (((height) % 2) == 0) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
++            }
++        }
++    }
++#else
++    if (even) {
++        OPJ_UINT32 c;
++        if (height > 1) {
++            OPJ_UINT32 i;
++            for (i = 0; i + 1 < sn; i++) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1;
++                }
++            }
++            if (((height) % 2) == 0) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Dc(i) -= OPJ_Sc(i);
++                }
++            }
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2;
++            }
++            for (i = 1; i < dn; i++) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2;
++                }
++            }
++            if (((height) % 2) == 1) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2;
++                }
++            }
++        }
++    } else {
++        OPJ_UINT32 c;
++        if (height == 1) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Sc(0) *= 2;
++            }
++        } else {
++            OPJ_UINT32 i;
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                OPJ_Sc(0) -= OPJ_Dc(0);
++            }
++            for (i = 1; i < sn; i++) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1;
++                }
++            }
++            if (((height) % 2) == 1) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Sc(i) -= OPJ_Dc(i - 1);
++                }
++            }
++            for (i = 0; i + 1 < dn; i++) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2;
++                }
++            }
++            if (((height) % 2) == 0) {
++                for (c = 0; c < NB_ELTS_V8; c++) {
++                    OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2;
++                }
++            }
++        }
++    }
++#endif
++
++    if (cols == NB_ELTS_V8) {
++        opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
++                                    stride_width, even ? 0 : 1, NB_ELTS_V8);
++    } else {
++        opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn,
++                                    stride_width, even ? 0 : 1, cols);
++    }
++}
++
++static void opj_v8dwt_encode_step1(OPJ_FLOAT32* fw,
++                                   OPJ_UINT32 end,
++                                   const OPJ_FLOAT32 cst)
++{
++    OPJ_UINT32 i;
++#ifdef __SSE__
++    __m128* vw = (__m128*) fw;
++    const __m128 vcst = _mm_set1_ps(cst);
++    for (i = 0; i < end; ++i) {
++        vw[0] = _mm_mul_ps(vw[0], vcst);
++        vw[1] = _mm_mul_ps(vw[1], vcst);
++        vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
++    }
++#else
++    OPJ_UINT32 c;
++    for (i = 0; i < end; ++i) {
++        for (c = 0; c < NB_ELTS_V8; c++) {
++            fw[i * 2 * NB_ELTS_V8 + c] *= cst;
++        }
++    }
++#endif
++}
++
++static void opj_v8dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw,
++                                   OPJ_UINT32 end,
++                                   OPJ_UINT32 m,
++                                   OPJ_FLOAT32 cst)
++{
++    OPJ_UINT32 i;
++    OPJ_UINT32 imax = opj_uint_min(end, m);
++#ifdef __SSE__
++    __m128* vw = (__m128*) fw;
++    __m128 vcst = _mm_set1_ps(cst);
++    if (imax > 0) {
++        __m128* vl = (__m128*) fl;
++        vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), vcst));
++        vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), vcst));
++        vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
++        i = 1;
++
++        for (; i < imax; ++i) {
++            vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), vcst));
++            vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), vcst));
++            vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128));
++        }
++    }
++    if (m < end) {
++        assert(m + 1 == end);
++        vcst = _mm_add_ps(vcst, vcst);
++        vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(vw[-4], vcst));
++        vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(vw[-3], vcst));
++    }
++#else
++    OPJ_INT32 c;
++    if (imax > 0) {
++        for (c = 0; c < NB_ELTS_V8; c++) {
++            fw[-1 * NB_ELTS_V8 + c] += (fl[0 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) *
++                                       cst;
++        }
++        fw += 2 * NB_ELTS_V8;
++        i = 1;
++        for (; i < imax; ++i) {
++            for (c = 0; c < NB_ELTS_V8; c++) {
++                fw[-1 * NB_ELTS_V8 + c] += (fw[-2 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) *
++                                           cst;
+             }
++            fw += 2 * NB_ELTS_V8;
++        }
++    }
++    if (m < end) {
++        assert(m + 1 == end);
++        for (c = 0; c < NB_ELTS_V8; c++) {
++            fw[-1 * NB_ELTS_V8 + c] += (2 * fw[-2 * NB_ELTS_V8 + c]) * cst;
+         }
+     }
++#endif
+ }
+ 
+-static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps,
+-                                    opj_stepsize_t *bandno_stepsize)
++/* Forward 9-7 transform, for the vertical pass, processing cols columns */
++/* where cols <= NB_ELTS_V8 */
++static void opj_dwt_encode_and_deinterleave_v_real(
++    void *arrayIn,
++    void *tmpIn,
++    OPJ_UINT32 height,
++    OPJ_BOOL even,
++    OPJ_UINT32 stride_width,
++    OPJ_UINT32 cols)
+ {
+-    OPJ_INT32 p, n;
+-    p = opj_int_floorlog2(stepsize) - 13;
+-    n = 11 - opj_int_floorlog2(stepsize);
+-    bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff;
+-    bandno_stepsize->expn = numbps - p;
+-}
++    OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn;
++    OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn;
++    const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1);
++    const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn);
++    OPJ_INT32 a, b;
++
++    if (height == 1) {
++        return;
++    }
++
++    opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols);
++
++    if (even) {
++        a = 0;
++        b = 1;
++    } else {
++        a = 1;
++        b = 0;
++    }
++    opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8,
++                           tmp + (b + 1) * NB_ELTS_V8,
++                           (OPJ_UINT32)dn,
++                           (OPJ_UINT32)opj_int_min(dn, sn - b),
++                           opj_dwt_alpha);
++    opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8,
++                           tmp + (a + 1) * NB_ELTS_V8,
++                           (OPJ_UINT32)sn,
++                           (OPJ_UINT32)opj_int_min(sn, dn - a),
++                           opj_dwt_beta);
++    opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8,
++                           tmp + (b + 1) * NB_ELTS_V8,
++                           (OPJ_UINT32)dn,
++                           (OPJ_UINT32)opj_int_min(dn, sn - b),
++                           opj_dwt_gamma);
++    opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8,
++                           tmp + (a + 1) * NB_ELTS_V8,
++                           (OPJ_UINT32)sn,
++                           (OPJ_UINT32)opj_int_min(sn, dn - a),
++                           opj_dwt_delta);
++    opj_v8dwt_encode_step1(tmp + b * NB_ELTS_V8, (OPJ_UINT32)dn,
++                           opj_K);
++    opj_v8dwt_encode_step1(tmp + a * NB_ELTS_V8, (OPJ_UINT32)sn,
++                           opj_invK);
+ 
+-/*
+-==========================================================
+-   DWT interface
+-==========================================================
+-*/
++
++    if (cols == NB_ELTS_V8) {
++        opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp,
++                                    (OPJ_INT32*)array,
++                                    (OPJ_INT32)dn, (OPJ_INT32)sn,
++                                    stride_width, even ? 0 : 1, NB_ELTS_V8);
++    } else {
++        opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp,
++                                    (OPJ_INT32*)array,
++                                    (OPJ_INT32)dn, (OPJ_INT32)sn,
++                                    stride_width, even ? 0 : 1, cols);
++    }
++}
+ 
+ 
+ /* <summary>                            */
+ /* Forward 5-3 wavelet transform in 2-D. */
+ /* </summary>                           */
+-static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
+-        void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32))
++static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
++        opj_tcd_tilecomp_t * tilec,
++        opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v,
++        opj_encode_and_deinterleave_h_one_row_fnptr_type
++        p_encode_and_deinterleave_h_one_row)
+ {
+-    OPJ_INT32 i, j, k;
+-    OPJ_INT32 *a = 00;
+-    OPJ_INT32 *aj = 00;
++    OPJ_INT32 i;
+     OPJ_INT32 *bj = 00;
+-    OPJ_INT32 w, l;
++    OPJ_UINT32 w;
++    OPJ_INT32 l;
+ 
+-    OPJ_INT32 rw;           /* width of the resolution level computed   */
+-    OPJ_INT32 rh;           /* height of the resolution level computed  */
+-    OPJ_SIZE_T l_data_count;
+     OPJ_SIZE_T l_data_size;
+ 
+     opj_tcd_resolution_t * l_cur_res = 0;
+     opj_tcd_resolution_t * l_last_res = 0;
++    const int num_threads = opj_thread_pool_get_thread_count(tp);
++    OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data;
+ 
+-    w = tilec->x1 - tilec->x0;
++    w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+     l = (OPJ_INT32)tilec->numresolutions - 1;
+-    a = tilec->data;
+ 
+     l_cur_res = tilec->resolutions + l;
+     l_last_res = l_cur_res - 1;
+ 
+-    l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
++    l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions);
+     /* overflow check */
+-    if (l_data_count > (SIZE_MAX / sizeof(OPJ_INT32))) {
++    if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) {
+         /* FIXME event manager error callback */
+         return OPJ_FALSE;
+     }
+-    l_data_size = l_data_count * sizeof(OPJ_INT32);
+-    bj = (OPJ_INT32*)opj_malloc(l_data_size);
++    l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32);
++    bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
+     /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */
+     /* in that case, so do not error out */
+     if (l_data_size != 0 && ! bj) {
+@@ -1151,43 +1759,135 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec
+     i = l;
+ 
+     while (i--) {
+-        OPJ_INT32 rw1;      /* width of the resolution level once lower than computed one                                       */
+-        OPJ_INT32 rh1;      /* height of the resolution level once lower than computed one                                      */
++        OPJ_UINT32 j;
++        OPJ_UINT32 rw;           /* width of the resolution level computed   */
++        OPJ_UINT32 rh;           /* height of the resolution level computed  */
++        OPJ_UINT32
++        rw1;      /* width of the resolution level once lower than computed one                                       */
++        OPJ_UINT32
++        rh1;      /* height of the resolution level once lower than computed one                                      */
+         OPJ_INT32 cas_col;  /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */
+         OPJ_INT32 cas_row;  /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering   */
+         OPJ_INT32 dn, sn;
+ 
+-        rw  = l_cur_res->x1 - l_cur_res->x0;
+-        rh  = l_cur_res->y1 - l_cur_res->y0;
+-        rw1 = l_last_res->x1 - l_last_res->x0;
+-        rh1 = l_last_res->y1 - l_last_res->y0;
++        rw  = (OPJ_UINT32)(l_cur_res->x1 - l_cur_res->x0);
++        rh  = (OPJ_UINT32)(l_cur_res->y1 - l_cur_res->y0);
++        rw1 = (OPJ_UINT32)(l_last_res->x1 - l_last_res->x0);
++        rh1 = (OPJ_UINT32)(l_last_res->y1 - l_last_res->y0);
+ 
+         cas_row = l_cur_res->x0 & 1;
+         cas_col = l_cur_res->y0 & 1;
+ 
+-        sn = rh1;
+-        dn = rh - rh1;
+-        for (j = 0; j < rw; ++j) {
+-            aj = a + j;
+-            for (k = 0; k < rh; ++k) {
+-                bj[k] = aj[k * w];
++        sn = (OPJ_INT32)rh1;
++        dn = (OPJ_INT32)(rh - rh1);
++
++        /* Perform vertical pass */
++        if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
++            for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) {
++                p_encode_and_deinterleave_v(tiledp + j,
++                                            bj,
++                                            rh,
++                                            cas_col == 0,
++                                            w,
++                                            NB_ELTS_V8);
++            }
++            if (j < rw) {
++                p_encode_and_deinterleave_v(tiledp + j,
++                                            bj,
++                                            rh,
++                                            cas_col == 0,
++                                            w,
++                                            rw - j);
++            }
++        }  else {
++            OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
++            OPJ_UINT32 step_j;
++
++            if (rw < num_jobs) {
++                num_jobs = rw;
+             }
++            step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
+ 
+-            (*p_function) (bj, l_data_count, dn, sn, cas_col);
++            for (j = 0; j < num_jobs; j++) {
++                opj_dwt_encode_v_job_t* job;
+ 
+-            opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
++                job = (opj_dwt_encode_v_job_t*) opj_malloc(sizeof(opj_dwt_encode_v_job_t));
++                if (!job) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_aligned_free(bj);
++                    return OPJ_FALSE;
++                }
++                job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
++                if (!job->v.mem) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_free(job);
++                    opj_aligned_free(bj);
++                    return OPJ_FALSE;
++                }
++                job->v.dn = dn;
++                job->v.sn = sn;
++                job->v.cas = cas_col;
++                job->rh = rh;
++                job->w = w;
++                job->tiledp = tiledp;
++                job->min_j = j * step_j;
++                job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j;
++                job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v;
++                opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job);
++            }
++            opj_thread_pool_wait_completion(tp, 0);
+         }
+ 
+-        sn = rw1;
+-        dn = rw - rw1;
++        sn = (OPJ_INT32)rw1;
++        dn = (OPJ_INT32)(rw - rw1);
+ 
+-        for (j = 0; j < rh; j++) {
+-            aj = a + j * w;
+-            for (k = 0; k < rw; k++) {
+-                bj[k] = aj[k];
++        /* Perform horizontal pass */
++        if (num_threads <= 1 || rh <= 1) {
++            for (j = 0; j < rh; j++) {
++                OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w;
++                (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw,
++                                                       cas_row == 0 ? OPJ_TRUE : OPJ_FALSE);
++            }
++        }  else {
++            OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
++            OPJ_UINT32 step_j;
++
++            if (rh < num_jobs) {
++                num_jobs = rh;
++            }
++            step_j = (rh / num_jobs);
++
++            for (j = 0; j < num_jobs; j++) {
++                opj_dwt_encode_h_job_t* job;
++
++                job = (opj_dwt_encode_h_job_t*) opj_malloc(sizeof(opj_dwt_encode_h_job_t));
++                if (!job) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_aligned_free(bj);
++                    return OPJ_FALSE;
++                }
++                job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size);
++                if (!job->h.mem) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_free(job);
++                    opj_aligned_free(bj);
++                    return OPJ_FALSE;
++                }
++                job->h.dn = dn;
++                job->h.sn = sn;
++                job->h.cas = cas_row;
++                job->rw = rw;
++                job->w = w;
++                job->tiledp = tiledp;
++                job->min_j = j * step_j;
++                job->max_j = (j + 1U) * step_j; /* this can overflow */
++                if (j == (num_jobs - 1U)) {  /* this will take care of the overflow */
++                    job->max_j = rh;
++                }
++                job->p_function = p_encode_and_deinterleave_h_one_row;
++                opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job);
+             }
+-            (*p_function) (bj, l_data_count, dn, sn, cas_row);
+-            opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
++            opj_thread_pool_wait_completion(tp, 0);
+         }
+ 
+         l_cur_res = l_last_res;
+@@ -1195,15 +1895,18 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec
+         --l_last_res;
+     }
+ 
+-    opj_free(bj);
++    opj_aligned_free(bj);
+     return OPJ_TRUE;
+ }
+ 
+ /* Forward 5-3 wavelet transform in 2-D. */
+ /* </summary>                           */
+-OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec)
++OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd,
++                        opj_tcd_tilecomp_t * tilec)
+ {
+-    return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1);
++    return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
++                                    opj_dwt_encode_and_deinterleave_v,
++                                    opj_dwt_encode_and_deinterleave_h_one_row);
+ }
+ 
+ /* <summary>                            */
+@@ -1219,21 +1922,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec,
+     }
+ }
+ 
+-
+-/* <summary>                          */
+-/* Get gain of 5-3 wavelet transform. */
+-/* </summary>                         */
+-OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient)
+-{
+-    if (orient == 0) {
+-        return 0;
+-    }
+-    if (orient == 1 || orient == 2) {
+-        return 1;
+-    }
+-    return 2;
+-}
+-
+ /* <summary>                */
+ /* Get norm of 5-3 wavelet. */
+ /* </summary>               */
+@@ -1253,18 +1941,12 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient)
+ /* <summary>                             */
+ /* Forward 9-7 wavelet transform in 2-D. */
+ /* </summary>                            */
+-OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec)
+-{
+-    return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1_real);
+-}
+-
+-/* <summary>                          */
+-/* Get gain of 9-7 wavelet transform. */
+-/* </summary>                         */
+-OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient)
++OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd,
++                             opj_tcd_tilecomp_t * tilec)
+ {
+-    (void)orient;
+-    return 0;
++    return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec,
++                                    opj_dwt_encode_and_deinterleave_v_real,
++                                    opj_dwt_encode_and_deinterleave_h_one_row_real);
+ }
+ 
+ /* <summary>                */
+@@ -1299,7 +1981,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec)
+         if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) {
+             stepsize = 1.0;
+         } else {
+-            OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level];
++            OPJ_FLOAT64 norm = opj_dwt_getnorm_real(level, orient);
+             stepsize = (1 << (gain)) / norm;
+         }
+         opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0),
+@@ -1334,15 +2016,15 @@ typedef struct {
+     OPJ_INT32 * OPJ_RESTRICT tiledp;
+     OPJ_UINT32 min_j;
+     OPJ_UINT32 max_j;
+-} opj_dwd_decode_h_job_t;
++} opj_dwt_decode_h_job_t;
+ 
+ static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls)
+ {
+     OPJ_UINT32 j;
+-    opj_dwd_decode_h_job_t* job;
++    opj_dwt_decode_h_job_t* job;
+     (void)tls;
+ 
+-    job = (opj_dwd_decode_h_job_t*)user_data;
++    job = (opj_dwt_decode_h_job_t*)user_data;
+     for (j = job->min_j; j < job->max_j; j++) {
+         opj_idwt53_h(&job->h, &job->tiledp[j * job->w]);
+     }
+@@ -1358,15 +2040,15 @@ typedef struct {
+     OPJ_INT32 * OPJ_RESTRICT tiledp;
+     OPJ_UINT32 min_j;
+     OPJ_UINT32 max_j;
+-} opj_dwd_decode_v_job_t;
++} opj_dwt_decode_v_job_t;
+ 
+ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
+ {
+     OPJ_UINT32 j;
+-    opj_dwd_decode_v_job_t* job;
++    opj_dwt_decode_v_job_t* job;
+     (void)tls;
+ 
+-    job = (opj_dwd_decode_v_job_t*)user_data;
++    job = (opj_dwt_decode_v_job_t*)user_data;
+     for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j;
+             j += PARALLEL_COLS_53) {
+         opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w,
+@@ -1454,9 +2136,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+             step_j = (rh / num_jobs);
+ 
+             for (j = 0; j < num_jobs; j++) {
+-                opj_dwd_decode_h_job_t* job;
++                opj_dwt_decode_h_job_t* job;
+ 
+-                job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t));
++                job = (opj_dwt_decode_h_job_t*) opj_malloc(sizeof(opj_dwt_decode_h_job_t));
+                 if (!job) {
+                     /* It would be nice to fallback to single thread case, but */
+                     /* unfortunately some jobs may be launched and have modified */
+@@ -1509,9 +2191,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp,
+             step_j = (rw / num_jobs);
+ 
+             for (j = 0; j < num_jobs; j++) {
+-                opj_dwd_decode_v_job_t* job;
++                opj_dwt_decode_v_job_t* job;
+ 
+-                job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t));
++                job = (opj_dwt_decode_v_job_t*) opj_malloc(sizeof(opj_dwt_decode_v_job_t));
+                 if (!job) {
+                     /* It would be nice to fallback to single thread case, but */
+                     /* unfortunately some jobs may be launched and have modified */
+@@ -2177,7 +2859,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile(
+     return OPJ_TRUE;
+ }
+ 
+-static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
++static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt,
+                                    OPJ_FLOAT32* OPJ_RESTRICT a,
+                                    OPJ_UINT32 width,
+                                    OPJ_UINT32 remaining_height)
+@@ -2188,39 +2870,69 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
+     OPJ_UINT32 x1 = dwt->win_l_x1;
+ 
+     for (k = 0; k < 2; ++k) {
+-        if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 &&
+-                ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) {
++        if (remaining_height >= NB_ELTS_V8 && ((OPJ_SIZE_T) a & 0x0f) == 0 &&
++                ((OPJ_SIZE_T) bi & 0x0f) == 0) {
+             /* Fast code path */
+             for (i = x0; i < x1; ++i) {
+                 OPJ_UINT32 j = i;
+-                bi[i * 8    ] = a[j];
++                OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8;
++                dst[0] = a[j];
+                 j += width;
+-                bi[i * 8 + 1] = a[j];
++                dst[1] = a[j];
+                 j += width;
+-                bi[i * 8 + 2] = a[j];
++                dst[2] = a[j];
+                 j += width;
+-                bi[i * 8 + 3] = a[j];
++                dst[3] = a[j];
++                j += width;
++                dst[4] = a[j];
++                j += width;
++                dst[5] = a[j];
++                j += width;
++                dst[6] = a[j];
++                j += width;
++                dst[7] = a[j];
+             }
+         } else {
+             /* Slow code path */
+             for (i = x0; i < x1; ++i) {
+                 OPJ_UINT32 j = i;
+-                bi[i * 8    ] = a[j];
++                OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8;
++                dst[0] = a[j];
+                 j += width;
+                 if (remaining_height == 1) {
+                     continue;
+                 }
+-                bi[i * 8 + 1] = a[j];
++                dst[1] = a[j];
+                 j += width;
+                 if (remaining_height == 2) {
+                     continue;
+                 }
+-                bi[i * 8 + 2] = a[j];
++                dst[2] = a[j];
+                 j += width;
+                 if (remaining_height == 3) {
+                     continue;
+                 }
+-                bi[i * 8 + 3] = a[j]; /* This one*/
++                dst[3] = a[j];
++                j += width;
++                if (remaining_height == 4) {
++                    continue;
++                }
++                dst[4] = a[j];
++                j += width;
++                if (remaining_height == 5) {
++                    continue;
++                }
++                dst[5] = a[j];
++                j += width;
++                if (remaining_height == 6) {
++                    continue;
++                }
++                dst[6] = a[j];
++                j += width;
++                if (remaining_height == 7) {
++                    continue;
++                }
++                dst[7] = a[j];
+             }
+         }
+ 
+@@ -2231,7 +2943,7 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt,
+     }
+ }
+ 
+-static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt,
++static void opj_v8dwt_interleave_partial_h(opj_v8dwt_t* dwt,
+         opj_sparse_array_int32_t* sa,
+         OPJ_UINT32 sa_line,
+         OPJ_UINT32 remaining_height)
+@@ -2244,25 +2956,25 @@ static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt,
+                                           dwt->win_l_x1, sa_line + i + 1,
+                                           /* Nasty cast from float* to int32* */
+                                           (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i,
+-                                          8, 0, OPJ_TRUE);
++                                          2 * NB_ELTS_V8, 0, OPJ_TRUE);
+         assert(ret);
+         ret = opj_sparse_array_int32_read(sa,
+                                           (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i,
+                                           (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1,
+                                           /* Nasty cast from float* to int32* */
+                                           (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i,
+-                                          8, 0, OPJ_TRUE);
++                                          2 * NB_ELTS_V8, 0, OPJ_TRUE);
+         assert(ret);
+         OPJ_UNUSED(ret);
+     }
+ }
+ 
+-static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
+-                                   OPJ_FLOAT32* OPJ_RESTRICT a,
+-                                   OPJ_UINT32 width,
+-                                   OPJ_UINT32 nb_elts_read)
++static INLINE void opj_v8dwt_interleave_v(opj_v8dwt_t* OPJ_RESTRICT dwt,
++        OPJ_FLOAT32* OPJ_RESTRICT a,
++        OPJ_UINT32 width,
++        OPJ_UINT32 nb_elts_read)
+ {
+-    opj_v4_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas;
++    opj_v8_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas;
+     OPJ_UINT32 i;
+ 
+     for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) {
+@@ -2279,7 +2991,7 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
+     }
+ }
+ 
+-static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
++static void opj_v8dwt_interleave_partial_v(opj_v8dwt_t* OPJ_RESTRICT dwt,
+         opj_sparse_array_int32_t* sa,
+         OPJ_UINT32 sa_col,
+         OPJ_UINT32 nb_elts_read)
+@@ -2289,44 +3001,36 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt,
+                                       sa_col, dwt->win_l_x0,
+                                       sa_col + nb_elts_read, dwt->win_l_x1,
+                                       (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0),
+-                                      1, 8, OPJ_TRUE);
++                                      1, 2 * NB_ELTS_V8, OPJ_TRUE);
+     assert(ret);
+     ret = opj_sparse_array_int32_read(sa,
+                                       sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0,
+                                       sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1,
+                                       (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0),
+-                                      1, 8, OPJ_TRUE);
++                                      1, 2 * NB_ELTS_V8, OPJ_TRUE);
+     assert(ret);
+     OPJ_UNUSED(ret);
+ }
+ 
+ #ifdef __SSE__
+ 
+-static void opj_v4dwt_decode_step1_sse(opj_v4_t* w,
++static void opj_v8dwt_decode_step1_sse(opj_v8_t* w,
+                                        OPJ_UINT32 start,
+                                        OPJ_UINT32 end,
+                                        const __m128 c)
+ {
+     __m128* OPJ_RESTRICT vw = (__m128*) w;
+-    OPJ_UINT32 i;
+-    /* 4x unrolled loop */
+-    vw += 2 * start;
+-    for (i = start; i + 3 < end; i += 4, vw += 8) {
+-        __m128 xmm0 = _mm_mul_ps(vw[0], c);
+-        __m128 xmm2 = _mm_mul_ps(vw[2], c);
+-        __m128 xmm4 = _mm_mul_ps(vw[4], c);
+-        __m128 xmm6 = _mm_mul_ps(vw[6], c);
+-        vw[0] = xmm0;
+-        vw[2] = xmm2;
+-        vw[4] = xmm4;
+-        vw[6] = xmm6;
+-    }
+-    for (; i < end; ++i, vw += 2) {
++    OPJ_UINT32 i = start;
++    /* To be adapted if NB_ELTS_V8 changes */
++    vw += 4 * start;
++    /* Note: attempt at loop unrolling x2 doesn't help */
++    for (; i < end; ++i, vw += 4) {
+         vw[0] = _mm_mul_ps(vw[0], c);
++        vw[1] = _mm_mul_ps(vw[1], c);
+     }
+ }
+ 
+-static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w,
++static void opj_v8dwt_decode_step2_sse(opj_v8_t* l, opj_v8_t* w,
+                                        OPJ_UINT32 start,
+                                        OPJ_UINT32 end,
+                                        OPJ_UINT32 m,
+@@ -2334,74 +3038,58 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w,
+ {
+     __m128* OPJ_RESTRICT vl = (__m128*) l;
+     __m128* OPJ_RESTRICT vw = (__m128*) w;
++    /* To be adapted if NB_ELTS_V8 changes */
+     OPJ_UINT32 i;
+     OPJ_UINT32 imax = opj_uint_min(end, m);
+-    __m128 tmp1, tmp2, tmp3;
+     if (start == 0) {
+-        tmp1 = vl[0];
++        if (imax >= 1) {
++            vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), c));
++            vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), c));
++            vw += 4;
++            start = 1;
++        }
+     } else {
+-        vw += start * 2;
+-        tmp1 = vw[-3];
++        vw += start * 4;
+     }
+ 
+     i = start;
+-
+-    /* 4x loop unrolling */
+-    for (; i + 3 < imax; i += 4) {
+-        __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
+-        tmp2 = vw[-1];
+-        tmp3 = vw[ 0];
+-        tmp4 = vw[ 1];
+-        tmp5 = vw[ 2];
+-        tmp6 = vw[ 3];
+-        tmp7 = vw[ 4];
+-        tmp8 = vw[ 5];
+-        tmp9 = vw[ 6];
+-        vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
+-        vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c));
+-        vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c));
+-        vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c));
+-        tmp1 = tmp9;
+-        vw += 8;
+-    }
+-
++    /* Note: attempt at loop unrolling x2 doesn't help */
+     for (; i < imax; ++i) {
+-        tmp2 = vw[-1];
+-        tmp3 = vw[ 0];
+-        vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
+-        tmp1 = tmp3;
+-        vw += 2;
++        vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), c));
++        vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), c));
++        vw += 4;
+     }
+     if (m < end) {
+         assert(m + 1 == end);
+         c = _mm_add_ps(c, c);
+-        c = _mm_mul_ps(c, vw[-2]);
+-        vw[-1] = _mm_add_ps(vw[-1], c);
++        vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(c, vw[-4]));
++        vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(c, vw[-3]));
+     }
+ }
+ 
+ #else
+ 
+-static void opj_v4dwt_decode_step1(opj_v4_t* w,
++static void opj_v8dwt_decode_step1(opj_v8_t* w,
+                                    OPJ_UINT32 start,
+                                    OPJ_UINT32 end,
+                                    const OPJ_FLOAT32 c)
+ {
+     OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w;
+     OPJ_UINT32 i;
++    /* To be adapted if NB_ELTS_V8 changes */
+     for (i = start; i < end; ++i) {
+-        OPJ_FLOAT32 tmp1 = fw[i * 8    ];
+-        OPJ_FLOAT32 tmp2 = fw[i * 8 + 1];
+-        OPJ_FLOAT32 tmp3 = fw[i * 8 + 2];
+-        OPJ_FLOAT32 tmp4 = fw[i * 8 + 3];
+-        fw[i * 8    ] = tmp1 * c;
+-        fw[i * 8 + 1] = tmp2 * c;
+-        fw[i * 8 + 2] = tmp3 * c;
+-        fw[i * 8 + 3] = tmp4 * c;
++        fw[i * 2 * 8    ] = fw[i * 2 * 8    ] * c;
++        fw[i * 2 * 8 + 1] = fw[i * 2 * 8 + 1] * c;
++        fw[i * 2 * 8 + 2] = fw[i * 2 * 8 + 2] * c;
++        fw[i * 2 * 8 + 3] = fw[i * 2 * 8 + 3] * c;
++        fw[i * 2 * 8 + 4] = fw[i * 2 * 8 + 4] * c;
++        fw[i * 2 * 8 + 5] = fw[i * 2 * 8 + 5] * c;
++        fw[i * 2 * 8 + 6] = fw[i * 2 * 8 + 6] * c;
++        fw[i * 2 * 8 + 7] = fw[i * 2 * 8 + 7] * c;
+     }
+ }
+ 
+-static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
++static void opj_v8dwt_decode_step2(opj_v8_t* l, opj_v8_t* w,
+                                    OPJ_UINT32 start,
+                                    OPJ_UINT32 end,
+                                    OPJ_UINT32 m,
+@@ -2412,36 +3100,33 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
+     OPJ_UINT32 i;
+     OPJ_UINT32 imax = opj_uint_min(end, m);
+     if (start > 0) {
+-        fw += 8 * start;
+-        fl = fw - 8;
++        fw += 2 * NB_ELTS_V8 * start;
++        fl = fw - 2 * NB_ELTS_V8;
+     }
++    /* To be adapted if NB_ELTS_V8 changes */
+     for (i = start; i < imax; ++i) {
+-        OPJ_FLOAT32 tmp1_1 = fl[0];
+-        OPJ_FLOAT32 tmp1_2 = fl[1];
+-        OPJ_FLOAT32 tmp1_3 = fl[2];
+-        OPJ_FLOAT32 tmp1_4 = fl[3];
+-        OPJ_FLOAT32 tmp2_1 = fw[-4];
+-        OPJ_FLOAT32 tmp2_2 = fw[-3];
+-        OPJ_FLOAT32 tmp2_3 = fw[-2];
+-        OPJ_FLOAT32 tmp2_4 = fw[-1];
+-        OPJ_FLOAT32 tmp3_1 = fw[0];
+-        OPJ_FLOAT32 tmp3_2 = fw[1];
+-        OPJ_FLOAT32 tmp3_3 = fw[2];
+-        OPJ_FLOAT32 tmp3_4 = fw[3];
+-        fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c);
+-        fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c);
+-        fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c);
+-        fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c);
++        fw[-8] = fw[-8] + ((fl[0] + fw[0]) * c);
++        fw[-7] = fw[-7] + ((fl[1] + fw[1]) * c);
++        fw[-6] = fw[-6] + ((fl[2] + fw[2]) * c);
++        fw[-5] = fw[-5] + ((fl[3] + fw[3]) * c);
++        fw[-4] = fw[-4] + ((fl[4] + fw[4]) * c);
++        fw[-3] = fw[-3] + ((fl[5] + fw[5]) * c);
++        fw[-2] = fw[-2] + ((fl[6] + fw[6]) * c);
++        fw[-1] = fw[-1] + ((fl[7] + fw[7]) * c);
+         fl = fw;
+-        fw += 8;
++        fw += 2 * NB_ELTS_V8;
+     }
+     if (m < end) {
+         assert(m + 1 == end);
+         c += c;
+-        fw[-4] = fw[-4] + fl[0] * c;
+-        fw[-3] = fw[-3] + fl[1] * c;
+-        fw[-2] = fw[-2] + fl[2] * c;
+-        fw[-1] = fw[-1] + fl[3] * c;
++        fw[-8] = fw[-8] + fl[0] * c;
++        fw[-7] = fw[-7] + fl[1] * c;
++        fw[-6] = fw[-6] + fl[2] * c;
++        fw[-5] = fw[-5] + fl[3] * c;
++        fw[-4] = fw[-4] + fl[4] * c;
++        fw[-3] = fw[-3] + fl[5] * c;
++        fw[-2] = fw[-2] + fl[6] * c;
++        fw[-1] = fw[-1] + fl[7] * c;
+     }
+ }
+ 
+@@ -2450,9 +3135,17 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w,
+ /* <summary>                             */
+ /* Inverse 9-7 wavelet transform in 1-D. */
+ /* </summary>                            */
+-static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt)
++static void opj_v8dwt_decode(opj_v8dwt_t* OPJ_RESTRICT dwt)
+ {
+     OPJ_INT32 a, b;
++    /* BUG_WEIRD_TWO_INVK (look for this identifier in tcd.c) */
++    /* Historic value for 2 / opj_invK */
++    /* Normally, we should use invK, but if we do so, we have failures in the */
++    /* conformance test, due to MSE and peak errors significantly higher than */
++    /* accepted value */
++    /* Due to using two_invK instead of invK, we have to compensate in tcd.c */
++    /* the computation of the stepsize for the non LL subbands */
++    const float two_invK = 1.625732422f;
+     if (dwt->cas == 0) {
+         if (!((dwt->dn > 0) || (dwt->sn > 1))) {
+             return;
+@@ -2467,60 +3160,147 @@ static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt)
+         b = 0;
+     }
+ #ifdef __SSE__
+-    opj_v4dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
++    opj_v8dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
+                                _mm_set1_ps(opj_K));
+-    opj_v4dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
+-                               _mm_set1_ps(opj_c13318));
+-    opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
++    opj_v8dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
++                               _mm_set1_ps(two_invK));
++    opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
+                                dwt->win_l_x0, dwt->win_l_x1,
+                                (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
+-                               _mm_set1_ps(opj_dwt_delta));
+-    opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
++                               _mm_set1_ps(-opj_dwt_delta));
++    opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
+                                dwt->win_h_x0, dwt->win_h_x1,
+                                (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
+-                               _mm_set1_ps(opj_dwt_gamma));
+-    opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
++                               _mm_set1_ps(-opj_dwt_gamma));
++    opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1,
+                                dwt->win_l_x0, dwt->win_l_x1,
+                                (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
+-                               _mm_set1_ps(opj_dwt_beta));
+-    opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
++                               _mm_set1_ps(-opj_dwt_beta));
++    opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1,
+                                dwt->win_h_x0, dwt->win_h_x1,
+                                (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
+-                               _mm_set1_ps(opj_dwt_alpha));
++                               _mm_set1_ps(-opj_dwt_alpha));
+ #else
+-    opj_v4dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
++    opj_v8dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1,
+                            opj_K);
+-    opj_v4dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
+-                           opj_c13318);
+-    opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
++    opj_v8dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1,
++                           two_invK);
++    opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
+                            dwt->win_l_x0, dwt->win_l_x1,
+                            (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
+-                           opj_dwt_delta);
+-    opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
++                           -opj_dwt_delta);
++    opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
+                            dwt->win_h_x0, dwt->win_h_x1,
+                            (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
+-                           opj_dwt_gamma);
+-    opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
++                           -opj_dwt_gamma);
++    opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1,
+                            dwt->win_l_x0, dwt->win_l_x1,
+                            (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a),
+-                           opj_dwt_beta);
+-    opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
++                           -opj_dwt_beta);
++    opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1,
+                            dwt->win_h_x0, dwt->win_h_x1,
+                            (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b),
+-                           opj_dwt_alpha);
++                           -opj_dwt_alpha);
+ #endif
+ }
+ 
++typedef struct {
++    opj_v8dwt_t h;
++    OPJ_UINT32 rw;
++    OPJ_UINT32 w;
++    OPJ_FLOAT32 * OPJ_RESTRICT aj;
++    OPJ_UINT32 nb_rows;
++} opj_dwt97_decode_h_job_t;
++
++static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls)
++{
++    OPJ_UINT32 j;
++    opj_dwt97_decode_h_job_t* job;
++    OPJ_FLOAT32 * OPJ_RESTRICT aj;
++    OPJ_UINT32 w;
++    (void)tls;
++
++    job = (opj_dwt97_decode_h_job_t*)user_data;
++    w = job->w;
++
++    assert((job->nb_rows % NB_ELTS_V8) == 0);
++
++    aj = job->aj;
++    for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) {
++        OPJ_UINT32 k;
++        opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8);
++        opj_v8dwt_decode(&job->h);
++
++        /* To be adapted if NB_ELTS_V8 changes */
++        for (k = 0; k < job->rw; k++) {
++            aj[k      ] = job->h.wavelet[k].f[0];
++            aj[k + (OPJ_SIZE_T)w  ] = job->h.wavelet[k].f[1];
++            aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2];
++            aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3];
++        }
++        for (k = 0; k < job->rw; k++) {
++            aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4];
++            aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5];
++            aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6];
++            aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7];
++        }
++
++        aj += w * NB_ELTS_V8;
++    }
++
++    opj_aligned_free(job->h.wavelet);
++    opj_free(job);
++}
++
++
++typedef struct {
++    opj_v8dwt_t v;
++    OPJ_UINT32 rh;
++    OPJ_UINT32 w;
++    OPJ_FLOAT32 * OPJ_RESTRICT aj;
++    OPJ_UINT32 nb_columns;
++} opj_dwt97_decode_v_job_t;
++
++static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls)
++{
++    OPJ_UINT32 j;
++    opj_dwt97_decode_v_job_t* job;
++    OPJ_FLOAT32 * OPJ_RESTRICT aj;
++    (void)tls;
++
++    job = (opj_dwt97_decode_v_job_t*)user_data;
++
++    assert((job->nb_columns % NB_ELTS_V8) == 0);
++
++    aj = job->aj;
++    for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) {
++        OPJ_UINT32 k;
++
++        opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8);
++        opj_v8dwt_decode(&job->v);
++
++        for (k = 0; k < job->rh; ++k) {
++            memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k],
++                   NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
++        }
++        aj += NB_ELTS_V8;
++    }
++
++    opj_aligned_free(job->v.wavelet);
++    opj_free(job);
++}
++
+ 
+ /* <summary>                             */
+ /* Inverse 9-7 wavelet transform in 2-D. */
+ /* </summary>                            */
+ static
+-OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
++OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp,
++                                opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+                                 OPJ_UINT32 numres)
+ {
+-    opj_v4dwt_t h;
+-    opj_v4dwt_t v;
++    opj_v8dwt_t h;
++    opj_v8dwt_t v;
+ 
+     opj_tcd_resolution_t* res = tilec->resolutions;
+ 
+@@ -2534,20 +3314,19 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+                                 tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
+ 
+     OPJ_SIZE_T l_data_size;
++    const int num_threads = opj_thread_pool_get_thread_count(tp);
+ 
+-    l_data_size = opj_dwt_max_resolution(res, numres);
+-    /* overflow check */
+-    if (l_data_size > (SIZE_MAX - 5U)) {
+-        /* FIXME event manager error callback */
+-        return OPJ_FALSE;
++    if (numres == 1) {
++        return OPJ_TRUE;
+     }
+-    l_data_size += 5U;
++
++    l_data_size = opj_dwt_max_resolution(res, numres);
+     /* overflow check */
+-    if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) {
++    if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
+         /* FIXME event manager error callback */
+         return OPJ_FALSE;
+     }
+-    h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t));
++    h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
+     if (!h.wavelet) {
+         /* FIXME event manager error callback */
+         return OPJ_FALSE;
+@@ -2575,35 +3354,80 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+         h.win_l_x1 = (OPJ_UINT32)h.sn;
+         h.win_h_x0 = 0;
+         h.win_h_x1 = (OPJ_UINT32)h.dn;
+-        for (j = 0; j + 3 < rh; j += 4) {
+-            OPJ_UINT32 k;
+-            opj_v4dwt_interleave_h(&h, aj, w, rh - j);
+-            opj_v4dwt_decode(&h);
+ 
+-            for (k = 0; k < rw; k++) {
+-                aj[k      ] = h.wavelet[k].f[0];
+-                aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
+-                aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
+-                aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
++        if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) {
++            for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
++                OPJ_UINT32 k;
++                opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8);
++                opj_v8dwt_decode(&h);
++
++                /* To be adapted if NB_ELTS_V8 changes */
++                for (k = 0; k < rw; k++) {
++                    aj[k      ] = h.wavelet[k].f[0];
++                    aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
++                    aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
++                    aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3];
++                }
++                for (k = 0; k < rw; k++) {
++                    aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4];
++                    aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5];
++                    aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6];
++                    aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7];
++                }
++
++                aj += w * NB_ELTS_V8;
++            }
++        } else {
++            OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads;
++            OPJ_UINT32 step_j;
++
++            if ((rh / NB_ELTS_V8) < num_jobs) {
++                num_jobs = rh / NB_ELTS_V8;
+             }
++            step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
++            for (j = 0; j < num_jobs; j++) {
++                opj_dwt97_decode_h_job_t* job;
+ 
+-            aj += w * 4;
++                job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t));
++                if (!job) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_aligned_free(h.wavelet);
++                    return OPJ_FALSE;
++                }
++                job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
++                if (!job->h.wavelet) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_free(job);
++                    opj_aligned_free(h.wavelet);
++                    return OPJ_FALSE;
++                }
++                job->h.dn = h.dn;
++                job->h.sn = h.sn;
++                job->h.cas = h.cas;
++                job->h.win_l_x0 = h.win_l_x0;
++                job->h.win_l_x1 = h.win_l_x1;
++                job->h.win_h_x0 = h.win_h_x0;
++                job->h.win_h_x1 = h.win_h_x1;
++                job->rw = rw;
++                job->w = w;
++                job->aj = aj;
++                job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~
++                                                      (NB_ELTS_V8 - 1)) - j * step_j : step_j;
++                aj += w * job->nb_rows;
++                opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job);
++            }
++            opj_thread_pool_wait_completion(tp, 0);
++            j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1);
+         }
+ 
+         if (j < rh) {
+             OPJ_UINT32 k;
+-            opj_v4dwt_interleave_h(&h, aj, w, rh - j);
+-            opj_v4dwt_decode(&h);
++            opj_v8dwt_interleave_h(&h, aj, w, rh - j);
++            opj_v8dwt_decode(&h);
+             for (k = 0; k < rw; k++) {
+-                switch (rh - j) {
+-                case 3:
+-                    aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2];
+-                /* FALLTHRU */
+-                case 2:
+-                    aj[k + (OPJ_SIZE_T)w  ] = h.wavelet[k].f[1];
+-                /* FALLTHRU */
+-                case 1:
+-                    aj[k] = h.wavelet[k].f[0];
++                OPJ_UINT32 l;
++                for (l = 0; l < rh - j; l++) {
++                    aj[k + (OPJ_SIZE_T)w  * l ] = h.wavelet[k].f[l];
+                 }
+             }
+         }
+@@ -2616,25 +3440,71 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+         v.win_h_x1 = (OPJ_UINT32)v.dn;
+ 
+         aj = (OPJ_FLOAT32*) tilec->data;
+-        for (j = rw; j > 3; j -= 4) {
+-            OPJ_UINT32 k;
++        if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) {
++            for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) {
++                OPJ_UINT32 k;
+ 
+-            opj_v4dwt_interleave_v(&v, aj, w, 4);
+-            opj_v4dwt_decode(&v);
++                opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8);
++                opj_v8dwt_decode(&v);
+ 
+-            for (k = 0; k < rh; ++k) {
+-                memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
++                for (k = 0; k < rh; ++k) {
++                    memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32));
++                }
++                aj += NB_ELTS_V8;
++            }
++        } else {
++            /* "bench_dwt -I" shows that scaling is poor, likely due to RAM
++                transfer being the limiting factor. So limit the number of
++                threads.
++             */
++            OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U);
++            OPJ_UINT32 step_j;
++
++            if ((rw / NB_ELTS_V8) < num_jobs) {
++                num_jobs = rw / NB_ELTS_V8;
++            }
++            step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8;
++            for (j = 0; j < num_jobs; j++) {
++                opj_dwt97_decode_v_job_t* job;
++
++                job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t));
++                if (!job) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_aligned_free(h.wavelet);
++                    return OPJ_FALSE;
++                }
++                job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
++                if (!job->v.wavelet) {
++                    opj_thread_pool_wait_completion(tp, 0);
++                    opj_free(job);
++                    opj_aligned_free(h.wavelet);
++                    return OPJ_FALSE;
++                }
++                job->v.dn = v.dn;
++                job->v.sn = v.sn;
++                job->v.cas = v.cas;
++                job->v.win_l_x0 = v.win_l_x0;
++                job->v.win_l_x1 = v.win_l_x1;
++                job->v.win_h_x0 = v.win_h_x0;
++                job->v.win_h_x1 = v.win_h_x1;
++                job->rh = rh;
++                job->w = w;
++                job->aj = aj;
++                job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~
++                                  (NB_ELTS_V8 - 1)) - j * step_j : step_j;
++                aj += job->nb_columns;
++                opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job);
+             }
+-            aj += 4;
++            opj_thread_pool_wait_completion(tp, 0);
+         }
+ 
+-        if (rw & 0x03) {
++        if (rw & (NB_ELTS_V8 - 1)) {
+             OPJ_UINT32 k;
+ 
+-            j = rw & 0x03;
++            j = rw & (NB_ELTS_V8 - 1);
+ 
+-            opj_v4dwt_interleave_v(&v, aj, w, j);
+-            opj_v4dwt_decode(&v);
++            opj_v8dwt_interleave_v(&v, aj, w, j);
++            opj_v8dwt_decode(&v);
+ 
+             for (k = 0; k < rh; ++k) {
+                 memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k],
+@@ -2652,8 +3522,8 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+                                    OPJ_UINT32 numres)
+ {
+     opj_sparse_array_int32_t* sa;
+-    opj_v4dwt_t h;
+-    opj_v4dwt_t v;
++    opj_v8dwt_t h;
++    opj_v8dwt_t v;
+     OPJ_UINT32 resno;
+     /* This value matches the maximum left/right extension given in tables */
+     /* F.2 and F.3 of the standard. Note: in opj_tcd_is_subband_area_of_interest() */
+@@ -2703,19 +3573,12 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+ 
+     l_data_size = opj_dwt_max_resolution(tr, numres);
+     /* overflow check */
+-    if (l_data_size > (SIZE_MAX - 5U)) {
+-        /* FIXME event manager error callback */
+-        opj_sparse_array_int32_free(sa);
+-        return OPJ_FALSE;
+-    }
+-    l_data_size += 5U;
+-    /* overflow check */
+-    if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) {
++    if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) {
+         /* FIXME event manager error callback */
+         opj_sparse_array_int32_free(sa);
+         return OPJ_FALSE;
+     }
+-    h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t));
++    h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t));
+     if (!h.wavelet) {
+         /* FIXME event manager error callback */
+         opj_sparse_array_int32_free(sa);
+@@ -2810,17 +3673,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+         h.win_l_x1 = win_ll_x1;
+         h.win_h_x0 = win_hl_x0;
+         h.win_h_x1 = win_hl_x1;
+-        for (j = 0; j + 3 < rh; j += 4) {
+-            if ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
+-                    (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
++        for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) {
++            if ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) ||
++                    (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn &&
+                      j < win_lh_y1 + (OPJ_UINT32)v.sn)) {
+-                opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j));
+-                opj_v4dwt_decode(&h);
++                opj_v8dwt_interleave_partial_h(&h, sa, j, opj_uint_min(NB_ELTS_V8, rh - j));
++                opj_v8dwt_decode(&h);
+                 if (!opj_sparse_array_int32_write(sa,
+                                                   win_tr_x0, j,
+-                                                  win_tr_x1, j + 4,
++                                                  win_tr_x1, j + NB_ELTS_V8,
+                                                   (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
+-                                                  4, 1, OPJ_TRUE)) {
++                                                  NB_ELTS_V8, 1, OPJ_TRUE)) {
+                     /* FIXME event manager error callback */
+                     opj_sparse_array_int32_free(sa);
+                     opj_aligned_free(h.wavelet);
+@@ -2830,16 +3693,16 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+         }
+ 
+         if (j < rh &&
+-                ((j + 3 >= win_ll_y0 && j < win_ll_y1) ||
+-                 (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn &&
++                ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) ||
++                 (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn &&
+                   j < win_lh_y1 + (OPJ_UINT32)v.sn))) {
+-            opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j);
+-            opj_v4dwt_decode(&h);
++            opj_v8dwt_interleave_partial_h(&h, sa, j, rh - j);
++            opj_v8dwt_decode(&h);
+             if (!opj_sparse_array_int32_write(sa,
+                                               win_tr_x0, j,
+                                               win_tr_x1, rh,
+                                               (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0],
+-                                              4, 1, OPJ_TRUE)) {
++                                              NB_ELTS_V8, 1, OPJ_TRUE)) {
+                 /* FIXME event manager error callback */
+                 opj_sparse_array_int32_free(sa);
+                 opj_aligned_free(h.wavelet);
+@@ -2851,17 +3714,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+         v.win_l_x1 = win_ll_y1;
+         v.win_h_x0 = win_lh_y0;
+         v.win_h_x1 = win_lh_y1;
+-        for (j = win_tr_x0; j < win_tr_x1; j += 4) {
+-            OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j);
++        for (j = win_tr_x0; j < win_tr_x1; j += NB_ELTS_V8) {
++            OPJ_UINT32 nb_elts = opj_uint_min(NB_ELTS_V8, win_tr_x1 - j);
+ 
+-            opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts);
+-            opj_v4dwt_decode(&v);
++            opj_v8dwt_interleave_partial_v(&v, sa, j, nb_elts);
++            opj_v8dwt_decode(&v);
+ 
+             if (!opj_sparse_array_int32_write(sa,
+                                               j, win_tr_y0,
+                                               j + nb_elts, win_tr_y1,
+                                               (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0],
+-                                              1, 4, OPJ_TRUE)) {
++                                              1, NB_ELTS_V8, OPJ_TRUE)) {
+                 /* FIXME event manager error callback */
+                 opj_sparse_array_int32_free(sa);
+                 opj_aligned_free(h.wavelet);
+@@ -2894,7 +3757,7 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd,
+                              OPJ_UINT32 numres)
+ {
+     if (p_tcd->whole_tile_decoding) {
+-        return opj_dwt_decode_tile_97(tilec, numres);
++        return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres);
+     } else {
+         return opj_dwt_decode_partial_97(tilec, numres);
+     }
+diff --git a/third_party/libopenjpeg20/dwt.h b/third_party/libopenjpeg20/dwt.h
+index 4f63e524a60fd75577e5b579438990cfbf6d540f..215061e6b9cf010da87b652b9a5f65f212e7f84b 100644
+--- a/third_party/libopenjpeg20/dwt.h
++++ b/third_party/libopenjpeg20/dwt.h
+@@ -56,9 +56,11 @@ DWT.C are used by some function in TCD.C.
+ /**
+ Forward 5-3 wavelet transform in 2-D.
+ Apply a reversible DWT transform to a component of an image.
++@param p_tcd TCD handle
+ @param tilec Tile component information (current tile)
+ */
+-OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec);
++OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd,
++                        opj_tcd_tilecomp_t * tilec);
+ 
+ /**
+ Inverse 5-3 wavelet transform in 2-D.
+@@ -71,12 +73,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd,
+                         opj_tcd_tilecomp_t* tilec,
+                         OPJ_UINT32 numres);
+ 
+-/**
+-Get the gain of a subband for the reversible 5-3 DWT.
+-@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH)
+-@return Returns 0 if orient = 0, returns 1 if orient = 1 or 2, returns 2 otherwise
+-*/
+-OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) ;
+ /**
+ Get the norm of a wavelet function of a subband at a specified level for the reversible 5-3 DWT.
+ @param level Level of the wavelet function
+@@ -87,9 +83,11 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient);
+ /**
+ Forward 9-7 wavelet transform in 2-D.
+ Apply an irreversible DWT transform to a component of an image.
++@param p_tcd TCD handle
+ @param tilec Tile component information (current tile)
+ */
+-OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec);
++OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd,
++                             opj_tcd_tilecomp_t * tilec);
+ /**
+ Inverse 9-7 wavelet transform in 2-D.
+ Apply an irreversible inverse DWT transform to a component of an image.
+@@ -101,12 +99,6 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd,
+                              opj_tcd_tilecomp_t* OPJ_RESTRICT tilec,
+                              OPJ_UINT32 numres);
+ 
+-/**
+-Get the gain of a subband for the irreversible 9-7 DWT.
+-@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH)
+-@return Returns the gain of the 9-7 wavelet transform
+-*/
+-OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient);
+ /**
+ Get the norm of a wavelet function of a subband at a specified level for the irreversible 9-7 DWT
+ @param level Level of the wavelet function
+diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c
+index c6473743df69839a1404adb59af4ca18035e02c6..711dd73e87d7a6888353a9790eb0741b6b06cc8d 100644
+--- a/third_party/libopenjpeg20/j2k.c
++++ b/third_party/libopenjpeg20/j2k.c
+@@ -400,14 +400,14 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k,
+ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+         OPJ_BYTE * p_data,
+         OPJ_UINT32 * p_data_written,
+-        OPJ_UINT32 p_total_data_size,
++        OPJ_UINT32 total_data_size,
+         opj_stream_private_t *p_stream,
+         struct opj_event_mgr * p_manager);
+ 
+ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+         OPJ_BYTE * p_data,
+         OPJ_UINT32 * p_data_written,
+-        OPJ_UINT32 p_total_data_size,
++        OPJ_UINT32 total_data_size,
+         opj_stream_private_t *p_stream,
+         struct opj_event_mgr * p_manager);
+ 
+@@ -832,14 +832,14 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k,
+  *
+  * @param       p_j2k            J2K codec.
+  * @param       p_data           Output buffer
+- * @param       p_total_data_size Output buffer size
++ * @param       total_data_size  Output buffer size
+  * @param       p_data_written   Number of bytes written into stream
+  * @param       p_stream         the stream to write data to.
+  * @param       p_manager        the user event manager.
+ */
+ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k,
+                                   OPJ_BYTE * p_data,
+-                                  OPJ_UINT32 p_total_data_size,
++                                  OPJ_UINT32 total_data_size,
+                                   OPJ_UINT32 * p_data_written,
+                                   const opj_stream_private_t *p_stream,
+                                   opj_event_mgr_t * p_manager);
+@@ -879,11 +879,13 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k,
+ /**
+  * Writes the SOD marker (Start of data)
+  *
++ * This also writes optional PLT markers (before SOD)
++ *
+  * @param       p_j2k               J2K codec.
+  * @param       p_tile_coder        FIXME DOC
+  * @param       p_data              FIXME DOC
+  * @param       p_data_written      FIXME DOC
+- * @param       p_total_data_size   FIXME DOC
++ * @param       total_data_size   FIXME DOC
+  * @param       p_stream            the stream to write data to.
+  * @param       p_manager           the user event manager.
+ */
+@@ -891,7 +893,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k,
+                                   opj_tcd_t * p_tile_coder,
+                                   OPJ_BYTE * p_data,
+                                   OPJ_UINT32 * p_data_written,
+-                                  OPJ_UINT32 p_total_data_size,
++                                  OPJ_UINT32 total_data_size,
+                                   const opj_stream_private_t *p_stream,
+                                   opj_event_mgr_t * p_manager);
+ 
+@@ -1219,6 +1221,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k,
+  * A nice message is outputted at errors.
+  *
+  * @param       p_pocs                  the progression order changes.
++ * @param       tileno                  the tile number of interest
+  * @param       p_nb_pocs               the number of progression order changes.
+  * @param       p_nb_resolutions        the number of resolutions.
+  * @param       numcomps                the number of components
+@@ -1228,6 +1231,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k,
+  * @return      true if the pocs are valid.
+  */
+ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs,
++                                      OPJ_UINT32 tileno,
+                                       OPJ_UINT32 p_nb_pocs,
+                                       OPJ_UINT32 p_nb_resolutions,
+                                       OPJ_UINT32 numcomps,
+@@ -1282,6 +1286,13 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters,
+ static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz,
+         opj_event_mgr_t *p_manager);
+ 
++static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters,
++                                       opj_image_t *image, opj_event_mgr_t *p_manager);
++
++static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters,
++        opj_image_t *image,
++        opj_event_mgr_t *p_manager);
++
+ /**
+  * Checks for invalid number of tile-parts in SOT marker (TPsot==TNsot). See issue 254.
+  *
+@@ -1615,6 +1626,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order)
+ }
+ 
+ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs,
++                                      OPJ_UINT32 tileno,
+                                       OPJ_UINT32 p_nb_pocs,
+                                       OPJ_UINT32 p_nb_resolutions,
+                                       OPJ_UINT32 p_num_comps,
+@@ -1628,7 +1640,8 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs,
+     OPJ_UINT32 step_r = p_num_comps * step_c;
+     OPJ_UINT32 step_l = p_nb_resolutions * step_r;
+     OPJ_BOOL loss = OPJ_FALSE;
+-    OPJ_UINT32 layno0 = 0;
++
++    assert(p_nb_pocs > 0);
+ 
+     packet_array = (OPJ_UINT32*) opj_calloc(step_l * p_num_layers,
+                                             sizeof(OPJ_UINT32));
+@@ -1638,63 +1651,37 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs,
+         return OPJ_FALSE;
+     }
+ 
+-    if (p_nb_pocs == 0) {
+-        opj_free(packet_array);
+-        return OPJ_TRUE;
+-    }
++    /* iterate through all the pocs that match our tile of interest. */
++    for (i = 0; i < p_nb_pocs; ++i) {
++        const opj_poc_t *poc = &p_pocs[i];
++        if (tileno + 1 == poc->tile) {
++            index = step_r * poc->resno0;
+ 
+-    index = step_r * p_pocs->resno0;
+-    /* take each resolution for each poc */
+-    for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) {
+-        OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c;
++            /* take each resolution for each poc */
++            for (resno = poc->resno0 ;
++                    resno < opj_uint_min(poc->resno1, p_nb_resolutions); ++resno) {
++                OPJ_UINT32 res_index = index + poc->compno0 * step_c;
+ 
+-        /* take each comp of each resolution for each poc */
+-        for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) {
+-            OPJ_UINT32 comp_index = res_index + layno0 * step_l;
+-
+-            /* and finally take each layer of each res of ... */
+-            for (layno = layno0; layno < p_pocs->layno1 ; ++layno) {
+-                /*index = step_r * resno + step_c * compno + step_l * layno;*/
+-                packet_array[comp_index] = 1;
+-                comp_index += step_l;
+-            }
+-
+-            res_index += step_c;
+-        }
+-
+-        index += step_r;
+-    }
+-    ++p_pocs;
++                /* take each comp of each resolution for each poc */
++                for (compno = poc->compno0 ;
++                        compno < opj_uint_min(poc->compno1, p_num_comps); ++compno) {
++                    /* The layer index always starts at zero for every progression. */
++                    const OPJ_UINT32 layno0 = 0;
++                    OPJ_UINT32 comp_index = res_index + layno0 * step_l;
+ 
+-    /* iterate through all the pocs */
+-    for (i = 1; i < p_nb_pocs ; ++i) {
+-        OPJ_UINT32 l_last_layno1 = (p_pocs - 1)->layno1 ;
+-
+-        layno0 = (p_pocs->layno1 > l_last_layno1) ? l_last_layno1 : 0;
+-        index = step_r * p_pocs->resno0;
+-
+-        /* take each resolution for each poc */
+-        for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) {
+-            OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c;
+-
+-            /* take each comp of each resolution for each poc */
+-            for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) {
+-                OPJ_UINT32 comp_index = res_index + layno0 * step_l;
++                    /* and finally take each layer of each res of ... */
++                    for (layno = layno0; layno < opj_uint_min(poc->layno1, p_num_layers);
++                            ++layno) {
++                        packet_array[comp_index] = 1;
++                        comp_index += step_l;
++                    }
+ 
+-                /* and finally take each layer of each res of ... */
+-                for (layno = layno0; layno < p_pocs->layno1 ; ++layno) {
+-                    /*index = step_r * resno + step_c * compno + step_l * layno;*/
+-                    packet_array[comp_index] = 1;
+-                    comp_index += step_l;
++                    res_index += step_c;
+                 }
+ 
+-                res_index += step_c;
++                index += step_r;
+             }
+-
+-            index += step_r;
+         }
+-
+-        ++p_pocs;
+     }
+ 
+     index = 0;
+@@ -1702,7 +1689,13 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs,
+         for (resno = 0; resno < p_nb_resolutions; ++resno) {
+             for (compno = 0; compno < p_num_comps; ++compno) {
+                 loss |= (packet_array[index] != 1);
+-                /*index = step_r * resno + step_c * compno + step_l * layno;*/
++#ifdef DEBUG_VERBOSE
++                if (packet_array[index] != 1) {
++                    fprintf(stderr,
++                            "Missing packet in POC: layno=%d resno=%d compno=%d\n",
++                            layno, resno, compno);
++                }
++#endif
+                 index += step_c;
+             }
+         }
+@@ -3454,6 +3447,28 @@ static OPJ_UINT32 opj_j2k_get_specific_header_sizes(opj_j2k_t *p_j2k)
+ 
+     l_nb_bytes += opj_j2k_get_max_poc_size(p_j2k);
+ 
++    if (p_j2k->m_specific_param.m_encoder.m_PLT) {
++        /* Reserve space for PLT markers */
++
++        OPJ_UINT32 i;
++        const opj_cp_t * l_cp = &(p_j2k->m_cp);
++        OPJ_UINT32 l_max_packet_count = 0;
++        for (i = 0; i < l_cp->th * l_cp->tw; ++i) {
++            l_max_packet_count = opj_uint_max(l_max_packet_count,
++                                              opj_get_encoding_packet_count(p_j2k->m_private_image, l_cp, i));
++        }
++        /* Minimum 6 bytes per PLT marker, and at a minimum (taking a pessimistic */
++        /* estimate of 4 bytes for a packet size), one can write */
++        /* (65536-6) / 4 = 16382 paquet sizes per PLT marker */
++        p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT =
++            6 * opj_uint_ceildiv(l_max_packet_count, 16382);
++        /* Maximum 5 bytes per packet to encode a full UINT32 */
++        p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT +=
++            l_nb_bytes += 5 * l_max_packet_count;
++        p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += 1;
++        l_nb_bytes += p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT;
++    }
++
+     /*** DEVELOPER CORNER, Add room for your headers ***/
+ 
+     return l_nb_bytes;
+@@ -4207,7 +4222,7 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k,
+ 
+ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k,
+                                   OPJ_BYTE * p_data,
+-                                  OPJ_UINT32 p_total_data_size,
++                                  OPJ_UINT32 total_data_size,
+                                   OPJ_UINT32 * p_data_written,
+                                   const opj_stream_private_t *p_stream,
+                                   opj_event_mgr_t * p_manager
+@@ -4220,7 +4235,7 @@ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k,
+ 
+     OPJ_UNUSED(p_stream);
+ 
+-    if (p_total_data_size < 12) {
++    if (total_data_size < 12) {
+         opj_event_msg(p_manager, EVT_ERROR,
+                       "Not enough bytes in output buffer to write SOT marker\n");
+         return OPJ_FALSE;
+@@ -4613,17 +4628,105 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k,
+     return OPJ_TRUE;
+ }
+ 
++/**
++ * Write one or more PLT markers in the provided buffer
++ */
++static OPJ_BOOL opj_j2k_write_plt_in_memory(opj_j2k_t *p_j2k,
++        opj_tcd_marker_info_t* marker_info,
++        OPJ_BYTE * p_data,
++        OPJ_UINT32 * p_data_written,
++        opj_event_mgr_t * p_manager)
++{
++    OPJ_BYTE Zplt = 0;
++    OPJ_UINT16 Lplt;
++    OPJ_BYTE* p_data_start = p_data;
++    OPJ_BYTE* p_data_Lplt = p_data + 2;
++    OPJ_UINT32 i;
++
++    OPJ_UNUSED(p_j2k);
++
++    opj_write_bytes(p_data, J2K_MS_PLT, 2);
++    p_data += 2;
++
++    /* Reserve space for Lplt */
++    p_data += 2;
++
++    opj_write_bytes(p_data, Zplt, 1);
++    p_data += 1;
++
++    Lplt = 3;
++
++    for (i = 0; i < marker_info->packet_count; i++) {
++        OPJ_BYTE var_bytes[5];
++        OPJ_UINT8 var_bytes_size = 0;
++        OPJ_UINT32 packet_size = marker_info->p_packet_size[i];
++
++        /* Packet size written in variable-length way, starting with LSB */
++        var_bytes[var_bytes_size] = (OPJ_BYTE)(packet_size & 0x7f);
++        var_bytes_size ++;
++        packet_size >>= 7;
++        while (packet_size > 0) {
++            var_bytes[var_bytes_size] = (OPJ_BYTE)((packet_size & 0x7f) | 0x80);
++            var_bytes_size ++;
++            packet_size >>= 7;
++        }
++
++        /* Check if that can fit in the current PLT marker. If not, finish */
++        /* current one, and start a new one */
++        if (Lplt + var_bytes_size > 65535) {
++            if (Zplt == 255) {
++                opj_event_msg(p_manager, EVT_ERROR,
++                              "More than 255 PLT markers would be needed for current tile-part !\n");
++                return OPJ_FALSE;
++            }
++
++            /* Patch Lplt */
++            opj_write_bytes(p_data_Lplt, Lplt, 2);
++
++            /* Start new segment */
++            opj_write_bytes(p_data, J2K_MS_PLT, 2);
++            p_data += 2;
++
++            /* Reserve space for Lplt */
++            p_data_Lplt = p_data;
++            p_data += 2;
++
++            Zplt ++;
++            opj_write_bytes(p_data, Zplt, 1);
++            p_data += 1;
++
++            Lplt = 3;
++        }
++
++        Lplt = (OPJ_UINT16)(Lplt + var_bytes_size);
++
++        /* Serialize variable-length packet size, starting with MSB */
++        for (; var_bytes_size > 0; --var_bytes_size) {
++            opj_write_bytes(p_data, var_bytes[var_bytes_size - 1], 1);
++            p_data += 1;
++        }
++    }
++
++    *p_data_written = (OPJ_UINT32)(p_data - p_data_start);
++
++    /* Patch Lplt */
++    opj_write_bytes(p_data_Lplt, Lplt, 2);
++
++    return OPJ_TRUE;
++}
++
+ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k,
+                                   opj_tcd_t * p_tile_coder,
+                                   OPJ_BYTE * p_data,
+                                   OPJ_UINT32 * p_data_written,
+-                                  OPJ_UINT32 p_total_data_size,
++                                  OPJ_UINT32 total_data_size,
+                                   const opj_stream_private_t *p_stream,
+                                   opj_event_mgr_t * p_manager
+                                  )
+ {
+     opj_codestream_info_t *l_cstr_info = 00;
+     OPJ_UINT32 l_remaining_data;
++    opj_tcd_marker_info_t* marker_info = NULL;
+ 
+     /* preconditions */
+     assert(p_j2k != 00);
+@@ -4632,7 +4735,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k,
+ 
+     OPJ_UNUSED(p_stream);
+ 
+-    if (p_total_data_size < 4) {
++    if (total_data_size < 4) {
+         opj_event_msg(p_manager, EVT_ERROR,
+                       "Not enough bytes in output buffer to write SOD marker\n");
+         return OPJ_FALSE;
+@@ -4640,10 +4743,9 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k,
+ 
+     opj_write_bytes(p_data, J2K_MS_SOD,
+                     2);                                 /* SOD */
+-    p_data += 2;
+ 
+     /* make room for the EOF marker */
+-    l_remaining_data =  p_total_data_size - 4;
++    l_remaining_data =  total_data_size - 4;
+ 
+     /* update tile coder */
+     p_tile_coder->tp_num =
+@@ -4690,15 +4792,69 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k,
+ 
+     *p_data_written = 0;
+ 
+-    if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, p_data,
++    if (p_j2k->m_specific_param.m_encoder.m_PLT) {
++        marker_info = opj_tcd_marker_info_create(
++                          p_j2k->m_specific_param.m_encoder.m_PLT);
++        if (marker_info == NULL) {
++            opj_event_msg(p_manager, EVT_ERROR,
++                          "Cannot encode tile: opj_tcd_marker_info_create() failed\n");
++            return OPJ_FALSE;
++        }
++    }
++
++    if (l_remaining_data <
++            p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT) {
++        opj_event_msg(p_manager, EVT_ERROR,
++                      "Not enough bytes in output buffer to write SOD marker\n");
++        opj_tcd_marker_info_destroy(marker_info);
++        return OPJ_FALSE;
++    }
++    l_remaining_data -= p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT;
++
++    if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number,
++                              p_data + 2,
+                               p_data_written, l_remaining_data, l_cstr_info,
++                              marker_info,
+                               p_manager)) {
+         opj_event_msg(p_manager, EVT_ERROR, "Cannot encode tile\n");
++        opj_tcd_marker_info_destroy(marker_info);
+         return OPJ_FALSE;
+     }
+ 
++    /* For SOD */
+     *p_data_written += 2;
+ 
++    if (p_j2k->m_specific_param.m_encoder.m_PLT) {
++        OPJ_UINT32 l_data_written_PLT = 0;
++        OPJ_BYTE* p_PLT_buffer = (OPJ_BYTE*)opj_malloc(
++                                     p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT);
++        if (!p_PLT_buffer) {
++            opj_event_msg(p_manager, EVT_ERROR, "Cannot allocate memory\n");
++            opj_tcd_marker_info_destroy(marker_info);
++            return OPJ_FALSE;
++        }
++        if (!opj_j2k_write_plt_in_memory(p_j2k,
++                                         marker_info,
++                                         p_PLT_buffer,
++                                         &l_data_written_PLT,
++                                         p_manager)) {
++            opj_tcd_marker_info_destroy(marker_info);
++            opj_free(p_PLT_buffer);
++            return OPJ_FALSE;
++        }
++
++        assert(l_data_written_PLT <=
++               p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT);
++
++        /* Move PLT marker(s) before SOD */
++        memmove(p_data + l_data_written_PLT, p_data, *p_data_written);
++        memcpy(p_data, p_PLT_buffer, l_data_written_PLT);
++        opj_free(p_PLT_buffer);
++        *p_data_written += l_data_written_PLT;
++    }
++
++    opj_tcd_marker_info_destroy(marker_info);
++
+     return OPJ_TRUE;
+ }
+ 
+@@ -5048,7 +5204,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k,
+     OPJ_FLOAT32 * l_rates = 0;
+     OPJ_FLOAT32 l_sot_remove;
+     OPJ_UINT32 l_bits_empty, l_size_pixel;
+-    OPJ_UINT32 l_tile_size = 0;
++    OPJ_UINT64 l_tile_size = 0;
+     OPJ_UINT32 l_last_res;
+     OPJ_FLOAT32(* l_tp_stride_func)(opj_tcp_t *) = 00;
+ 
+@@ -5092,25 +5248,12 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k,
+             l_rates = l_tcp->rates;
+ 
+             /* Modification of the RATE >> */
+-            if (*l_rates > 0.0f) {
+-                *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) *
+-                                           (OPJ_UINT32)(l_y1 - l_y0)))
+-                            /
+-                            ((*l_rates) * (OPJ_FLOAT32)l_bits_empty)
+-                           )
+-                           -
+-                           l_offset;
+-            }
+-
+-            ++l_rates;
+-
+-            for (k = 1; k < l_tcp->numlayers; ++k) {
++            for (k = 0; k < l_tcp->numlayers; ++k) {
+                 if (*l_rates > 0.0f) {
+-                    *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) *
+-                                               (OPJ_UINT32)(l_y1 - l_y0)))
+-                                /
+-                                ((*l_rates) * (OPJ_FLOAT32)l_bits_empty)
+-                               )
++                    *l_rates = (OPJ_FLOAT32)(((OPJ_FLOAT64)l_size_pixel * (OPJ_UINT32)(
++                                                  l_x1 - l_x0) *
++                                              (OPJ_UINT32)(l_y1 - l_y0))
++                                             / ((*l_rates) * (OPJ_FLOAT32)l_bits_empty))
+                                -
+                                l_offset;
+                 }
+@@ -5170,12 +5313,11 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k,
+     l_tile_size = 0;
+ 
+     for (i = 0; i < l_image->numcomps; ++i) {
+-        l_tile_size += (opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx)
+-                        *
+-                        opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy)
+-                        *
+-                        l_img_comp->prec
+-                       );
++        l_tile_size += (OPJ_UINT64)opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx)
++                       *
++                       opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy)
++                       *
++                       l_img_comp->prec;
+ 
+         ++l_img_comp;
+     }
+@@ -5186,7 +5328,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k,
+     /* bin/test_tile_encoder 1 256 256 32 32 8 0 reversible_with_precinct.j2k 4 4 3 0 0 1 16 16 */
+     /* TODO revise this to take into account the overhead linked to the */
+     /* number of packets and number of code blocks in packets */
+-    l_tile_size = (OPJ_UINT32)(l_tile_size * 1.4 / 8);
++    l_tile_size = (OPJ_UINT64)((double)l_tile_size * 1.4 / 8);
+ 
+     /* Arbitrary amount to make the following work: */
+     /* bin/test_tile_encoder 1 256 256 17 16 8 0 reversible_no_precinct.j2k 4 4 3 0 0 1 */
+@@ -5194,14 +5336,21 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k,
+ 
+     l_tile_size += opj_j2k_get_specific_header_sizes(p_j2k);
+ 
+-    p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = l_tile_size;
++    if (l_tile_size > UINT_MAX) {
++        l_tile_size = UINT_MAX;
++    }
++
++    p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = (OPJ_UINT32)l_tile_size;
+     p_j2k->m_specific_param.m_encoder.m_encoded_tile_data =
+         (OPJ_BYTE *) opj_malloc(p_j2k->m_specific_param.m_encoder.m_encoded_tile_size);
+     if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data == 00) {
++        opj_event_msg(p_manager, EVT_ERROR,
++                      "Not enough memory to allocate m_encoded_tile_data. %u MB required\n",
++                      (OPJ_UINT32)(l_tile_size / 1024 / 1024));
+         return OPJ_FALSE;
+     }
+ 
+-    if (OPJ_IS_CINEMA(l_cp->rsiz)) {
++    if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+         p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer =
+             (OPJ_BYTE *) opj_malloc(5 *
+                                     p_j2k->m_specific_param.m_encoder.m_total_tile_parts);
+@@ -6633,7 +6782,7 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters,
+     }
+ 
+     /* Precincts */
+-    parameters->csty |= 0x01;
++    parameters->csty |= J2K_CP_CSTY_PRT;
+     if (parameters->numresolution == 1) {
+         parameters->res_spec = 1;
+         parameters->prcw_init[0] = 128;
+@@ -6759,6 +6908,589 @@ static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz,
+     return OPJ_TRUE;
+ }
+ 
++static int opj_j2k_get_imf_max_NL(opj_cparameters_t *parameters,
++                                  opj_image_t *image)
++{
++    /* Decomposition levels */
++    const OPJ_UINT16 rsiz = parameters->rsiz;
++    const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz);
++    const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32)
++                             parameters->cp_tdx : image->x1;
++    switch (profile) {
++    case OPJ_PROFILE_IMF_2K:
++        return 5;
++    case OPJ_PROFILE_IMF_4K:
++        return 6;
++    case OPJ_PROFILE_IMF_8K:
++        return 7;
++    case OPJ_PROFILE_IMF_2K_R: {
++        if (XTsiz >= 2048) {
++            return 5;
++        } else if (XTsiz >= 1024) {
++            return 4;
++        }
++        break;
++    }
++    case OPJ_PROFILE_IMF_4K_R: {
++        if (XTsiz >= 4096) {
++            return 6;
++        } else if (XTsiz >= 2048) {
++            return 5;
++        } else if (XTsiz >= 1024) {
++            return 4;
++        }
++        break;
++    }
++    case OPJ_PROFILE_IMF_8K_R: {
++        if (XTsiz >= 8192) {
++            return 7;
++        } else if (XTsiz >= 4096) {
++            return 6;
++        } else if (XTsiz >= 2048) {
++            return 5;
++        } else if (XTsiz >= 1024) {
++            return 4;
++        }
++        break;
++    }
++    default:
++        break;
++    }
++    return -1;
++}
++
++static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters,
++                                       opj_image_t *image, opj_event_mgr_t *p_manager)
++{
++    const OPJ_UINT16 rsiz = parameters->rsiz;
++    const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz);
++
++    OPJ_UNUSED(p_manager);
++
++    /* Override defaults set by opj_set_default_encoder_parameters */
++    if (parameters->cblockw_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKW &&
++            parameters->cblockh_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKH) {
++        parameters->cblockw_init = 32;
++        parameters->cblockh_init = 32;
++    }
++
++    /* One tile part for each component */
++    parameters->tp_flag = 'C';
++    parameters->tp_on = 1;
++
++    if (parameters->prog_order == OPJ_COMP_PARAM_DEFAULT_PROG_ORDER) {
++        parameters->prog_order = OPJ_CPRL;
++    }
++
++    if (profile == OPJ_PROFILE_IMF_2K ||
++            profile == OPJ_PROFILE_IMF_4K ||
++            profile == OPJ_PROFILE_IMF_8K) {
++        /* 9-7 transform */
++        parameters->irreversible = 1;
++    }
++
++    /* Adjust the number of resolutions if set to its defaults */
++    if (parameters->numresolution == OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION &&
++            image->x0 == 0 &&
++            image->y0 == 0) {
++        const int max_NL = opj_j2k_get_imf_max_NL(parameters, image);
++        if (max_NL >= 0 && parameters->numresolution > max_NL) {
++            parameters->numresolution = max_NL + 1;
++        }
++
++        /* Note: below is generic logic */
++        if (!parameters->tile_size_on) {
++            while (parameters->numresolution > 0) {
++                if (image->x1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) {
++                    parameters->numresolution --;
++                    continue;
++                }
++                if (image->y1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) {
++                    parameters->numresolution --;
++                    continue;
++                }
++                break;
++            }
++        }
++    }
++
++    /* Set defaults precincts */
++    if (parameters->csty == 0) {
++        parameters->csty |= J2K_CP_CSTY_PRT;
++        if (parameters->numresolution == 1) {
++            parameters->res_spec = 1;
++            parameters->prcw_init[0] = 128;
++            parameters->prch_init[0] = 128;
++        } else {
++            int i;
++            parameters->res_spec = parameters->numresolution - 1;
++            for (i = 0; i < parameters->res_spec; i++) {
++                parameters->prcw_init[i] = 256;
++                parameters->prch_init[i] = 256;
++            }
++        }
++    }
++}
++
++/* Table A.53 from JPEG2000 standard */
++static const OPJ_UINT16 tabMaxSubLevelFromMainLevel[] = {
++    15, /* unspecified */
++    1,
++    1,
++    1,
++    2,
++    3,
++    4,
++    5,
++    6,
++    7,
++    8,
++    9
++};
++
++static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters,
++        opj_image_t *image,
++        opj_event_mgr_t *p_manager)
++{
++    OPJ_UINT32 i;
++    const OPJ_UINT16 rsiz = parameters->rsiz;
++    const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz);
++    const OPJ_UINT16 mainlevel = OPJ_GET_IMF_MAINLEVEL(rsiz);
++    const OPJ_UINT16 sublevel = OPJ_GET_IMF_SUBLEVEL(rsiz);
++    const int NL = parameters->numresolution - 1;
++    const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32)
++                             parameters->cp_tdx : image->x1;
++    OPJ_BOOL ret = OPJ_TRUE;
++
++    /* Validate mainlevel */
++    if (mainlevel > OPJ_IMF_MAINLEVEL_MAX) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile require mainlevel <= 11.\n"
++                      "-> %d is thus not compliant\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      mainlevel);
++        ret = OPJ_FALSE;
++    }
++
++    /* Validate sublevel */
++    assert(sizeof(tabMaxSubLevelFromMainLevel) ==
++           (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0]));
++    if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile require sublevel <= %d for mainlevel = %d.\n"
++                      "-> %d is thus not compliant\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      tabMaxSubLevelFromMainLevel[mainlevel],
++                      mainlevel,
++                      sublevel);
++        ret = OPJ_FALSE;
++    }
++
++    /* Number of components */
++    if (image->numcomps > 3) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profiles require at most 3 components.\n"
++                      "-> Number of components of input image (%d) is not compliant\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      image->numcomps);
++        ret = OPJ_FALSE;
++    }
++
++    if (image->x0 != 0 || image->y0 != 0) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profiles require image origin to be at 0,0.\n"
++                      "-> %d,%d is not compliant\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      image->x0, image->y0 != 0);
++        ret = OPJ_FALSE;
++    }
++
++    if (parameters->cp_tx0 != 0 || parameters->cp_ty0 != 0) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profiles require tile origin to be at 0,0.\n"
++                      "-> %d,%d is not compliant\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->cp_tx0, parameters->cp_ty0);
++        ret = OPJ_FALSE;
++    }
++
++    if (parameters->tile_size_on) {
++        if (profile == OPJ_PROFILE_IMF_2K ||
++                profile == OPJ_PROFILE_IMF_4K ||
++                profile == OPJ_PROFILE_IMF_8K) {
++            if ((OPJ_UINT32)parameters->cp_tdx < image->x1 ||
++                    (OPJ_UINT32)parameters->cp_tdy < image->y1) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 2K/4K/8K single tile profiles require tile to be greater or equal to image size.\n"
++                              "-> %d,%d is lesser than %d,%d\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              parameters->cp_tdx,
++                              parameters->cp_tdy,
++                              image->x1,
++                              image->y1);
++                ret = OPJ_FALSE;
++            }
++        } else {
++            if ((OPJ_UINT32)parameters->cp_tdx >= image->x1 &&
++                    (OPJ_UINT32)parameters->cp_tdy >= image->y1) {
++                /* ok */
++            } else if (parameters->cp_tdx == 1024 &&
++                       parameters->cp_tdy == 1024) {
++                /* ok */
++            } else if (parameters->cp_tdx == 2048 &&
++                       parameters->cp_tdy == 2048 &&
++                       (profile == OPJ_PROFILE_IMF_4K ||
++                        profile == OPJ_PROFILE_IMF_8K)) {
++                /* ok */
++            } else if (parameters->cp_tdx == 4096 &&
++                       parameters->cp_tdy == 4096 &&
++                       profile == OPJ_PROFILE_IMF_8K) {
++                /* ok */
++            } else {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 2K_R/4K_R/8K_R single/multiple tile profiles "
++                              "require tile to be greater or equal to image size,\n"
++                              "or to be (1024,1024), or (2048,2048) for 4K_R/8K_R "
++                              "or (4096,4096) for 8K_R.\n"
++                              "-> %d,%d is non conformant\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              parameters->cp_tdx,
++                              parameters->cp_tdy);
++                ret = OPJ_FALSE;
++            }
++        }
++    }
++
++    /* Bitdepth */
++    for (i = 0; i < image->numcomps; i++) {
++        if (!(image->comps[i].bpp >= 8 && image->comps[i].bpp <= 16) ||
++                (image->comps[i].sgnd)) {
++            char signed_str[] = "signed";
++            char unsigned_str[] = "unsigned";
++            char *tmp_str = image->comps[i].sgnd ? signed_str : unsigned_str;
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require precision of each component to b in [8-16] bits unsigned"
++                          "-> At least component %d of input image (%d bits, %s) is not compliant\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          i, image->comps[i].bpp, tmp_str);
++            ret = OPJ_FALSE;
++        }
++    }
++
++    /* Sub-sampling */
++    for (i = 0; i < image->numcomps; i++) {
++        if (i == 0 && image->comps[i].dx != 1) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require XRSiz1 == 1. Here it is set to %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[i].dx);
++            ret = OPJ_FALSE;
++        }
++        if (i == 1 && image->comps[i].dx != 1 && image->comps[i].dx != 2) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require XRSiz2 == 1 or 2. Here it is set to %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[i].dx);
++            ret = OPJ_FALSE;
++        }
++        if (i > 1 && image->comps[i].dx != image->comps[i - 1].dx) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require XRSiz%d to be the same as XRSiz2. "
++                          "Here it is set to %d instead of %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          i + 1, image->comps[i].dx, image->comps[i - 1].dx);
++            ret = OPJ_FALSE;
++        }
++        if (image->comps[i].dy != 1) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require YRsiz == 1. "
++                          "Here it is set to %d for component i.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[i].dy, i);
++            ret = OPJ_FALSE;
++        }
++    }
++
++    /* Image size */
++    switch (profile) {
++    case OPJ_PROFILE_IMF_2K:
++    case OPJ_PROFILE_IMF_2K_R:
++        if (((image->comps[0].w > 2048) | (image->comps[0].h > 1556))) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 2K/2K_R profile require:\n"
++                          "width <= 2048 and height <= 1556\n"
++                          "-> Input image size %d x %d is not compliant\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[0].w, image->comps[0].h);
++            ret = OPJ_FALSE;
++        }
++        break;
++    case OPJ_PROFILE_IMF_4K:
++    case OPJ_PROFILE_IMF_4K_R:
++        if (((image->comps[0].w > 4096) | (image->comps[0].h > 3112))) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 4K/4K_R profile require:\n"
++                          "width <= 4096 and height <= 3112\n"
++                          "-> Input image size %d x %d is not compliant\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[0].w, image->comps[0].h);
++            ret = OPJ_FALSE;
++        }
++        break;
++    case OPJ_PROFILE_IMF_8K:
++    case OPJ_PROFILE_IMF_8K_R:
++        if (((image->comps[0].w > 8192) | (image->comps[0].h > 6224))) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 8K/8K_R profile require:\n"
++                          "width <= 8192 and height <= 6224\n"
++                          "-> Input image size %d x %d is not compliant\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          image->comps[0].w, image->comps[0].h);
++            ret = OPJ_FALSE;
++        }
++        break;
++    default :
++        assert(0);
++        return OPJ_FALSE;
++    }
++
++    if (parameters->roi_compno != -1) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile forbid RGN / region of interest marker.\n"
++                      "-> Compression parameters specify a ROI\n"
++                      "-> Non-IMF codestream will be generated\n");
++        ret = OPJ_FALSE;
++    }
++
++    if (parameters->cblockw_init != 32 || parameters->cblockh_init != 32) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile require code block size to be 32x32.\n"
++                      "-> Compression parameters set it to %dx%d.\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->cblockw_init,
++                      parameters->cblockh_init);
++        ret = OPJ_FALSE;
++    }
++
++    if (parameters->prog_order != OPJ_CPRL) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile require progression order to be CPRL.\n"
++                      "-> Compression parameters set it to %d.\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->prog_order);
++        ret = OPJ_FALSE;
++    }
++
++    if (parameters->numpocs != 0) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile forbid POC markers.\n"
++                      "-> Compression parameters set %d POC.\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->numpocs);
++        ret = OPJ_FALSE;
++    }
++
++    /* Codeblock style: no mode switch enabled */
++    if (parameters->mode != 0) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF profile forbid mode switch in code block style.\n"
++                      "-> Compression parameters set code block style to %d.\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->mode);
++        ret = OPJ_FALSE;
++    }
++
++    if (profile == OPJ_PROFILE_IMF_2K ||
++            profile == OPJ_PROFILE_IMF_4K ||
++            profile == OPJ_PROFILE_IMF_8K) {
++        /* Expect 9-7 transform */
++        if (parameters->irreversible != 1) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 2K/4K/8K profiles require 9-7 Irreversible Transform.\n"
++                          "-> Compression parameters set it to reversible.\n"
++                          "-> Non-IMF codestream will be generated\n");
++            ret = OPJ_FALSE;
++        }
++    } else {
++        /* Expect 5-3 transform */
++        if (parameters->irreversible != 0) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 2K/4K/8K profiles require 5-3 reversible Transform.\n"
++                          "-> Compression parameters set it to irreversible.\n"
++                          "-> Non-IMF codestream will be generated\n");
++            ret = OPJ_FALSE;
++        }
++    }
++
++    /* Number of layers */
++    if (parameters->tcp_numlayers != 1) {
++        opj_event_msg(p_manager, EVT_WARNING,
++                      "IMF 2K/4K/8K profiles require 1 single quality layer.\n"
++                      "-> Number of layers is %d.\n"
++                      "-> Non-IMF codestream will be generated\n",
++                      parameters->tcp_numlayers);
++        ret = OPJ_FALSE;
++    }
++
++    /* Decomposition levels */
++    switch (profile) {
++    case OPJ_PROFILE_IMF_2K:
++        if (!(NL >= 1 && NL <= 5)) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 2K profile requires 1 <= NL <= 5:\n"
++                          "-> Number of decomposition levels is %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          NL);
++            ret = OPJ_FALSE;
++        }
++        break;
++    case OPJ_PROFILE_IMF_4K:
++        if (!(NL >= 1 && NL <= 6)) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 4K profile requires 1 <= NL <= 6:\n"
++                          "-> Number of decomposition levels is %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          NL);
++            ret = OPJ_FALSE;
++        }
++        break;
++    case OPJ_PROFILE_IMF_8K:
++        if (!(NL >= 1 && NL <= 7)) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF 8K profile requires 1 <= NL <= 7:\n"
++                          "-> Number of decomposition levels is %d.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          NL);
++            ret = OPJ_FALSE;
++        }
++        break;
++    case OPJ_PROFILE_IMF_2K_R: {
++        if (XTsiz >= 2048) {
++            if (!(NL >= 1 && NL <= 5)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 2K_R profile requires 1 <= NL <= 5 for XTsiz >= 2048:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 1024) {
++            if (!(NL >= 1 && NL <= 4)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 2K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        }
++        break;
++    }
++    case OPJ_PROFILE_IMF_4K_R: {
++        if (XTsiz >= 4096) {
++            if (!(NL >= 1 && NL <= 6)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz >= 4096:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 2048) {
++            if (!(NL >= 1 && NL <= 5)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 1024) {
++            if (!(NL >= 1 && NL <= 4)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        }
++        break;
++    }
++    case OPJ_PROFILE_IMF_8K_R: {
++        if (XTsiz >= 8192) {
++            if (!(NL >= 1 && NL <= 7)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 7 for XTsiz >= 8192:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 4096) {
++            if (!(NL >= 1 && NL <= 6)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz in [4096,8192[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 2048) {
++            if (!(NL >= 1 && NL <= 5)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        } else if (XTsiz >= 1024) {
++            if (!(NL >= 1 && NL <= 4)) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n"
++                              "-> Number of decomposition levels is %d.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        }
++        break;
++    }
++    default:
++        break;
++    }
++
++    if (parameters->numresolution == 1) {
++        if (parameters->res_spec != 1 ||
++                parameters->prcw_init[0] != 128 ||
++                parameters->prch_init[0] != 128) {
++            opj_event_msg(p_manager, EVT_WARNING,
++                          "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n"
++                          "-> Supplied values are different from that.\n"
++                          "-> Non-IMF codestream will be generated\n",
++                          NL);
++            ret = OPJ_FALSE;
++        }
++    } else {
++        int i;
++        for (i = 0; i < parameters->res_spec; i++) {
++            if (parameters->prcw_init[i] != 256 ||
++                    parameters->prch_init[i] != 256) {
++                opj_event_msg(p_manager, EVT_WARNING,
++                              "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n"
++                              "-> Supplied values are different from that.\n"
++                              "-> Non-IMF codestream will be generated\n",
++                              NL);
++                ret = OPJ_FALSE;
++            }
++        }
++    }
++
++    return ret;
++}
++
++
+ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+                                opj_cparameters_t *parameters,
+                                opj_image_t *image,
+@@ -6951,6 +7683,15 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+     } else {
+         OPJ_FLOAT32 temp_rate;
+         OPJ_BOOL cap = OPJ_FALSE;
++
++        if (OPJ_IS_IMF(parameters->rsiz) && parameters->max_cs_size > 0 &&
++                parameters->tcp_numlayers == 1 && parameters->tcp_rates[0] == 0) {
++            parameters->tcp_rates[0] = (OPJ_FLOAT32)(image->numcomps * image->comps[0].w *
++                                       image->comps[0].h * image->comps[0].prec) /
++                                       (OPJ_FLOAT32)(((OPJ_UINT32)parameters->max_cs_size) * 8 * image->comps[0].dx *
++                                               image->comps[0].dy);
++        }
++
+         temp_rate = (OPJ_FLOAT32)(((double)image->numcomps * image->comps[0].w *
+                                    image->comps[0].h * image->comps[0].prec) /
+                                   (((double)parameters->max_cs_size) * 8 * image->comps[0].dx *
+@@ -6991,9 +7732,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+                       "JPEG 2000 Broadcast profiles not yet supported\n");
+         parameters->rsiz = OPJ_PROFILE_NONE;
+     } else if (OPJ_IS_IMF(parameters->rsiz)) {
+-        opj_event_msg(p_manager, EVT_WARNING,
+-                      "JPEG 2000 IMF profiles not yet supported\n");
+-        parameters->rsiz = OPJ_PROFILE_NONE;
++        opj_j2k_set_imf_parameters(parameters, image, p_manager);
++        if (!opj_j2k_is_imf_compliant(parameters, image, p_manager)) {
++            parameters->rsiz = OPJ_PROFILE_NONE;
++        }
+     } else if (OPJ_IS_PART2(parameters->rsiz)) {
+         if (parameters->rsiz == ((OPJ_PROFILE_PART2) | (OPJ_EXTENSION_NONE))) {
+             opj_event_msg(p_manager, EVT_WARNING,
+@@ -7085,6 +7827,14 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+     */
+ 
+     if (parameters->tile_size_on) {
++        if (cp->tdx == 0) {
++            opj_event_msg(p_manager, EVT_ERROR, "Invalid tile width\n");
++            return OPJ_FALSE;
++        }
++        if (cp->tdy == 0) {
++            opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n");
++            return OPJ_FALSE;
++        }
+         cp->tw = opj_uint_ceildiv(image->x1 - cp->tx0, cp->tdx);
+         cp->th = opj_uint_ceildiv(image->y1 - cp->ty0, cp->tdy);
+     } else {
+@@ -7161,20 +7911,13 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+                       "Not enough memory to allocate tile coding parameters\n");
+         return OPJ_FALSE;
+     }
+-    if (parameters->numpocs) {
+-        /* initialisation of POC */
+-        opj_j2k_check_poc_val(parameters->POC, parameters->numpocs,
+-                              (OPJ_UINT32)parameters->numresolution, image->numcomps,
+-                              (OPJ_UINT32)parameters->tcp_numlayers, p_manager);
+-        /* TODO MSD use the return value*/
+-    }
+ 
+     for (tileno = 0; tileno < cp->tw * cp->th; tileno++) {
+         opj_tcp_t *tcp = &cp->tcps[tileno];
+         tcp->numlayers = (OPJ_UINT32)parameters->tcp_numlayers;
+ 
+         for (j = 0; j < tcp->numlayers; j++) {
+-            if (OPJ_IS_CINEMA(cp->rsiz)) {
++            if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
+                 if (cp->m_specific_param.m_enc.m_fixed_quality) {
+                     tcp->distoratio[j] = parameters->tcp_distoratio[j];
+                 }
+@@ -7201,16 +7944,22 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+ 
+         if (parameters->numpocs) {
+             /* initialisation of POC */
+-            tcp->POC = 1;
+             for (i = 0; i < parameters->numpocs; i++) {
+                 if (tileno + 1 == parameters->POC[i].tile)  {
+                     opj_poc_t *tcp_poc = &tcp->pocs[numpocs_tile];
+ 
++                    if (parameters->POC[numpocs_tile].compno0 >= image->numcomps) {
++                        opj_event_msg(p_manager, EVT_ERROR,
++                                      "Invalid compno0 for POC %d\n", i);
++                        return OPJ_FALSE;
++                    }
++
+                     tcp_poc->resno0         = parameters->POC[numpocs_tile].resno0;
+                     tcp_poc->compno0        = parameters->POC[numpocs_tile].compno0;
+                     tcp_poc->layno1         = parameters->POC[numpocs_tile].layno1;
+                     tcp_poc->resno1         = parameters->POC[numpocs_tile].resno1;
+-                    tcp_poc->compno1        = parameters->POC[numpocs_tile].compno1;
++                    tcp_poc->compno1        = opj_uint_min(parameters->POC[numpocs_tile].compno1,
++                                                           image->numcomps);
+                     tcp_poc->prg1           = parameters->POC[numpocs_tile].prg1;
+                     tcp_poc->tile           = parameters->POC[numpocs_tile].tile;
+ 
+@@ -7218,7 +7967,16 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k,
+                 }
+             }
+ 
+-            tcp->numpocs = numpocs_tile - 1 ;
++            if (numpocs_tile) {
++
++                /* TODO MSD use the return value*/
++                opj_j2k_check_poc_val(parameters->POC, tileno, parameters->numpocs,
++                                      (OPJ_UINT32)parameters->numresolution, image->numcomps,
++                                      (OPJ_UINT32)parameters->tcp_numlayers, p_manager);
++
++                tcp->POC = 1;
++                tcp->numpocs = numpocs_tile - 1 ;
++            }
+         } else {
+             tcp->numpocs = 0;
+         }
+@@ -7546,6 +8304,8 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream,
+ 
+     /*Allocate and initialize some elements of codestrem index*/
+     if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) {
++        opj_image_destroy(*p_image);
++        *p_image = NULL;
+         return OPJ_FALSE;
+     }
+ 
+@@ -8632,6 +9392,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+     OPJ_UINT32 l_marker_size;
+     const opj_dec_memory_marker_handler_t * l_marker_handler = 00;
+     opj_tcp_t * l_tcp = NULL;
++    const OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th;
+ 
+     /* preconditions */
+     assert(p_stream != 00);
+@@ -8807,7 +9568,6 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+                     return OPJ_FALSE;
+                 }
+                 if (l_correction_needed) {
+-                    OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th;
+                     OPJ_UINT32 l_tile_no;
+ 
+                     p_j2k->m_specific_param.m_decoder.m_can_decode = 0;
+@@ -8822,27 +9582,42 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+                                   "Non conformant codestream TPsot==TNsot.\n");
+                 }
+             }
+-            if (! p_j2k->m_specific_param.m_decoder.m_can_decode) {
+-                /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */
+-                if (opj_stream_read_data(p_stream,
+-                                         p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) {
+-                    opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
+-                    return OPJ_FALSE;
+-                }
+-
+-                /* Read 2 bytes from buffer as the new marker ID */
+-                opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data,
+-                               &l_current_marker, 2);
+-            }
+         } else {
+             /* Indicate we will try to read a new tile-part header*/
+             p_j2k->m_specific_param.m_decoder.m_skip_data = 0;
+             p_j2k->m_specific_param.m_decoder.m_can_decode = 0;
+             p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT;
++        }
+ 
++        if (! p_j2k->m_specific_param.m_decoder.m_can_decode) {
+             /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */
+             if (opj_stream_read_data(p_stream,
+                                      p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) {
++
++                /* Deal with likely non conformant SPOT6 files, where the last */
++                /* row of tiles have TPsot == 0 and TNsot == 0, and missing EOC, */
++                /* but no other tile-parts were found. */
++                if (p_j2k->m_current_tile_number + 1 == l_nb_tiles) {
++                    OPJ_UINT32 l_tile_no;
++                    for (l_tile_no = 0U; l_tile_no < l_nb_tiles; ++l_tile_no) {
++                        if (p_j2k->m_cp.tcps[l_tile_no].m_current_tile_part_number == 0 &&
++                                p_j2k->m_cp.tcps[l_tile_no].m_nb_tile_parts == 0) {
++                            break;
++                        }
++                    }
++                    if (l_tile_no < l_nb_tiles) {
++                        opj_event_msg(p_manager, EVT_INFO,
++                                      "Tile %u has TPsot == 0 and TNsot == 0, "
++                                      "but no other tile-parts were found. "
++                                      "EOC is also missing.\n",
++                                      l_tile_no);
++                        p_j2k->m_current_tile_number = l_tile_no;
++                        l_current_marker = J2K_MS_EOC;
++                        p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_EOC;
++                        break;
++                    }
++                }
++
+                 opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n");
+                 return OPJ_FALSE;
+             }
+@@ -8861,9 +9636,8 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k,
+         }
+     }
+ 
+-    /* FIXME DOC ???*/
++    /* Deal with tiles that have a single tile-part with TPsot == 0 and TNsot == 0 */
+     if (! p_j2k->m_specific_param.m_decoder.m_can_decode) {
+-        OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw;
+         l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number;
+ 
+         while ((p_j2k->m_current_tile_number < l_nb_tiles) && (l_tcp->m_data == 00)) {
+@@ -9245,30 +10019,40 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image,
+ 
+     l_img_comp = p_image->comps;
+     for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) {
++        OPJ_INT32 l_h, l_w;
++        if (p_image->x0 > (OPJ_UINT32)INT_MAX ||
++                p_image->y0 > (OPJ_UINT32)INT_MAX ||
++                p_image->x1 > (OPJ_UINT32)INT_MAX ||
++                p_image->y1 > (OPJ_UINT32)INT_MAX) {
++            opj_event_msg(p_manager, EVT_ERROR,
++                          "Image coordinates above INT_MAX are not supported\n");
++            return OPJ_FALSE;
++        }
++
+         l_img_comp->x0 = opj_uint_ceildiv(p_image->x0, l_img_comp->dx);
+         l_img_comp->y0 = opj_uint_ceildiv(p_image->y0, l_img_comp->dy);
+         l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx);
+         l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy);
+ 
+-        OPJ_INT32 l_1 = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor);
+-        OPJ_INT32 l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor);
+-        if (l_1 < l_2) {
++        l_w = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor)
++              - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor);
++        if (l_w < 0) {
+             opj_event_msg(p_manager, EVT_ERROR,
+-                          "Size x of the decoded component image is incorrect (comp[%d].w<0).\n",
+-                          it_comp);
++                          "Size x of the decoded component image is incorrect (comp[%d].w=%d).\n",
++                          it_comp, l_w);
+             return OPJ_FALSE;
+         }
+-        l_img_comp->w = (OPJ_UINT32)(l_1-l_2);
++        l_img_comp->w = (OPJ_UINT32)l_w;
+ 
+-        l_1 = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor);
+-        l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor);
+-        if (l_1 < l_2) {
++        l_h = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor)
++              - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor);
++        if (l_h < 0) {
+             opj_event_msg(p_manager, EVT_ERROR,
+-                          "Size y of the decoded component image is incorrect (comp[%d].h<0).\n",
+-                          it_comp);
++                          "Size y of the decoded component image is incorrect (comp[%d].h=%d).\n",
++                          it_comp, l_h);
+             return OPJ_FALSE;
+         }
+-        l_img_comp->h = (OPJ_UINT32)(l_1-l_2);
++        l_img_comp->h = (OPJ_UINT32)l_h;
+ 
+         l_img_comp++;
+     }
+@@ -9764,9 +10548,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-    opj_read_bytes(l_current_ptr, &l_tccp->numresolutions,
+-                   1);              /* SPcox (D) */
+-    ++l_tccp->numresolutions;                                                                               /* tccp->numresolutions = read() + 1 */
++    /* SPcod (D) / SPcoc (A) */
++    opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, 1);
++    ++l_tccp->numresolutions;  /* tccp->numresolutions = read() + 1 */
+     if (l_tccp->numresolutions > OPJ_J2K_MAXRLVLS) {
+         opj_event_msg(p_manager, EVT_ERROR,
+                       "Invalid value for numresolutions : %d, max value is set in openjpeg.h at %d\n",
+@@ -9787,11 +10571,13 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-    opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1);               /* SPcoc (E) */
++    /* SPcod (E) / SPcoc (B) */
++    opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1);
+     ++l_current_ptr;
+     l_tccp->cblkw += 2;
+ 
+-    opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1);               /* SPcoc (F) */
++    /* SPcod (F) / SPcoc (C) */
++    opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1);
+     ++l_current_ptr;
+     l_tccp->cblkh += 2;
+ 
+@@ -9802,8 +10588,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-
+-    opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);             /* SPcoc (G) */
++    /* SPcod (G) / SPcoc (D) */
++    opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1);
+     ++l_current_ptr;
+     if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */
+         opj_event_msg(p_manager, EVT_ERROR,
+@@ -9811,7 +10597,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-    opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1);              /* SPcoc (H) */
++    /* SPcod (H) / SPcoc (E) */
++    opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1);
+     ++l_current_ptr;
+ 
+     if (l_tccp->qmfbid > 1) {
+@@ -9829,8 +10616,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k,
+             return OPJ_FALSE;
+         }
+ 
++        /* SPcod (I_i) / SPcoc (F_i) */
+         for (i = 0; i < l_tccp->numresolutions; ++i) {
+-            opj_read_bytes(l_current_ptr, &l_tmp, 1);               /* SPcoc (I_i) */
++            opj_read_bytes(l_current_ptr, &l_tmp, 1);
+             ++l_current_ptr;
+             /* Precinct exponent 0 is only allowed for lowest resolution level (Table A.21) */
+             if ((i != 0) && (((l_tmp & 0xf) == 0) || ((l_tmp >> 4) == 0))) {
+@@ -10675,6 +11463,42 @@ static OPJ_BOOL opj_j2k_allocate_tile_element_cstr_index(opj_j2k_t *p_j2k)
+     return OPJ_TRUE;
+ }
+ 
++static OPJ_BOOL opj_j2k_are_all_used_components_decoded(opj_j2k_t *p_j2k,
++        opj_event_mgr_t * p_manager)
++{
++    OPJ_UINT32 compno;
++    OPJ_BOOL decoded_all_used_components = OPJ_TRUE;
++
++    if (p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode) {
++        for (compno = 0;
++                compno < p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode; compno++) {
++            OPJ_UINT32 dec_compno =
++                p_j2k->m_specific_param.m_decoder.m_comps_indices_to_decode[compno];
++            if (p_j2k->m_output_image->comps[dec_compno].data == NULL) {
++                opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n",
++                              dec_compno);
++                decoded_all_used_components = OPJ_FALSE;
++            }
++        }
++    } else {
++        for (compno = 0; compno < p_j2k->m_output_image->numcomps; compno++) {
++            if (p_j2k->m_output_image->comps[compno].data == NULL) {
++                opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n",
++                              compno);
++                decoded_all_used_components = OPJ_FALSE;
++            }
++        }
++    }
++
++    if (decoded_all_used_components == OPJ_FALSE) {
++        opj_event_msg(p_manager, EVT_ERROR, "Failed to decode all used components\n");
++        return OPJ_FALSE;
++    }
++
++    return OPJ_TRUE;
++}
++
++
+ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
+                                      opj_stream_private_t *p_stream,
+                                      opj_event_mgr_t * p_manager)
+@@ -10786,6 +11610,10 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k,
+         }
+     }
+ 
++    if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) {
++        return OPJ_FALSE;
++    }
++
+     return OPJ_TRUE;
+ }
+ 
+@@ -10914,6 +11742,10 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k,
+ 
+     }
+ 
++    if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) {
++        return OPJ_FALSE;
++    }
++
+     return OPJ_TRUE;
+ }
+ 
+@@ -11198,6 +12030,42 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k,
+     return OPJ_FALSE;
+ }
+ 
++/* ----------------------------------------------------------------------- */
++
++OPJ_BOOL opj_j2k_encoder_set_extra_options(
++    opj_j2k_t *p_j2k,
++    const char* const* p_options,
++    opj_event_mgr_t * p_manager)
++{
++    const char* const* p_option_iter;
++
++    if (p_options == NULL) {
++        return OPJ_TRUE;
++    }
++
++    for (p_option_iter = p_options; *p_option_iter != NULL; ++p_option_iter) {
++        if (strncmp(*p_option_iter, "PLT=", 4) == 0) {
++            if (strcmp(*p_option_iter, "PLT=YES") == 0) {
++                p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_TRUE;
++            } else if (strcmp(*p_option_iter, "PLT=NO") == 0) {
++                p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_FALSE;
++            } else {
++                opj_event_msg(p_manager, EVT_ERROR,
++                              "Invalid value for option: %s.\n", *p_option_iter);
++                return OPJ_FALSE;
++            }
++        } else {
++            opj_event_msg(p_manager, EVT_ERROR,
++                          "Invalid option: %s.\n", *p_option_iter);
++            return OPJ_FALSE;
++        }
++    }
++
++    return OPJ_TRUE;
++}
++
++/* ----------------------------------------------------------------------- */
++
+ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
+                         opj_stream_private_t *p_stream,
+                         opj_event_mgr_t * p_manager)
+@@ -11255,7 +12123,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k,
+                 }
+             }
+         }
+-        l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd);
++        l_current_tile_size = opj_tcd_get_encoder_input_buffer_size(p_j2k->m_tcd);
+         if (!l_reuse_data) {
+             if (l_current_tile_size > l_max_tile_size) {
+                 OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data,
+@@ -11581,7 +12449,7 @@ static OPJ_BOOL opj_j2k_setup_end_compress(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) {
++    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) {
+         if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list,
+                                                (opj_procedure)opj_j2k_write_updated_tlm, p_manager)) {
+             return OPJ_FALSE;
+@@ -11664,7 +12532,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k,
+         return OPJ_FALSE;
+     }
+ 
+-    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) {
++    if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) {
+         if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list,
+                                                (opj_procedure)opj_j2k_write_tlm, p_manager)) {
+             return OPJ_FALSE;
+@@ -11691,7 +12559,8 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k,
+     }
+ 
+     /* DEVELOPER CORNER, insert your custom procedures */
+-    if (p_j2k->m_cp.rsiz & OPJ_EXTENSION_MCT) {
++    if ((p_j2k->m_cp.rsiz & (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) ==
++            (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) {
+         if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list,
+                                                (opj_procedure)opj_j2k_write_mct_data_group, p_manager)) {
+             return OPJ_FALSE;
+@@ -11721,7 +12590,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k,
+ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+         OPJ_BYTE * p_data,
+         OPJ_UINT32 * p_data_written,
+-        OPJ_UINT32 p_total_data_size,
++        OPJ_UINT32 total_data_size,
+         opj_stream_private_t *p_stream,
+         struct opj_event_mgr * p_manager)
+ {
+@@ -11745,7 +12614,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+ 
+     l_current_nb_bytes_written = 0;
+     l_begin_data = p_data;
+-    if (! opj_j2k_write_sot(p_j2k, p_data, p_total_data_size,
++    if (! opj_j2k_write_sot(p_j2k, p_data, total_data_size,
+                             &l_current_nb_bytes_written, p_stream,
+                             p_manager)) {
+         return OPJ_FALSE;
+@@ -11753,7 +12622,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+ 
+     l_nb_bytes_written += l_current_nb_bytes_written;
+     p_data += l_current_nb_bytes_written;
+-    p_total_data_size -= l_current_nb_bytes_written;
++    total_data_size -= l_current_nb_bytes_written;
+ 
+     if (!OPJ_IS_CINEMA(l_cp->rsiz)) {
+ #if 0
+@@ -11763,29 +12632,29 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+                                         p_manager);
+             l_nb_bytes_written += l_current_nb_bytes_written;
+             p_data += l_current_nb_bytes_written;
+-            p_total_data_size -= l_current_nb_bytes_written;
++            total_data_size -= l_current_nb_bytes_written;
+ 
+             l_current_nb_bytes_written = 0;
+             opj_j2k_write_qcc_in_memory(p_j2k, compno, p_data, &l_current_nb_bytes_written,
+                                         p_manager);
+             l_nb_bytes_written += l_current_nb_bytes_written;
+             p_data += l_current_nb_bytes_written;
+-            p_total_data_size -= l_current_nb_bytes_written;
++            total_data_size -= l_current_nb_bytes_written;
+         }
+ #endif
+-        if (l_cp->tcps[p_j2k->m_current_tile_number].numpocs) {
++        if (l_cp->tcps[p_j2k->m_current_tile_number].POC) {
+             l_current_nb_bytes_written = 0;
+             opj_j2k_write_poc_in_memory(p_j2k, p_data, &l_current_nb_bytes_written,
+                                         p_manager);
+             l_nb_bytes_written += l_current_nb_bytes_written;
+             p_data += l_current_nb_bytes_written;
+-            p_total_data_size -= l_current_nb_bytes_written;
++            total_data_size -= l_current_nb_bytes_written;
+         }
+     }
+ 
+     l_current_nb_bytes_written = 0;
+     if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written,
+-                            p_total_data_size, p_stream, p_manager)) {
++                            total_data_size, p_stream, p_manager)) {
+         return OPJ_FALSE;
+     }
+ 
+@@ -11796,7 +12665,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+     opj_write_bytes(l_begin_data + 6, l_nb_bytes_written,
+                     4);                                 /* PSOT */
+ 
+-    if (OPJ_IS_CINEMA(l_cp->rsiz)) {
++    if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+         opj_j2k_update_tlm(p_j2k, l_nb_bytes_written);
+     }
+ 
+@@ -11806,7 +12675,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k,
+ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+         OPJ_BYTE * p_data,
+         OPJ_UINT32 * p_data_written,
+-        OPJ_UINT32 p_total_data_size,
++        OPJ_UINT32 total_data_size,
+         opj_stream_private_t *p_stream,
+         struct opj_event_mgr * p_manager
+                                             )
+@@ -11839,7 +12708,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+         l_begin_data = p_data;
+ 
+         if (! opj_j2k_write_sot(p_j2k, p_data,
+-                                p_total_data_size,
++                                total_data_size,
+                                 &l_current_nb_bytes_written,
+                                 p_stream,
+                                 p_manager)) {
+@@ -11848,25 +12717,25 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+ 
+         l_nb_bytes_written += l_current_nb_bytes_written;
+         p_data += l_current_nb_bytes_written;
+-        p_total_data_size -= l_current_nb_bytes_written;
++        total_data_size -= l_current_nb_bytes_written;
+         l_part_tile_size += l_current_nb_bytes_written;
+ 
+         l_current_nb_bytes_written = 0;
+         if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written,
+-                                p_total_data_size, p_stream, p_manager)) {
++                                total_data_size, p_stream, p_manager)) {
+             return OPJ_FALSE;
+         }
+ 
+         p_data += l_current_nb_bytes_written;
+         l_nb_bytes_written += l_current_nb_bytes_written;
+-        p_total_data_size -= l_current_nb_bytes_written;
++        total_data_size -= l_current_nb_bytes_written;
+         l_part_tile_size += l_current_nb_bytes_written;
+ 
+         /* Writing Psot in SOT marker */
+         opj_write_bytes(l_begin_data + 6, l_part_tile_size,
+                         4);                                   /* PSOT */
+ 
+-        if (OPJ_IS_CINEMA(l_cp->rsiz)) {
++        if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+             opj_j2k_update_tlm(p_j2k, l_part_tile_size);
+         }
+ 
+@@ -11885,7 +12754,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+             l_begin_data = p_data;
+ 
+             if (! opj_j2k_write_sot(p_j2k, p_data,
+-                                    p_total_data_size,
++                                    total_data_size,
+                                     &l_current_nb_bytes_written, p_stream,
+                                     p_manager)) {
+                 return OPJ_FALSE;
+@@ -11893,26 +12762,26 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k,
+ 
+             l_nb_bytes_written += l_current_nb_bytes_written;
+             p_data += l_current_nb_bytes_written;
+-            p_total_data_size -= l_current_nb_bytes_written;
++            total_data_size -= l_current_nb_bytes_written;
+             l_part_tile_size += l_current_nb_bytes_written;
+ 
+             l_current_nb_bytes_written = 0;
+ 
+             if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written,
+-                                    p_total_data_size, p_stream, p_manager)) {
++                                    total_data_size, p_stream, p_manager)) {
+                 return OPJ_FALSE;
+             }
+ 
+             l_nb_bytes_written += l_current_nb_bytes_written;
+             p_data += l_current_nb_bytes_written;
+-            p_total_data_size -= l_current_nb_bytes_written;
++            total_data_size -= l_current_nb_bytes_written;
+             l_part_tile_size += l_current_nb_bytes_written;
+ 
+             /* Writing Psot in SOT marker */
+             opj_write_bytes(l_begin_data + 6, l_part_tile_size,
+                             4);                                   /* PSOT */
+ 
+-            if (OPJ_IS_CINEMA(l_cp->rsiz)) {
++            if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) {
+                 opj_j2k_update_tlm(p_j2k, l_part_tile_size);
+             }
+ 
+diff --git a/third_party/libopenjpeg20/j2k.h b/third_party/libopenjpeg20/j2k.h
+index 5d393c98130f27af8ee128175e504e580e4e4e13..9eb50b50da6977500a95d7a64d20b675c1754b50 100644
+--- a/third_party/libopenjpeg20/j2k.h
++++ b/third_party/libopenjpeg20/j2k.h
+@@ -531,8 +531,14 @@ typedef struct opj_j2k_enc {
+     OPJ_BYTE * m_header_tile_data;
+ 
+     /* size of the encoded_data */
++
+     OPJ_UINT32 m_header_tile_data_size;
+ 
++    /* whether to generate PLT markers */
++    OPJ_BOOL   m_PLT;
++
++    /* reserved bytes in m_encoded_tile_size for PLT markers */
++    OPJ_UINT32 m_reserved_bytes_for_PLT;
+ 
+ } opj_j2k_enc_t;
+ 
+@@ -577,15 +583,16 @@ typedef struct opj_j2k {
+     /** the current tile coder/decoder **/
+     struct opj_tcd *    m_tcd;
+ 
+-    /** Number of threads to use */
+-    int m_num_threads;
+-
+     /** Thread pool */
+     opj_thread_pool_t* m_tp;
+ 
++    /** Image width coming from JP2 IHDR box. 0 from a pure codestream */
+     OPJ_UINT32 ihdr_w;
++
++    /** Image height coming from JP2 IHDR box. 0 from a pure codestream */
+     OPJ_UINT32 ihdr_h;
+-    OPJ_UINT32 enumcs;
++
++    /** Set to 1 by the decoder initialization if OPJ_DPARAMETERS_DUMP_FLAG is set */
+     unsigned int dump_state;
+ }
+ opj_j2k_t;
+@@ -827,6 +834,19 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k,
+         OPJ_UINT32 res_factor,
+         opj_event_mgr_t * p_manager);
+ 
++/**
++ * Specify extra options for the encoder.
++ *
++ * @param  p_j2k        the jpeg2000 codec.
++ * @param  p_options    options
++ * @param  p_manager    the user event manager
++ *
++ * @see opj_encoder_set_extra_options() for more details.
++ */
++OPJ_BOOL opj_j2k_encoder_set_extra_options(
++    opj_j2k_t *p_j2k,
++    const char* const* p_options,
++    opj_event_mgr_t * p_manager);
+ 
+ /**
+  * Writes a tile.
+diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c
+index 1f61a23e6bc4ecc06cbff0666efc1ac476bbf87e..02f3d04c747833283f476912c7dfdc3b13b6bfa3 100644
+--- a/third_party/libopenjpeg20/jp2.c
++++ b/third_party/libopenjpeg20/jp2.c
+@@ -586,6 +586,12 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2,
+     opj_read_bytes(p_image_header_data, &(jp2->numcomps), 2);   /* NC */
+     p_image_header_data += 2;
+ 
++    if (jp2->h < 1 || jp2->w < 1 || jp2->numcomps < 1) {
++        opj_event_msg(p_manager, EVT_ERROR,
++                      "Wrong values for: w(%d) h(%d) numcomps(%d) (ihdr)\n",
++                      jp2->w, jp2->h, jp2->numcomps);
++        return OPJ_FALSE;
++    }
+     if ((jp2->numcomps - 1U) >=
+             16384U) { /* unsigned underflow is well defined: 1U <= jp2->numcomps <= 16384U */
+         opj_event_msg(p_manager, EVT_ERROR, "Invalid number of components (ihdr)\n");
+@@ -1317,7 +1323,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2,
+ 
+ 
+     for (i = 0; i < nr_channels; ++i) {
+-        opj_read_bytes_BE(p_cmap_header_data, &l_value, 2);     /* CMP^i */
++        opj_read_bytes_BE(p_cmap_header_data, &l_value, 2);         /* CMP^i */
+         p_cmap_header_data += 2;
+         cmap[i].cmp = (OPJ_UINT16) l_value;
+ 
+@@ -1599,9 +1605,7 @@ static OPJ_BOOL opj_jp2_read_colr(opj_jp2_t *jp2,
+                       "COLR BOX meth value is not a regular value (%d), "
+                       "so we will ignore the entire Colour Specification box. \n", jp2->meth);
+     }
+-    if (jp2->color.jp2_has_colr) {
+-        jp2->j2k->enumcs = jp2->enumcs;
+-    }
++
+     return OPJ_TRUE;
+ }
+ 
+@@ -3252,6 +3256,18 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2,
+     return opj_j2k_set_decoded_resolution_factor(p_jp2->j2k, res_factor, p_manager);
+ }
+ 
++/* ----------------------------------------------------------------------- */
++
++OPJ_BOOL opj_jp2_encoder_set_extra_options(
++    opj_jp2_t *p_jp2,
++    const char* const* p_options,
++    opj_event_mgr_t * p_manager)
++{
++    return opj_j2k_encoder_set_extra_options(p_jp2->j2k, p_options, p_manager);
++}
++
++/* ----------------------------------------------------------------------- */
++
+ /* JPIP specific */
+ 
+ #ifdef USE_JPIP
+diff --git a/third_party/libopenjpeg20/jp2.h b/third_party/libopenjpeg20/jp2.h
+index 34abd5118e3740a02c5692ee92aba3f1a7004431..9e7fa56674cd45d8133518b2b6ebffb0e0a7b348 100644
+--- a/third_party/libopenjpeg20/jp2.h
++++ b/third_party/libopenjpeg20/jp2.h
+@@ -459,6 +459,20 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2,
+         OPJ_UINT32 res_factor,
+         opj_event_mgr_t * p_manager);
+ 
++/**
++ * Specify extra options for the encoder.
++ *
++ * @param  p_jp2        the jpeg2000 codec.
++ * @param  p_options    options
++ * @param  p_manager    the user event manager
++ *
++ * @see opj_encoder_set_extra_options() for more details.
++ */
++OPJ_BOOL opj_jp2_encoder_set_extra_options(
++    opj_jp2_t *p_jp2,
++    const char* const* p_options,
++    opj_event_mgr_t * p_manager);
++
+ 
+ /* TODO MSD: clean these 3 functions */
+ /**
+diff --git a/third_party/libopenjpeg20/mct.c b/third_party/libopenjpeg20/mct.c
+index 81ec223d85a755807ddbba281c3dfc4e14a21a78..c4c2e732e6897d46998e5a666b62df1d49f19445 100644
+--- a/third_party/libopenjpeg20/mct.c
++++ b/third_party/libopenjpeg20/mct.c
+@@ -46,7 +46,6 @@
+ #include <emmintrin.h>
+ #endif
+ #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
+-#define USE_SSE4
+ #include <smmintrin.h>
+ #endif
+ 
+@@ -186,7 +185,7 @@ void opj_mct_decode(
+     OPJ_INT32* OPJ_RESTRICT c2,
+     OPJ_SIZE_T n)
+ {
+-    OPJ_UINT32 i;
++    OPJ_SIZE_T i;
+     for (i = 0; i < n; ++i) {
+         OPJ_INT32 y = c0[i];
+         OPJ_INT32 u = c1[i];
+@@ -212,175 +211,72 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno)
+ /* <summary> */
+ /* Forward irreversible MCT. */
+ /* </summary> */
+-#ifdef USE_SSE4
+ void opj_mct_encode_real(
+-    OPJ_INT32* OPJ_RESTRICT c0,
+-    OPJ_INT32* OPJ_RESTRICT c1,
+-    OPJ_INT32* OPJ_RESTRICT c2,
++    OPJ_FLOAT32* OPJ_RESTRICT c0,
++    OPJ_FLOAT32* OPJ_RESTRICT c1,
++    OPJ_FLOAT32* OPJ_RESTRICT c2,
+     OPJ_SIZE_T n)
+ {
+     OPJ_SIZE_T i;
+-    const OPJ_SIZE_T len = n;
+-
+-    const __m128i ry = _mm_set1_epi32(2449);
+-    const __m128i gy = _mm_set1_epi32(4809);
+-    const __m128i by = _mm_set1_epi32(934);
+-    const __m128i ru = _mm_set1_epi32(1382);
+-    const __m128i gu = _mm_set1_epi32(2714);
+-    /* const __m128i bu = _mm_set1_epi32(4096); */
+-    /* const __m128i rv = _mm_set1_epi32(4096); */
+-    const __m128i gv = _mm_set1_epi32(3430);
+-    const __m128i bv = _mm_set1_epi32(666);
+-    const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096),
+-                             _MM_SHUFFLE(1, 0, 1, 0));
+-
+-    for (i = 0; i < (len & ~3U); i += 4) {
+-        __m128i lo, hi;
+-        __m128i y, u, v;
+-        __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
+-        __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
+-        __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
+-
+-        lo = r;
+-        hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, ry);
+-        hi = _mm_mul_epi32(hi, ry);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        y = _mm_blend_epi16(lo, hi, 0xCC);
+-
+-        lo = g;
+-        hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, gy);
+-        hi = _mm_mul_epi32(hi, gy);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
+-
+-        lo = b;
+-        hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, by);
+-        hi = _mm_mul_epi32(hi, by);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
+-        _mm_store_si128((__m128i *) & (c0[i]), y);
+-
+-        /*lo = b;
+-        hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, mulround);
+-        hi = _mm_mul_epi32(hi, mulround);*/
+-        lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
+-        hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
+-        lo = _mm_slli_epi64(lo, 12);
+-        hi = _mm_slli_epi64(hi, 12);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        u = _mm_blend_epi16(lo, hi, 0xCC);
+-
+-        lo = r;
+-        hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, ru);
+-        hi = _mm_mul_epi32(hi, ru);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
+-
+-        lo = g;
+-        hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, gu);
+-        hi = _mm_mul_epi32(hi, gu);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
+-        _mm_store_si128((__m128i *) & (c1[i]), u);
++#ifdef USE_SSE
++    const __m128 YR = _mm_set1_ps(0.299f);
++    const __m128 YG = _mm_set1_ps(0.587f);
++    const __m128 YB = _mm_set1_ps(0.114f);
++    const __m128 UR = _mm_set1_ps(-0.16875f);
++    const __m128 UG = _mm_set1_ps(-0.331260f);
++    const __m128 UB = _mm_set1_ps(0.5f);
++    const __m128 VR = _mm_set1_ps(0.5f);
++    const __m128 VG = _mm_set1_ps(-0.41869f);
++    const __m128 VB = _mm_set1_ps(-0.08131f);
++    for (i = 0; i < (n >> 3); i ++) {
++        __m128 r, g, b, y, u, v;
++
++        r = _mm_load_ps(c0);
++        g = _mm_load_ps(c1);
++        b = _mm_load_ps(c2);
++        y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)),
++                       _mm_mul_ps(b, YB));
++        u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)),
++                       _mm_mul_ps(b, UB));
++        v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)),
++                       _mm_mul_ps(b, VB));
++        _mm_store_ps(c0, y);
++        _mm_store_ps(c1, u);
++        _mm_store_ps(c2, v);
++        c0 += 4;
++        c1 += 4;
++        c2 += 4;
+ 
+-        /*lo = r;
+-        hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, mulround);
+-        hi = _mm_mul_epi32(hi, mulround);*/
+-        lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
+-        hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
+-        lo = _mm_slli_epi64(lo, 12);
+-        hi = _mm_slli_epi64(hi, 12);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        v = _mm_blend_epi16(lo, hi, 0xCC);
+-
+-        lo = g;
+-        hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, gv);
+-        hi = _mm_mul_epi32(hi, gv);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
+-
+-        lo = b;
+-        hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
+-        lo = _mm_mul_epi32(lo, bv);
+-        hi = _mm_mul_epi32(hi, bv);
+-        lo = _mm_add_epi64(lo, mulround);
+-        hi = _mm_add_epi64(hi, mulround);
+-        lo = _mm_srli_epi64(lo, 13);
+-        hi = _mm_slli_epi64(hi, 32 - 13);
+-        v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
+-        _mm_store_si128((__m128i *) & (c2[i]), v);
+-    }
+-    for (; i < len; ++i) {
+-        OPJ_INT32 r = c0[i];
+-        OPJ_INT32 g = c1[i];
+-        OPJ_INT32 b = c2[i];
+-        OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
+-                       4809) + opj_int_fix_mul(b, 934);
+-        OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
+-                      2714) + opj_int_fix_mul(b, 4096);
+-        OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
+-                       3430) - opj_int_fix_mul(b, 666);
+-        c0[i] = y;
+-        c1[i] = u;
+-        c2[i] = v;
++        r = _mm_load_ps(c0);
++        g = _mm_load_ps(c1);
++        b = _mm_load_ps(c2);
++        y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)),
++                       _mm_mul_ps(b, YB));
++        u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)),
++                       _mm_mul_ps(b, UB));
++        v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)),
++                       _mm_mul_ps(b, VB));
++        _mm_store_ps(c0, y);
++        _mm_store_ps(c1, u);
++        _mm_store_ps(c2, v);
++        c0 += 4;
++        c1 += 4;
++        c2 += 4;
+     }
+-}
+-#else
+-void opj_mct_encode_real(
+-    OPJ_INT32* OPJ_RESTRICT c0,
+-    OPJ_INT32* OPJ_RESTRICT c1,
+-    OPJ_INT32* OPJ_RESTRICT c2,
+-    OPJ_SIZE_T n)
+-{
+-    OPJ_UINT32 i;
++    n &= 7;
++#endif
+     for (i = 0; i < n; ++i) {
+-        OPJ_INT32 r = c0[i];
+-        OPJ_INT32 g = c1[i];
+-        OPJ_INT32 b = c2[i];
+-        OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
+-                       4809) + opj_int_fix_mul(b, 934);
+-        OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
+-                      2714) + opj_int_fix_mul(b, 4096);
+-        OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
+-                       3430) - opj_int_fix_mul(b, 666);
++        OPJ_FLOAT32 r = c0[i];
++        OPJ_FLOAT32 g = c1[i];
++        OPJ_FLOAT32 b = c2[i];
++        OPJ_FLOAT32 y = 0.299f * r + 0.587f * g + 0.114f * b;
++        OPJ_FLOAT32 u = -0.16875f * r - 0.331260f * g + 0.5f * b;
++        OPJ_FLOAT32 v = 0.5f * r - 0.41869f * g - 0.08131f * b;
+         c0[i] = y;
+         c1[i] = u;
+         c2[i] = v;
+     }
+ }
+-#endif
+ 
+ /* <summary> */
+ /* Inverse irreversible MCT. */
+@@ -391,7 +287,7 @@ void opj_mct_decode_real(
+     OPJ_FLOAT32* OPJ_RESTRICT c2,
+     OPJ_SIZE_T n)
+ {
+-    OPJ_UINT32 i;
++    OPJ_SIZE_T i;
+ #ifdef USE_SSE
+     __m128 vrv, vgu, vgv, vbu;
+     vrv = _mm_set1_ps(1.402f);
+diff --git a/third_party/libopenjpeg20/mct.h b/third_party/libopenjpeg20/mct.h
+index 2e37ce7333f85083d683e510e2107b92b73f5d06..3e1f5e4946cf798b8f2e5c6ea098afb8df7dc23f 100644
+--- a/third_party/libopenjpeg20/mct.h
++++ b/third_party/libopenjpeg20/mct.h
+@@ -85,8 +85,9 @@ Apply an irreversible multi-component transform to an image
+ @param c2 Samples blue component
+ @param n Number of samples for each component
+ */
+-void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1,
+-                         OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
++void opj_mct_encode_real(OPJ_FLOAT32* OPJ_RESTRICT c0,
++                         OPJ_FLOAT32* OPJ_RESTRICT c1,
++                         OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n);
+ /**
+ Apply an irreversible multi-component inverse transform to an image
+ @param c0 Samples for luminance component
+diff --git a/third_party/libopenjpeg20/mqc.c b/third_party/libopenjpeg20/mqc.c
+index 6299b171d8788ffb32bd13c36027eb3f10b6983a..3caab9e7c4411d6dc21cd3908f44ca1f84b8c8b1 100644
+--- a/third_party/libopenjpeg20/mqc.c
++++ b/third_party/libopenjpeg20/mqc.c
+@@ -46,27 +46,6 @@
+ /** @name Local static functions */
+ /*@{*/
+ 
+-/**
+-Output a byte, doing bit-stuffing if necessary.
+-After a 0xff byte, the next byte must be smaller than 0x90.
+-@param mqc MQC handle
+-*/
+-static void opj_mqc_byteout(opj_mqc_t *mqc);
+-/**
+-Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
+-@param mqc MQC handle
+-*/
+-static void opj_mqc_renorme(opj_mqc_t *mqc);
+-/**
+-Encode the most probable symbol
+-@param mqc MQC handle
+-*/
+-static void opj_mqc_codemps(opj_mqc_t *mqc);
+-/**
+-Encode the most least symbol
+-@param mqc MQC handle
+-*/
+-static void opj_mqc_codelps(opj_mqc_t *mqc);
+ /**
+ Fill mqc->c with 1's for flushing
+ @param mqc MQC handle
+@@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = {
+ ==========================================================
+ */
+ 
+-static void opj_mqc_byteout(opj_mqc_t *mqc)
+-{
+-    /* bp is initialized to start - 1 in opj_mqc_init_enc() */
+-    /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
+-    assert(mqc->bp >= mqc->start - 1);
+-    if (*mqc->bp == 0xff) {
+-        mqc->bp++;
+-        *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
+-        mqc->c &= 0xfffff;
+-        mqc->ct = 7;
+-    } else {
+-        if ((mqc->c & 0x8000000) == 0) {
+-            mqc->bp++;
+-            *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
+-            mqc->c &= 0x7ffff;
+-            mqc->ct = 8;
+-        } else {
+-            (*mqc->bp)++;
+-            if (*mqc->bp == 0xff) {
+-                mqc->c &= 0x7ffffff;
+-                mqc->bp++;
+-                *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
+-                mqc->c &= 0xfffff;
+-                mqc->ct = 7;
+-            } else {
+-                mqc->bp++;
+-                *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
+-                mqc->c &= 0x7ffff;
+-                mqc->ct = 8;
+-            }
+-        }
+-    }
+-}
+-
+-static void opj_mqc_renorme(opj_mqc_t *mqc)
+-{
+-    do {
+-        mqc->a <<= 1;
+-        mqc->c <<= 1;
+-        mqc->ct--;
+-        if (mqc->ct == 0) {
+-            opj_mqc_byteout(mqc);
+-        }
+-    } while ((mqc->a & 0x8000) == 0);
+-}
+-
+-static void opj_mqc_codemps(opj_mqc_t *mqc)
+-{
+-    mqc->a -= (*mqc->curctx)->qeval;
+-    if ((mqc->a & 0x8000) == 0) {
+-        if (mqc->a < (*mqc->curctx)->qeval) {
+-            mqc->a = (*mqc->curctx)->qeval;
+-        } else {
+-            mqc->c += (*mqc->curctx)->qeval;
+-        }
+-        *mqc->curctx = (*mqc->curctx)->nmps;
+-        opj_mqc_renorme(mqc);
+-    } else {
+-        mqc->c += (*mqc->curctx)->qeval;
+-    }
+-}
+-
+-static void opj_mqc_codelps(opj_mqc_t *mqc)
+-{
+-    mqc->a -= (*mqc->curctx)->qeval;
+-    if (mqc->a < (*mqc->curctx)->qeval) {
+-        mqc->c += (*mqc->curctx)->qeval;
+-    } else {
+-        mqc->a = (*mqc->curctx)->qeval;
+-    }
+-    *mqc->curctx = (*mqc->curctx)->nlps;
+-    opj_mqc_renorme(mqc);
+-}
+-
+ static void opj_mqc_setbits(opj_mqc_t *mqc)
+ {
+     OPJ_UINT32 tempc = mqc->c + mqc->a;
+@@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp)
+     mqc->end_of_byte_stream_counter = 0;
+ }
+ 
+-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
+-{
+-    if ((*mqc->curctx)->mps == d) {
+-        opj_mqc_codemps(mqc);
+-    } else {
+-        opj_mqc_codelps(mqc);
+-    }
+-}
+ 
+ void opj_mqc_flush(opj_mqc_t *mqc)
+ {
+@@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc)
+     }
+ }
+ 
+-#define BYPASS_CT_INIT  0xDEADBEEF
+-
+ void opj_mqc_bypass_init_enc(opj_mqc_t *mqc)
+ {
+     /* This function is normally called after at least one opj_mqc_flush() */
+@@ -475,6 +370,38 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc)
+     }
+ }
+ 
++/**
++Encode the most probable symbol
++@param mqc MQC handle
++*/
++static INLINE void opj_mqc_codemps(opj_mqc_t *mqc)
++{
++    opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
++}
++
++/**
++Encode the most least symbol
++@param mqc MQC handle
++*/
++static INLINE void opj_mqc_codelps(opj_mqc_t *mqc)
++{
++    opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct);
++}
++
++/**
++Encode a symbol using the MQ-coder
++@param mqc MQC handle
++@param d The symbol to be encoded (0 or 1)
++*/
++static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d)
++{
++    if ((*mqc->curctx)->mps == d) {
++        opj_mqc_codemps(mqc);
++    } else {
++        opj_mqc_codelps(mqc);
++    }
++}
++
+ void opj_mqc_segmark_enc(opj_mqc_t *mqc)
+ {
+     OPJ_UINT32 i;
+@@ -557,4 +484,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb,
+     mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)];
+ }
+ 
+-
++void opj_mqc_byteout(opj_mqc_t *mqc)
++{
++    /* bp is initialized to start - 1 in opj_mqc_init_enc() */
++    /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */
++    assert(mqc->bp >= mqc->start - 1);
++    if (*mqc->bp == 0xff) {
++        mqc->bp++;
++        *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
++        mqc->c &= 0xfffff;
++        mqc->ct = 7;
++    } else {
++        if ((mqc->c & 0x8000000) == 0) {
++            mqc->bp++;
++            *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
++            mqc->c &= 0x7ffff;
++            mqc->ct = 8;
++        } else {
++            (*mqc->bp)++;
++            if (*mqc->bp == 0xff) {
++                mqc->c &= 0x7ffffff;
++                mqc->bp++;
++                *mqc->bp = (OPJ_BYTE)(mqc->c >> 20);
++                mqc->c &= 0xfffff;
++                mqc->ct = 7;
++            } else {
++                mqc->bp++;
++                *mqc->bp = (OPJ_BYTE)(mqc->c >> 19);
++                mqc->c &= 0x7ffff;
++                mqc->ct = 8;
++            }
++        }
++    }
++}
+\ No newline at end of file
+diff --git a/third_party/libopenjpeg20/mqc.h b/third_party/libopenjpeg20/mqc.h
+index 69a2a79dc06d68b1a973e9aac915d13c6b0f566f..9850fed03161701cb2abee0d130b14186b94dd66 100644
+--- a/third_party/libopenjpeg20/mqc.h
++++ b/third_party/libopenjpeg20/mqc.h
+@@ -96,6 +96,8 @@ typedef struct opj_mqc {
+     OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA];
+ } opj_mqc_t;
+ 
++#define BYPASS_CT_INIT  0xDEADBEEF
++
+ #include "mqc_inl.h"
+ 
+ /** @name Exported functions */
+@@ -135,12 +137,7 @@ Set the current context used for coding/decoding
+ @param ctxno Number that identifies the context
+ */
+ #define opj_mqc_setcurctx(mqc, ctxno)   (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
+-/**
+-Encode a symbol using the MQ-coder
+-@param mqc MQC handle
+-@param d The symbol to be encoded (0 or 1)
+-*/
+-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d);
++
+ /**
+ Flush the encoder, so that all remaining data is written
+ @param mqc MQC handle
+diff --git a/third_party/libopenjpeg20/mqc_inl.h b/third_party/libopenjpeg20/mqc_inl.h
+index 310a3287fd918dce3ab66e49e9f4e2706ccfa2de..0031b94be318aef6d34dcbe9b2c936169007204a 100644
+--- a/third_party/libopenjpeg20/mqc_inl.h
++++ b/third_party/libopenjpeg20/mqc_inl.h
+@@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc)
+     } \
+ }
+ 
+-#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
++#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
+         register const opj_mqc_state_t **curctx = mqc->curctx; \
+         register OPJ_UINT32 c = mqc->c; \
+         register OPJ_UINT32 a = mqc->a; \
+         register OPJ_UINT32 ct = mqc->ct
+ 
+-#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \
++#define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \
+         mqc->curctx = curctx; \
+         mqc->c = c; \
+         mqc->a = a; \
+@@ -193,4 +193,90 @@ Decode a symbol
+ #define opj_mqc_decode(d, mqc) \
+     opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct)
+ 
++/**
++Output a byte, doing bit-stuffing if necessary.
++After a 0xff byte, the next byte must be smaller than 0x90.
++@param mqc MQC handle
++*/
++void opj_mqc_byteout(opj_mqc_t *mqc);
++
++/**
++Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000
++@param mqc MQC handle
++@param a_ value of mqc->a
++@param c_ value of mqc->c_
++@param ct_ value of mqc->ct_
++*/
++#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \
++{ \
++    do { \
++        a_ <<= 1; \
++        c_ <<= 1; \
++        ct_--; \
++        if (ct_ == 0) { \
++            mqc->c = c_; \
++            opj_mqc_byteout(mqc); \
++            c_ = mqc->c; \
++            ct_ = mqc->ct; \
++        } \
++    } while( (a_ & 0x8000) == 0); \
++}
++
++#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \
++{ \
++    a -= (*curctx)->qeval; \
++    if ((a & 0x8000) == 0) { \
++        if (a < (*curctx)->qeval) { \
++            a = (*curctx)->qeval; \
++        } else { \
++            c += (*curctx)->qeval; \
++        } \
++        *curctx = (*curctx)->nmps; \
++        opj_mqc_renorme_macro(mqc, a, c, ct); \
++    } else { \
++        c += (*curctx)->qeval; \
++    } \
++}
++
++#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \
++{ \
++    a -= (*curctx)->qeval; \
++    if (a < (*curctx)->qeval) { \
++        c += (*curctx)->qeval; \
++    } else { \
++        a = (*curctx)->qeval; \
++    } \
++    *curctx = (*curctx)->nlps; \
++    opj_mqc_renorme_macro(mqc, a, c, ct); \
++}
++
++#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \
++{ \
++    if ((*curctx)->mps == (d)) { \
++        opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \
++    } else { \
++        opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \
++    } \
++}
++
++
++#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \
++{\
++    if (ct == BYPASS_CT_INIT) {\
++        ct = 8;\
++    }\
++    ct--;\
++    c = c + ((d) << ct);\
++    if (ct == 0) {\
++        *mqc->bp = (OPJ_BYTE)c;\
++        ct = 8;\
++        /* If the previous byte was 0xff, make sure that the next msb is 0 */ \
++        if (*mqc->bp == 0xff) {\
++            ct = 7;\
++        }\
++        mqc->bp++;\
++        c = 0;\
++    }\
++}
++
+ #endif /* OPJ_MQC_INL_H */
+diff --git a/third_party/libopenjpeg20/openjpeg.c b/third_party/libopenjpeg20/openjpeg.c
+index 7b12303423b7ad79cf50b6f259a682dcdab25bb4..9c9b6eb0c0ac24f51646993840512190ed443ca6 100644
+--- a/third_party/libopenjpeg20/openjpeg.c
++++ b/third_party/libopenjpeg20/openjpeg.c
+@@ -652,6 +652,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
+                 struct opj_image *,
+                 struct opj_event_mgr *)) opj_j2k_setup_encoder;
+ 
++        l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL(
++                    *)(void *,
++                       const char* const*,
++                       struct opj_event_mgr *)) opj_j2k_encoder_set_extra_options;
++
++        l_codec->opj_set_threads =
++            (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_j2k_set_threads;
++
+         l_codec->m_codec = opj_j2k_create_compress();
+         if (! l_codec->m_codec) {
+             opj_free(l_codec);
+@@ -690,6 +698,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format)
+                 struct opj_image *,
+                 struct opj_event_mgr *)) opj_jp2_setup_encoder;
+ 
++        l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL(
++                    *)(void *,
++                       const char* const*,
++                       struct opj_event_mgr *)) opj_jp2_encoder_set_extra_options;
++
++        l_codec->opj_set_threads =
++            (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_jp2_set_threads;
++
+         l_codec->m_codec = opj_jp2_create(OPJ_FALSE);
+         if (! l_codec->m_codec) {
+             opj_free(l_codec);
+@@ -718,11 +734,11 @@ void OPJ_CALLCONV opj_set_default_encoder_parameters(opj_cparameters_t
+         parameters->cp_cinema = OPJ_OFF; /* DEPRECATED */
+         parameters->rsiz = OPJ_PROFILE_NONE;
+         parameters->max_comp_size = 0;
+-        parameters->numresolution = 6;
++        parameters->numresolution = OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION;
+         parameters->cp_rsiz = OPJ_STD_RSIZ; /* DEPRECATED */
+-        parameters->cblockw_init = 64;
+-        parameters->cblockh_init = 64;
+-        parameters->prog_order = OPJ_LRCP;
++        parameters->cblockw_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKW;
++        parameters->cblockh_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKH;
++        parameters->prog_order = OPJ_COMP_PARAM_DEFAULT_PROG_ORDER;
+         parameters->roi_compno = -1;        /* no ROI */
+         parameters->subsampling_dx = 1;
+         parameters->subsampling_dy = 1;
+@@ -788,6 +804,27 @@ OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec,
+     return OPJ_FALSE;
+ }
+ 
++/* ----------------------------------------------------------------------- */
++
++OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(opj_codec_t *p_codec,
++        const char* const* options)
++{
++    if (p_codec) {
++        opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec;
++
++        if (! l_codec->is_decompressor) {
++            return l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options(
++                       l_codec->m_codec,
++                       options,
++                       &(l_codec->m_event_mgr));
++        }
++    }
++
++    return OPJ_FALSE;
++}
++
++/* ----------------------------------------------------------------------- */
++
+ OPJ_BOOL OPJ_CALLCONV opj_start_compress(opj_codec_t *p_codec,
+         opj_image_t * p_image,
+         opj_stream_t *p_stream)
+diff --git a/third_party/libopenjpeg20/openjpeg.h b/third_party/libopenjpeg20/openjpeg.h
+index 53a0e10c54b3312cd6722b57ad66b93f69905f67..269ac329ae097fc938b54f8469cf3bd7eeb93a57 100644
+--- a/third_party/libopenjpeg20/openjpeg.h
++++ b/third_party/libopenjpeg20/openjpeg.h
+@@ -78,7 +78,7 @@ Most compilers implement their own version of this keyword ...
+ 
+ #if defined(OPJ_STATIC) || !defined(_WIN32)
+ /* http://gcc.gnu.org/wiki/Visibility */
+-#   if __GNUC__ >= 4
++#   if !defined(_WIN32) && __GNUC__ >= 4
+ #       if defined(OPJ_STATIC) /* static library uses "hidden" */
+ #           define OPJ_API    __attribute__ ((visibility ("hidden")))
+ #       else
+@@ -204,11 +204,11 @@ typedef size_t   OPJ_SIZE_T;
+ #define OPJ_PROFILE_BC_MULTI    0x0200 /** Multi Tile Broadcast profile defined in 15444-1 AMD3 */
+ #define OPJ_PROFILE_BC_MULTI_R  0x0300 /** Multi Tile Reversible Broadcast profile defined in 15444-1 AMD3 */
+ #define OPJ_PROFILE_IMF_2K      0x0400 /** 2K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */
+-#define OPJ_PROFILE_IMF_4K      0x0401 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */
+-#define OPJ_PROFILE_IMF_8K      0x0402 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */
+-#define OPJ_PROFILE_IMF_2K_R    0x0403 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */
++#define OPJ_PROFILE_IMF_4K      0x0500 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */
++#define OPJ_PROFILE_IMF_8K      0x0600 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */
++#define OPJ_PROFILE_IMF_2K_R    0x0700 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */
+ #define OPJ_PROFILE_IMF_4K_R    0x0800 /** 4K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */
+-#define OPJ_PROFILE_IMF_8K_R    0x0801  /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */
++#define OPJ_PROFILE_IMF_8K_R    0x0900 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */
+ 
+ /**
+  * JPEG 2000 Part-2 extensions
+@@ -225,6 +225,36 @@ typedef size_t   OPJ_SIZE_T;
+ #define OPJ_IS_IMF(v)        (((v) >= OPJ_PROFILE_IMF_2K)&&((v) <= ((OPJ_PROFILE_IMF_8K_R) | (0x009b))))
+ #define OPJ_IS_PART2(v)      ((v) & OPJ_PROFILE_PART2)
+ 
++#define OPJ_GET_IMF_PROFILE(v)   ((v) & 0xff00)      /** Extract IMF profile without mainlevel/sublevel */
++#define OPJ_GET_IMF_MAINLEVEL(v) ((v) & 0xf)         /** Extract IMF main level */
++#define OPJ_GET_IMF_SUBLEVEL(v)  (((v) >> 4) & 0xf)  /** Extract IMF sub level */
++
++#define OPJ_IMF_MAINLEVEL_MAX    11   /** Maximum main level */
++
++/** Max. Components Sampling Rate (MSamples/sec) per IMF main level */
++#define OPJ_IMF_MAINLEVEL_1_MSAMPLESEC   65      /** MSamples/sec for IMF main level 1 */
++#define OPJ_IMF_MAINLEVEL_2_MSAMPLESEC   130     /** MSamples/sec for IMF main level 2 */
++#define OPJ_IMF_MAINLEVEL_3_MSAMPLESEC   195     /** MSamples/sec for IMF main level 3 */
++#define OPJ_IMF_MAINLEVEL_4_MSAMPLESEC   260     /** MSamples/sec for IMF main level 4 */
++#define OPJ_IMF_MAINLEVEL_5_MSAMPLESEC   520     /** MSamples/sec for IMF main level 5 */
++#define OPJ_IMF_MAINLEVEL_6_MSAMPLESEC   1200    /** MSamples/sec for IMF main level 6 */
++#define OPJ_IMF_MAINLEVEL_7_MSAMPLESEC   2400    /** MSamples/sec for IMF main level 7 */
++#define OPJ_IMF_MAINLEVEL_8_MSAMPLESEC   4800    /** MSamples/sec for IMF main level 8 */
++#define OPJ_IMF_MAINLEVEL_9_MSAMPLESEC   9600    /** MSamples/sec for IMF main level 9 */
++#define OPJ_IMF_MAINLEVEL_10_MSAMPLESEC  19200   /** MSamples/sec for IMF main level 10 */
++#define OPJ_IMF_MAINLEVEL_11_MSAMPLESEC  38400   /** MSamples/sec for IMF main level 11 */
++
++/** Max. compressed Bit Rate (Mbits/s) per IMF sub level */
++#define OPJ_IMF_SUBLEVEL_1_MBITSSEC      200     /** Mbits/s for IMF sub level 1 */
++#define OPJ_IMF_SUBLEVEL_2_MBITSSEC      400     /** Mbits/s for IMF sub level 2 */
++#define OPJ_IMF_SUBLEVEL_3_MBITSSEC      800     /** Mbits/s for IMF sub level 3 */
++#define OPJ_IMF_SUBLEVEL_4_MBITSSEC     1600     /** Mbits/s for IMF sub level 4 */
++#define OPJ_IMF_SUBLEVEL_5_MBITSSEC     3200     /** Mbits/s for IMF sub level 5 */
++#define OPJ_IMF_SUBLEVEL_6_MBITSSEC     6400     /** Mbits/s for IMF sub level 6 */
++#define OPJ_IMF_SUBLEVEL_7_MBITSSEC    12800     /** Mbits/s for IMF sub level 7 */
++#define OPJ_IMF_SUBLEVEL_8_MBITSSEC    25600     /** Mbits/s for IMF sub level 8 */
++#define OPJ_IMF_SUBLEVEL_9_MBITSSEC    51200     /** Mbits/s for IMF sub level 9 */
++
+ /**
+  * JPEG 2000 codestream and component size limits in cinema profiles
+  * */
+@@ -318,6 +348,10 @@ typedef void (*opj_msg_callback)(const char *msg, void *client_data);
+ ==========================================================
+ */
+ 
++#ifndef OPJ_UINT32_SEMANTICALLY_BUT_INT32
++#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_INT32
++#endif
++
+ /**
+  * Progression order changes
+  *
+@@ -333,10 +367,10 @@ typedef struct opj_poc {
+     OPJ_PROG_ORDER prg1, prg;
+     /** Progression order string*/
+     OPJ_CHAR progorder[5];
+-    /** Tile number */
++    /** Tile number (starting at 1) */
+     OPJ_UINT32 tile;
+     /** Start and end values for Tile width and height*/
+-    OPJ_INT32 tx0, tx1, ty0, ty1;
++    OPJ_UINT32_SEMANTICALLY_BUT_INT32 tx0, tx1, ty0, ty1;
+     /** Start value, initialised in pi_initialise_encode*/
+     OPJ_UINT32 layS, resS, compS, prcS;
+     /** End value, initialised in pi_initialise_encode */
+@@ -1314,15 +1348,14 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec,
+  * number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called,
+  * this function will override the behaviour of the environment variable.
+  *
+- * Currently this function must be called after opj_setup_decoder() and
+- * before opj_read_header().
+- *
+- * Note: currently only has effect on the decompressor.
++ * This function must be called after opj_setup_decoder() and
++ * before opj_read_header() for the decoding side, or after opj_setup_encoder()
++ * and before opj_start_compress() for the encoding side.
+  *
+- * @param p_codec       decompressor handler
++ * @param p_codec       decompressor or compressor handler
+  * @param num_threads   number of threads.
+  *
+- * @return OPJ_TRUE     if the decoder is correctly set
++ * @return OPJ_TRUE     if the function is successful.
+  */
+ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec,
+         int num_threads);
+@@ -1546,6 +1579,33 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec,
+         opj_cparameters_t *parameters,
+         opj_image_t *image);
+ 
++
++/**
++ * Specify extra options for the encoder.
++ *
++ * This may be called after opj_setup_encoder() and before opj_start_compress()
++ *
++ * This is the way to add new options in a fully ABI compatible way, without
++ * extending the opj_cparameters_t structure.
++ *
++ * Currently supported options are:
++ * <ul>
++ * <li>PLT=YES/NO. Defaults to NO. If set to YES, PLT marker segments,
++ *     indicating the length of each packet in the tile-part header, will be
++ *     written. Since 2.3.2</li>
++ * </ul>
++ *
++ * @param p_codec       Compressor handle
++ * @param p_options     Compression options. This should be a NULL terminated
++ *                      array of strings. Each string is of the form KEY=VALUE.
++ *
++ * @return OPJ_TRUE in case of success.
++ * @since 2.3.2
++ */
++OPJ_API OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(
++    opj_codec_t *p_codec,
++    const char* const* p_options);
++
+ /**
+  * Start to compress the current image.
+  * @param p_codec       Compressor handle
+diff --git a/third_party/libopenjpeg20/opj_codec.h b/third_party/libopenjpeg20/opj_codec.h
+index b962b121633e1fb10c65a8b2bee5b5d99c8cdee3..8a8af9119e385033a404ad3a347814de39912589 100644
+--- a/third_party/libopenjpeg20/opj_codec.h
++++ b/third_party/libopenjpeg20/opj_codec.h
+@@ -148,6 +148,11 @@ typedef struct opj_codec_private {
+                                           opj_cparameters_t * p_param,
+                                           struct opj_image * p_image,
+                                           struct opj_event_mgr * p_manager);
++
++            OPJ_BOOL(* opj_encoder_set_extra_options)(void * p_codec,
++                    const char* const* p_options,
++                    struct opj_event_mgr * p_manager);
++
+         } m_compression;
+     } m_codec_data;
+     /** FIXME DOC*/
+diff --git a/third_party/libopenjpeg20/opj_common.h b/third_party/libopenjpeg20/opj_common.h
+index a051339154b69e295bddf24a0ca539cf7f773df8..ee8adf4725c4bcaf9e1b8489abdc33c2b04b1264 100644
+--- a/third_party/libopenjpeg20/opj_common.h
++++ b/third_party/libopenjpeg20/opj_common.h
+@@ -38,4 +38,10 @@
+ */
+ #define OPJ_COMMON_CBLK_DATA_EXTRA        2    /**< Margin for a fake FFFF marker */
+ 
++
++#define OPJ_COMP_PARAM_DEFAULT_CBLOCKW        64
++#define OPJ_COMP_PARAM_DEFAULT_CBLOCKH        64
++#define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER     OPJ_LRCP
++#define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION  6
++
+ #endif /* OPJ_COMMMON_H */
+diff --git a/third_party/libopenjpeg20/opj_config.h b/third_party/libopenjpeg20/opj_config.h
+index fda1f641308f11ee2a6fb7f5bbad3fae2d845fac..e5d82be4347e6c83c5aee7eaea481fc34d443138 100644
+--- a/third_party/libopenjpeg20/opj_config.h
++++ b/third_party/libopenjpeg20/opj_config.h
+@@ -12,5 +12,5 @@
+ 
+ /* Version number. */
+ #define OPJ_VERSION_MAJOR 2
+-#define OPJ_VERSION_MINOR 3
+-#define OPJ_VERSION_BUILD 1
++#define OPJ_VERSION_MINOR 4
++#define OPJ_VERSION_BUILD 0
+diff --git a/third_party/libopenjpeg20/opj_config_private.h b/third_party/libopenjpeg20/opj_config_private.h
+index b6986f9320cf4afdeab240dfb65c2e66d67c65f3..ee96ceefb407894b8e1f616cd70df87d71a0859b 100644
+--- a/third_party/libopenjpeg20/opj_config_private.h
++++ b/third_party/libopenjpeg20/opj_config_private.h
+@@ -7,7 +7,7 @@
+ /* create opj_config_private.h for CMake */
+ #define OPJ_HAVE_INTTYPES_H 	1
+ 
+-#define OPJ_PACKAGE_VERSION "2.3.1"
++#define OPJ_PACKAGE_VERSION "2.4.0"
+ 
+ /* Not used by openjp2*/
+ /*#define HAVE_MEMORY_H 1*/
+diff --git a/third_party/libopenjpeg20/opj_intmath.h b/third_party/libopenjpeg20/opj_intmath.h
+index 754b5512ff2cf64640359c21d23259076fab7159..afe69d90c072574aa735aa0e9a1947602f66a1d4 100644
+--- a/third_party/libopenjpeg20/opj_intmath.h
++++ b/third_party/libopenjpeg20/opj_intmath.h
+@@ -208,6 +208,16 @@ static INLINE OPJ_INT32 opj_int_floordivpow2(OPJ_INT32 a, OPJ_INT32 b)
+ {
+     return a >> b;
+ }
++
++/**
++Divide an integer by a power of 2 and round downwards
++@return Returns a divided by 2^b
++*/
++static INLINE OPJ_UINT32 opj_uint_floordivpow2(OPJ_UINT32 a, OPJ_UINT32 b)
++{
++    return a >> b;
++}
++
+ /**
+ Get logarithm of an integer and round downwards
+ @return Returns log2(a)
+diff --git a/third_party/libopenjpeg20/pi.c b/third_party/libopenjpeg20/pi.c
+index 5f3d9ec1222e7230d694a1cb35b22bedcae6b5ec..1430d12a9bf64a74f581bb9ad0df92e03ee538cf 100644
+--- a/third_party/libopenjpeg20/pi.c
++++ b/third_party/libopenjpeg20/pi.c
+@@ -36,6 +36,8 @@
+  * POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
++#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_UINT32
++
+ #include "opj_includes.h"
+ 
+ /** @defgroup PI PI - Implementation of a packet iterator */
+@@ -91,10 +93,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi);
+  */
+ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp,
+         OPJ_UINT32 p_tileno,
+-        OPJ_INT32 p_tx0,
+-        OPJ_INT32 p_tx1,
+-        OPJ_INT32 p_ty0,
+-        OPJ_INT32 p_ty1,
++        OPJ_UINT32 p_tx0,
++        OPJ_UINT32 p_tx1,
++        OPJ_UINT32 p_ty0,
++        OPJ_UINT32 p_ty1,
+         OPJ_UINT32 p_max_prec,
+         OPJ_UINT32 p_max_res,
+         OPJ_UINT32 p_dx_min,
+@@ -118,10 +120,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp,
+ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp,
+         OPJ_UINT32 p_num_comps,
+         OPJ_UINT32 p_tileno,
+-        OPJ_INT32 p_tx0,
+-        OPJ_INT32 p_tx1,
+-        OPJ_INT32 p_ty0,
+-        OPJ_INT32 p_ty1,
++        OPJ_UINT32 p_tx0,
++        OPJ_UINT32 p_tx1,
++        OPJ_UINT32 p_ty0,
++        OPJ_UINT32 p_ty1,
+         OPJ_UINT32 p_max_prec,
+         OPJ_UINT32 p_max_res,
+         OPJ_UINT32 p_dx_min,
+@@ -144,10 +146,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp,
+ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+                                         const opj_cp_t *p_cp,
+                                         OPJ_UINT32  tileno,
+-                                        OPJ_INT32  * p_tx0,
+-                                        OPJ_INT32 * p_tx1,
+-                                        OPJ_INT32 * p_ty0,
+-                                        OPJ_INT32 * p_ty1,
++                                        OPJ_UINT32 * p_tx0,
++                                        OPJ_UINT32 * p_tx1,
++                                        OPJ_UINT32 * p_ty0,
++                                        OPJ_UINT32 * p_ty1,
+                                         OPJ_UINT32 * p_dx_min,
+                                         OPJ_UINT32 * p_dy_min,
+                                         OPJ_UINT32 * p_max_prec,
+@@ -176,10 +178,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+         const opj_cp_t *p_cp,
+         OPJ_UINT32 tileno,
+-        OPJ_INT32 * p_tx0,
+-        OPJ_INT32 * p_tx1,
+-        OPJ_INT32 * p_ty0,
+-        OPJ_INT32 * p_ty1,
++        OPJ_UINT32 * p_tx0,
++        OPJ_UINT32 * p_tx1,
++        OPJ_UINT32 * p_ty0,
++        OPJ_UINT32 * p_ty1,
+         OPJ_UINT32 * p_dx_min,
+         OPJ_UINT32 * p_dy_min,
+         OPJ_UINT32 * p_max_prec,
+@@ -192,10 +194,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+  * @param   p_image     the image used to initialize the packet iterator (in fact only the number of components is relevant.
+  * @param   p_cp        the coding parameters.
+  * @param   tileno  the index of the tile from which creating the packet iterator.
++ * @param   manager Event manager
+  */
+ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *p_image,
+         const opj_cp_t *p_cp,
+-        OPJ_UINT32 tileno);
++        OPJ_UINT32 tileno,
++        opj_event_mgr_t* manager);
+ /**
+  * FIXME DOC
+  */
+@@ -230,18 +234,19 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos,
+ ==========================================================
+ */
+ 
+-static void opj_pi_emit_error(opj_pi_iterator_t * pi, const char* msg)
+-{
+-    (void)pi;
+-    (void)msg;
+-}
+-
+ static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi)
+ {
+     opj_pi_comp_t *comp = NULL;
+     opj_pi_resolution_t *res = NULL;
+     OPJ_UINT32 index = 0;
+ 
++    if (pi->poc.compno0 >= pi->numcomps ||
++            pi->poc.compno1 >= pi->numcomps + 1) {
++        opj_event_msg(pi->manager, EVT_ERROR,
++                      "opj_pi_next_lrcp(): invalid compno0/compno1\n");
++        return OPJ_FALSE;
++    }
++
+     if (!pi->first) {
+         comp = &pi->comps[pi->compno];
+         res = &comp->resolutions[pi->resno];
+@@ -272,7 +277,7 @@ static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi)
+                     /* include should be resized when a POC arises, or */
+                     /* the POC should be rejected */
+                     if (index >= pi->include_size) {
+-                        opj_pi_emit_error(pi, "Invalid access to pi->include");
++                        opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include");
+                         return OPJ_FALSE;
+                     }
+                     if (!pi->include[index]) {
+@@ -295,6 +300,13 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi)
+     opj_pi_resolution_t *res = NULL;
+     OPJ_UINT32 index = 0;
+ 
++    if (pi->poc.compno0 >= pi->numcomps ||
++            pi->poc.compno1 >= pi->numcomps + 1) {
++        opj_event_msg(pi->manager, EVT_ERROR,
++                      "opj_pi_next_rlcp(): invalid compno0/compno1\n");
++        return OPJ_FALSE;
++    }
++
+     if (!pi->first) {
+         comp = &pi->comps[pi->compno];
+         res = &comp->resolutions[pi->resno];
+@@ -318,7 +330,7 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi)
+                     index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                             pi->step_c + pi->precno * pi->step_p;
+                     if (index >= pi->include_size) {
+-                        opj_pi_emit_error(pi, "Invalid access to pi->include");
++                        opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include");
+                         return OPJ_FALSE;
+                     }
+                     if (!pi->include[index]) {
+@@ -341,6 +353,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+     opj_pi_resolution_t *res = NULL;
+     OPJ_UINT32 index = 0;
+ 
++    if (pi->poc.compno0 >= pi->numcomps ||
++            pi->poc.compno1 >= pi->numcomps + 1) {
++        opj_event_msg(pi->manager, EVT_ERROR,
++                      "opj_pi_next_rpcl(): invalid compno0/compno1\n");
++        return OPJ_FALSE;
++    }
++
+     if (!pi->first) {
+         goto LABEL_SKIP;
+     } else {
+@@ -376,16 +395,16 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+         pi->poc.tx1 = pi->tx1;
+     }
+     for (pi->resno = pi->poc.resno0; pi->resno < pi->poc.resno1; pi->resno++) {
+-        for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1;
+-                pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) {
+-            for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1;
+-                    pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) {
++        for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1;
++                pi->y += (pi->dy - (pi->y % pi->dy))) {
++            for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1;
++                    pi->x += (pi->dx - (pi->x % pi->dx))) {
+                 for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) {
+                     OPJ_UINT32 levelno;
+-                    OPJ_INT32 trx0, try0;
+-                    OPJ_INT32  trx1, try1;
++                    OPJ_UINT32 trx0, try0;
++                    OPJ_UINT32  trx1, try1;
+                     OPJ_UINT32  rpx, rpy;
+-                    OPJ_INT32  prci, prcj;
++                    OPJ_UINT32  prci, prcj;
+                     comp = &pi->comps[pi->compno];
+                     if (pi->resno >= comp->numresolutions) {
+                         continue;
+@@ -404,10 +423,10 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+                             (comp->dy << levelno) > INT_MAX) {
+                         continue;
+                     }
+-                    trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno));
+-                    try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno));
+-                    trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno));
+-                    try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno));
++                    trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno));
++                    try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno));
++                    trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno));
++                    try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno));
+                     rpx = res->pdx + levelno;
+                     rpy = res->pdy + levelno;
+ 
+@@ -421,12 +440,12 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+                     }
+ 
+                     /* See ISO-15441. B.12.1.3 Resolution level-position-component-layer progression */
+-                    if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
+-                            ((try0 << levelno) % (1 << rpy))))) {
++                    if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
++                            ((try0 << levelno) % (1U << rpy))))) {
+                         continue;
+                     }
+-                    if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
+-                            ((trx0 << levelno) % (1 << rpx))))) {
++                    if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
++                            ((trx0 << levelno) % (1U << rpx))))) {
+                         continue;
+                     }
+ 
+@@ -438,13 +457,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+                         continue;
+                     }
+ 
+-                    prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x,
+-                                                (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx)
+-                           - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx);
+-                    prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y,
+-                                                (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                           - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                    pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++                    prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x,
++                                                 (comp->dx << levelno)), res->pdx)
++                           - opj_uint_floordivpow2(trx0, res->pdx);
++                    prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y,
++                                                 (comp->dy << levelno)), res->pdy)
++                           - opj_uint_floordivpow2(try0, res->pdy);
++                    pi->precno = prci + prcj * res->pw;
+                     if (pi->precno >= res->pw * res->ph) {
+                       return OPJ_FALSE;
+                     }
+@@ -452,7 +471,7 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi)
+                         index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                                 pi->step_c + pi->precno * pi->step_p;
+                         if (index >= pi->include_size) {
+-                            opj_pi_emit_error(pi, "Invalid access to pi->include");
++                            opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include");
+                             return OPJ_FALSE;
+                         }
+                         if (!pi->include[index]) {
+@@ -476,6 +495,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+     opj_pi_resolution_t *res = NULL;
+     OPJ_UINT32 index = 0;
+ 
++    if (pi->poc.compno0 >= pi->numcomps ||
++            pi->poc.compno1 >= pi->numcomps + 1) {
++        opj_event_msg(pi->manager, EVT_ERROR,
++                      "opj_pi_next_pcrl(): invalid compno0/compno1\n");
++        return OPJ_FALSE;
++    }
++
+     if (!pi->first) {
+         comp = &pi->comps[pi->compno];
+         goto LABEL_SKIP;
+@@ -511,19 +537,19 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+         pi->poc.ty1 = pi->ty1;
+         pi->poc.tx1 = pi->tx1;
+     }
+-    for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1;
+-            pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) {
+-        for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1;
+-                pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) {
++    for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1;
++            pi->y += (pi->dy - (pi->y % pi->dy))) {
++        for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1;
++                pi->x += (pi->dx - (pi->x % pi->dx))) {
+             for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) {
+                 comp = &pi->comps[pi->compno];
+                 for (pi->resno = pi->poc.resno0;
+                         pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) {
+                     OPJ_UINT32 levelno;
+-                    OPJ_INT32 trx0, try0;
+-                    OPJ_INT32 trx1, try1;
++                    OPJ_UINT32 trx0, try0;
++                    OPJ_UINT32 trx1, try1;
+                     OPJ_UINT32 rpx, rpy;
+-                    OPJ_INT32 prci, prcj;
++                    OPJ_UINT32 prci, prcj;
+                     res = &comp->resolutions[pi->resno];
+                     levelno = comp->numresolutions - 1 - pi->resno;
+                     /* Avoids division by zero */
+@@ -538,10 +564,10 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+                             (comp->dy << levelno) > INT_MAX) {
+                         continue;
+                     }
+-                    trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno));
+-                    try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno));
+-                    trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno));
+-                    try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno));
++                    trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno));
++                    try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno));
++                    trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno));
++                    try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno));
+                     rpx = res->pdx + levelno;
+                     rpy = res->pdy + levelno;
+ 
+@@ -555,12 +581,12 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+                     }
+ 
+                     /* See ISO-15441. B.12.1.4 Position-component-resolution level-layer progression */
+-                    if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
+-                            ((try0 << levelno) % (1 << rpy))))) {
++                    if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
++                            ((try0 << levelno) % (1U << rpy))))) {
+                         continue;
+                     }
+-                    if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
+-                            ((trx0 << levelno) % (1 << rpx))))) {
++                    if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
++                            ((trx0 << levelno) % (1U << rpx))))) {
+                         continue;
+                     }
+ 
+@@ -572,13 +598,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+                         continue;
+                     }
+ 
+-                    prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x,
+-                                                (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx)
+-                           - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx);
+-                    prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y,
+-                                                (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                           - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                    pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++                    prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x,
++                                                 (comp->dx << levelno)), res->pdx)
++                           - opj_uint_floordivpow2(trx0, res->pdx);
++                    prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y,
++                                                 (comp->dy << levelno)), res->pdy)
++                           - opj_uint_floordivpow2(try0, res->pdy);
++                    pi->precno = prci + prcj * res->pw;
+                     if (pi->precno >= res->pw * res->ph) {
+                       return OPJ_FALSE;
+                     }
+@@ -586,7 +612,7 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi)
+                         index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                                 pi->step_c + pi->precno * pi->step_p;
+                         if (index >= pi->include_size) {
+-                            opj_pi_emit_error(pi, "Invalid access to pi->include");
++                            opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include");
+                             return OPJ_FALSE;
+                         }
+                         if (!pi->include[index]) {
+@@ -610,6 +636,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+     opj_pi_resolution_t *res = NULL;
+     OPJ_UINT32 index = 0;
+ 
++    if (pi->poc.compno0 >= pi->numcomps ||
++            pi->poc.compno1 >= pi->numcomps + 1) {
++        opj_event_msg(pi->manager, EVT_ERROR,
++                      "opj_pi_next_cprl(): invalid compno0/compno1\n");
++        return OPJ_FALSE;
++    }
++
+     if (!pi->first) {
+         comp = &pi->comps[pi->compno];
+         goto LABEL_SKIP;
+@@ -645,17 +678,17 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+             pi->poc.ty1 = pi->ty1;
+             pi->poc.tx1 = pi->tx1;
+         }
+-        for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1;
+-                pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) {
+-            for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1;
+-                    pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) {
++        for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1;
++                pi->y += (pi->dy - (pi->y % pi->dy))) {
++            for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1;
++                    pi->x += (pi->dx - (pi->x % pi->dx))) {
+                 for (pi->resno = pi->poc.resno0;
+                         pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) {
+                     OPJ_UINT32 levelno;
+-                    OPJ_INT32 trx0, try0;
+-                    OPJ_INT32 trx1, try1;
++                    OPJ_UINT32 trx0, try0;
++                    OPJ_UINT32 trx1, try1;
+                     OPJ_UINT32 rpx, rpy;
+-                    OPJ_INT32 prci, prcj;
++                    OPJ_UINT32 prci, prcj;
+                     res = &comp->resolutions[pi->resno];
+                     levelno = comp->numresolutions - 1 - pi->resno;
+                     /* Avoids division by zero on id_000004,sig_06,src_000679,op_arith8,pos_49,val_-17 */
+@@ -669,10 +702,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+                             (comp->dy << levelno) > INT_MAX) {
+                         continue;
+                     }
+-                    trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno));
+-                    try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno));
+-                    trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno));
+-                    try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno));
++                    trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno));
++                    try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno));
++                    trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno));
++                    try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno));
+                     rpx = res->pdx + levelno;
+                     rpy = res->pdy + levelno;
+ 
+@@ -686,12 +719,12 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+                     }
+ 
+                     /* See ISO-15441. B.12.1.5 Component-position-resolution level-layer progression */
+-                    if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
+-                            ((try0 << levelno) % (1 << rpy))))) {
++                    if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) &&
++                            ((try0 << levelno) % (1U << rpy))))) {
+                         continue;
+                     }
+-                    if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
+-                            ((trx0 << levelno) % (1 << rpx))))) {
++                    if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) &&
++                            ((trx0 << levelno) % (1U << rpx))))) {
+                         continue;
+                     }
+ 
+@@ -703,13 +736,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+                         continue;
+                     }
+ 
+-                    prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x,
+-                                                (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx)
+-                           - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx);
+-                    prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y,
+-                                                (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy)
+-                           - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy);
+-                    pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw);
++                    prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x,
++                                                 (comp->dx << levelno)), res->pdx)
++                           - opj_uint_floordivpow2(trx0, res->pdx);
++                    prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y,
++                                                 (comp->dy << levelno)), res->pdy)
++                           - opj_uint_floordivpow2(try0, res->pdy);
++                    pi->precno = (OPJ_UINT32)(prci + prcj * res->pw);
+                     if (pi->precno >= res->pw * res->ph) {
+                       return OPJ_FALSE;
+                     }
+@@ -717,7 +750,7 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi)
+                         index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno *
+                                 pi->step_c + pi->precno * pi->step_p;
+                         if (index >= pi->include_size) {
+-                            opj_pi_emit_error(pi, "Invalid access to pi->include");
++                            opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include");
+                             return OPJ_FALSE;
+                         }
+                         if (!pi->include[index]) {
+@@ -738,10 +771,10 @@ LABEL_SKIP:
+ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+                                         const opj_cp_t *p_cp,
+                                         OPJ_UINT32 p_tileno,
+-                                        OPJ_INT32 * p_tx0,
+-                                        OPJ_INT32  * p_tx1,
+-                                        OPJ_INT32  * p_ty0,
+-                                        OPJ_INT32  * p_ty1,
++                                        OPJ_UINT32 * p_tx0,
++                                        OPJ_UINT32  * p_tx1,
++                                        OPJ_UINT32  * p_ty0,
++                                        OPJ_UINT32  * p_ty1,
+                                         OPJ_UINT32 * p_dx_min,
+                                         OPJ_UINT32 * p_dy_min,
+                                         OPJ_UINT32 * p_max_prec,
+@@ -777,12 +810,12 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+     /* find extent of tile */
+     l_tx0 = p_cp->tx0 + p *
+             p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */
+-    *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0);
+-    *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1);
++    *p_tx0 = opj_uint_max(l_tx0, p_image->x0);
++    *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1);
+     l_ty0 = p_cp->ty0 + q *
+             p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */
+-    *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0);
+-    *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1);
++    *p_ty0 = opj_uint_max(l_ty0, p_image->y0);
++    *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1);
+ 
+     /* max precision is 0 (can only grow) */
+     *p_max_prec = 0;
+@@ -795,17 +828,17 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+     for (compno = 0; compno < p_image->numcomps; ++compno) {
+         /* arithmetic variables to calculate */
+         OPJ_UINT32 l_level_no;
+-        OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1;
+-        OPJ_INT32 l_px0, l_py0, l_px1, py1;
++        OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1;
++        OPJ_UINT32 l_px0, l_py0, l_px1, py1;
+         OPJ_UINT32 l_pdx, l_pdy;
+         OPJ_UINT32 l_pw, l_ph;
+         OPJ_UINT32 l_product;
+-        OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1;
++        OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1;
+ 
+-        l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx);
+-        l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy);
+-        l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx);
+-        l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy);
++        l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx);
++        l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy);
++        l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx);
++        l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy);
+ 
+         if (l_tccp->numresolutions > *p_max_res) {
+             *p_max_res = l_tccp->numresolutions;
+@@ -829,19 +862,19 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+             /* various calculations of extents */
+             l_level_no = l_tccp->numresolutions - 1 - resno;
+ 
+-            l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no);
+-            l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no);
+-            l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no);
+-            l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no);
++            l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no);
++            l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no);
++            l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no);
++            l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no);
+ 
+-            l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx;
+-            l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy;
+-            l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx;
++            l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx;
++            l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy;
++            l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx;
+ 
+-            py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy;
++            py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy;
+ 
+-            l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx);
+-            l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy);
++            l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx);
++            l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy);
+ 
+             l_product = l_pw * l_ph;
+ 
+@@ -859,10 +892,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image,
+ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+         const opj_cp_t *p_cp,
+         OPJ_UINT32 tileno,
+-        OPJ_INT32 * p_tx0,
+-        OPJ_INT32 * p_tx1,
+-        OPJ_INT32 * p_ty0,
+-        OPJ_INT32 * p_ty1,
++        OPJ_UINT32 * p_tx0,
++        OPJ_UINT32 * p_tx1,
++        OPJ_UINT32 * p_ty0,
++        OPJ_UINT32 * p_ty1,
+         OPJ_UINT32 * p_dx_min,
+         OPJ_UINT32 * p_dy_min,
+         OPJ_UINT32 * p_max_prec,
+@@ -903,12 +936,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+     /* here calculation of tx0, tx1, ty0, ty1, maxprec, l_dx and l_dy */
+     l_tx0 = p_cp->tx0 + p *
+             p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */
+-    *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0);
+-    *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1);
++    *p_tx0 = opj_uint_max(l_tx0, p_image->x0);
++    *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1);
+     l_ty0 = p_cp->ty0 + q *
+             p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */
+-    *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0);
+-    *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1);
++    *p_ty0 = opj_uint_max(l_ty0, p_image->y0);
++    *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1);
+ 
+     /* max precision and resolution is 0 (can only grow)*/
+     *p_max_prec = 0;
+@@ -921,18 +954,18 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+     for (compno = 0; compno < p_image->numcomps; ++compno) {
+         /* aritmetic variables to calculate*/
+         OPJ_UINT32 l_level_no;
+-        OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1;
+-        OPJ_INT32 l_px0, l_py0, l_px1, py1;
++        OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1;
++        OPJ_UINT32 l_px0, l_py0, l_px1, py1;
+         OPJ_UINT32 l_product;
+-        OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1;
++        OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1;
+         OPJ_UINT32 l_pdx, l_pdy, l_pw, l_ph;
+ 
+-        lResolutionPtr = p_resolutions[compno];
++        lResolutionPtr = p_resolutions ? p_resolutions[compno] : NULL;
+ 
+-        l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx);
+-        l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy);
+-        l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx);
+-        l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy);
++        l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx);
++        l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy);
++        l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx);
++        l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy);
+ 
+         if (l_tccp->numresolutions > *p_max_res) {
+             *p_max_res = l_tccp->numresolutions;
+@@ -948,33 +981,37 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+             /* precinct width and height*/
+             l_pdx = l_tccp->prcw[resno];
+             l_pdy = l_tccp->prch[resno];
+-            *lResolutionPtr++ = l_pdx;
+-            *lResolutionPtr++ = l_pdy;
++            if (lResolutionPtr) {
++                *lResolutionPtr++ = l_pdx;
++                *lResolutionPtr++ = l_pdy;
++            }
+             if (l_pdx + l_level_no < 32 &&
+                     l_img_comp->dx <= UINT_MAX / (1u << (l_pdx + l_level_no))) {
+                 l_dx = l_img_comp->dx * (1u << (l_pdx + l_level_no));
+                 /* take the minimum size for l_dx for each comp and resolution*/
+-                *p_dx_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dx_min, (OPJ_INT32)l_dx);
++                *p_dx_min = opj_uint_min(*p_dx_min, l_dx);
+             }
+             if (l_pdy + l_level_no < 32 &&
+                     l_img_comp->dy <= UINT_MAX / (1u << (l_pdy + l_level_no))) {
+                 l_dy = l_img_comp->dy * (1u << (l_pdy + l_level_no));
+-                *p_dy_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dy_min, (OPJ_INT32)l_dy);
++                *p_dy_min = opj_uint_min(*p_dy_min, l_dy);
+             }
+ 
+             /* various calculations of extents*/
+-            l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no);
+-            l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no);
+-            l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no);
+-            l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no);
+-            l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx;
+-            l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy;
+-            l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx;
+-            py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy;
+-            l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx);
+-            l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy);
+-            *lResolutionPtr++ = l_pw;
+-            *lResolutionPtr++ = l_ph;
++            l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no);
++            l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no);
++            l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no);
++            l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no);
++            l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx;
++            l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy;
++            l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx;
++            py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy;
++            l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx);
++            l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy);
++            if (lResolutionPtr) {
++                *lResolutionPtr++ = l_pw;
++                *lResolutionPtr++ = l_ph;
++            }
+             l_product = l_pw * l_ph;
+ 
+             /* update precision*/
+@@ -990,7 +1027,8 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image,
+ 
+ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image,
+         const opj_cp_t *cp,
+-        OPJ_UINT32 tileno)
++        OPJ_UINT32 tileno,
++        opj_event_mgr_t* manager)
+ {
+     /* loop*/
+     OPJ_UINT32 pino, compno;
+@@ -1024,6 +1062,8 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image,
+     l_current_pi = l_pi;
+     for (pino = 0; pino < l_poc_bound ; ++pino) {
+ 
++        l_current_pi->manager = manager;
++
+         l_current_pi->comps = (opj_pi_comp_t*) opj_calloc(image->numcomps,
+                               sizeof(opj_pi_comp_t));
+         if (! l_current_pi->comps) {
+@@ -1054,10 +1094,10 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image,
+ 
+ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp,
+         OPJ_UINT32 p_tileno,
+-        OPJ_INT32 p_tx0,
+-        OPJ_INT32 p_tx1,
+-        OPJ_INT32 p_ty0,
+-        OPJ_INT32 p_ty1,
++        OPJ_UINT32 p_tx0,
++        OPJ_UINT32 p_tx1,
++        OPJ_UINT32 p_ty0,
++        OPJ_UINT32 p_ty1,
+         OPJ_UINT32 p_max_prec,
+         OPJ_UINT32 p_max_res,
+         OPJ_UINT32 p_dx_min,
+@@ -1134,10 +1174,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp,
+ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp,
+         OPJ_UINT32 p_num_comps,
+         OPJ_UINT32 p_tileno,
+-        OPJ_INT32 p_tx0,
+-        OPJ_INT32 p_tx1,
+-        OPJ_INT32 p_ty0,
+-        OPJ_INT32 p_ty1,
++        OPJ_UINT32 p_tx0,
++        OPJ_UINT32 p_tx1,
++        OPJ_UINT32 p_ty0,
++        OPJ_UINT32 p_ty1,
+         OPJ_UINT32 p_max_prec,
+         OPJ_UINT32 p_max_res,
+         OPJ_UINT32 p_dx_min,
+@@ -1176,10 +1216,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp,
+         l_current_poc->prg  = l_tcp->prg;
+         l_current_poc->prcS = 0;
+         l_current_poc->prcE = p_max_prec;
+-        l_current_poc->txS = (OPJ_UINT32)p_tx0;
+-        l_current_poc->txE = (OPJ_UINT32)p_tx1;
+-        l_current_poc->tyS = (OPJ_UINT32)p_ty0;
+-        l_current_poc->tyE = (OPJ_UINT32)p_ty1;
++        l_current_poc->txS = p_tx0;
++        l_current_poc->txE = p_tx1;
++        l_current_poc->tyS = p_ty0;
++        l_current_poc->tyE = p_ty1;
+         l_current_poc->dx = p_dx_min;
+         l_current_poc->dy = p_dy_min;
+         ++ l_current_poc;
+@@ -1361,7 +1401,8 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos,
+ */
+ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image,
+                                         opj_cp_t *p_cp,
+-                                        OPJ_UINT32 p_tile_no)
++                                        OPJ_UINT32 p_tile_no,
++                                        opj_event_mgr_t* manager)
+ {
+     OPJ_UINT32 numcomps = p_image->numcomps;
+ 
+@@ -1376,7 +1417,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image,
+     /* encoding prameters to set */
+     OPJ_UINT32 l_max_res;
+     OPJ_UINT32 l_max_prec;
+-    OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1;
++    OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
+     OPJ_UINT32 l_dx_min, l_dy_min;
+     OPJ_UINT32 l_bound;
+     OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ;
+@@ -1416,7 +1457,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image,
+     }
+ 
+     /* memory allocation for pi */
+-    l_pi = opj_pi_create(p_image, p_cp, p_tile_no);
++    l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager);
+     if (!l_pi) {
+         opj_free(l_tmp_data);
+         opj_free(l_tmp_ptr);
+@@ -1557,11 +1598,34 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image,
+ }
+ 
+ 
++OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image,
++        const opj_cp_t *p_cp,
++        OPJ_UINT32 p_tile_no)
++{
++    OPJ_UINT32 l_max_res;
++    OPJ_UINT32 l_max_prec;
++    OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
++    OPJ_UINT32 l_dx_min, l_dy_min;
++
++    /* preconditions in debug*/
++    assert(p_cp != 00);
++    assert(p_image != 00);
++    assert(p_tile_no < p_cp->tw * p_cp->th);
++
++    /* get encoding parameters*/
++    opj_get_all_encoding_parameters(p_image, p_cp, p_tile_no, &l_tx0, &l_tx1,
++                                    &l_ty0, &l_ty1, &l_dx_min, &l_dy_min, &l_max_prec, &l_max_res, NULL);
++
++    return p_cp->tcps[p_tile_no].numlayers * l_max_prec * p_image->numcomps *
++           l_max_res;
++}
++
+ 
+ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image,
+         opj_cp_t *p_cp,
+         OPJ_UINT32 p_tile_no,
+-        J2K_T2_MODE p_t2_mode)
++        J2K_T2_MODE p_t2_mode,
++        opj_event_mgr_t* manager)
+ {
+     OPJ_UINT32 numcomps = p_image->numcomps;
+ 
+@@ -1576,7 +1640,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image,
+     /* encoding prameters to set*/
+     OPJ_UINT32 l_max_res;
+     OPJ_UINT32 l_max_prec;
+-    OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1;
++    OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
+     OPJ_UINT32 l_dx_min, l_dy_min;
+     OPJ_UINT32 l_bound;
+     OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ;
+@@ -1615,7 +1679,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image,
+     }
+ 
+     /* memory allocation for pi*/
+-    l_pi = opj_pi_create(p_image, p_cp, p_tile_no);
++    l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager);
+     if (!l_pi) {
+         opj_free(l_tmp_data);
+         opj_free(l_tmp_ptr);
+@@ -1770,7 +1834,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+     pi[pino].poc.prg = tcp->prg;
+ 
+     if (!(cp->m_specific_param.m_enc.m_tp_on && ((!OPJ_IS_CINEMA(cp->rsiz) &&
+-            (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz)))) {
++            !OPJ_IS_IMF(cp->rsiz) &&
++            (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)))) {
+         pi[pino].poc.resno0 = tcp->resS;
+         pi[pino].poc.resno1 = tcp->resE;
+         pi[pino].poc.compno0 = tcp->compS;
+@@ -1779,10 +1844,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+         pi[pino].poc.layno1 = tcp->layE;
+         pi[pino].poc.precno0 = tcp->prcS;
+         pi[pino].poc.precno1 = tcp->prcE;
+-        pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS;
+-        pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS;
+-        pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE;
+-        pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE;
++        pi[pino].poc.tx0 = tcp->txS;
++        pi[pino].poc.ty0 = tcp->tyS;
++        pi[pino].poc.tx1 = tcp->txE;
++        pi[pino].poc.ty1 = tcp->tyE;
+     } else {
+         for (i = tppos + 1; i < 4; i++) {
+             switch (prog[i]) {
+@@ -1806,10 +1871,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+                     pi[pino].poc.precno1 = tcp->prcE;
+                     break;
+                 default:
+-                    pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS;
+-                    pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS;
+-                    pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE;
+-                    pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE;
++                    pi[pino].poc.tx0 = tcp->txS;
++                    pi[pino].poc.ty0 = tcp->tyS;
++                    pi[pino].poc.tx1 = tcp->txE;
++                    pi[pino].poc.ty1 = tcp->tyE;
+                     break;
+                 }
+                 break;
+@@ -1849,10 +1914,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+                     default:
+                         tcp->tx0_t = tcp->txS;
+                         tcp->ty0_t = tcp->tyS;
+-                        pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t;
+-                        pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx));
+-                        pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t;
+-                        pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy));
++                        pi[pino].poc.tx0 = tcp->tx0_t;
++                        pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx);
++                        pi[pino].poc.ty0 = tcp->ty0_t;
++                        pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy);
+                         tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1;
+                         tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1;
+                         break;
+@@ -1884,10 +1949,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+                         pi[pino].poc.precno1 = tcp->prc_t;
+                         break;
+                     default:
+-                        pi[pino].poc.tx0 = (OPJ_INT32)(tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx));
+-                        pi[pino].poc.tx1 = (OPJ_INT32)tcp->tx0_t ;
+-                        pi[pino].poc.ty0 = (OPJ_INT32)(tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy));
+-                        pi[pino].poc.ty1 = (OPJ_INT32)tcp->ty0_t ;
++                        pi[pino].poc.tx0 = tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx);
++                        pi[pino].poc.tx1 = tcp->tx0_t ;
++                        pi[pino].poc.ty0 = tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy);
++                        pi[pino].poc.ty1 = tcp->ty0_t ;
+                         break;
+                     }
+                     break;
+@@ -1974,8 +2039,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+                                 if (tcp->ty0_t >= tcp->tyE) {
+                                     if (opj_pi_check_next_level(i - 1, cp, tileno, pino, prog)) {
+                                         tcp->ty0_t = tcp->tyS;
+-                                        pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t;
+-                                        pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy));
++                                        pi[pino].poc.ty0 = tcp->ty0_t;
++                                        pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy);
+                                         tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1;
+                                         incr_top = 1;
+                                         resetX = 1;
+@@ -1984,21 +2049,21 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi,
+                                         resetX = 0;
+                                     }
+                                 } else {
+-                                    pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t;
+-                                    pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy));
++                                    pi[pino].poc.ty0 = tcp->ty0_t;
++                                    pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy);
+                                     tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1;
+                                     incr_top = 0;
+                                     resetX = 1;
+                                 }
+                                 if (resetX == 1) {
+                                     tcp->tx0_t = tcp->txS;
+-                                    pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t;
+-                                    pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx));
++                                    pi[pino].poc.tx0 = tcp->tx0_t;
++                                    pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx);
+                                     tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1;
+                                 }
+                             } else {
+-                                pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t;
+-                                pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx));
++                                pi[pino].poc.tx0 = tcp->tx0_t;
++                                pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx);
+                                 tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1;
+                                 incr_top = 0;
+                             }
+@@ -2051,7 +2116,7 @@ void opj_pi_update_encoding_parameters(const opj_image_t *p_image,
+     /* encoding parameters to set */
+     OPJ_UINT32 l_max_res;
+     OPJ_UINT32 l_max_prec;
+-    OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1;
++    OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1;
+     OPJ_UINT32 l_dx_min, l_dy_min;
+ 
+     /* pointers */
+diff --git a/third_party/libopenjpeg20/pi.h b/third_party/libopenjpeg20/pi.h
+index 8c0dc25c19dd962bf9e76994609a37ee41fc0cca..0320523b7693376d6e57d417ba86da358bcc7747 100644
+--- a/third_party/libopenjpeg20/pi.h
++++ b/third_party/libopenjpeg20/pi.h
+@@ -102,11 +102,13 @@ typedef struct opj_pi_iterator {
+     /** Components*/
+     opj_pi_comp_t *comps;
+     /** FIXME DOC*/
+-    OPJ_INT32 tx0, ty0, tx1, ty1;
++    OPJ_UINT32 tx0, ty0, tx1, ty1;
+     /** FIXME DOC*/
+-    OPJ_INT32 x, y;
++    OPJ_UINT32 x, y;
+     /** FIXME DOC*/
+     OPJ_UINT32 dx, dy;
++    /** event manager */
++    opj_event_mgr_t* manager;
+ } opj_pi_iterator_t;
+ 
+ /** @name Exported functions */
+@@ -119,13 +121,15 @@ typedef struct opj_pi_iterator {
+  * @param   cp      the coding parameters.
+  * @param   tileno  index of the tile being encoded.
+  * @param   t2_mode the type of pass for generating the packet iterator
++ * @param   manager Event manager
+  *
+  * @return  a list of packet iterator that points to the first packet of the tile (not true).
+ */
+ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *image,
+         opj_cp_t *cp,
+         OPJ_UINT32 tileno,
+-        J2K_T2_MODE t2_mode);
++        J2K_T2_MODE t2_mode,
++        opj_event_mgr_t* manager);
+ 
+ /**
+  * Updates the encoding parameters of the codec.
+@@ -161,12 +165,14 @@ Create a packet iterator for Decoder
+ @param image Raw image for which the packets will be listed
+ @param cp Coding parameters
+ @param tileno Number that identifies the tile for which to list the packets
++@param manager Event manager
+ @return Returns a packet iterator that points to the first packet of the tile
+ @see opj_pi_destroy
+ */
+ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t * image,
+                                         opj_cp_t * cp,
+-                                        OPJ_UINT32 tileno);
++                                        OPJ_UINT32 tileno,
++                                        opj_event_mgr_t* manager);
+ /**
+  * Destroys a packet iterator array.
+  *
+@@ -182,6 +188,17 @@ Modify the packet iterator to point to the next packet
+ @return Returns false if pi pointed to the last packet or else returns true
+ */
+ OPJ_BOOL opj_pi_next(opj_pi_iterator_t * pi);
++
++/**
++ * Return the number of packets in the tile.
++ * @param   image       the image being encoded.
++ * @param cp Coding parameters
++ * @param tileno Number that identifies the tile.
++ */
++OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image,
++        const opj_cp_t *p_cp,
++        OPJ_UINT32 p_tile_no);
++
+ /* ----------------------------------------------------------------------- */
+ /*@}*/
+ 
+diff --git a/third_party/libopenjpeg20/t1.c b/third_party/libopenjpeg20/t1.c
+index f6f7671190cd5bc5a40a8ccac9b349abc0489e43..1bea54b0d518d04cb76c05a274ec040d0a2eeb9d 100644
+--- a/third_party/libopenjpeg20/t1.c
++++ b/third_party/libopenjpeg20/t1.c
+@@ -61,6 +61,13 @@
+ 
+ #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
+ 
++/* Macros to deal with signed integer with just MSB bit set for
++ * negative values (smr = signed magnitude representation) */
++#define opj_smr_abs(x)  (((OPJ_UINT32)(x)) & 0x7FFFFFFFU)
++#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31)
++#define opj_to_smr(x)   ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U))
++
++
+ /** @name Local static functions */
+ /*@{*/
+ 
+@@ -177,18 +184,18 @@ static OPJ_FLOAT64 opj_t1_getwmsedec(
+     const OPJ_FLOAT64 * mct_norms,
+     OPJ_UINT32 mct_numcomps);
+ 
+-static void opj_t1_encode_cblk(opj_t1_t *t1,
+-                               opj_tcd_cblk_enc_t* cblk,
+-                               OPJ_UINT32 orient,
+-                               OPJ_UINT32 compno,
+-                               OPJ_UINT32 level,
+-                               OPJ_UINT32 qmfbid,
+-                               OPJ_FLOAT64 stepsize,
+-                               OPJ_UINT32 cblksty,
+-                               OPJ_UINT32 numcomps,
+-                               opj_tcd_tile_t * tile,
+-                               const OPJ_FLOAT64 * mct_norms,
+-                               OPJ_UINT32 mct_numcomps);
++/** Return "cumwmsedec" that should be used to increase tile->distotile */
++static double opj_t1_encode_cblk(opj_t1_t *t1,
++                                 opj_tcd_cblk_enc_t* cblk,
++                                 OPJ_UINT32 orient,
++                                 OPJ_UINT32 compno,
++                                 OPJ_UINT32 level,
++                                 OPJ_UINT32 qmfbid,
++                                 OPJ_FLOAT64 stepsize,
++                                 OPJ_UINT32 cblksty,
++                                 OPJ_UINT32 numcomps,
++                                 const OPJ_FLOAT64 * mct_norms,
++                                 OPJ_UINT32 mct_numcomps);
+ 
+ /**
+ Decode 1 code-block
+@@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
+ /**
+ Encode significant pass
+ */
+-static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
+-        opj_flag_t *flagsp,
+-        OPJ_INT32 *datap,
+-        OPJ_INT32 bpno,
+-        OPJ_INT32 one,
+-        OPJ_INT32 *nmsedec,
+-        OPJ_BYTE type,
+-        OPJ_UINT32 ci,
+-        OPJ_UINT32 vsc)
+-{
+-    OPJ_UINT32 v;
+-
+-    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
+-
+-    OPJ_UINT32 const flags = *flagsp;
+-
+-    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
+-            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
+-        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+-        v = (opj_int_abs(*datap) & one) ? 1 : 0;
+-#ifdef DEBUG_ENC_SIG
+-        fprintf(stderr, "   ctxt1=%d\n", ctxt1);
+-#endif
+-        opj_mqc_setcurctx(mqc, ctxt1);
+-        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
+-            opj_mqc_bypass_enc(mqc, v);
+-        } else {
+-            opj_mqc_encode(mqc, v);
+-        }
+-        if (v) {
+-            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
+-                                *flagsp,
+-                                flagsp[-1], flagsp[1],
+-                                ci);
+-            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
+-            v = *datap < 0 ? 1U : 0U;
+-            *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
+-                                              (OPJ_UINT32)bpno);
+-#ifdef DEBUG_ENC_SIG
+-            fprintf(stderr, "   ctxt2=%d\n", ctxt2);
+-#endif
+-            opj_mqc_setcurctx(mqc, ctxt2);
+-            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
+-                opj_mqc_bypass_enc(mqc, v);
+-            } else {
+-                OPJ_UINT32 spb = opj_t1_getspb(lu);
+-#ifdef DEBUG_ENC_SIG
+-                fprintf(stderr, "   spb=%d\n", spb);
+-#endif
+-                opj_mqc_encode(mqc, v ^ spb);
+-            }
+-            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
+-        }
+-        *flagsp |= T1_PI_THIS << (ci * 3U);
+-    }
++#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \
++{ \
++    OPJ_UINT32 v; \
++    const OPJ_UINT32 ci = (ciIn); \
++    const OPJ_UINT32 vsc = (vscIn); \
++    const OPJ_INT32* l_datap = (datapIn); \
++    opj_flag_t* flagsp = (flagspIn); \
++    OPJ_UINT32 const flags = *flagsp; \
++    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
++            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
++        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
++        v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
++/* #ifdef DEBUG_ENC_SIG */ \
++/*        fprintf(stderr, "   ctxt1=%d\n", ctxt1); */ \
++/* #endif */ \
++        opj_t1_setcurctx(curctx, ctxt1); \
++        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
++            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
++        } else { \
++            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
++        } \
++        if (v) { \
++            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
++                                *flagsp, \
++                                flagsp[-1], flagsp[1], \
++                                ci); \
++            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
++            v = opj_smr_sign(*l_datap); \
++            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
++                                              (OPJ_UINT32)bpno); \
++/* #ifdef DEBUG_ENC_SIG */ \
++/*            fprintf(stderr, "   ctxt2=%d\n", ctxt2); */ \
++/* #endif */ \
++            opj_t1_setcurctx(curctx, ctxt2); \
++            if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
++                opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
++            } else { \
++                OPJ_UINT32 spb = opj_t1_getspb(lu); \
++/* #ifdef DEBUG_ENC_SIG */ \
++/*                fprintf(stderr, "   spb=%d\n", spb); */ \
++/* #endif */ \
++                opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
++            } \
++            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \
++        } \
++        *flagsp |= T1_PI_THIS << (ci * 3U); \
++    } \
+ }
+ 
+ static INLINE void opj_t1_dec_sigpass_step_raw(
+@@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
+     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+     opj_flag_t* f = &T1_FLAGS(0, 0);
+     OPJ_UINT32 const extra = 2;
++    opj_mqc_t* mqc = &(t1->mqc);
++    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
++    const OPJ_INT32* datap = t1->data;
+ 
+     *nmsedec = 0;
+ #ifdef DEBUG_ENC_SIG
+     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
+ #endif
+-    for (k = 0; k < (t1->h & ~3U); k += 4) {
++    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
++        const OPJ_UINT32 w = t1->w;
+ #ifdef DEBUG_ENC_SIG
+         fprintf(stderr, " k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < w; ++i, ++f, datap += 4) {
+ #ifdef DEBUG_ENC_SIG
+             fprintf(stderr, " i=%d\n", i);
+ #endif
+             if (*f == 0U) {
+                 /* Nothing to do for any of the 4 data points */
+-                f++;
+                 continue;
+             }
+-            opj_t1_enc_sigpass_step(
+-                t1,
++            opj_t1_enc_sigpass_step_macro(
++                mqc, curctx, a, c, ct,
+                 f,
+-                &t1->data[((k + 0) * t1->data_stride) + i],
++                &datap[0],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
+-            opj_t1_enc_sigpass_step(
+-                t1,
++            opj_t1_enc_sigpass_step_macro(
++                mqc, curctx, a, c, ct,
+                 f,
+-                &t1->data[((k + 1) * t1->data_stride) + i],
++                &datap[1],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 1, 0);
+-            opj_t1_enc_sigpass_step(
+-                t1,
++            opj_t1_enc_sigpass_step_macro(
++                mqc, curctx, a, c, ct,
+                 f,
+-                &t1->data[((k + 2) * t1->data_stride) + i],
++                &datap[2],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 2, 0);
+-            opj_t1_enc_sigpass_step(
+-                t1,
++            opj_t1_enc_sigpass_step_macro(
++                mqc, curctx, a, c, ct,
+                 f,
+-                &t1->data[((k + 3) * t1->data_stride) + i],
++                &datap[3],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 3, 0);
+-            ++f;
+         }
+-        f += extra;
+     }
+ 
+     if (k < t1->h) {
+@@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
+ #ifdef DEBUG_ENC_SIG
+         fprintf(stderr, " k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < t1->w; ++i, ++f) {
+ #ifdef DEBUG_ENC_SIG
+             fprintf(stderr, " i=%d\n", i);
+ #endif
+             if (*f == 0U) {
+                 /* Nothing to do for any of the 4 data points */
+-                f++;
++                datap += (t1->h - k);
+                 continue;
+             }
+-            for (j = k; j < t1->h; ++j) {
+-                opj_t1_enc_sigpass_step(
+-                    t1,
++            for (j = k; j < t1->h; ++j, ++datap) {
++                opj_t1_enc_sigpass_step_macro(
++                    mqc, curctx, a, c, ct,
+                     f,
+-                    &t1->data[(j * t1->data_stride) + i],
++                    &datap[0],
+                     bpno,
+                     one,
+                     nmsedec,
+@@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
+                     j - k,
+                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
+             }
+-            ++f;
+         }
+     }
++
++    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ }
+ 
+ static void opj_t1_dec_sigpass_raw(
+@@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw(
+         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
+         const OPJ_UINT32 l_w = w; \
+         opj_mqc_t* mqc = &(t1->mqc); \
+-        DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+         register OPJ_UINT32 v; \
+         one = 1 << bpno; \
+         half = one >> 1; \
+@@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw(
+                         } \
+                 } \
+         } \
+-        UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+         if( k < h ) { \
+             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                 for (j = 0; j < h - k; ++j) { \
+@@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc(
+ /**
+ Encode refinement pass step
+ */
+-static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
+-        opj_flag_t *flagsp,
+-        OPJ_INT32 *datap,
+-        OPJ_INT32 bpno,
+-        OPJ_INT32 one,
+-        OPJ_INT32 *nmsedec,
+-        OPJ_BYTE type,
+-        OPJ_UINT32 ci)
+-{
+-    OPJ_UINT32 v;
+-
+-    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
+-
+-    OPJ_UINT32 const shift_flags =
+-        (*flagsp >> (ci * 3U));
+-
+-    if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
+-        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
+-        *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
+-                                          (OPJ_UINT32)bpno);
+-        v = (opj_int_abs(*datap) & one) ? 1 : 0;
+-#ifdef DEBUG_ENC_REF
+-        fprintf(stderr, "  ctxt=%d\n", ctxt);
+-#endif
+-        opj_mqc_setcurctx(mqc, ctxt);
+-        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
+-            opj_mqc_bypass_enc(mqc, v);
+-        } else {
+-            opj_mqc_encode(mqc, v);
+-        }
+-        *flagsp |= T1_MU_THIS << (ci * 3U);
+-    }
++#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \
++{\
++    OPJ_UINT32 v; \
++    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \
++        const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \
++        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \
++        OPJ_UINT32 abs_data = opj_smr_abs(*datap); \
++        *nmsedec += opj_t1_getnmsedec_ref(abs_data, \
++                                          (OPJ_UINT32)bpno); \
++        v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \
++/* #ifdef DEBUG_ENC_REF */ \
++/*        fprintf(stderr, "  ctxt=%d\n", ctxt); */ \
++/* #endif */ \
++        opj_t1_setcurctx(curctx, ctxt); \
++        if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */ \
++            opj_mqc_bypass_enc_macro(mqc, c, ct, v); \
++        } else { \
++            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
++        } \
++        flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \
++    } \
+ }
+ 
+ 
+@@ -807,100 +797,104 @@ static void opj_t1_enc_refpass(
+     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+     opj_flag_t* f = &T1_FLAGS(0, 0);
+     const OPJ_UINT32 extra = 2U;
++    opj_mqc_t* mqc = &(t1->mqc);
++    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
++    const OPJ_INT32* datap = t1->data;
+ 
+     *nmsedec = 0;
+ #ifdef DEBUG_ENC_REF
+     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
+ #endif
+-    for (k = 0; k < (t1->h & ~3U); k += 4) {
++    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
+ #ifdef DEBUG_ENC_REF
+         fprintf(stderr, " k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < t1->w; ++i, f++, datap += 4) {
++            const OPJ_UINT32 flags = *f;
++            OPJ_UINT32 flagsUpdated = flags;
+ #ifdef DEBUG_ENC_REF
+             fprintf(stderr, " i=%d\n", i);
+ #endif
+-            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
++            if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
+                 /* none significant */
+-                f++;
+                 continue;
+             }
+-            if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
++            if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
+                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
+                 /* all processed by sigpass */
+-                f++;
+                 continue;
+             }
+ 
+-            opj_t1_enc_refpass_step(
+-                t1,
+-                f,
+-                &t1->data[((k + 0) * t1->data_stride) + i],
++            opj_t1_enc_refpass_step_macro(
++                mqc, curctx, a, c, ct,
++                flags, flagsUpdated,
++                &datap[0],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 0);
+-            opj_t1_enc_refpass_step(
+-                t1,
+-                f,
+-                &t1->data[((k + 1) * t1->data_stride) + i],
++            opj_t1_enc_refpass_step_macro(
++                mqc, curctx, a, c, ct,
++                flags, flagsUpdated,
++                &datap[1],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 1);
+-            opj_t1_enc_refpass_step(
+-                t1,
+-                f,
+-                &t1->data[((k + 2) * t1->data_stride) + i],
++            opj_t1_enc_refpass_step_macro(
++                mqc, curctx, a, c, ct,
++                flags, flagsUpdated,
++                &datap[2],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 2);
+-            opj_t1_enc_refpass_step(
+-                t1,
+-                f,
+-                &t1->data[((k + 3) * t1->data_stride) + i],
++            opj_t1_enc_refpass_step_macro(
++                mqc, curctx, a, c, ct,
++                flags, flagsUpdated,
++                &datap[3],
+                 bpno,
+                 one,
+                 nmsedec,
+                 type,
+                 3);
+-            ++f;
++            *f = flagsUpdated;
+         }
+-        f += extra;
+     }
+ 
+     if (k < t1->h) {
+         OPJ_UINT32 j;
++        const OPJ_UINT32 remaining_lines = t1->h - k;
+ #ifdef DEBUG_ENC_REF
+         fprintf(stderr, " k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < t1->w; ++i, ++f) {
+ #ifdef DEBUG_ENC_REF
+             fprintf(stderr, " i=%d\n", i);
+ #endif
+             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
+                 /* none significant */
+-                f++;
++                datap += remaining_lines;
+                 continue;
+             }
+-            for (j = k; j < t1->h; ++j) {
+-                opj_t1_enc_refpass_step(
+-                    t1,
+-                    f,
+-                    &t1->data[(j * t1->data_stride) + i],
++            for (j = 0; j < remaining_lines; ++j, datap ++) {
++                opj_t1_enc_refpass_step_macro(
++                    mqc, curctx, a, c, ct,
++                    *f, *f,
++                    &datap[0],
+                     bpno,
+                     one,
+                     nmsedec,
+                     type,
+-                    j - k);
++                    j);
+             }
+-            ++f;
+         }
+     }
++
++    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ }
+ 
+ 
+@@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw(
+         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
+         const OPJ_UINT32 l_w = w; \
+         opj_mqc_t* mqc = &(t1->mqc); \
+-        DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++        DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+         register OPJ_UINT32 v; \
+         one = 1 << bpno; \
+         poshalf = one >> 1; \
+@@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw(
+                         } \
+                 } \
+         } \
+-        UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++        UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+         if( k < h ) { \
+             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                 for (j = 0; j < h - k; ++j) { \
+@@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc(
+ /**
+ Encode clean-up pass step
+ */
+-static void opj_t1_enc_clnpass_step(
+-    opj_t1_t *t1,
+-    opj_flag_t *flagsp,
+-    OPJ_INT32 *datap,
+-    OPJ_INT32 bpno,
+-    OPJ_INT32 one,
+-    OPJ_INT32 *nmsedec,
+-    OPJ_UINT32 agg,
+-    OPJ_UINT32 runlen,
+-    OPJ_UINT32 lim,
+-    OPJ_UINT32 cblksty)
+-{
+-    OPJ_UINT32 v;
+-    OPJ_UINT32 ci;
+-    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
+-
+-    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
+-                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
+-
+-    if ((*flagsp & check) == check) {
+-        if (runlen == 0) {
+-            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
+-        } else if (runlen == 1) {
+-            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
+-        } else if (runlen == 2) {
+-            *flagsp &= ~(T1_PI_2 | T1_PI_3);
+-        } else if (runlen == 3) {
+-            *flagsp &= ~(T1_PI_3);
+-        }
+-        return;
+-    }
+-
+-    for (ci = runlen; ci < lim; ++ci) {
+-        OPJ_UINT32 vsc;
+-        opj_flag_t flags;
+-        OPJ_UINT32 ctxt1;
+-
+-        flags = *flagsp;
+-
+-        if ((agg != 0) && (ci == runlen)) {
+-            goto LABEL_PARTIAL;
+-        }
+-
+-        if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
+-            ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+-#ifdef DEBUG_ENC_CLN
+-            printf("   ctxt1=%d\n", ctxt1);
+-#endif
+-            opj_mqc_setcurctx(mqc, ctxt1);
+-            v = (opj_int_abs(*datap) & one) ? 1 : 0;
+-            opj_mqc_encode(mqc, v);
+-            if (v) {
+-                OPJ_UINT32 ctxt2, spb;
+-                OPJ_UINT32 lu;
+-LABEL_PARTIAL:
+-                lu = opj_t1_getctxtno_sc_or_spb_index(
+-                         *flagsp,
+-                         flagsp[-1], flagsp[1],
+-                         ci);
+-                *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
+-                                                  (OPJ_UINT32)bpno);
+-                ctxt2 = opj_t1_getctxno_sc(lu);
+-#ifdef DEBUG_ENC_CLN
+-                printf("   ctxt2=%d\n", ctxt2);
+-#endif
+-                opj_mqc_setcurctx(mqc, ctxt2);
+-
+-                v = *datap < 0 ? 1U : 0U;
+-                spb = opj_t1_getspb(lu);
+-#ifdef DEBUG_ENC_CLN
+-                printf("   spb=%d\n", spb);
+-#endif
+-                opj_mqc_encode(mqc, v ^ spb);
+-                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
+-                opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
+-            }
+-        }
+-        *flagsp &= ~(T1_PI_THIS << (3U * ci));
+-        datap += t1->data_stride;
+-    }
++#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \
++{ \
++    OPJ_UINT32 v; \
++    OPJ_UINT32 ci; \
++    opj_flag_t* const flagsp = (flagspIn); \
++    const OPJ_INT32* l_datap = (datapIn); \
++    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \
++                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
++ \
++    if ((*flagsp & check) == check) { \
++        if (runlen == 0) { \
++            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
++        } else if (runlen == 1) { \
++            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \
++        } else if (runlen == 2) { \
++            *flagsp &= ~(T1_PI_2 | T1_PI_3); \
++        } else if (runlen == 3) { \
++            *flagsp &= ~(T1_PI_3); \
++        } \
++    } \
++    else \
++    for (ci = runlen; ci < lim; ++ci) { \
++        OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \
++        if ((agg != 0) && (ci == runlen)) { \
++            goto_PARTIAL = OPJ_TRUE; \
++        } \
++        else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \
++            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \
++/* #ifdef DEBUG_ENC_CLN */ \
++/*            printf("   ctxt1=%d\n", ctxt1); */ \
++/* #endif */ \
++            opj_t1_setcurctx(curctx, ctxt1); \
++            v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \
++            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \
++            if (v) { \
++                goto_PARTIAL = OPJ_TRUE; \
++            } \
++        } \
++        if( goto_PARTIAL ) { \
++            OPJ_UINT32 vsc; \
++            OPJ_UINT32 ctxt2, spb; \
++            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
++                        *flagsp, \
++                        flagsp[-1], flagsp[1], \
++                        ci); \
++            *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \
++                                                (OPJ_UINT32)bpno); \
++            ctxt2 = opj_t1_getctxno_sc(lu); \
++/* #ifdef DEBUG_ENC_CLN */ \
++/*           printf("   ctxt2=%d\n", ctxt2); */ \
++/* #endif */ \
++            opj_t1_setcurctx(curctx, ctxt2); \
++ \
++            v = opj_smr_sign(*l_datap); \
++            spb = opj_t1_getspb(lu); \
++/* #ifdef DEBUG_ENC_CLN */ \
++/*           printf("   spb=%d\n", spb); */\
++/* #endif */ \
++            opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \
++            vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \
++            opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \
++        } \
++        *flagsp &= ~(T1_PI_THIS << (3U * ci)); \
++        l_datap ++; \
++    } \
+ }
+ 
+ #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
+@@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass(
+ {
+     OPJ_UINT32 i, k;
+     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+-    OPJ_UINT32 agg, runlen;
+-
+-    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
++    opj_mqc_t* mqc = &(t1->mqc);
++    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
++    const OPJ_INT32* datap = t1->data;
++    opj_flag_t *f = &T1_FLAGS(0, 0);
++    const OPJ_UINT32 extra = 2U;
+ 
+     *nmsedec = 0;
+ #ifdef DEBUG_ENC_CLN
+     printf("enc_clnpass: bpno=%d\n", bpno);
+ #endif
+-    for (k = 0; k < (t1->h & ~3U); k += 4) {
++    for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) {
+ #ifdef DEBUG_ENC_CLN
+         printf(" k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < t1->w; ++i, f++) {
++            OPJ_UINT32 agg, runlen;
+ #ifdef DEBUG_ENC_CLN
+             printf("  i=%d\n", i);
+ #endif
+-            agg = !(T1_FLAGS(i, k));
++            agg = !*f;
+ #ifdef DEBUG_ENC_CLN
+             printf("   agg=%d\n", agg);
+ #endif
+             if (agg) {
+-                for (runlen = 0; runlen < 4; ++runlen) {
+-                    if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
++                for (runlen = 0; runlen < 4; ++runlen, ++datap) {
++                    if (opj_smr_abs(*datap) & (OPJ_UINT32)one) {
+                         break;
+                     }
+                 }
+-                opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
+-                opj_mqc_encode(mqc, runlen != 4);
++                opj_t1_setcurctx(curctx, T1_CTXNO_AGG);
++                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4);
+                 if (runlen == 4) {
+                     continue;
+                 }
+-                opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
+-                opj_mqc_encode(mqc, runlen >> 1);
+-                opj_mqc_encode(mqc, runlen & 1);
++                opj_t1_setcurctx(curctx, T1_CTXNO_UNI);
++                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1);
++                opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1);
+             } else {
+                 runlen = 0;
+             }
+-            opj_t1_enc_clnpass_step(
+-                t1,
+-                &T1_FLAGS(i, k),
+-                &t1->data[((k + runlen) * t1->data_stride) + i],
++            opj_t1_enc_clnpass_step_macro(
++                mqc, curctx, a, c, ct,
++                f,
++                datap,
+                 bpno,
+                 one,
+                 nmsedec,
+@@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass(
+                 runlen,
+                 4U,
+                 cblksty);
++            datap += 4 - runlen;
+         }
+     }
+     if (k < t1->h) {
+-        agg = 0;
+-        runlen = 0;
++        const OPJ_UINT32 agg = 0;
++        const OPJ_UINT32 runlen = 0;
+ #ifdef DEBUG_ENC_CLN
+         printf(" k=%d\n", k);
+ #endif
+-        for (i = 0; i < t1->w; ++i) {
++        for (i = 0; i < t1->w; ++i, f++) {
+ #ifdef DEBUG_ENC_CLN
+             printf("  i=%d\n", i);
+             printf("   agg=%d\n", agg);
+ #endif
+-            opj_t1_enc_clnpass_step(
+-                t1,
+-                &T1_FLAGS(i, k),
+-                &t1->data[((k + runlen) * t1->data_stride) + i],
++            opj_t1_enc_clnpass_step_macro(
++                mqc, curctx, a, c, ct,
++                f,
++                datap,
+                 bpno,
+                 one,
+                 nmsedec,
+@@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass(
+                 runlen,
+                 t1->h - k,
+                 cblksty);
++            datap += t1->h - k;
+         }
+     }
++
++    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct);
+ }
+ 
+ #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
+@@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass(
+     opj_mqc_t* mqc = &(t1->mqc); \
+     register OPJ_INT32 *data = t1->data; \
+     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
+-    DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++    DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+     register OPJ_UINT32 v; \
+     one = 1 << bpno; \
+     half = one >> 1; \
+@@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass(
+             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
+         } \
+     } \
+-    UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
++    UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \
+     if( k < h ) { \
+         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
+             for (j = 0; j < h - k; ++j) { \
+@@ -1426,7 +1412,11 @@ static OPJ_FLOAT64 opj_t1_getwmsedec(
+     if (qmfbid == 1) {
+         w2 = opj_dwt_getnorm(level, orient);
+     } else {    /* if (qmfbid == 0) */
++        const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
++                                    (orient == 3) ? 2 : 1;
+         w2 = opj_dwt_getnorm_real(level, orient);
++        /* Not sure this is right. But preserves past behaviour */
++        stepsize /= (1 << log2_gain);
+     }
+ 
+     wmsedec = w1 * w2 * stepsize * (1 << bpno);
+@@ -1450,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers(
+     assert(w * h <= 4096);
+ 
+     /* encoder uses tile buffer, so no need to allocate */
+-    if (!t1->encoder) {
++    {
+         OPJ_UINT32 datasize = w * h;
+ 
+         if (datasize > t1->datasize) {
+@@ -1560,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1)
+         return;
+     }
+ 
+-    /* encoder uses tile buffer, so no need to free */
+-    if (!p_t1->encoder && p_t1->data) {
++    if (p_t1->data) {
+         opj_aligned_free(p_t1->data);
+         p_t1->data = 00;
+     }
+@@ -1658,7 +1647,21 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
+     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
+     if (t1 == NULL) {
+         t1 = opj_t1_create(OPJ_FALSE);
+-        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
++        if (t1 == NULL) {
++            opj_event_msg(job->p_manager, EVT_ERROR,
++                          "Cannot allocate Tier 1 handle\n");
++            *(job->pret) = OPJ_FALSE;
++            opj_free(job);
++            return;
++        }
++        if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) {
++            opj_event_msg(job->p_manager, EVT_ERROR,
++                          "Unable to set t1 handle as TLS\n");
++            opj_t1_destroy(t1);
++            *(job->pret) = OPJ_FALSE;
++            opj_free(job);
++            return;
++        }
+     }
+     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
+ 
+@@ -1725,10 +1728,11 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
+                 datap[i] /= 2;
+             }
+         } else {        /* if (tccp->qmfbid == 0) */
++            const float stepsize = 0.5f * band->stepsize;
+             i = 0;
+ #ifdef __SSE2__
+             {
+-                const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
++                const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
+                 for (; i < (cblk_size & ~15U); i += 16) {
+                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                            datap + 0)));
+@@ -1747,7 +1751,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
+             }
+ #endif
+             for (; i < cblk_size; ++i) {
+-                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
++                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
+                 memcpy(datap, &tmp, sizeof(tmp));
+                 datap++;
+             }
+@@ -1773,12 +1777,13 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
+             }
+         }
+     } else {        /* if (tccp->qmfbid == 0) */
++        const float stepsize = 0.5f * band->stepsize;
+         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                          tile_w + (OPJ_SIZE_T)x];
+         for (j = 0; j < cblk_h; ++j) {
+             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
+             for (i = 0; i < cblk_w; ++i) {
+-                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize;
++                OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
+                 *tiledp2 = tmp;
+                 datap++;
+                 tiledp2++;
+@@ -2100,124 +2105,232 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
+ }
+ 
+ 
++typedef struct {
++    OPJ_UINT32 compno;
++    OPJ_UINT32 resno;
++    opj_tcd_cblk_enc_t* cblk;
++    opj_tcd_tile_t *tile;
++    opj_tcd_band_t* band;
++    opj_tcd_tilecomp_t* tilec;
++    opj_tccp_t* tccp;
++    const OPJ_FLOAT64 * mct_norms;
++    OPJ_UINT32 mct_numcomps;
++    volatile OPJ_BOOL* pret;
++    opj_mutex_t* mutex;
++} opj_t1_cblk_encode_processing_job_t;
++
++/** Procedure to deal with a asynchronous code-block encoding job.
++ *
++ * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
++ * @param tls       TLS handle.
++ */
++static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls)
++{
++    opj_t1_cblk_encode_processing_job_t* job =
++        (opj_t1_cblk_encode_processing_job_t*)user_data;
++    opj_tcd_cblk_enc_t* cblk = job->cblk;
++    const opj_tcd_band_t* band = job->band;
++    const opj_tcd_tilecomp_t* tilec = job->tilec;
++    const opj_tccp_t* tccp = job->tccp;
++    const OPJ_UINT32 resno = job->resno;
++    opj_t1_t* t1;
++    const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
++
++    OPJ_INT32* OPJ_RESTRICT tiledp;
++    OPJ_UINT32 cblk_w;
++    OPJ_UINT32 cblk_h;
++    OPJ_UINT32 i, j;
++
++    OPJ_INT32 x = cblk->x0 - band->x0;
++    OPJ_INT32 y = cblk->y0 - band->y0;
++
++    if (!*(job->pret)) {
++        opj_free(job);
++        return;
++    }
++
++    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
++    if (t1 == NULL) {
++        t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
++        opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
++    }
++
++    if (band->bandno & 1) {
++        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
++        x += pres->x1 - pres->x0;
++    }
++    if (band->bandno & 2) {
++        opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
++        y += pres->y1 - pres->y0;
++    }
++
++    if (!opj_t1_allocate_buffers(
++                t1,
++                (OPJ_UINT32)(cblk->x1 - cblk->x0),
++                (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
++        *(job->pret) = OPJ_FALSE;
++        opj_free(job);
++        return;
++    }
++
++    cblk_w = t1->w;
++    cblk_h = t1->h;
++
++    tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
++
++    if (tccp->qmfbid == 1) {
++        /* Do multiplication on unsigned type, even if the
++            * underlying type is signed, to avoid potential
++            * int overflow on large value (the output will be
++            * incorrect in such situation, but whatever...)
++            * This assumes complement-to-2 signed integer
++            * representation
++            * Fixes https://github.com/uclouvain/openjpeg/issues/1053
++            */
++        OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
++        OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data;
++        /* Change from "natural" order to "zigzag" order of T1 passes */
++        for (j = 0; j < (cblk_h & ~3U); j += 4) {
++            for (i = 0; i < cblk_w; ++i) {
++                t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS;
++                t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS;
++                t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS;
++                t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS;
++                t1data += 4;
++            }
++        }
++        if (j < cblk_h) {
++            for (i = 0; i < cblk_w; ++i) {
++                OPJ_UINT32 k;
++                for (k = j; k < cblk_h; k++) {
++                    t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS;
++                    t1data ++;
++                }
++            }
++        }
++    } else {        /* if (tccp->qmfbid == 0) */
++        OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp;
++        OPJ_INT32* OPJ_RESTRICT t1data = t1->data;
++        /* Change from "natural" order to "zigzag" order of T1 passes */
++        for (j = 0; j < (cblk_h & ~3U); j += 4) {
++            for (i = 0; i < cblk_w; ++i) {
++                t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] /
++                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
++                t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] /
++                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
++                t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] /
++                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
++                t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] /
++                                                   band->stepsize) * (1 << T1_NMSEDEC_FRACBITS));
++                t1data += 4;
++            }
++        }
++        if (j < cblk_h) {
++            for (i = 0; i < cblk_w; ++i) {
++                OPJ_UINT32 k;
++                for (k = j; k < cblk_h; k++) {
++                    t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize)
++                                                      * (1 << T1_NMSEDEC_FRACBITS));
++                    t1data ++;
++                }
++            }
++        }
++    }
++
++    {
++        OPJ_FLOAT64 cumwmsedec =
++            opj_t1_encode_cblk(
++                t1,
++                cblk,
++                band->bandno,
++                job->compno,
++                tilec->numresolutions - 1 - resno,
++                tccp->qmfbid,
++                band->stepsize,
++                tccp->cblksty,
++                job->tile->numcomps,
++                job->mct_norms,
++                job->mct_numcomps);
++        if (job->mutex) {
++            opj_mutex_lock(job->mutex);
++        }
++        job->tile->distotile += cumwmsedec;
++        if (job->mutex) {
++            opj_mutex_unlock(job->mutex);
++        }
++    }
++
++    opj_free(job);
++}
+ 
+ 
+-OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
++OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
+                              opj_tcd_tile_t *tile,
+                              opj_tcp_t *tcp,
+                              const OPJ_FLOAT64 * mct_norms,
+                              OPJ_UINT32 mct_numcomps
+                             )
+ {
++    volatile OPJ_BOOL ret = OPJ_TRUE;
++    opj_thread_pool_t* tp = tcd->thread_pool;
+     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
++    opj_mutex_t* mutex = opj_mutex_create();
+ 
+     tile->distotile = 0;        /* fixed_quality */
+ 
+     for (compno = 0; compno < tile->numcomps; ++compno) {
+         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
+         opj_tccp_t* tccp = &tcp->tccps[compno];
+-        OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+ 
+         for (resno = 0; resno < tilec->numresolutions; ++resno) {
+             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
+ 
+             for (bandno = 0; bandno < res->numbands; ++bandno) {
+                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
+-                OPJ_INT32 bandconst;
+ 
+                 /* Skip empty bands */
+                 if (opj_tcd_is_band_empty(band)) {
+                     continue;
+                 }
+-
+-                bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
+                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
+                     opj_tcd_precinct_t *prc = &band->precincts[precno];
+ 
+                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
+                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
+-                        OPJ_INT32* OPJ_RESTRICT tiledp;
+-                        OPJ_UINT32 cblk_w;
+-                        OPJ_UINT32 cblk_h;
+-                        OPJ_UINT32 i, j, tileLineAdvance;
+-                        OPJ_SIZE_T tileIndex = 0;
+-
+-                        OPJ_INT32 x = cblk->x0 - band->x0;
+-                        OPJ_INT32 y = cblk->y0 - band->y0;
+-                        if (band->bandno & 1) {
+-                            opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
+-                            x += pres->x1 - pres->x0;
+-                        }
+-                        if (band->bandno & 2) {
+-                            opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
+-                            y += pres->y1 - pres->y0;
+-                        }
+-
+-                        if (!opj_t1_allocate_buffers(
+-                                    t1,
+-                                    (OPJ_UINT32)(cblk->x1 - cblk->x0),
+-                                    (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
+-                            return OPJ_FALSE;
+-                        }
+ 
+-                        cblk_w = t1->w;
+-                        cblk_h = t1->h;
+-                        tileLineAdvance = tile_w - cblk_w;
+-
+-                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
+-                        t1->data = tiledp;
+-                        t1->data_stride = tile_w;
+-                        if (tccp->qmfbid == 1) {
+-                            /* Do multiplication on unsigned type, even if the
+-                             * underlying type is signed, to avoid potential
+-                             * int overflow on large value (the output will be
+-                             * incorrect in such situation, but whatever...)
+-                             * This assumes complement-to-2 signed integer
+-                             * representation
+-                             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
+-                             */
+-                            OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
+-                            for (j = 0; j < cblk_h; ++j) {
+-                                for (i = 0; i < cblk_w; ++i) {
+-                                    tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
+-                                    tileIndex++;
+-                                }
+-                                tileIndex += tileLineAdvance;
+-                            }
+-                        } else {        /* if (tccp->qmfbid == 0) */
+-                            for (j = 0; j < cblk_h; ++j) {
+-                                for (i = 0; i < cblk_w; ++i) {
+-                                    OPJ_INT32 tmp = tiledp[tileIndex];
+-                                    tiledp[tileIndex] =
+-                                        opj_int_fix_mul_t1(
+-                                            tmp,
+-                                            bandconst);
+-                                    tileIndex++;
+-                                }
+-                                tileIndex += tileLineAdvance;
+-                            }
++                        opj_t1_cblk_encode_processing_job_t* job =
++                            (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
++                                    sizeof(opj_t1_cblk_encode_processing_job_t));
++                        if (!job) {
++                            ret = OPJ_FALSE;
++                            goto end;
+                         }
+-
+-                        opj_t1_encode_cblk(
+-                            t1,
+-                            cblk,
+-                            band->bandno,
+-                            compno,
+-                            tilec->numresolutions - 1 - resno,
+-                            tccp->qmfbid,
+-                            band->stepsize,
+-                            tccp->cblksty,
+-                            tile->numcomps,
+-                            tile,
+-                            mct_norms,
+-                            mct_numcomps);
++                        job->compno = compno;
++                        job->tile = tile;
++                        job->resno = resno;
++                        job->cblk = cblk;
++                        job->band = band;
++                        job->tilec = tilec;
++                        job->tccp = tccp;
++                        job->mct_norms = mct_norms;
++                        job->mct_numcomps = mct_numcomps;
++                        job->pret = &ret;
++                        job->mutex = mutex;
++                        opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job);
+ 
+                     } /* cblkno */
+                 } /* precno */
+             } /* bandno */
+         } /* resno  */
+     } /* compno  */
+-    return OPJ_TRUE;
++
++end:
++    opj_thread_pool_wait_completion(tcd->thread_pool, 0);
++    if (mutex) {
++        opj_mutex_destroy(mutex);
++    }
++
++    return ret;
+ }
+ 
+ /* Returns whether the pass (bpno, passtype) is terminated */
+@@ -2252,18 +2365,17 @@ static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
+ 
+ 
+ /** mod fixed_quality */
+-static void opj_t1_encode_cblk(opj_t1_t *t1,
+-                               opj_tcd_cblk_enc_t* cblk,
+-                               OPJ_UINT32 orient,
+-                               OPJ_UINT32 compno,
+-                               OPJ_UINT32 level,
+-                               OPJ_UINT32 qmfbid,
+-                               OPJ_FLOAT64 stepsize,
+-                               OPJ_UINT32 cblksty,
+-                               OPJ_UINT32 numcomps,
+-                               opj_tcd_tile_t * tile,
+-                               const OPJ_FLOAT64 * mct_norms,
+-                               OPJ_UINT32 mct_numcomps)
++static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
++                                      opj_tcd_cblk_enc_t* cblk,
++                                      OPJ_UINT32 orient,
++                                      OPJ_UINT32 compno,
++                                      OPJ_UINT32 level,
++                                      OPJ_UINT32 qmfbid,
++                                      OPJ_FLOAT64 stepsize,
++                                      OPJ_UINT32 cblksty,
++                                      OPJ_UINT32 numcomps,
++                                      const OPJ_FLOAT64 * mct_norms,
++                                      OPJ_UINT32 mct_numcomps)
+ {
+     OPJ_FLOAT64 cumwmsedec = 0.0;
+ 
+@@ -2277,6 +2389,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
+     OPJ_UINT32 i, j;
+     OPJ_BYTE type = T1_TYPE_MQ;
+     OPJ_FLOAT64 tempwmsedec;
++    OPJ_INT32* datap;
+ 
+ #ifdef EXTRA_DEBUG
+     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
+@@ -2286,10 +2399,19 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
+     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
+ 
+     max = 0;
+-    for (i = 0; i < t1->w; ++i) {
+-        for (j = 0; j < t1->h; ++j) {
+-            OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
+-            max = opj_int_max(max, tmp);
++    datap = t1->data;
++    for (j = 0; j < t1->h; ++j) {
++        const OPJ_UINT32 w = t1->w;
++        for (i = 0; i < w; ++i, ++datap) {
++            OPJ_INT32 tmp = *datap;
++            if (tmp < 0) {
++                OPJ_UINT32 tmp_unsigned;
++                max = opj_int_max(max, -tmp);
++                tmp_unsigned = opj_to_smr(tmp);
++                memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32));
++            } else {
++                max = opj_int_max(max, tmp);
++            }
+         }
+     }
+ 
+@@ -2297,7 +2419,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
+                                       T1_NMSEDEC_FRACBITS) : 0;
+     if (cblk->numbps == 0) {
+         cblk->totalpasses = 0;
+-        return;
++        return cumwmsedec;
+     }
+ 
+     bpno = (OPJ_INT32)(cblk->numbps - 1);
+@@ -2343,7 +2465,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
+         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
+                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
+         cumwmsedec += tempwmsedec;
+-        tile->distotile += tempwmsedec;
+         pass->distortiondec = cumwmsedec;
+ 
+         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
+@@ -2425,4 +2546,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
+         }
+     }
+ #endif
++
++    return cumwmsedec;
+ }
+diff --git a/third_party/libopenjpeg20/t1.h b/third_party/libopenjpeg20/t1.h
+index 171dfb0a7ae57e5f874f74c6967d80b628b6316e..81ad0d00f17d11a7a33d6c1a02222d3ab47faf14 100644
+--- a/third_party/libopenjpeg20/t1.h
++++ b/third_party/libopenjpeg20/t1.h
+@@ -198,7 +198,6 @@ typedef struct opj_t1 {
+     OPJ_UINT32 h;
+     OPJ_UINT32 datasize;
+     OPJ_UINT32 flagssize;
+-    OPJ_UINT32 data_stride;
+     OPJ_BOOL   encoder;
+ 
+     /* Thre 3 variables below are only used by the decoder */
+@@ -216,13 +215,13 @@ typedef struct opj_t1 {
+ 
+ /**
+ Encode the code-blocks of a tile
+-@param t1 T1 handle
++@param tcd TCD handle
+ @param tile The tile to encode
+ @param tcp Tile coding parameters
+ @param mct_norms  FIXME DOC
+ @param mct_numcomps Number of components used for MCT
+ */
+-OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
++OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
+                              opj_tcd_tile_t *tile,
+                              opj_tcp_t *tcp,
+                              const OPJ_FLOAT64 * mct_norms,
+diff --git a/third_party/libopenjpeg20/t2.c b/third_party/libopenjpeg20/t2.c
+index 9825118cfd7350d091a675522c165b66cff76b1d..1481e16f461968adca4ede901b7a3af6de162165 100644
+--- a/third_party/libopenjpeg20/t2.c
++++ b/third_party/libopenjpeg20/t2.c
+@@ -224,6 +224,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2,
+                                OPJ_UINT32 * p_data_written,
+                                OPJ_UINT32 p_max_len,
+                                opj_codestream_info_t *cstr_info,
++                               opj_tcd_marker_info_t* p_marker_info,
+                                OPJ_UINT32 p_tp_num,
+                                OPJ_INT32 p_tp_pos,
+                                OPJ_UINT32 p_pino,
+@@ -244,7 +245,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2,
+                             l_image->numcomps : 1;
+     OPJ_UINT32 l_nb_pocs = l_tcp->numpocs + 1;
+ 
+-    l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode);
++    l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode, p_manager);
+     if (!l_pi) {
+         return OPJ_FALSE;
+     }
+@@ -310,6 +311,20 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2,
+             opj_pi_destroy(l_pi, l_nb_pocs);
+             return OPJ_FALSE;
+         }
++
++        if (p_marker_info && p_marker_info->need_PLT) {
++            /* One time use intended */
++            assert(p_marker_info->packet_count == 0);
++            assert(p_marker_info->p_packet_size == NULL);
++
++            p_marker_info->p_packet_size = (OPJ_UINT32*) opj_malloc(
++                                               opj_get_encoding_packet_count(l_image, l_cp, p_tile_no) * sizeof(OPJ_UINT32));
++            if (p_marker_info->p_packet_size == NULL) {
++                opj_pi_destroy(l_pi, l_nb_pocs);
++                return OPJ_FALSE;
++            }
++        }
++
+         while (opj_pi_next(l_current_pi)) {
+             if (l_current_pi->layno < p_maxlayers) {
+                 l_nb_bytes = 0;
+@@ -326,6 +341,11 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2,
+ 
+                 * p_data_written += l_nb_bytes;
+ 
++                if (p_marker_info && p_marker_info->need_PLT) {
++                    p_marker_info->p_packet_size[p_marker_info->packet_count] = l_nb_bytes;
++                    p_marker_info->packet_count ++;
++                }
++
+                 /* INDEX >> */
+                 if (cstr_info) {
+                     if (cstr_info->index_write) {
+@@ -405,7 +425,7 @@ OPJ_BOOL opj_t2_decode_packets(opj_tcd_t* tcd,
+ #endif
+ 
+     /* create a packet iterator */
+-    l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no);
++    l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no, p_manager);
+     if (!l_pi) {
+         return OPJ_FALSE;
+     }
+@@ -673,6 +693,14 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno,
+     OPJ_BOOL packet_empty = OPJ_FALSE;
+ #endif
+ 
++#ifdef DEBUG_VERBOSE
++    if (p_t2_mode == FINAL_PASS) {
++        fprintf(stderr,
++                "encode packet compono=%d, resno=%d, precno=%d, layno=%d\n",
++                compno, resno, precno, layno);
++    }
++#endif
++
+     /* <SOP 0xff91> */
+     if (tcp->csty & J2K_CP_CSTY_SOP) {
+         if (length < 6) {
+@@ -711,6 +739,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno,
+                 continue;
+             }
+ 
++            /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1294 */
++            /* but likely not a proper fix. */
++            if (precno >= res->pw * res->ph) {
++                opj_event_msg(p_manager, EVT_ERROR,
++                              "opj_t2_encode_packet(): accessing precno=%u >= %u\n",
++                              precno, res->pw * res->ph);
++                return OPJ_FALSE;
++            }
++
+             prc = &band->precincts[precno];
+             opj_tgt_reset(prc->incltree);
+             opj_tgt_reset(prc->imsbtree);
+@@ -778,6 +815,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno,
+             continue;
+         }
+ 
++        /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1297 */
++        /* but likely not a proper fix. */
++        if (precno >= res->pw * res->ph) {
++            opj_event_msg(p_manager, EVT_ERROR,
++                          "opj_t2_encode_packet(): accessing precno=%u >= %u\n",
++                          precno, res->pw * res->ph);
++            return OPJ_FALSE;
++        }
++
+         prc = &band->precincts[precno];
+         l_nb_blocks = prc->cw * prc->ch;
+         cblk = prc->cblks.enc;
+diff --git a/third_party/libopenjpeg20/t2.h b/third_party/libopenjpeg20/t2.h
+index 66500b1699334d7752f9ad86eec672379028105b..becfa91a4deef924839953e0d1a2145e34f34bc1 100644
+--- a/third_party/libopenjpeg20/t2.h
++++ b/third_party/libopenjpeg20/t2.h
+@@ -73,6 +73,7 @@ Encode the packets of a tile to a destination buffer
+ @param p_data_written   FIXME DOC
+ @param len              the length of the destination buffer
+ @param cstr_info        Codestream information structure
++@param p_marker_info    Marker information structure
+ @param tpnum            Tile part number of the current tile
+ @param tppos            The position of the tile part flag in the progression order
+ @param pino             FIXME DOC
+@@ -87,6 +88,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* t2,
+                                OPJ_UINT32 * p_data_written,
+                                OPJ_UINT32 len,
+                                opj_codestream_info_t *cstr_info,
++                               opj_tcd_marker_info_t* p_marker_info,
+                                OPJ_UINT32 tpnum,
+                                OPJ_INT32 tppos,
+                                OPJ_UINT32 pino,
+diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c
+index 9e98f04ab8bb8b008e812c9b1ef73ead49a49d7a..b9f571410b9ecd3f4c8b20c3144907f9d33d6f9e 100644
+--- a/third_party/libopenjpeg20/tcd.c
++++ b/third_party/libopenjpeg20/tcd.c
+@@ -112,7 +112,7 @@ void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img)
+  * Initializes tile coding/decoding
+  */
+ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+-        OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block,
++        OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block,
+         opj_event_mgr_t* manager);
+ 
+ /**
+@@ -182,6 +182,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
+                                   OPJ_UINT32 * p_data_written,
+                                   OPJ_UINT32 p_max_dest_size,
+                                   opj_codestream_info_t *p_cstr_info,
++                                  opj_tcd_marker_info_t* p_marker_info,
+                                   opj_event_mgr_t *p_manager);
+ 
+ static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd,
+@@ -573,9 +574,10 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
+                 opj_tcd_makelayer(tcd, layno, thresh, 0);
+ 
+                 if (cp->m_specific_param.m_enc.m_fixed_quality) {       /* fixed_quality */
+-                    if (OPJ_IS_CINEMA(cp->rsiz)) {
++                    if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
+                         if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
+-                                                    p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino,
++                                                    p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
++                                                    tcd->cur_pino,
+                                                     THRESH_CALC, p_manager)) {
+ 
+                             lo = thresh;
+@@ -605,7 +607,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
+                     }
+                 } else {
+                     if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
+-                                                p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino,
++                                                p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
++                                                tcd->cur_pino,
+                                                 THRESH_CALC, p_manager)) {
+                         /* TODO: what to do with l ??? seek / tell ??? */
+                         /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */
+@@ -718,10 +721,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
+ /* ----------------------------------------------------------------------- */
+ 
+ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+-        OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block,
++        OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block,
+         opj_event_mgr_t* manager)
+ {
+-    OPJ_UINT32(*l_gain_ptr)(OPJ_UINT32) = 00;
+     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
+     opj_tcp_t * l_tcp = 00;
+     opj_cp_t * l_cp = 00;
+@@ -737,7 +739,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+     OPJ_UINT32 p, q;
+     OPJ_UINT32 l_level_no;
+     OPJ_UINT32 l_pdx, l_pdy;
+-    OPJ_UINT32 l_gain;
+     OPJ_INT32 l_x0b, l_y0b;
+     OPJ_UINT32 l_tx0, l_ty0;
+     /* extent of precincts , top left, bottom right**/
+@@ -881,11 +882,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+         l_level_no = l_tilec->numresolutions;
+         l_res = l_tilec->resolutions;
+         l_step_size = l_tccp->stepsizes;
+-        if (l_tccp->qmfbid == 0) {
+-            l_gain_ptr = &opj_dwt_getgain_real;
+-        } else {
+-            l_gain_ptr  = &opj_dwt_getgain;
+-        }
+         /*fprintf(stderr, "\tlevel_no=%d\n",l_level_no);*/
+ 
+         for (resno = 0; resno < l_tilec->numresolutions; ++resno) {
+@@ -972,7 +968,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+             l_band = l_res->bands;
+ 
+             for (bandno = 0; bandno < l_res->numbands; ++bandno, ++l_band, ++l_step_size) {
+-                OPJ_INT32 numbps;
+                 /*fprintf(stderr, "\t\t\tband_no=%d/%d\n", bandno, l_res->numbands );*/
+ 
+                 if (resno == 0) {
+@@ -1008,11 +1003,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+                     }
+                 }
+ 
+-                /** avoid an if with storing function pointer */
+-                l_gain = (*l_gain_ptr)(l_band->bandno);
+-                numbps = (OPJ_INT32)(l_image_comp->prec + l_gain);
+-                l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0,
+-                                                  (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction;
++                {
++                    /* Table E-1 - Sub-band gains */
++                    /* BUG_WEIRD_TWO_INVK (look for this identifier in dwt.c): */
++                    /* the test (!isEncoder && l_tccp->qmfbid == 0) is strongly */
++                    /* linked to the use of two_invK instead of invK */
++                    const OPJ_INT32 log2_gain = (!isEncoder &&
++                                                 l_tccp->qmfbid == 0) ? 0 : (l_band->bandno == 0) ? 0 :
++                                                (l_band->bandno == 3) ? 2 : 1;
++
++                    /* Nominal dynamic range. Equation E-4 */
++                    const OPJ_INT32 Rb = (OPJ_INT32)l_image_comp->prec + log2_gain;
++
++                    /* Delta_b value of Equation E-3 in "E.1 Inverse quantization
++                    * procedure" of the standard */
++                    l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0,
++                                                      (OPJ_INT32)(Rb - l_step_size->expn))));
++                }
++
+                 /* Mb value of Equation E-2 in "E.1 Inverse quantization
+                  * procedure" of the standard */
+                 l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits -
+@@ -1198,14 +1206,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+                                   opj_event_mgr_t* p_manager)
+ {
+-    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, 1.0F,
++    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE,
+                              sizeof(opj_tcd_cblk_enc_t), p_manager);
+ }
+ 
+ OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
+                                   opj_event_mgr_t* p_manager)
+ {
+-    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, 0.5F,
++    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE,
+                              sizeof(opj_tcd_cblk_dec_t), p_manager);
+ }
+ 
+@@ -1243,10 +1251,16 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
+ 
+     /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */
+     /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */
++    /* and +7 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 3) */
++    /* and +26 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 7) */
++    /* and +28 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 44) */
++    /* and +33 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4) */
++    /* and +63 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -IMF 2K) */
++    /* and +74 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -n 8 -s 7,7 -I) */
+     /* TODO: is there a theoretical upper-bound for the compressed code */
+     /* block size ? */
+-    l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
+-                                   (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32));
++    l_data_size = 74 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
++                                    (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32));
+ 
+     if (l_data_size > p_code_block->data_size) {
+         if (p_code_block->data) {
+@@ -1378,6 +1392,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
+                              OPJ_UINT32 * p_data_written,
+                              OPJ_UINT32 p_max_length,
+                              opj_codestream_info_t *p_cstr_info,
++                             opj_tcd_marker_info_t* p_marker_info,
+                              opj_event_mgr_t *p_manager)
+ {
+ 
+@@ -1457,7 +1472,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
+     /* FIXME _ProfStart(PGROUP_T2); */
+ 
+     if (! opj_tcd_t2_encode(p_tcd, p_dest, p_data_written, p_max_length,
+-                            p_cstr_info, p_manager)) {
++                            p_cstr_info, p_marker_info, p_manager)) {
+         return OPJ_FALSE;
+     }
+     /* FIXME _ProfStop(PGROUP_T2); */
+@@ -2041,7 +2056,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
+     opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
+     opj_tcp_t * l_tcp = p_tcd->tcp;
+     opj_tcd_tilecomp_t * l_tile_comp = l_tile->comps;
+-    OPJ_UINT32 l_samples, i;
++    OPJ_SIZE_T l_samples;
++    OPJ_UINT32 i;
+ 
+     if (l_tcp->mct == 0 || p_tcd->used_component != NULL) {
+         return OPJ_TRUE;
+@@ -2054,8 +2070,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
+         /* A bit inefficient: we process more data than needed if */
+         /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */
+         /* but we would need to take into account a stride then */
+-        l_samples = (OPJ_UINT32)((res_comp0->x1 - res_comp0->x0) *
+-                                 (res_comp0->y1 - res_comp0->y0));
++        l_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) *
++                    (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0);
+         if (l_tile->numcomps >= 3) {
+             if (l_tile_comp->minimum_num_resolutions !=
+                     l_tile->comps[1].minimum_num_resolutions ||
+@@ -2089,8 +2105,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager)
+         opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions +
+                                           p_tcd->image->comps[0].resno_decoded;
+ 
+-        l_samples = (res_comp0->win_x1 - res_comp0->win_x0) *
+-                    (res_comp0->win_y1 - res_comp0->win_y0);
++        l_samples = (OPJ_SIZE_T)(res_comp0->win_x1 - res_comp0->win_x0) *
++                    (OPJ_SIZE_T)(res_comp0->win_y1 - res_comp0->win_y0);
+         if (l_tile->numcomps >= 3) {
+             opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions +
+                                               p_tcd->image->comps[1].resno_decoded;
+@@ -2356,7 +2372,7 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct)
+     }
+ }
+ 
+-OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd)
++OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd)
+ {
+     OPJ_UINT32 i;
+     OPJ_SIZE_T l_data_size = 0;
+@@ -2414,7 +2430,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
+             }
+         } else {
+             for (i = 0; i < l_nb_elem; ++i) {
+-                *l_current_ptr = (*l_current_ptr - l_tccp->m_dc_level_shift) * (1 << 11);
++                *((OPJ_FLOAT32 *) l_current_ptr) = (OPJ_FLOAT32)(*l_current_ptr -
++                                                   l_tccp->m_dc_level_shift);
+                 ++l_current_ptr;
+             }
+         }
+@@ -2472,8 +2489,11 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd)
+ 
+         opj_free(l_data);
+     } else if (l_tcp->tccps->qmfbid == 0) {
+-        opj_mct_encode_real(l_tile->comps[0].data, l_tile->comps[1].data,
+-                            l_tile->comps[2].data, samples);
++        opj_mct_encode_real(
++            (OPJ_FLOAT32*)l_tile->comps[0].data,
++            (OPJ_FLOAT32*)l_tile->comps[1].data,
++            (OPJ_FLOAT32*)l_tile->comps[2].data,
++            samples);
+     } else {
+         opj_mct_encode(l_tile->comps[0].data, l_tile->comps[1].data,
+                        l_tile->comps[2].data, samples);
+@@ -2491,11 +2511,11 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd)
+ 
+     for (compno = 0; compno < l_tile->numcomps; ++compno) {
+         if (l_tccp->qmfbid == 1) {
+-            if (! opj_dwt_encode(l_tile_comp)) {
++            if (! opj_dwt_encode(p_tcd, l_tile_comp)) {
+                 return OPJ_FALSE;
+             }
+         } else if (l_tccp->qmfbid == 0) {
+-            if (! opj_dwt_encode_real(l_tile_comp)) {
++            if (! opj_dwt_encode_real(p_tcd, l_tile_comp)) {
+                 return OPJ_FALSE;
+             }
+         }
+@@ -2509,16 +2529,10 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd)
+ 
+ static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd)
+ {
+-    opj_t1_t * l_t1;
+     const OPJ_FLOAT64 * l_mct_norms;
+     OPJ_UINT32 l_mct_numcomps = 0U;
+     opj_tcp_t * l_tcp = p_tcd->tcp;
+ 
+-    l_t1 = opj_t1_create(OPJ_TRUE);
+-    if (l_t1 == 00) {
+-        return OPJ_FALSE;
+-    }
+-
+     if (l_tcp->mct == 1) {
+         l_mct_numcomps = 3U;
+         /* irreversible encoding */
+@@ -2532,13 +2546,9 @@ static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd)
+         l_mct_norms = (const OPJ_FLOAT64 *)(l_tcp->mct_norms);
+     }
+ 
+-    if (! opj_t1_encode_cblks(l_t1, p_tcd->tcd_image->tiles, l_tcp, l_mct_norms,
+-                              l_mct_numcomps)) {
+-        opj_t1_destroy(l_t1);
+-        return OPJ_FALSE;
+-    }
+-
+-    opj_t1_destroy(l_t1);
++    return opj_t1_encode_cblks(p_tcd,
++                               p_tcd->tcd_image->tiles, l_tcp, l_mct_norms,
++                               l_mct_numcomps);
+ 
+     return OPJ_TRUE;
+ }
+@@ -2548,6 +2558,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
+                                   OPJ_UINT32 * p_data_written,
+                                   OPJ_UINT32 p_max_dest_size,
+                                   opj_codestream_info_t *p_cstr_info,
++                                  opj_tcd_marker_info_t* p_marker_info,
+                                   opj_event_mgr_t *p_manager)
+ {
+     opj_t2_t * l_t2;
+@@ -2566,6 +2577,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd,
+                 p_data_written,
+                 p_max_dest_size,
+                 p_cstr_info,
++                p_marker_info,
+                 p_tcd->tp_num,
+                 p_tcd->tp_pos,
+                 p_tcd->cur_pino,
+@@ -2624,7 +2636,7 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
+     OPJ_UINT32 l_size_comp, l_remaining;
+     OPJ_SIZE_T l_nb_elem;
+ 
+-    l_data_size = opj_tcd_get_encoded_tile_size(p_tcd);
++    l_data_size = opj_tcd_get_encoder_input_buffer_size(p_tcd);
+     if (l_data_size != p_src_length) {
+         return OPJ_FALSE;
+     }
+@@ -2826,3 +2838,30 @@ static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd,
+               (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 &&
+               (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0)));
+ }
++
++/* ----------------------------------------------------------------------- */
++
++opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT)
++{
++    opj_tcd_marker_info_t *l_tcd_marker_info =
++        (opj_tcd_marker_info_t*) opj_calloc(1, sizeof(opj_tcd_marker_info_t));
++    if (!l_tcd_marker_info) {
++        return NULL;
++    }
++
++    l_tcd_marker_info->need_PLT = need_PLT;
++
++    return l_tcd_marker_info;
++}
++
++/* ----------------------------------------------------------------------- */
++
++void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info)
++{
++    if (p_tcd_marker_info) {
++        opj_free(p_tcd_marker_info->p_packet_size);
++        opj_free(p_tcd_marker_info);
++    }
++}
++
++/* ----------------------------------------------------------------------- */
+diff --git a/third_party/libopenjpeg20/tcd.h b/third_party/libopenjpeg20/tcd.h
+index e3214c1d982ad9b4ce57d17d007d6bd562883fe2..f1b52b8dac6e7115cd65580ae89e387100d946d3 100644
+--- a/third_party/libopenjpeg20/tcd.h
++++ b/third_party/libopenjpeg20/tcd.h
+@@ -284,6 +284,22 @@ typedef struct opj_tcd {
+     OPJ_BOOL* used_component;
+ } opj_tcd_t;
+ 
++/**
++ * Structure to hold information needed to generate some markers.
++ * Used by encoder.
++ */
++typedef struct opj_tcd_marker_info {
++    /** In: Whether information to generate PLT markers in needed */
++    OPJ_BOOL    need_PLT;
++
++    /** OUT: Number of elements in p_packet_size[] array */
++    OPJ_UINT32  packet_count;
++
++    /** OUT: Array of size packet_count, such that p_packet_size[i] is
++     *       the size in bytes of the ith packet */
++    OPJ_UINT32* p_packet_size;
++} opj_tcd_marker_info_t;
++
+ /** @name Exported functions */
+ /*@{*/
+ /* ----------------------------------------------------------------------- */
+@@ -306,6 +322,21 @@ Destroy a previously created TCD handle
+ */
+ void opj_tcd_destroy(opj_tcd_t *tcd);
+ 
++
++/**
++ * Create a new opj_tcd_marker_info_t* structure
++ * @param need_PLT Whether information is needed to generate PLT markers.
++ */
++opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT);
++
++
++/**
++Destroy a previously created opj_tcd_marker_info_t* structure
++@param p_tcd_marker_info Structure to destroy
++*/
++void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info);
++
++
+ /**
+  * Initialize the tile coder and may reuse some memory.
+  * @param   p_tcd       TCD handle.
+@@ -364,6 +395,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd,
+  * @param   p_data_written  pointer to an int that is incremented by the number of bytes really written on p_dest
+  * @param   p_len           Maximum length of the destination buffer
+  * @param   p_cstr_info     Codestream information structure
++ * @param   p_marker_info   Marker information structure
+  * @param   p_manager       the user event manager
+  * @return  true if the coding is successful.
+ */
+@@ -373,6 +405,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd,
+                              OPJ_UINT32 * p_data_written,
+                              OPJ_UINT32 p_len,
+                              struct opj_codestream_info *p_cstr_info,
++                             opj_tcd_marker_info_t* p_marker_info,
+                              opj_event_mgr_t *p_manager);
+ 
+ 
+@@ -415,9 +448,11 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd,
+                                   OPJ_UINT32 p_dest_length);
+ 
+ /**
+- *
++ * Get the size in bytes of the input buffer provided before encoded.
++ * This must be the size provided to the p_src_length argument of
++ * opj_tcd_copy_tile_data()
+  */
+-OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd);
++OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd);
+ 
+ /**
+  * Initialize the tile coder and may reuse some meory.
+@@ -433,6 +468,8 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd,
+ 
+ /**
+  * Copies tile data from the given memory block onto the system.
++ *
++ * p_src_length must be equal to opj_tcd_get_encoder_input_buffer_size()
+  */
+ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd,
+                                 OPJ_BYTE * p_src,