Browse Source

chore: cherry-pick 3 changes from 0-M133 (#45705)

chore: [33-x-y] cherry-pick 3 changes from 0-M133

* 51cd241b7b13 from chromium
* 1c7ff4d5477f from v8
* 8834c16acfcc from v8

Co-authored-by: Shelley Vohr <[email protected]>
Pedro Pontes 1 month ago
parent
commit
c29c430dd2

+ 0 - 1
.gitattributes

@@ -1,6 +1,5 @@
 # `git apply` and friends don't understand CRLF, even on windows. Force those
 # files to be checked out with LF endings even if core.autocrlf is true.
-*.patch text eol=lf
 DEPS text eol=lf
 yarn.lock text eol=lf
 script/zip_manifests/*.manifest text eol=lf

+ 1 - 0
patches/chromium/.patches

@@ -142,3 +142,4 @@ ignore_parse_errors_for_pkey_appusermodel_toastactivatorclsid.patch
 feat_add_signals_when_embedder_cleanup_callbacks_run_for.patch
 feat_separate_content_settings_callback_for_sync_and_async_clipboard.patch
 fix_osr_stutter_in_both_cpu_and_gpu_capture_when_page_has_animation.patch
+reland_lzma_sdk_update_to_24_09.patch

+ 17465 - 0
patches/chromium/reland_lzma_sdk_update_to_24_09.patch

@@ -0,0 +1,17465 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Joshua Pawlicki <[email protected]>
+Date: Fri, 17 Jan 2025 11:02:44 -0800
+Subject: Reland "lzma_sdk: Update to 24.09."
+
+This is a reland of commit 1d69891ae775c74724558585929c89438a6fda93
+
+Original change's description:
+> lzma_sdk: Update to 24.09.
+>
+> Files originate from https://7-zip.org/a/lzma2409.7z
+> SHA-256(./lzma2409.7z)=
+> 79b39f10b7b69eea293caa90c3e7ea07faf8f01f8ae9db1bb1b90c092375e5f3
+>
+> Fixed: 383772517, 40849176
+> Change-Id: I7ccefbcc0dc92f94ae43566fe9d8b962369ea8b8
+> Low-Coverage-Reason: OTHER - It's not clear how to exercise some of these conditionally-compiled alternatives.
+> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6091038
+> Reviewed-by: Greg Thompson <[email protected]>
+> Commit-Queue: Will Harris <[email protected]>
+> Auto-Submit: Joshua Pawlicki <[email protected]>
+> Reviewed-by: Will Harris <[email protected]>
+> Cr-Commit-Position: refs/heads/main@{#1404227}
+
+(cherry picked from commit 968d9282d215058bc19a7c0cf22f2dcdea153740)
+
+Bug: 383772517, 388538957
+Change-Id: I3f36e233d52c0ba560a5d740111d1db62a32b8fc
+Low-Coverage-Reason: OTHER - It's not clear how to exercise some of these conditionally-compiled alternatives.
+Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6163689
+Commit-Queue: Joshua Pawlicki <[email protected]>
+Reviewed-by: Greg Thompson <[email protected]>
+Cr-Original-Commit-Position: refs/heads/main@{#1404814}
+Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6175641
+Auto-Submit: Joshua Pawlicki <[email protected]>
+Cr-Commit-Position: refs/branch-heads/6943@{#392}
+Cr-Branched-From: 72dd0b377c099e1e0230cc7345d5a5125b46ae7d-refs/heads/main@{#1402768}
+
+diff --git a/third_party/lzma_sdk/Asm/arm64/7zAsm.S b/third_party/lzma_sdk/Asm/arm64/7zAsm.S
+deleted file mode 100644
+index aa30a9ef8bf34ca51917983bcff7d873747d238c..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/Asm/arm64/7zAsm.S
++++ /dev/null
+@@ -1,194 +0,0 @@
+-// 7zAsm.S -- ASM macros for arm64
+-// 2021-04-25 : Igor Pavlov : Public domain
+-
+-#define  r0 x0
+-#define  r1 x1
+-#define  r2 x2
+-#define  r3 x3
+-#define  r4 x4
+-#define  r5 x5
+-#define  r6 x6
+-#define  r7 x7
+-#define  r8 x8
+-#define  r9 x9
+-#define  r10 x10
+-#define  r11 x11
+-#define  r12 x12
+-#define  r13 x13
+-#define  r14 x14
+-#define  r15 x15
+-#define  r16 x16
+-#define  r17 x17
+-#define  r18 x18
+-#define  r19 x19
+-#define  r20 x20
+-#define  r21 x21
+-#define  r22 x22
+-#define  r23 x23
+-#define  r24 x24
+-#define  r25 x25
+-#define  r26 x26
+-#define  r27 x27
+-#define  r28 x28
+-#define  r29 x29
+-#define  r30 x30
+-
+-#define  REG_ABI_PARAM_0 r0
+-#define  REG_ABI_PARAM_1 r1
+-#define  REG_ABI_PARAM_2 r2
+-
+-// The .note.gnu.property section is required because Chromium Android builds
+-// utilize the linker flag force-bti.
+-.pushsection .note.gnu.property, "a"
+-.balign 8
+-.long 4
+-.long 0x10
+-.long 0x5
+-.asciz "GNU"
+-.long 0xc0000000
+-.long 4
+-.long ((1 << 0 ) | (1 << 1))
+-.long 0
+-.popsection
+-
+-.macro p2_add reg:req, param:req
+-        add     \reg, \reg, \param
+-.endm
+-
+-.macro p2_sub reg:req, param:req
+-        sub     \reg, \reg, \param
+-.endm
+-
+-.macro p2_sub_s reg:req, param:req
+-        subs    \reg, \reg, \param
+-.endm
+-
+-.macro p2_and reg:req, param:req
+-        and     \reg, \reg, \param
+-.endm
+-
+-.macro xor reg:req, param:req
+-        eor     \reg, \reg, \param
+-.endm
+-
+-.macro or reg:req, param:req
+-        orr     \reg, \reg, \param
+-.endm
+-
+-.macro shl reg:req, param:req
+-        lsl     \reg, \reg, \param
+-.endm
+-
+-.macro shr reg:req, param:req
+-        lsr     \reg, \reg, \param
+-.endm
+-
+-.macro sar reg:req, param:req
+-        asr     \reg, \reg, \param
+-.endm
+-
+-.macro p1_neg reg:req
+-        neg     \reg, \reg
+-.endm
+-
+-.macro dec reg:req
+-        sub     \reg, \reg, 1
+-.endm
+-
+-.macro dec_s reg:req
+-        subs    \reg, \reg, 1
+-.endm
+-
+-.macro inc reg:req
+-        add     \reg, \reg, 1
+-.endm
+-
+-.macro inc_s reg:req
+-        adds    \reg, \reg, 1
+-.endm
+-
+-
+-.macro imul reg:req, param:req
+-        mul     \reg, \reg, \param
+-.endm
+-
+-/*
+-arm64 and arm use reverted c flag after subs/cmp instructions:
+-  arm64-arm   :     x86
+- b.lo / b.cc  :  jb  / jc
+- b.hs / b.cs  :  jae / jnc
+-*/ 
+-
+-.macro jmp lab:req
+-        b       \lab
+-.endm
+-
+-.macro je lab:req
+-        b.eq    \lab
+-.endm
+-
+-.macro jz lab:req
+-        b.eq    \lab
+-.endm
+-
+-.macro jnz lab:req
+-        b.ne    \lab
+-.endm
+-
+-.macro jne lab:req
+-        b.ne    \lab
+-.endm
+-
+-.macro jb lab:req
+-        b.lo    \lab
+-.endm
+-
+-.macro jbe lab:req
+-        b.ls    \lab
+-.endm
+-
+-.macro ja lab:req
+-        b.hi    \lab
+-.endm
+-
+-.macro jae lab:req
+-        b.hs    \lab
+-.endm
+-
+-
+-.macro cmove dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, eq
+-.endm
+-
+-.macro cmovne dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, ne
+-.endm
+-
+-.macro cmovs dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, mi
+-.endm
+-
+-.macro cmovns dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, pl
+-.endm
+-
+-.macro cmovb dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, lo
+-.endm
+-
+-.macro cmovae dest:req, srcTrue:req
+-        csel    \dest, \srcTrue, \dest, hs
+-.endm
+-
+-
+-.macro MY_ALIGN_16 macro
+-	.p2align 4,, (1 << 4) - 1
+-.endm
+-
+-.macro MY_ALIGN_32 macro
+-        .p2align 5,, (1 << 5) - 1
+-.endm
+-
+-.macro MY_ALIGN_64 macro
+-        .p2align 6,, (1 << 6) - 1
+-.endm
+diff --git a/third_party/lzma_sdk/Asm/x86/7zAsm.asm b/third_party/lzma_sdk/Asm/x86/7zAsm.asm
+index a77edf25311d1a61ac627771d4d899041527cbfc..8b994a5e1f3c667c2e977d369b10bc7da5bd80fd 100644
+--- a/third_party/lzma_sdk/Asm/x86/7zAsm.asm
++++ b/third_party/lzma_sdk/Asm/x86/7zAsm.asm
+@@ -1,5 +1,5 @@
+ ; 7zAsm.asm -- ASM macros
+-; 2022-05-16 : Igor Pavlov : Public domain
++; 2023-12-08 : Igor Pavlov : Public domain
+ 
+ 
+ ; UASM can require these changes
+@@ -43,7 +43,7 @@ else
+ endif
+ endif
+ 
+-OPTION PROLOGUE:NONE 
++OPTION PROLOGUE:NONE
+ OPTION EPILOGUE:NONE
+ 
+ MY_ASM_START macro
+@@ -52,7 +52,7 @@ MY_ASM_START macro
+   else
+     .386
+     .model flat
+-    SEVENZ SEGMENT PARA PUBLIC 'CODE'
++    _TEXT$00 SEGMENT PARA PUBLIC 'CODE'
+   endif
+ endm
+ 
+@@ -121,10 +121,29 @@ endif
+   x2_H equ DH
+   x3_H equ BH
+ 
++;  r0_L equ AL
++;  r1_L equ CL
++;  r2_L equ DL
++;  r3_L equ BL
++
++;  r0_H equ AH
++;  r1_H equ CH
++;  r2_H equ DH
++;  r3_H equ BH
++
++
+ ifdef x64
+   x5_L equ BPL
+   x6_L equ SIL
+   x7_L equ DIL
++  x8_L equ r8b
++  x9_L equ r9b
++  x10_L equ r10b
++  x11_L equ r11b
++  x12_L equ r12b
++  x13_L equ r13b
++  x14_L equ r14b
++  x15_L equ r15b
+ 
+   r0 equ RAX
+   r1 equ RCX
+@@ -153,6 +172,22 @@ else
+   r7 equ x7
+ endif
+ 
++  x0_R equ r0
++  x1_R equ r1
++  x2_R equ r2
++  x3_R equ r3
++  x4_R equ r4
++  x5_R equ r5
++  x6_R equ r6
++  x7_R equ r7
++  x8_R equ r8
++  x9_R equ r9
++  x10_R equ r10
++  x11_R equ r11
++  x12_R equ r12
++  x13_R equ r13
++  x14_R equ r14
++  x15_R equ r15
+ 
+ ifdef x64
+ ifdef ABI_LINUX
+@@ -200,6 +235,14 @@ REG_ABI_PARAM_0   equ REG_PARAM_0
+ REG_ABI_PARAM_1_x equ REG_PARAM_1_x
+ REG_ABI_PARAM_1   equ REG_PARAM_1
+ 
++MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
++        MY_PUSH_4_REGS
++endm
++
++MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
++        MY_POP_4_REGS
++endm
++
+ else
+ ; x64
+ 
+@@ -261,12 +304,25 @@ endm
+ endif ; IS_LINUX
+ 
+ 
+-MY_PUSH_PRESERVED_ABI_REGS macro
++MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
+     if  (IS_LINUX gt 0)
+         MY_PUSH_2_REGS
+     else
+         MY_PUSH_4_REGS
+     endif
++endm
++
++MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
++    if  (IS_LINUX gt 0)
++        MY_POP_2_REGS
++    else
++        MY_POP_4_REGS
++    endif
++endm
++
++
++MY_PUSH_PRESERVED_ABI_REGS macro
++    MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
+         push    r12
+         push    r13
+         push    r14
+@@ -279,11 +335,7 @@ MY_POP_PRESERVED_ABI_REGS macro
+         pop     r14
+         pop     r13
+         pop     r12
+-    if  (IS_LINUX gt 0)
+-        MY_POP_2_REGS
+-    else
+-        MY_POP_4_REGS
+-    endif
++    MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
+ endm
+ 
+ endif ; x64
+diff --git a/third_party/lzma_sdk/Asm/x86/7zCrcOpt.asm b/third_party/lzma_sdk/Asm/x86/7zCrcOpt.asm
+deleted file mode 100644
+index 97a6b9aa80dd25742439c17426026472733209b1..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/Asm/x86/7zCrcOpt.asm
++++ /dev/null
+@@ -1,180 +0,0 @@
+-; 7zCrcOpt.asm -- CRC32 calculation : optimized version
+-; 2021-02-07 : Igor Pavlov : Public domain
+-
+-include 7zAsm.asm
+-
+-MY_ASM_START
+-
+-rD   equ  r2
+-rN   equ  r7
+-rT   equ  r5
+-
+-ifdef x64
+-    num_VAR     equ r8
+-    table_VAR   equ r9
+-else
+-  if (IS_CDECL gt 0)
+-    crc_OFFS    equ (REG_SIZE * 5)
+-    data_OFFS   equ (REG_SIZE + crc_OFFS)
+-    size_OFFS   equ (REG_SIZE + data_OFFS)
+-  else
+-    size_OFFS   equ (REG_SIZE * 5)
+-  endif
+-    table_OFFS  equ (REG_SIZE + size_OFFS)
+-    num_VAR     equ [r4 + size_OFFS]
+-    table_VAR   equ [r4 + table_OFFS]
+-endif
+-
+-SRCDAT  equ  rD + rN * 1 + 4 *
+-
+-CRC macro op:req, dest:req, src:req, t:req
+-    op      dest, DWORD PTR [rT + src * 4 + 0400h * t]
+-endm
+-
+-CRC_XOR macro dest:req, src:req, t:req
+-    CRC xor, dest, src, t
+-endm
+-
+-CRC_MOV macro dest:req, src:req, t:req
+-    CRC mov, dest, src, t
+-endm
+-
+-CRC1b macro
+-    movzx   x6, BYTE PTR [rD]
+-    inc     rD
+-    movzx   x3, x0_L
+-    xor     x6, x3
+-    shr     x0, 8
+-    CRC     xor, x0, r6, 0
+-    dec     rN
+-endm
+-
+-MY_PROLOG macro crc_end:req
+-
+-    ifdef x64
+-      if  (IS_LINUX gt 0)
+-        MY_PUSH_2_REGS
+-        mov     x0, REG_ABI_PARAM_0_x   ; x0 = x7
+-        mov     rT, REG_ABI_PARAM_3     ; r5 = r1
+-        mov     rN, REG_ABI_PARAM_2     ; r7 = r2
+-        mov     rD, REG_ABI_PARAM_1     ; r2 = r6
+-      else
+-        MY_PUSH_4_REGS
+-        mov     x0, REG_ABI_PARAM_0_x   ; x0 = x1
+-        mov     rT, REG_ABI_PARAM_3     ; r5 = r9
+-        mov     rN, REG_ABI_PARAM_2     ; r7 = r8
+-        ; mov     rD, REG_ABI_PARAM_1     ; r2 = r2
+-      endif
+-    else
+-        MY_PUSH_4_REGS
+-      if  (IS_CDECL gt 0)
+-        mov     x0, [r4 + crc_OFFS]
+-        mov     rD, [r4 + data_OFFS]
+-      else
+-        mov     x0, REG_ABI_PARAM_0_x
+-      endif
+-        mov     rN, num_VAR
+-        mov     rT, table_VAR
+-    endif
+-    
+-    test    rN, rN
+-    jz      crc_end
+-  @@:
+-    test    rD, 7
+-    jz      @F
+-    CRC1b
+-    jnz     @B
+-  @@:
+-    cmp     rN, 16
+-    jb      crc_end
+-    add     rN, rD
+-    mov     num_VAR, rN
+-    sub     rN, 8
+-    and     rN, NOT 7
+-    sub     rD, rN
+-    xor     x0, [SRCDAT 0]
+-endm
+-
+-MY_EPILOG macro crc_end:req
+-    xor     x0, [SRCDAT 0]
+-    mov     rD, rN
+-    mov     rN, num_VAR
+-    sub     rN, rD
+-  crc_end:
+-    test    rN, rN
+-    jz      @F
+-    CRC1b
+-    jmp     crc_end
+-  @@:
+-      if (IS_X64 gt 0) and (IS_LINUX gt 0)
+-        MY_POP_2_REGS
+-      else
+-        MY_POP_4_REGS
+-      endif
+-endm
+-
+-MY_PROC CrcUpdateT8, 4
+-    MY_PROLOG crc_end_8
+-    mov     x1, [SRCDAT 1]
+-    align 16
+-  main_loop_8:
+-    mov     x6, [SRCDAT 2]
+-    movzx   x3, x1_L
+-    CRC_XOR x6, r3, 3
+-    movzx   x3, x1_H
+-    CRC_XOR x6, r3, 2
+-    shr     x1, 16
+-    movzx   x3, x1_L
+-    movzx   x1, x1_H
+-    CRC_XOR x6, r3, 1
+-    movzx   x3, x0_L
+-    CRC_XOR x6, r1, 0
+-
+-    mov     x1, [SRCDAT 3]
+-    CRC_XOR x6, r3, 7
+-    movzx   x3, x0_H
+-    shr     x0, 16
+-    CRC_XOR x6, r3, 6
+-    movzx   x3, x0_L
+-    CRC_XOR x6, r3, 5
+-    movzx   x3, x0_H
+-    CRC_MOV x0, r3, 4
+-    xor     x0, x6
+-    add     rD, 8
+-    jnz     main_loop_8
+-
+-    MY_EPILOG crc_end_8
+-MY_ENDP
+-
+-MY_PROC CrcUpdateT4, 4
+-    MY_PROLOG crc_end_4
+-    align 16
+-  main_loop_4:
+-    movzx   x1, x0_L
+-    movzx   x3, x0_H
+-    shr     x0, 16
+-    movzx   x6, x0_H
+-    and     x0, 0FFh
+-    CRC_MOV x1, r1, 3
+-    xor     x1, [SRCDAT 1]
+-    CRC_XOR x1, r3, 2
+-    CRC_XOR x1, r6, 0
+-    CRC_XOR x1, r0, 1
+- 
+-    movzx   x0, x1_L
+-    movzx   x3, x1_H
+-    shr     x1, 16
+-    movzx   x6, x1_H
+-    and     x1, 0FFh
+-    CRC_MOV x0, r0, 3
+-    xor     x0, [SRCDAT 2]
+-    CRC_XOR x0, r3, 2
+-    CRC_XOR x0, r6, 0
+-    CRC_XOR x0, r1, 1
+-    add     rD, 8
+-    jnz     main_loop_4
+-
+-    MY_EPILOG crc_end_4
+-MY_ENDP
+-
+-end
+diff --git a/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm b/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm
+index ddbd88ffc2e955419128fb105c00bb9f442dfddb..f2dcceb12a846f3034e1395c1b2c5ac0a65ccd92 100644
+--- a/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm
++++ b/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm
+@@ -1,5 +1,5 @@
+ ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
+-; 2021-02-23: Igor Pavlov : Public domain
++; 2024-06-18: Igor Pavlov : Public domain
+ ;
+ ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
+ ; function for check at link time.
+@@ -17,11 +17,43 @@ include 7zAsm.asm
+ 
+ MY_ASM_START
+ 
+-LZMADEC SEGMENT ALIGN(64) 'CODE'
++; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is     defined, we use additional SEGMENT with 64-byte alignment.
++; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
++; The performance is almost identical in our tests.
++; But the performance can depend from position of lzmadec code inside instruction cache
++; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
++; And 64-byte alignment provides a more consistent speed regardless
++; of the code's position in the executable.
++; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
++; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
++; code in 64-byte block after compilation provides better speed by some reason.
++; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
++; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
++
++ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
++if (IS_LINUX gt 0)
++  Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
++else
++  Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
++endif
++endif
+ 
++ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
++; Make this deterministic
++; _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
++LZMADEC SEGMENT ALIGN(64) 'CODE'
+ MY_ALIGN macro num:req
+         align  num
++        ; align  16
+ endm
++else
++MY_ALIGN macro num:req
++        ; We expect that ".text" is aligned for 16-bytes.
++        ; So we don't need large alignment inside out function.
++        align  16
++endm
++endif
++
+ 
+ MY_ALIGN_16 macro
+         MY_ALIGN 16
+@@ -610,7 +642,11 @@ PARAM_lzma      equ REG_ABI_PARAM_0
+ PARAM_limit     equ REG_ABI_PARAM_1
+ PARAM_bufLimit  equ REG_ABI_PARAM_2
+ 
++ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
+ ; MY_ALIGN_64
++else
++  MY_ALIGN_16
++endif
+ MY_PROC LzmaDec_DecodeReal_3, 3
+ MY_PUSH_PRESERVED_ABI_REGS
+ 
+@@ -1298,6 +1334,8 @@ fin:
+ MY_POP_PRESERVED_ABI_REGS
+ MY_ENDP
+ 
++ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
+ LZMADEC ENDS
++endif
+ 
+ end
+diff --git a/third_party/lzma_sdk/Asm/x86/Sha256Opt.asm b/third_party/lzma_sdk/Asm/x86/Sha256Opt.asm
+index 116153b69e56f519fad9a117ecc1402e8d3ef64f..a07930287a6d3612badf2a6db2fd12cc2a49d034 100644
+--- a/third_party/lzma_sdk/Asm/x86/Sha256Opt.asm
++++ b/third_party/lzma_sdk/Asm/x86/Sha256Opt.asm
+@@ -1,5 +1,5 @@
+ ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
+-; 2022-04-17 : Igor Pavlov : Public domain
++; 2024-06-16 : Igor Pavlov : Public domain
+ 
+ include 7zAsm.asm
+ 
+@@ -20,7 +20,7 @@ endif
+ EXTRN   K_CONST:xmmword
+ @
+ 
+-CONST   SEGMENT
++CONST   SEGMENT READONLY
+ 
+ align 16
+ Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
+diff --git a/third_party/lzma_sdk/Asm/x86/XzCrc64Opt.asm b/third_party/lzma_sdk/Asm/x86/XzCrc64Opt.asm
+index 1c67037ba8a20bdb30f6521a841e7b72c6394282..de023f9f490bafbf8637e4fc116b8326a07c40d8 100644
+--- a/third_party/lzma_sdk/Asm/x86/XzCrc64Opt.asm
++++ b/third_party/lzma_sdk/Asm/x86/XzCrc64Opt.asm
+@@ -1,113 +1,231 @@
+ ; XzCrc64Opt.asm -- CRC64 calculation : optimized version
+-; 2021-02-06 : Igor Pavlov : Public domain
++; 2023-12-08 : Igor Pavlov : Public domain
+ 
+ include 7zAsm.asm
+ 
+ MY_ASM_START
+ 
++NUM_WORDS       equ     3
++
++if (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
++.err <num_words_IS_INCORRECT>
++endif
++
++NUM_SKIP_BYTES  equ     ((NUM_WORDS - 2) * 4)
++
++
++MOVZXLO macro dest:req, src:req
++        movzx   dest, @CatStr(src, _L)
++endm
++
++MOVZXHI macro dest:req, src:req
++        movzx   dest, @CatStr(src, _H)
++endm
++
++
+ ifdef x64
+ 
+-rD      equ  r9
++rD      equ  r11
+ rN      equ  r10
+-rT      equ  r5
+-num_VAR equ  r8
+-
+-SRCDAT4 equ  dword ptr [rD + rN * 1]
++rT      equ  r9
++
++CRC_OP macro op:req, dest:req, src:req, t:req
++        op      dest, QWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t)]
++endm
+     
+ CRC_XOR macro dest:req, src:req, t:req
+-    xor     dest, QWORD PTR [rT + src * 8 + 0800h * t]
++        CRC_OP  xor, dest, src, t
++endm
++
++CRC_MOV macro dest:req, src:req, t:req
++        CRC_OP  mov, dest, src, t
+ endm
+ 
+ CRC1b macro
+-    movzx   x6, BYTE PTR [rD]
+-    inc     rD
+-    movzx   x3, x0_L
+-    xor     x6, x3
+-    shr     r0, 8
+-    CRC_XOR r0, r6, 0
+-    dec     rN
+-endm
+-
+-MY_PROLOG macro crc_end:req
+-  ifdef ABI_LINUX
+-    MY_PUSH_2_REGS
+-  else
+-    MY_PUSH_4_REGS
+-  endif
+-    mov     r0, REG_ABI_PARAM_0
+-    mov     rN, REG_ABI_PARAM_2
+-    mov     rT, REG_ABI_PARAM_3
+-    mov     rD, REG_ABI_PARAM_1
+-    test    rN, rN
+-    jz      crc_end
+-  @@:
+-    test    rD, 3
+-    jz      @F
+-    CRC1b
+-    jnz     @B
+-  @@:
+-    cmp     rN, 8
+-    jb      crc_end
+-    add     rN, rD
+-    mov     num_VAR, rN
+-    sub     rN, 4
+-    and     rN, NOT 3
+-    sub     rD, rN
+-    mov     x1, SRCDAT4
+-    xor     r0, r1
+-    add     rN, 4
+-endm
+-
+-MY_EPILOG macro crc_end:req
+-    sub     rN, 4
+-    mov     x1, SRCDAT4
+-    xor     r0, r1
+-    mov     rD, rN
+-    mov     rN, num_VAR
+-    sub     rN, rD
+-  crc_end:
+-    test    rN, rN
+-    jz      @F
+-    CRC1b
+-    jmp     crc_end
+-  @@:
+-  ifdef ABI_LINUX
+-    MY_POP_2_REGS
+-  else
+-    MY_POP_4_REGS
+-  endif
++        movzx   x6, BYTE PTR [rD]
++        inc     rD
++        MOVZXLO x3, x0
++        xor     x6, x3
++        shr     r0, 8
++        CRC_XOR r0, x6, 0
++        dec     rN
+ endm
+ 
+-MY_PROC XzCrc64UpdateT4, 4
+-    MY_PROLOG crc_end_4
+-    align 16
+-  main_loop_4:
+-    mov     x1, SRCDAT4
+-    movzx   x2, x0_L
+-    movzx   x3, x0_H
+-    shr     r0, 16
+-    movzx   x6, x0_L
+-    movzx   x7, x0_H
+-    shr     r0, 16
+-    CRC_XOR r1, r2, 3
+-    CRC_XOR r0, r3, 2
+-    CRC_XOR r1, r6, 1
+-    CRC_XOR r0, r7, 0
+-    xor     r0, r1
+-
+-    add     rD, 4
+-    jnz     main_loop_4
+-
+-    MY_EPILOG crc_end_4
++
++; ALIGN_MASK is 3 or 7 bytes alignment:
++ALIGN_MASK      equ  (7 - (NUM_WORDS and 1) * 4)
++
++if NUM_WORDS eq 1
++
++src_rN_offset   equ  4
++; + 4 for prefetching next 4-bytes after current iteration
++NUM_BYTES_LIMIT equ  (NUM_WORDS * 4 + 4)
++SRCDAT4         equ  DWORD PTR [rN + rD * 1]
++
++XOR_NEXT macro
++        mov     x1, [rD]
++        xor     r0, r1
++endm
++
++else ; NUM_WORDS > 1
++
++src_rN_offset   equ 8
++; + 8 for prefetching next 8-bytes after current iteration
++NUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 8)
++
++XOR_NEXT macro
++        xor     r0, QWORD PTR [rD] ; 64-bit read, can be unaligned
++endm
++
++; 32-bit or 64-bit
++LOAD_SRC_MULT4 macro dest:req, word_index:req
++        mov     dest, [rN + rD * 1 + 4 * (word_index) - src_rN_offset];
++endm
++
++endif
++
++
++
++MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 4
++        MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
++
++        mov     r0, REG_ABI_PARAM_0   ; r0  <- r1 / r7
++        mov     rD, REG_ABI_PARAM_1   ; r11 <- r2 / r6
++        mov     rN, REG_ABI_PARAM_2   ; r10 <- r8 / r2
++if  (IS_LINUX gt 0)
++        mov     rT, REG_ABI_PARAM_3   ; r9  <- r9 / r1
++endif
++
++        cmp     rN, NUM_BYTES_LIMIT + ALIGN_MASK
++        jb      crc_end
++@@:
++        test    rD, ALIGN_MASK
++        jz      @F
++        CRC1b
++        jmp     @B
++@@:
++        XOR_NEXT
++        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
++        sub     rD, rN
++        add     rN, src_rN_offset
++
++align 16
++@@:
++
++if NUM_WORDS eq 1
++
++        mov     x1, x0
++        shr     x1, 8
++        MOVZXLO x3, x1
++        MOVZXLO x2, x0
++        shr     x1, 8
++        shr     r0, 32
++        xor     x0, SRCDAT4
++        CRC_XOR r0, x2, 3
++        CRC_XOR r0, x3, 2
++        MOVZXLO x2, x1
++        shr     x1, 8
++        CRC_XOR r0, x2, 1
++        CRC_XOR r0, x1, 0
++
++else ; NUM_WORDS > 1
++
++if NUM_WORDS ne 2
++  k = 2
++  while k lt NUM_WORDS
++
++        LOAD_SRC_MULT4  x1, k
++    crc_op1  textequ <xor>
++
++    if k eq 2
++      if (NUM_WORDS and 1)
++        LOAD_SRC_MULT4  x7, NUM_WORDS       ; aligned 32-bit
++        LOAD_SRC_MULT4  x6, NUM_WORDS + 1   ; aligned 32-bit
++        shl     r6, 32
++      else
++        LOAD_SRC_MULT4  r6, NUM_WORDS       ; aligned 64-bit
++        crc_op1  textequ <mov>
++      endif
++    endif
++        table = 4 * (NUM_WORDS - 1 - k)
++        MOVZXLO x3, x1
++        CRC_OP crc_op1, r7, x3, 3 + table
++        MOVZXHI x3, x1
++        shr     x1, 16
++        CRC_XOR r6, x3, 2 + table
++        MOVZXLO x3, x1
++        shr     x1, 8
++        CRC_XOR r7, x3, 1 + table
++        CRC_XOR r6, x1, 0 + table
++        k = k + 1
++  endm
++        crc_op2  textequ <xor>
++
++else ; NUM_WORDS == 2
++        LOAD_SRC_MULT4  r6, NUM_WORDS       ; aligned 64-bit
++        crc_op2  textequ <mov>
++endif ; NUM_WORDS == 2
++
++        MOVZXHI x3, x0
++        MOVZXLO x2, x0
++        mov     r1, r0
++        shr     r1, 32
++        shr     x0, 16
++        CRC_XOR r6, x2, NUM_SKIP_BYTES + 7
++        CRC_OP  crc_op2, r7, x3, NUM_SKIP_BYTES + 6
++        MOVZXLO x2, x0
++        MOVZXHI x5, x1
++        MOVZXLO x3, x1
++        shr     x0, 8
++        shr     x1, 16
++        CRC_XOR r7, x2, NUM_SKIP_BYTES + 5
++        CRC_XOR r6, x3, NUM_SKIP_BYTES + 3
++        CRC_XOR r7, x0, NUM_SKIP_BYTES + 4
++        CRC_XOR r6, x5, NUM_SKIP_BYTES + 2
++        MOVZXLO x2, x1
++        shr     x1, 8
++        CRC_XOR r7, x2, NUM_SKIP_BYTES + 1
++        CRC_MOV r0, x1, NUM_SKIP_BYTES + 0
++        xor     r0, r6
++        xor     r0, r7
++
++endif ; NUM_WORDS > 1
++        add     rD, NUM_WORDS * 4
++        jnc     @B
++
++        sub     rN, src_rN_offset
++        add     rD, rN
++        XOR_NEXT
++        add     rN, NUM_BYTES_LIMIT - 1
++        sub     rN, rD
++
++crc_end:
++        test    rN, rN
++        jz      func_end
++@@:
++        CRC1b
++        jnz      @B
++func_end:
++        MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
+ MY_ENDP
+ 
++
++
+ else
++; ==================================================================
+ ; x86 (32-bit)
+ 
+-rD      equ  r1
+-rN      equ  r7
++rD      equ  r7
++rN      equ  r1
+ rT      equ  r5
+ 
++xA      equ  x6
++xA_R    equ  r6
++
++ifdef x64
++    num_VAR     equ  r8
++else
++
+ crc_OFFS  equ  (REG_SIZE * 5)
+ 
+ if (IS_CDECL gt 0) or (IS_LINUX gt 0)
+@@ -133,107 +251,273 @@ else
+     table_VAR   equ  [r4 + table_OFFS]
+     num_VAR     equ  table_VAR
+ endif
++endif ; x64
++
++SRCDAT4         equ     DWORD PTR [rN + rD * 1]
+ 
+-SRCDAT4 equ  dword ptr [rD + rN * 1]
++CRC_1 macro op:req, dest:req, src:req, t:req, word_index:req
++        op      dest, DWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t) + (word_index) * 4]
++endm
+ 
+ CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
+-    op0     dest0, DWORD PTR [rT + src * 8 + 0800h * t]
+-    op1     dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
++        CRC_1   op0, dest0, src, t, 0
++        CRC_1   op1, dest1, src, t, 1
+ endm
+ 
+ CRC_XOR macro dest0:req, dest1:req, src:req, t:req
+-    CRC xor, xor, dest0, dest1, src, t
++        CRC xor, xor, dest0, dest1, src, t
+ endm
+ 
+ 
+ CRC1b macro
+-    movzx   x6, BYTE PTR [rD]
+-    inc     rD
+-    movzx   x3, x0_L
+-    xor     x6, x3
+-    shrd    r0, r2, 8
+-    shr     r2, 8
+-    CRC_XOR r0, r2, r6, 0
+-    dec     rN
+-endm
+-
+-MY_PROLOG macro crc_end:req
+-    MY_PUSH_4_REGS
+-
+-  if (IS_CDECL gt 0) or (IS_LINUX gt 0)
+-    proc_numParams = proc_numParams + 2 ; for ABI_LINUX
+-    mov     rN, [r4 + size_OFFS]
+-    mov     rD, [r4 + data_OFFS]
++        movzx   xA, BYTE PTR [rD]
++        inc     rD
++        MOVZXLO x3, x0
++        xor     xA, x3
++        shrd    x0, x2, 8
++        shr     x2, 8
++        CRC_XOR x0, x2, xA, 0
++        dec     rN
++endm
++
++
++MY_PROLOG_BASE macro
++        MY_PUSH_4_REGS
++ifdef x64
++        mov     r0, REG_ABI_PARAM_0     ; r0 <- r1 / r7
++        mov     rT, REG_ABI_PARAM_3     ; r5 <- r9 / r1
++        mov     rN, REG_ABI_PARAM_2     ; r1 <- r8 / r2
++        mov     rD, REG_ABI_PARAM_1     ; r7 <- r2 / r6
++        mov     r2, r0
++        shr     r2, 32
++        mov     x0, x0
++else
++    if (IS_CDECL gt 0) or (IS_LINUX gt 0)
++        proc_numParams = proc_numParams + 2 ; for ABI_LINUX
++        mov     rN, [r4 + size_OFFS]
++        mov     rD, [r4 + data_OFFS]
++    else
++        mov     rD, REG_ABI_PARAM_0     ; r7 <- r1 : (data)
++        mov     rN, REG_ABI_PARAM_1     ; r1 <- r2 : (size)
++    endif
++        mov     x0, [r4 + crc_OFFS]
++        mov     x2, [r4 + crc_OFFS + 4]
++        mov     rT, table_VAR
++endif
++endm
++
++
++MY_EPILOG_BASE macro crc_end:req, func_end:req
++crc_end:
++        test    rN, rN
++        jz      func_end
++@@:
++        CRC1b
++        jnz      @B
++func_end:
++ifdef x64
++        shl     r2, 32
++        xor     r0, r2
++endif
++        MY_POP_4_REGS
++endm
++
++
++; ALIGN_MASK is 3 or 7 bytes alignment:
++ALIGN_MASK  equ     (7 - (NUM_WORDS and 1) * 4)
++
++if (NUM_WORDS eq 1)
++
++NUM_BYTES_LIMIT_T4 equ (NUM_WORDS * 4 + 4)
++
++MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
++        MY_PROLOG_BASE
++
++        cmp     rN, NUM_BYTES_LIMIT_T4 + ALIGN_MASK
++        jb      crc_end_4
++@@:
++        test    rD, ALIGN_MASK
++        jz      @F
++        CRC1b
++        jmp     @B
++@@:
++        xor     x0, [rD]
++        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT_T4 - 1)]
++        sub     rD, rN
++        add     rN, 4
++
++        MOVZXLO xA, x0
++align 16
++@@:
++        mov     x3, SRCDAT4
++        xor     x3, x2
++        shr     x0, 8
++        CRC xor, mov, x3, x2, xA, 3
++        MOVZXLO xA, x0
++        shr     x0, 8
++        ; MOVZXHI  xA, x0
++        ; shr     x0, 16
++        CRC_XOR x3, x2, xA, 2
++
++        MOVZXLO xA, x0
++        shr     x0, 8
++        CRC_XOR x3, x2, xA, 1
++        CRC_XOR x3, x2, x0, 0
++        MOVZXLO xA, x3
++        mov     x0, x3
++
++        add     rD, 4
++        jnc     @B
++
++        sub     rN, 4
++        add     rD, rN
++        xor     x0, [rD]
++        add     rN, NUM_BYTES_LIMIT_T4 - 1
++        sub     rN, rD
++        MY_EPILOG_BASE crc_end_4, func_end_4
++MY_ENDP
++
++else ; NUM_WORDS > 1
++
++SHR_X macro x, imm
++        shr x, imm
++endm
++
++
++ITER_1 macro v0, v1, a, off
++        MOVZXLO xA, a
++        SHR_X   a, 8
++        CRC_XOR v0, v1, xA, off
++endm
++
++
++ITER_4 macro v0, v1, a, off
++if 0 eq 0
++        ITER_1  v0, v1, a, off + 3
++        ITER_1  v0, v1, a, off + 2
++        ITER_1  v0, v1, a, off + 1
++        CRC_XOR v0, v1, a, off
++elseif 0 eq 0
++        MOVZXLO xA, a
++        CRC_XOR v0, v1, xA, off + 3
++        mov     xA, a
++        ror     a, 16   ; 32-bit ror
++        shr     xA, 24
++        CRC_XOR v0, v1, xA, off
++        MOVZXLO xA, a
++        SHR_X   a, 24
++        CRC_XOR v0, v1, xA, off + 1
++        CRC_XOR v0, v1, a, off + 2
++else
++        ; MOVZXHI provides smaller code, but MOVZX_HI_BYTE is not fast instruction
++        MOVZXLO xA, a
++        CRC_XOR v0, v1, xA, off + 3
++        MOVZXHI xA, a
++        SHR_X   a, 16
++        CRC_XOR v0, v1, xA, off + 2
++        MOVZXLO xA, a
++        SHR_X   a, 8
++        CRC_XOR v0, v1, xA, off + 1
++        CRC_XOR v0, v1, a, off
++endif
++endm
++
++
++
++ITER_1_PAIR macro v0, v1, a0, a1, off
++        ITER_1 v0, v1, a0, off + 4
++        ITER_1 v0, v1, a1, off
++endm
++
++src_rD_offset equ 8
++STEP_SIZE       equ     (NUM_WORDS * 4)
++
++ITER_12_NEXT macro op, index, v0, v1
++        op     v0, DWORD PTR [rD + (index + 1) * STEP_SIZE     - src_rD_offset]
++        op     v1, DWORD PTR [rD + (index + 1) * STEP_SIZE + 4 - src_rD_offset]
++endm
++
++ITER_12 macro index, a0, a1, v0, v1
++
++  if NUM_SKIP_BYTES  eq 0
++        ITER_12_NEXT mov, index, v0, v1
+   else
+-    mov     rN, r2
++    k = 0
++    while k lt NUM_SKIP_BYTES
++        movzx   xA, BYTE PTR [rD + (index) * STEP_SIZE + k + 8 - src_rD_offset]
++      if k eq 0
++        CRC mov, mov,   v0, v1, xA, NUM_SKIP_BYTES - 1 - k
++      else
++        CRC_XOR         v0, v1, xA, NUM_SKIP_BYTES - 1 - k
++      endif
++      k = k + 1
++    endm
++        ITER_12_NEXT xor, index, v0, v1
+   endif
+ 
+-    mov     x0, [r4 + crc_OFFS]
+-    mov     x2, [r4 + crc_OFFS + 4]
+-    mov     rT, table_VAR
+-    test    rN, rN
+-    jz      crc_end
+-  @@:
+-    test    rD, 3
+-    jz      @F
+-    CRC1b
+-    jnz     @B
+-  @@:
+-    cmp     rN, 8
+-    jb      crc_end
+-    add     rN, rD
+-
+-    mov     num_VAR, rN
+-
+-    sub     rN, 4
+-    and     rN, NOT 3
+-    sub     rD, rN
+-    xor     r0, SRCDAT4
+-    add     rN, 4
+-endm
+-
+-MY_EPILOG macro crc_end:req
+-    sub     rN, 4
+-    xor     r0, SRCDAT4
+-
+-    mov     rD, rN
+-    mov     rN, num_VAR
+-    sub     rN, rD
+-  crc_end:
+-    test    rN, rN
+-    jz      @F
+-    CRC1b
+-    jmp     crc_end
+-  @@:
+-    MY_POP_4_REGS
+-endm
+-
+-MY_PROC XzCrc64UpdateT4, 5
+-    MY_PROLOG crc_end_4
+-    movzx   x6, x0_L
+-    align 16
+-  main_loop_4:
+-    mov     r3, SRCDAT4
+-    xor     r3, r2
+-
+-    CRC xor, mov, r3, r2, r6, 3
+-    movzx   x6, x0_H
+-    shr     r0, 16
+-    CRC_XOR r3, r2, r6, 2
+-
+-    movzx   x6, x0_L
+-    movzx   x0, x0_H
+-    CRC_XOR r3, r2, r6, 1
+-    CRC_XOR r3, r2, r0, 0
+-    movzx   x6, x3_L
+-    mov     r0, r3
+-
+-    add     rD, 4
+-    jnz     main_loop_4
+-
+-    MY_EPILOG crc_end_4
++if 0 eq 0
++        ITER_4  v0, v1, a0, NUM_SKIP_BYTES + 4
++        ITER_4  v0, v1, a1, NUM_SKIP_BYTES
++else ; interleave version is faster/slower for different processors
++        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 3
++        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 2
++        ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 1
++        CRC_XOR     v0, v1, a0,     NUM_SKIP_BYTES + 4
++        CRC_XOR     v0, v1, a1,     NUM_SKIP_BYTES
++endif
++endm
++
++; we use (UNROLL_CNT > 1) to reduce read ports pressure (num_VAR reads)
++UNROLL_CNT      equ     (2 * 1)
++NUM_BYTES_LIMIT equ     (STEP_SIZE * UNROLL_CNT + 8)
++
++MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
++        MY_PROLOG_BASE
++
++        cmp     rN, NUM_BYTES_LIMIT + ALIGN_MASK
++        jb      crc_end_12
++@@:
++        test    rD, ALIGN_MASK
++        jz      @F
++        CRC1b
++        jmp     @B
++@@:
++        xor     x0, [rD]
++        xor     x2, [rD + 4]
++        add     rD, src_rD_offset
++        lea     rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
++        mov     num_VAR, rN
++
++align 16
++@@:
++    i = 0
++    rept UNROLL_CNT
++      if (i and 1) eq 0
++        ITER_12     i, x0, x2,  x1, x3
++      else
++        ITER_12     i, x1, x3,  x0, x2
++      endif
++      i = i + 1
++    endm
++
++    if (UNROLL_CNT and 1)
++        mov     x0, x1
++        mov     x2, x3
++    endif
++        add     rD, STEP_SIZE * UNROLL_CNT
++        cmp     rD, num_VAR
++        jb      @B
++
++        mov     rN, num_VAR
++        add     rN, NUM_BYTES_LIMIT - 1
++        sub     rN, rD
++        sub     rD, src_rD_offset
++        xor     x0, [rD]
++        xor     x2, [rD + 4]
++
++        MY_EPILOG_BASE crc_end_12, func_end_12
+ MY_ENDP
+ 
++endif ; (NUM_WORDS > 1)
+ endif ; ! x64
+-
+ end
+diff --git a/third_party/lzma_sdk/BUILD.gn b/third_party/lzma_sdk/BUILD.gn
+index 57923a6b9e6cf6f73e7b2706fcbc0dcd367ebe89..39d8dc0a33ce95f7c293bab9d2de5f8639925f13 100644
+--- a/third_party/lzma_sdk/BUILD.gn
++++ b/third_party/lzma_sdk/BUILD.gn
+@@ -36,13 +36,14 @@ config("lzma_sdk_config") {
+ # (otherwise -Wall will appear after this, and turn it back on).
+ config("lzma_build_config") {
+   defines = [
+-    "_7ZIP_ST",
+-    "_7Z_NO_METHODS_FILTERS",
++    "Z7_LZMA_PROB32",
++    "Z7_NO_METHODS_FILTERS",
++    "Z7_ST",
+     "_LZMA_PROB32",
+   ]
+ 
+   if (enable_lzma_opt) {
+-    defines += [ "_LZMA_DEC_OPT" ]
++    defines += [ "Z7_LZMA_DEC_OPT" ]
+   }
+ 
+   cflags = []
+@@ -52,10 +53,6 @@ config("lzma_build_config") {
+   }
+ 
+   if (use_arm_neon_optimizations) {
+-    if (is_fuchsia) {
+-      defines += [ "ARMV8_OS_FUCHSIA" ]
+-    }
+-
+     if (target_cpu == "arm" && arm_version >= 8) {
+       if (is_clang) {
+         cflags += [
+@@ -92,6 +89,7 @@ static_library("lzma_sdk") {
+     "C/7zFile.h",
+     "C/7zStream.c",
+     "C/7zTypes.h",
++    "C/7zWindows.h",
+     "C/Alloc.c",
+     "C/Alloc.h",
+     "C/Bcj2.c",
+@@ -118,14 +116,13 @@ static_library("lzma_sdk") {
+     "C/LzmaLib.c",
+     "C/LzmaLib.h",
+     "C/Precomp.h",
++    "C/RotateDefs.h",
+   ]
+ 
+   if (enable_lzma_opt) {
+     sources += [
+-      "Asm/x86/7zCrcOpt.asm",
+       "Asm/x86/LzmaDecOpt.asm",
+     ]
+-    sources -= [ "C/7zCrcOpt.c" ]
+   }
+ 
+   configs -= [ "//build/config/compiler:chromium_code" ]
+@@ -141,9 +138,9 @@ static_library("lzma_sdk") {
+ static_library("lzma_sdk_xz") {
+   sources = [
+     "C/BraIA64.c",
+-    "C/RotateDefs.h",
+     "C/Sha256.c",
+     "C/Sha256.h",
++    "C/Sha256Opt.c",
+     "C/Xz.c",
+     "C/Xz.h",
+     "C/XzCrc64.c",
+@@ -153,11 +150,6 @@ static_library("lzma_sdk_xz") {
+     "C/XzIn.c",
+   ]
+ 
+-  # TODO(crbug.com/1338627): Enable ARM optimizations
+-  if (target_cpu == "x86" || target_cpu == "x64") {
+-    sources += [ "C/Sha256Opt.c" ]
+-  }
+-
+   if (enable_lzma_opt) {
+     sources += [
+       "Asm/x86/Sha256Opt.asm",
+diff --git a/third_party/lzma_sdk/C/7z.h b/third_party/lzma_sdk/C/7z.h
+index 304f75ffc548116cc1730399b1f79b876c840611..9e27c0152189b3bb0d09f123b167c2eedc122d96 100644
+--- a/third_party/lzma_sdk/C/7z.h
++++ b/third_party/lzma_sdk/C/7z.h
+@@ -1,8 +1,8 @@
+ /* 7z.h -- 7z interface
+-2018-07-02 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_H
+-#define __7Z_H
++#ifndef ZIP7_INC_7Z_H
++#define ZIP7_INC_7Z_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -98,7 +98,7 @@ typedef struct
+ UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
+ 
+ SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+-    ILookInStream *stream, UInt64 startPos,
++    ILookInStreamPtr stream, UInt64 startPos,
+     Byte *outBuffer, size_t outSize,
+     ISzAllocPtr allocMain);
+ 
+@@ -174,7 +174,7 @@ UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16
+ 
+ SRes SzArEx_Extract(
+     const CSzArEx *db,
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     UInt32 fileIndex,         /* index of file */
+     UInt32 *blockIndex,       /* index of solid block */
+     Byte **outBuffer,         /* pointer to pointer to output buffer (allocated with allocMain) */
+@@ -196,7 +196,7 @@ SZ_ERROR_INPUT_EOF
+ SZ_ERROR_FAIL
+ */
+ 
+-SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
++SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream,
+     ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
+ 
+ EXTERN_C_END
+diff --git a/third_party/lzma_sdk/C/7zAlloc.c b/third_party/lzma_sdk/C/7zAlloc.c
+index c924a529fa8724793ad2420183bacfbe26afcf28..2f0659af6f7bf268f3160edd666fc18f2cdc4388 100644
+--- a/third_party/lzma_sdk/C/7zAlloc.c
++++ b/third_party/lzma_sdk/C/7zAlloc.c
+@@ -1,5 +1,5 @@
+-/* 7zAlloc.c -- Allocation functions
+-2017-04-03 : Igor Pavlov : Public domain */
++/* 7zAlloc.c -- Allocation functions for 7z processing
++2023-03-04 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -7,74 +7,83 @@
+ 
+ #include "7zAlloc.h"
+ 
+-/* #define _SZ_ALLOC_DEBUG */
+-/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
++/* #define SZ_ALLOC_DEBUG */
++/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
+ 
+-#ifdef _SZ_ALLOC_DEBUG
++#ifdef SZ_ALLOC_DEBUG
+ 
++/*
+ #ifdef _WIN32
+-#include <windows.h>
++#include "7zWindows.h"
+ #endif
++*/
+ 
+ #include <stdio.h>
+-int g_allocCount = 0;
+-int g_allocCountTemp = 0;
++static int g_allocCount = 0;
++static int g_allocCountTemp = 0;
+ 
++static void Print_Alloc(const char *s, size_t size, int *counter)
++{
++  const unsigned size2 = (unsigned)size;
++  fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2);
++  (*counter)++;
++}
++static void Print_Free(const char *s, int *counter)
++{
++  (*counter)--;
++  fprintf(stderr, "\n%s count = %10d", s, *counter);
++}
+ #endif
+ 
+ void *SzAlloc(ISzAllocPtr p, size_t size)
+ {
+-  UNUSED_VAR(p);
++  UNUSED_VAR(p)
+   if (size == 0)
+     return 0;
+-  #ifdef _SZ_ALLOC_DEBUG
+-  fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
+-  g_allocCount++;
++  #ifdef SZ_ALLOC_DEBUG
++  Print_Alloc("Alloc", size, &g_allocCount);
+   #endif
+   return malloc(size);
+ }
+ 
+ void SzFree(ISzAllocPtr p, void *address)
+ {
+-  UNUSED_VAR(p);
+-  #ifdef _SZ_ALLOC_DEBUG
+-  if (address != 0)
+-  {
+-    g_allocCount--;
+-    fprintf(stderr, "\nFree; count = %10d", g_allocCount);
+-  }
++  UNUSED_VAR(p)
++  #ifdef SZ_ALLOC_DEBUG
++  if (address)
++    Print_Free("Free ", &g_allocCount);
+   #endif
+   free(address);
+ }
+ 
+ void *SzAllocTemp(ISzAllocPtr p, size_t size)
+ {
+-  UNUSED_VAR(p);
++  UNUSED_VAR(p)
+   if (size == 0)
+     return 0;
+-  #ifdef _SZ_ALLOC_DEBUG
+-  fprintf(stderr, "\nAlloc_temp %10u bytes;  count = %10d", (unsigned)size, g_allocCountTemp);
+-  g_allocCountTemp++;
++  #ifdef SZ_ALLOC_DEBUG
++  Print_Alloc("Alloc_temp", size, &g_allocCountTemp);
++  /*
+   #ifdef _WIN32
+   return HeapAlloc(GetProcessHeap(), 0, size);
+   #endif
++  */
+   #endif
+   return malloc(size);
+ }
+ 
+ void SzFreeTemp(ISzAllocPtr p, void *address)
+ {
+-  UNUSED_VAR(p);
+-  #ifdef _SZ_ALLOC_DEBUG
+-  if (address != 0)
+-  {
+-    g_allocCountTemp--;
+-    fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
+-  }
++  UNUSED_VAR(p)
++  #ifdef SZ_ALLOC_DEBUG
++  if (address)
++    Print_Free("Free_temp ", &g_allocCountTemp);
++  /*
+   #ifdef _WIN32
+   HeapFree(GetProcessHeap(), 0, address);
+   return;
+   #endif
++  */
+   #endif
+   free(address);
+ }
+diff --git a/third_party/lzma_sdk/C/7zAlloc.h b/third_party/lzma_sdk/C/7zAlloc.h
+index 44778f9b2e189877e2b46661ec33ea04d79e0741..b2b8b0cdd93f4c04a23539bcf1914f91c122d71f 100644
+--- a/third_party/lzma_sdk/C/7zAlloc.h
++++ b/third_party/lzma_sdk/C/7zAlloc.h
+@@ -1,8 +1,8 @@
+ /* 7zAlloc.h -- Allocation functions
+-2017-04-03 : Igor Pavlov : Public domain */
++2023-03-04 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_ALLOC_H
+-#define __7Z_ALLOC_H
++#ifndef ZIP7_INC_7Z_ALLOC_H
++#define ZIP7_INC_7Z_ALLOC_H
+ 
+ #include "7zTypes.h"
+ 
+diff --git a/third_party/lzma_sdk/C/7zArcIn.c b/third_party/lzma_sdk/C/7zArcIn.c
+index 0d9dec41e500ca291a9687fa8909c869d24229df..23f294992235ef126c1af4920acb5bf8cb17f257 100644
+--- a/third_party/lzma_sdk/C/7zArcIn.c
++++ b/third_party/lzma_sdk/C/7zArcIn.c
+@@ -1,5 +1,5 @@
+ /* 7zArcIn.c -- 7z Input functions
+-2021-02-09 : Igor Pavlov : Public domain */
++2023-09-07 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -10,10 +10,11 @@
+ #include "7zCrc.h"
+ #include "CpuArch.h"
+ 
+-#define MY_ALLOC(T, p, size, alloc) { \
+-  if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; }
++#define MY_ALLOC(T, p, size, alloc) \
++  { if ((p = (T *)ISzAlloc_Alloc(alloc, (size) * sizeof(T))) == NULL) return SZ_ERROR_MEM; }
+ 
+-#define MY_ALLOC_ZE(T, p, size, alloc) { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) }
++#define MY_ALLOC_ZE(T, p, size, alloc) \
++  { if ((size) == 0) p = NULL; else MY_ALLOC(T, p, size, alloc) }
+ 
+ #define MY_ALLOC_AND_CPY(to, size, from, alloc) \
+   { MY_ALLOC(Byte, to, size, alloc); memcpy(to, from, size); }
+@@ -58,7 +59,7 @@ enum EIdEnum
+ 
+ const Byte k7zSignature[k7zSignatureSize] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
+ 
+-#define SzBitUi32s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
++#define SzBitUi32s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+ 
+ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
+ {
+@@ -69,8 +70,8 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
+   }
+   else
+   {
+-    MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc);
+-    MY_ALLOC(UInt32, p->Vals, num, alloc);
++    MY_ALLOC(Byte, p->Defs, (num + 7) >> 3, alloc)
++    MY_ALLOC(UInt32, p->Vals, num, alloc)
+   }
+   return SZ_OK;
+ }
+@@ -81,7 +82,7 @@ static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
+   ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
+ }
+ 
+-#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
++#define SzBitUi64s_INIT(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
+ 
+ static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
+ {
+@@ -96,7 +97,7 @@ static void SzAr_Init(CSzAr *p)
+   p->NumFolders = 0;
+   
+   p->PackPositions = NULL;
+-  SzBitUi32s_Init(&p->FolderCRCs);
++  SzBitUi32s_INIT(&p->FolderCRCs)
+ 
+   p->FoCodersOffsets = NULL;
+   p->FoStartPackStreamIndex = NULL;
+@@ -142,11 +143,11 @@ void SzArEx_Init(CSzArEx *p)
+   p->FileNameOffsets = NULL;
+   p->FileNames = NULL;
+   
+-  SzBitUi32s_Init(&p->CRCs);
+-  SzBitUi32s_Init(&p->Attribs);
+-  // SzBitUi32s_Init(&p->Parents);
+-  SzBitUi64s_Init(&p->MTime);
+-  SzBitUi64s_Init(&p->CTime);
++  SzBitUi32s_INIT(&p->CRCs)
++  SzBitUi32s_INIT(&p->Attribs)
++  // SzBitUi32s_INIT(&p->Parents)
++  SzBitUi64s_INIT(&p->MTime)
++  SzBitUi64s_INIT(&p->CTime)
+ }
+ 
+ void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc)
+@@ -180,11 +181,20 @@ static int TestSignatureCandidate(const Byte *testBytes)
+   return 1;
+ }
+ 
+-#define SzData_Clear(p) { (p)->Data = NULL; (p)->Size = 0; }
++#define SzData_CLEAR(p) { (p)->Data = NULL; (p)->Size = 0; }
++
++#define SZ_READ_BYTE_SD_NOCHECK(_sd_, dest) \
++    (_sd_)->Size--; dest = *(_sd_)->Data++;
++
++#define SZ_READ_BYTE_SD(_sd_, dest) \
++    if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; \
++    SZ_READ_BYTE_SD_NOCHECK(_sd_, dest)
+ 
+-#define SZ_READ_BYTE_SD(_sd_, dest) if ((_sd_)->Size == 0) return SZ_ERROR_ARCHIVE; (_sd_)->Size--; dest = *(_sd_)->Data++;
+ #define SZ_READ_BYTE(dest) SZ_READ_BYTE_SD(sd, dest)
+-#define SZ_READ_BYTE_2(dest) if (sd.Size == 0) return SZ_ERROR_ARCHIVE; sd.Size--; dest = *sd.Data++;
++
++#define SZ_READ_BYTE_2(dest) \
++    if (sd.Size == 0) return SZ_ERROR_ARCHIVE; \
++    sd.Size--; dest = *sd.Data++;
+ 
+ #define SKIP_DATA(sd, size) { sd->Size -= (size_t)(size); sd->Data += (size_t)(size); }
+ #define SKIP_DATA2(sd, size) { sd.Size -= (size_t)(size); sd.Data += (size_t)(size); }
+@@ -192,25 +202,25 @@ static int TestSignatureCandidate(const Byte *testBytes)
+ #define SZ_READ_32(dest) if (sd.Size < 4) return SZ_ERROR_ARCHIVE; \
+    dest = GetUi32(sd.Data); SKIP_DATA2(sd, 4);
+ 
+-static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
++static Z7_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
+ {
+   Byte firstByte, mask;
+   unsigned i;
+   UInt32 v;
+ 
+-  SZ_READ_BYTE(firstByte);
++  SZ_READ_BYTE(firstByte)
+   if ((firstByte & 0x80) == 0)
+   {
+     *value = firstByte;
+     return SZ_OK;
+   }
+-  SZ_READ_BYTE(v);
++  SZ_READ_BYTE(v)
+   if ((firstByte & 0x40) == 0)
+   {
+     *value = (((UInt32)firstByte & 0x3F) << 8) | v;
+     return SZ_OK;
+   }
+-  SZ_READ_BYTE(mask);
++  SZ_READ_BYTE(mask)
+   *value = v | ((UInt32)mask << 8);
+   mask = 0x20;
+   for (i = 2; i < 8; i++)
+@@ -218,11 +228,11 @@ static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
+     Byte b;
+     if ((firstByte & mask) == 0)
+     {
+-      UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1);
++      const UInt64 highPart = (unsigned)firstByte & (unsigned)(mask - 1);
+       *value |= (highPart << (8 * i));
+       return SZ_OK;
+     }
+-    SZ_READ_BYTE(b);
++    SZ_READ_BYTE(b)
+     *value |= ((UInt64)b << (8 * i));
+     mask >>= 1;
+   }
+@@ -230,7 +240,7 @@ static MY_NO_INLINE SRes ReadNumber(CSzData *sd, UInt64 *value)
+ }
+ 
+ 
+-static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
++static Z7_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
+ {
+   Byte firstByte;
+   UInt64 value64;
+@@ -244,7 +254,7 @@ static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
+     sd->Size--;
+     return SZ_OK;
+   }
+-  RINOK(ReadNumber(sd, &value64));
++  RINOK(ReadNumber(sd, &value64))
+   if (value64 >= (UInt32)0x80000000 - 1)
+     return SZ_ERROR_UNSUPPORTED;
+   if (value64 >= ((UInt64)(1) << ((sizeof(size_t) - 1) * 8 + 4)))
+@@ -258,10 +268,10 @@ static MY_NO_INLINE SRes SzReadNumber32(CSzData *sd, UInt32 *value)
+ static SRes SkipData(CSzData *sd)
+ {
+   UInt64 size;
+-  RINOK(ReadNumber(sd, &size));
++  RINOK(ReadNumber(sd, &size))
+   if (size > sd->Size)
+     return SZ_ERROR_ARCHIVE;
+-  SKIP_DATA(sd, size);
++  SKIP_DATA(sd, size)
+   return SZ_OK;
+ }
+ 
+@@ -270,28 +280,28 @@ static SRes WaitId(CSzData *sd, UInt32 id)
+   for (;;)
+   {
+     UInt64 type;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+     if (type == id)
+       return SZ_OK;
+     if (type == k7zIdEnd)
+       return SZ_ERROR_ARCHIVE;
+-    RINOK(SkipData(sd));
++    RINOK(SkipData(sd))
+   }
+ }
+ 
+ static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v)
+ {
+-  UInt32 numBytes = (numItems + 7) >> 3;
++  const UInt32 numBytes = (numItems + 7) >> 3;
+   if (numBytes > sd->Size)
+     return SZ_ERROR_ARCHIVE;
+   *v = sd->Data;
+-  SKIP_DATA(sd, numBytes);
++  SKIP_DATA(sd, numBytes)
+   return SZ_OK;
+ }
+ 
+ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
+ {
+-  Byte b = 0;
++  unsigned b = 0;
+   unsigned m = 0;
+   UInt32 sum = 0;
+   for (; numItems != 0; numItems--)
+@@ -302,53 +312,53 @@ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
+       m = 8;
+     }
+     m--;
+-    sum += ((b >> m) & 1);
++    sum += (UInt32)((b >> m) & 1);
+   }
+   return sum;
+ }
+ 
+-static MY_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc)
++static Z7_NO_INLINE SRes ReadBitVector(CSzData *sd, UInt32 numItems, Byte **v, ISzAllocPtr alloc)
+ {
+   Byte allAreDefined;
+   Byte *v2;
+-  UInt32 numBytes = (numItems + 7) >> 3;
++  const UInt32 numBytes = (numItems + 7) >> 3;
+   *v = NULL;
+-  SZ_READ_BYTE(allAreDefined);
++  SZ_READ_BYTE(allAreDefined)
+   if (numBytes == 0)
+     return SZ_OK;
+   if (allAreDefined == 0)
+   {
+     if (numBytes > sd->Size)
+       return SZ_ERROR_ARCHIVE;
+-    MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc);
+-    SKIP_DATA(sd, numBytes);
++    MY_ALLOC_AND_CPY(*v, numBytes, sd->Data, alloc)
++    SKIP_DATA(sd, numBytes)
+     return SZ_OK;
+   }
+-  MY_ALLOC(Byte, *v, numBytes, alloc);
++  MY_ALLOC(Byte, *v, numBytes, alloc)
+   v2 = *v;
+   memset(v2, 0xFF, (size_t)numBytes);
+   {
+-    unsigned numBits = (unsigned)numItems & 7;
++    const unsigned numBits = (unsigned)numItems & 7;
+     if (numBits != 0)
+       v2[(size_t)numBytes - 1] = (Byte)((((UInt32)1 << numBits) - 1) << (8 - numBits));
+   }
+   return SZ_OK;
+ }
+ 
+-static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc)
++static Z7_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc)
+ {
+   UInt32 i;
+   CSzData sd;
+   UInt32 *vals;
+   const Byte *defs;
+-  MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc);
++  MY_ALLOC_ZE(UInt32, crcs->Vals, numItems, alloc)
+   sd = *sd2;
+   defs = crcs->Defs;
+   vals = crcs->Vals;
+   for (i = 0; i < numItems; i++)
+     if (SzBitArray_Check(defs, i))
+     {
+-      SZ_READ_32(vals[i]);
++      SZ_READ_32(vals[i])
+     }
+     else
+       vals[i] = 0;
+@@ -359,7 +369,7 @@ static MY_NO_INLINE SRes ReadUi32s(CSzData *sd2, UInt32 numItems, CSzBitUi32s *c
+ static SRes ReadBitUi32s(CSzData *sd, UInt32 numItems, CSzBitUi32s *crcs, ISzAllocPtr alloc)
+ {
+   SzBitUi32s_Free(crcs, alloc);
+-  RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc));
++  RINOK(ReadBitVector(sd, numItems, &crcs->Defs, alloc))
+   return ReadUi32s(sd, numItems, crcs, alloc);
+ }
+ 
+@@ -367,36 +377,36 @@ static SRes SkipBitUi32s(CSzData *sd, UInt32 numItems)
+ {
+   Byte allAreDefined;
+   UInt32 numDefined = numItems;
+-  SZ_READ_BYTE(allAreDefined);
++  SZ_READ_BYTE(allAreDefined)
+   if (!allAreDefined)
+   {
+-    size_t numBytes = (numItems + 7) >> 3;
++    const size_t numBytes = (numItems + 7) >> 3;
+     if (numBytes > sd->Size)
+       return SZ_ERROR_ARCHIVE;
+     numDefined = CountDefinedBits(sd->Data, numItems);
+-    SKIP_DATA(sd, numBytes);
++    SKIP_DATA(sd, numBytes)
+   }
+   if (numDefined > (sd->Size >> 2))
+     return SZ_ERROR_ARCHIVE;
+-  SKIP_DATA(sd, (size_t)numDefined * 4);
++  SKIP_DATA(sd, (size_t)numDefined * 4)
+   return SZ_OK;
+ }
+ 
+ static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc)
+ {
+-  RINOK(SzReadNumber32(sd, &p->NumPackStreams));
++  RINOK(SzReadNumber32(sd, &p->NumPackStreams))
+ 
+-  RINOK(WaitId(sd, k7zIdSize));
+-  MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc);
++  RINOK(WaitId(sd, k7zIdSize))
++  MY_ALLOC(UInt64, p->PackPositions, (size_t)p->NumPackStreams + 1, alloc)
+   {
+     UInt64 sum = 0;
+     UInt32 i;
+-    UInt32 numPackStreams = p->NumPackStreams;
++    const UInt32 numPackStreams = p->NumPackStreams;
+     for (i = 0; i < numPackStreams; i++)
+     {
+       UInt64 packSize;
+       p->PackPositions[i] = sum;
+-      RINOK(ReadNumber(sd, &packSize));
++      RINOK(ReadNumber(sd, &packSize))
+       sum += packSize;
+       if (sum < packSize)
+         return SZ_ERROR_ARCHIVE;
+@@ -407,16 +417,16 @@ static SRes ReadPackInfo(CSzAr *p, CSzData *sd, ISzAllocPtr alloc)
+   for (;;)
+   {
+     UInt64 type;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+     if (type == k7zIdEnd)
+       return SZ_OK;
+     if (type == k7zIdCRC)
+     {
+       /* CRC of packed streams is unused now */
+-      RINOK(SkipBitUi32s(sd, p->NumPackStreams));
++      RINOK(SkipBitUi32s(sd, p->NumPackStreams))
+       continue;
+     }
+-    RINOK(SkipData(sd));
++    RINOK(SkipData(sd))
+   }
+ }
+ 
+@@ -442,7 +452,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+   f->NumPackStreams = 0;
+   f->UnpackStream = 0;
+   
+-  RINOK(SzReadNumber32(sd, &numCoders));
++  RINOK(SzReadNumber32(sd, &numCoders))
+   if (numCoders == 0 || numCoders > SZ_NUM_CODERS_IN_FOLDER_MAX)
+     return SZ_ERROR_UNSUPPORTED;
+   
+@@ -453,7 +463,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+     unsigned idSize, j;
+     UInt64 id;
+     
+-    SZ_READ_BYTE(mainByte);
++    SZ_READ_BYTE(mainByte)
+     if ((mainByte & 0xC0) != 0)
+       return SZ_ERROR_UNSUPPORTED;
+     
+@@ -481,12 +491,12 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+     {
+       UInt32 numStreams;
+       
+-      RINOK(SzReadNumber32(sd, &numStreams));
++      RINOK(SzReadNumber32(sd, &numStreams))
+       if (numStreams > k_NumCodersStreams_in_Folder_MAX)
+         return SZ_ERROR_UNSUPPORTED;
+       coder->NumStreams = (Byte)numStreams;
+ 
+-      RINOK(SzReadNumber32(sd, &numStreams));
++      RINOK(SzReadNumber32(sd, &numStreams))
+       if (numStreams != 1)
+         return SZ_ERROR_UNSUPPORTED;
+     }
+@@ -499,7 +509,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+     if ((mainByte & 0x20) != 0)
+     {
+       UInt32 propsSize = 0;
+-      RINOK(SzReadNumber32(sd, &propsSize));
++      RINOK(SzReadNumber32(sd, &propsSize))
+       if (propsSize > sd->Size)
+         return SZ_ERROR_ARCHIVE;
+       if (propsSize >= 0x80)
+@@ -549,12 +559,12 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+       {
+         CSzBond *bp = f->Bonds + i;
+         
+-        RINOK(SzReadNumber32(sd, &bp->InIndex));
++        RINOK(SzReadNumber32(sd, &bp->InIndex))
+         if (bp->InIndex >= numInStreams || streamUsed[bp->InIndex])
+           return SZ_ERROR_ARCHIVE;
+         streamUsed[bp->InIndex] = True;
+         
+-        RINOK(SzReadNumber32(sd, &bp->OutIndex));
++        RINOK(SzReadNumber32(sd, &bp->OutIndex))
+         if (bp->OutIndex >= numCoders || coderUsed[bp->OutIndex])
+           return SZ_ERROR_ARCHIVE;
+         coderUsed[bp->OutIndex] = True;
+@@ -584,7 +594,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+       for (i = 0; i < numPackStreams; i++)
+       {
+         UInt32 index;
+-        RINOK(SzReadNumber32(sd, &index));
++        RINOK(SzReadNumber32(sd, &index))
+         if (index >= numInStreams || streamUsed[index])
+           return SZ_ERROR_ARCHIVE;
+         streamUsed[index] = True;
+@@ -598,7 +608,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
+ }
+ 
+ 
+-static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
++static Z7_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
+ {
+   CSzData sd;
+   sd = *sd2;
+@@ -606,7 +616,7 @@ static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
+   {
+     Byte firstByte, mask;
+     unsigned i;
+-    SZ_READ_BYTE_2(firstByte);
++    SZ_READ_BYTE_2(firstByte)
+     if ((firstByte & 0x80) == 0)
+       continue;
+     if ((firstByte & 0x40) == 0)
+@@ -622,7 +632,7 @@ static MY_NO_INLINE SRes SkipNumbers(CSzData *sd2, UInt32 num)
+       mask >>= 1;
+     if (i > sd.Size)
+       return SZ_ERROR_ARCHIVE;
+-    SKIP_DATA2(sd, i);
++    SKIP_DATA2(sd, i)
+   }
+   *sd2 = sd;
+   return SZ_OK;
+@@ -645,30 +655,30 @@ static SRes ReadUnpackInfo(CSzAr *p,
+   const Byte *startBufPtr;
+   Byte external;
+   
+-  RINOK(WaitId(sd2, k7zIdFolder));
++  RINOK(WaitId(sd2, k7zIdFolder))
+   
+-  RINOK(SzReadNumber32(sd2, &numFolders));
++  RINOK(SzReadNumber32(sd2, &numFolders))
+   if (numFolders > numFoldersMax)
+     return SZ_ERROR_UNSUPPORTED;
+   p->NumFolders = numFolders;
+ 
+-  SZ_READ_BYTE_SD(sd2, external);
++  SZ_READ_BYTE_SD(sd2, external)
+   if (external == 0)
+     sd = *sd2;
+   else
+   {
+     UInt32 index;
+-    RINOK(SzReadNumber32(sd2, &index));
++    RINOK(SzReadNumber32(sd2, &index))
+     if (index >= numTempBufs)
+       return SZ_ERROR_ARCHIVE;
+     sd.Data = tempBufs[index].data;
+     sd.Size = tempBufs[index].size;
+   }
+   
+-  MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc);
+-  MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc);
+-  MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc);
+-  MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc);
++  MY_ALLOC(size_t, p->FoCodersOffsets, (size_t)numFolders + 1, alloc)
++  MY_ALLOC(UInt32, p->FoStartPackStreamIndex, (size_t)numFolders + 1, alloc)
++  MY_ALLOC(UInt32, p->FoToCoderUnpackSizes, (size_t)numFolders + 1, alloc)
++  MY_ALLOC_ZE(Byte, p->FoToMainUnpackSizeIndex, (size_t)numFolders, alloc)
+   
+   startBufPtr = sd.Data;
+   
+@@ -681,7 +691,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
+     
+     p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
+     
+-    RINOK(SzReadNumber32(&sd, &numCoders));
++    RINOK(SzReadNumber32(&sd, &numCoders))
+     if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
+       return SZ_ERROR_UNSUPPORTED;
+     
+@@ -691,7 +701,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
+       unsigned idSize;
+       UInt32 coderInStreams;
+       
+-      SZ_READ_BYTE_2(mainByte);
++      SZ_READ_BYTE_2(mainByte)
+       if ((mainByte & 0xC0) != 0)
+         return SZ_ERROR_UNSUPPORTED;
+       idSize = (mainByte & 0xF);
+@@ -699,15 +709,15 @@ static SRes ReadUnpackInfo(CSzAr *p,
+         return SZ_ERROR_UNSUPPORTED;
+       if (idSize > sd.Size)
+         return SZ_ERROR_ARCHIVE;
+-      SKIP_DATA2(sd, idSize);
++      SKIP_DATA2(sd, idSize)
+       
+       coderInStreams = 1;
+       
+       if ((mainByte & 0x10) != 0)
+       {
+         UInt32 coderOutStreams;
+-        RINOK(SzReadNumber32(&sd, &coderInStreams));
+-        RINOK(SzReadNumber32(&sd, &coderOutStreams));
++        RINOK(SzReadNumber32(&sd, &coderInStreams))
++        RINOK(SzReadNumber32(&sd, &coderOutStreams))
+         if (coderInStreams > k_Scan_NumCodersStreams_in_Folder_MAX || coderOutStreams != 1)
+           return SZ_ERROR_UNSUPPORTED;
+       }
+@@ -717,10 +727,10 @@ static SRes ReadUnpackInfo(CSzAr *p,
+       if ((mainByte & 0x20) != 0)
+       {
+         UInt32 propsSize;
+-        RINOK(SzReadNumber32(&sd, &propsSize));
++        RINOK(SzReadNumber32(&sd, &propsSize))
+         if (propsSize > sd.Size)
+           return SZ_ERROR_ARCHIVE;
+-        SKIP_DATA2(sd, propsSize);
++        SKIP_DATA2(sd, propsSize)
+       }
+     }
+     
+@@ -734,7 +744,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
+         Byte coderUsed[k_Scan_NumCoders_MAX];
+     
+         UInt32 i;
+-        UInt32 numBonds = numCoders - 1;
++        const UInt32 numBonds = numCoders - 1;
+         if (numInStreams < numBonds)
+           return SZ_ERROR_ARCHIVE;
+         
+@@ -750,12 +760,12 @@ static SRes ReadUnpackInfo(CSzAr *p,
+         {
+           UInt32 index;
+           
+-          RINOK(SzReadNumber32(&sd, &index));
++          RINOK(SzReadNumber32(&sd, &index))
+           if (index >= numInStreams || streamUsed[index])
+             return SZ_ERROR_ARCHIVE;
+           streamUsed[index] = True;
+           
+-          RINOK(SzReadNumber32(&sd, &index));
++          RINOK(SzReadNumber32(&sd, &index))
+           if (index >= numCoders || coderUsed[index])
+             return SZ_ERROR_ARCHIVE;
+           coderUsed[index] = True;
+@@ -767,7 +777,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
+           for (i = 0; i < numPackStreams; i++)
+           {
+             UInt32 index;
+-            RINOK(SzReadNumber32(&sd, &index));
++            RINOK(SzReadNumber32(&sd, &index))
+             if (index >= numInStreams || streamUsed[index])
+               return SZ_ERROR_ARCHIVE;
+             streamUsed[index] = True;
+@@ -802,7 +812,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
+     const size_t dataSize = (size_t)(sd.Data - startBufPtr);
+     p->FoStartPackStreamIndex[fo] = packStreamIndex;
+     p->FoCodersOffsets[fo] = dataSize;
+-    MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
++    MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc)
+   }
+   
+   if (external != 0)
+@@ -812,21 +822,21 @@ static SRes ReadUnpackInfo(CSzAr *p,
+     sd = *sd2;
+   }
+   
+-  RINOK(WaitId(&sd, k7zIdCodersUnpackSize));
++  RINOK(WaitId(&sd, k7zIdCodersUnpackSize))
+   
+-  MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc);
++  MY_ALLOC_ZE(UInt64, p->CoderUnpackSizes, (size_t)numCodersOutStreams, alloc)
+   {
+     UInt32 i;
+     for (i = 0; i < numCodersOutStreams; i++)
+     {
+-      RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i));
++      RINOK(ReadNumber(&sd, p->CoderUnpackSizes + i))
+     }
+   }
+ 
+   for (;;)
+   {
+     UInt64 type;
+-    RINOK(ReadID(&sd, &type));
++    RINOK(ReadID(&sd, &type))
+     if (type == k7zIdEnd)
+     {
+       *sd2 = sd;
+@@ -834,10 +844,10 @@ static SRes ReadUnpackInfo(CSzAr *p,
+     }
+     if (type == k7zIdCRC)
+     {
+-      RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc));
++      RINOK(ReadBitUi32s(&sd, numFolders, &p->FolderCRCs, alloc))
+       continue;
+     }
+-    RINOK(SkipData(&sd));
++    RINOK(SkipData(&sd))
+   }
+ }
+ 
+@@ -862,13 +872,13 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+ {
+   UInt64 type = 0;
+   UInt32 numSubDigests = 0;
+-  UInt32 numFolders = p->NumFolders;
++  const UInt32 numFolders = p->NumFolders;
+   UInt32 numUnpackStreams = numFolders;
+   UInt32 numUnpackSizesInData = 0;
+ 
+   for (;;)
+   {
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+     if (type == k7zIdNumUnpackStream)
+     {
+       UInt32 i;
+@@ -878,7 +888,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+       for (i = 0; i < numFolders; i++)
+       {
+         UInt32 numStreams;
+-        RINOK(SzReadNumber32(sd, &numStreams));
++        RINOK(SzReadNumber32(sd, &numStreams))
+         if (numUnpackStreams > numUnpackStreams + numStreams)
+           return SZ_ERROR_UNSUPPORTED;
+         numUnpackStreams += numStreams;
+@@ -892,7 +902,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+     }
+     if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
+       break;
+-    RINOK(SkipData(sd));
++    RINOK(SkipData(sd))
+   }
+ 
+   if (!ssi->sdNumSubStreams.Data)
+@@ -908,9 +918,9 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+   if (type == k7zIdSize)
+   {
+     ssi->sdSizes.Data = sd->Data;
+-    RINOK(SkipNumbers(sd, numUnpackSizesInData));
++    RINOK(SkipNumbers(sd, numUnpackSizesInData))
+     ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+ 
+   for (;;)
+@@ -920,14 +930,14 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
+     if (type == k7zIdCRC)
+     {
+       ssi->sdCRCs.Data = sd->Data;
+-      RINOK(SkipBitUi32s(sd, numSubDigests));
++      RINOK(SkipBitUi32s(sd, numSubDigests))
+       ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
+     }
+     else
+     {
+-      RINOK(SkipData(sd));
++      RINOK(SkipData(sd))
+     }
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+ }
+ 
+@@ -940,31 +950,31 @@ static SRes SzReadStreamsInfo(CSzAr *p,
+ {
+   UInt64 type;
+ 
+-  SzData_Clear(&ssi->sdSizes);
+-  SzData_Clear(&ssi->sdCRCs);
+-  SzData_Clear(&ssi->sdNumSubStreams);
++  SzData_CLEAR(&ssi->sdSizes)
++  SzData_CLEAR(&ssi->sdCRCs)
++  SzData_CLEAR(&ssi->sdNumSubStreams)
+ 
+   *dataOffset = 0;
+-  RINOK(ReadID(sd, &type));
++  RINOK(ReadID(sd, &type))
+   if (type == k7zIdPackInfo)
+   {
+-    RINOK(ReadNumber(sd, dataOffset));
++    RINOK(ReadNumber(sd, dataOffset))
+     if (*dataOffset > p->RangeLimit)
+       return SZ_ERROR_ARCHIVE;
+-    RINOK(ReadPackInfo(p, sd, alloc));
++    RINOK(ReadPackInfo(p, sd, alloc))
+     if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
+       return SZ_ERROR_ARCHIVE;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+   if (type == k7zIdUnpackInfo)
+   {
+-    RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc));
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadUnpackInfo(p, sd, numFoldersMax, tempBufs, numTempBufs, alloc))
++    RINOK(ReadID(sd, &type))
+   }
+   if (type == k7zIdSubStreamsInfo)
+   {
+-    RINOK(ReadSubStreamsInfo(p, sd, ssi));
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadSubStreamsInfo(p, sd, ssi))
++    RINOK(ReadID(sd, &type))
+   }
+   else
+   {
+@@ -976,7 +986,7 @@ static SRes SzReadStreamsInfo(CSzAr *p,
+ }
+ 
+ static SRes SzReadAndDecodePackedStreams(
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     CSzData *sd,
+     CBuf *tempBufs,
+     UInt32 numFoldersMax,
+@@ -988,7 +998,7 @@ static SRes SzReadAndDecodePackedStreams(
+   UInt32 fo;
+   CSubStreamInfo ssi;
+ 
+-  RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp));
++  RINOK(SzReadStreamsInfo(p, sd, numFoldersMax, NULL, 0, &dataStartPos, &ssi, allocTemp))
+   
+   dataStartPos += baseOffset;
+   if (p->NumFolders == 0)
+@@ -1000,7 +1010,7 @@ static SRes SzReadAndDecodePackedStreams(
+   for (fo = 0; fo < p->NumFolders; fo++)
+   {
+     CBuf *tempBuf = tempBufs + fo;
+-    UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo);
++    const UInt64 unpackSize = SzAr_GetFolderUnpackSize(p, fo);
+     if ((size_t)unpackSize != unpackSize)
+       return SZ_ERROR_MEM;
+     if (!Buf_Create(tempBuf, (size_t)unpackSize, allocTemp))
+@@ -1010,8 +1020,8 @@ static SRes SzReadAndDecodePackedStreams(
+   for (fo = 0; fo < p->NumFolders; fo++)
+   {
+     const CBuf *tempBuf = tempBufs + fo;
+-    RINOK(LookInStream_SeekTo(inStream, dataStartPos));
+-    RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp));
++    RINOK(LookInStream_SeekTo(inStream, dataStartPos))
++    RINOK(SzAr_DecodeFolder(p, fo, inStream, dataStartPos, tempBuf->data, tempBuf->size, allocTemp))
+   }
+   
+   return SZ_OK;
+@@ -1046,7 +1056,7 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
+   return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
+ }
+ 
+-static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
++static Z7_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+     CSzData *sd2,
+     const CBuf *tempBufs, UInt32 numTempBufs,
+     ISzAllocPtr alloc)
+@@ -1057,22 +1067,22 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+   Byte *defs;
+   Byte external;
+   
+-  RINOK(ReadBitVector(sd2, num, &p->Defs, alloc));
++  RINOK(ReadBitVector(sd2, num, &p->Defs, alloc))
+   
+-  SZ_READ_BYTE_SD(sd2, external);
++  SZ_READ_BYTE_SD(sd2, external)
+   if (external == 0)
+     sd = *sd2;
+   else
+   {
+     UInt32 index;
+-    RINOK(SzReadNumber32(sd2, &index));
++    RINOK(SzReadNumber32(sd2, &index))
+     if (index >= numTempBufs)
+       return SZ_ERROR_ARCHIVE;
+     sd.Data = tempBufs[index].data;
+     sd.Size = tempBufs[index].size;
+   }
+   
+-  MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc);
++  MY_ALLOC_ZE(CNtfsFileTime, p->Vals, num, alloc)
+   vals = p->Vals;
+   defs = p->Defs;
+   for (i = 0; i < num; i++)
+@@ -1082,7 +1092,7 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+         return SZ_ERROR_ARCHIVE;
+       vals[i].Low = GetUi32(sd.Data);
+       vals[i].High = GetUi32(sd.Data + 4);
+-      SKIP_DATA2(sd, 8);
++      SKIP_DATA2(sd, 8)
+     }
+     else
+       vals[i].High = vals[i].Low = 0;
+@@ -1100,7 +1110,7 @@ static MY_NO_INLINE SRes ReadTime(CSzBitUi64s *p, UInt32 num,
+ static SRes SzReadHeader2(
+     CSzArEx *p,   /* allocMain */
+     CSzData *sd,
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     CBuf *tempBufs, UInt32 *numTempBufs,
+     ISzAllocPtr allocMain,
+     ISzAllocPtr allocTemp
+@@ -1111,26 +1121,26 @@ static SRes SzReadHeader2(
+ {
+   UInt64 type;
+   
+-  SzData_Clear(&ssi.sdSizes);
+-  SzData_Clear(&ssi.sdCRCs);
+-  SzData_Clear(&ssi.sdNumSubStreams);
++  SzData_CLEAR(&ssi.sdSizes)
++  SzData_CLEAR(&ssi.sdCRCs)
++  SzData_CLEAR(&ssi.sdNumSubStreams)
+ 
+   ssi.NumSubDigests = 0;
+   ssi.NumTotalSubStreams = 0;
+ 
+-  RINOK(ReadID(sd, &type));
++  RINOK(ReadID(sd, &type))
+ 
+   if (type == k7zIdArchiveProperties)
+   {
+     for (;;)
+     {
+       UInt64 type2;
+-      RINOK(ReadID(sd, &type2));
++      RINOK(ReadID(sd, &type2))
+       if (type2 == k7zIdEnd)
+         break;
+-      RINOK(SkipData(sd));
++      RINOK(SkipData(sd))
+     }
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+ 
+   if (type == k7zIdAdditionalStreamsInfo)
+@@ -1148,15 +1158,15 @@ static SRes SzReadHeader2(
+     
+     if (res != SZ_OK)
+       return res;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+ 
+   if (type == k7zIdMainStreamsInfo)
+   {
+     RINOK(SzReadStreamsInfo(&p->db, sd, (UInt32)1 << 30, tempBufs, *numTempBufs,
+-        &p->dataPos, &ssi, allocMain));
++        &p->dataPos, &ssi, allocMain))
+     p->dataPos += p->startPosAfterHeader;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+   }
+ 
+   if (type == k7zIdEnd)
+@@ -1174,23 +1184,23 @@ static SRes SzReadHeader2(
+   const Byte *emptyStreams = NULL;
+   const Byte *emptyFiles = NULL;
+   
+-  RINOK(SzReadNumber32(sd, &numFiles));
++  RINOK(SzReadNumber32(sd, &numFiles))
+   p->NumFiles = numFiles;
+ 
+   for (;;)
+   {
+     UInt64 type;
+     UInt64 size;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+     if (type == k7zIdEnd)
+       break;
+-    RINOK(ReadNumber(sd, &size));
++    RINOK(ReadNumber(sd, &size))
+     if (size > sd->Size)
+       return SZ_ERROR_ARCHIVE;
+     
+     if (type >= ((UInt32)1 << 8))
+     {
+-      SKIP_DATA(sd, size);
++      SKIP_DATA(sd, size)
+     }
+     else switch ((unsigned)type)
+     {
+@@ -1200,7 +1210,7 @@ static SRes SzReadHeader2(
+         const Byte *namesData;
+         Byte external;
+ 
+-        SZ_READ_BYTE(external);
++        SZ_READ_BYTE(external)
+         if (external == 0)
+         {
+           namesSize = (size_t)size - 1;
+@@ -1209,7 +1219,7 @@ static SRes SzReadHeader2(
+         else
+         {
+           UInt32 index;
+-          RINOK(SzReadNumber32(sd, &index));
++          RINOK(SzReadNumber32(sd, &index))
+           if (index >= *numTempBufs)
+             return SZ_ERROR_ARCHIVE;
+           namesData = (tempBufs)[index].data;
+@@ -1218,25 +1228,25 @@ static SRes SzReadHeader2(
+ 
+         if ((namesSize & 1) != 0)
+           return SZ_ERROR_ARCHIVE;
+-        MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain);
+-        MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain);
++        MY_ALLOC(size_t, p->FileNameOffsets, numFiles + 1, allocMain)
++        MY_ALLOC_ZE_AND_CPY(p->FileNames, namesSize, namesData, allocMain)
+         RINOK(SzReadFileNames(p->FileNames, namesSize, numFiles, p->FileNameOffsets))
+         if (external == 0)
+         {
+-          SKIP_DATA(sd, namesSize);
++          SKIP_DATA(sd, namesSize)
+         }
+         break;
+       }
+       case k7zIdEmptyStream:
+       {
+-        RINOK(RememberBitVector(sd, numFiles, &emptyStreams));
++        RINOK(RememberBitVector(sd, numFiles, &emptyStreams))
+         numEmptyStreams = CountDefinedBits(emptyStreams, numFiles);
+         emptyFiles = NULL;
+         break;
+       }
+       case k7zIdEmptyFile:
+       {
+-        RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles));
++        RINOK(RememberBitVector(sd, numEmptyStreams, &emptyFiles))
+         break;
+       }
+       case k7zIdWinAttrib:
+@@ -1245,22 +1255,22 @@ static SRes SzReadHeader2(
+         CSzData sdSwitch;
+         CSzData *sdPtr;
+         SzBitUi32s_Free(&p->Attribs, allocMain);
+-        RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain));
++        RINOK(ReadBitVector(sd, numFiles, &p->Attribs.Defs, allocMain))
+ 
+-        SZ_READ_BYTE(external);
++        SZ_READ_BYTE(external)
+         if (external == 0)
+           sdPtr = sd;
+         else
+         {
+           UInt32 index;
+-          RINOK(SzReadNumber32(sd, &index));
++          RINOK(SzReadNumber32(sd, &index))
+           if (index >= *numTempBufs)
+             return SZ_ERROR_ARCHIVE;
+           sdSwitch.Data = (tempBufs)[index].data;
+           sdSwitch.Size = (tempBufs)[index].size;
+           sdPtr = &sdSwitch;
+         }
+-        RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain));
++        RINOK(ReadUi32s(sdPtr, numFiles, &p->Attribs, allocMain))
+         break;
+       }
+       /*
+@@ -1273,11 +1283,11 @@ static SRes SzReadHeader2(
+         break;
+       }
+       */
+-      case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
+-      case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)); break;
++      case k7zIdMTime: RINOK(ReadTime(&p->MTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break;
++      case k7zIdCTime: RINOK(ReadTime(&p->CTime, numFiles, sd, tempBufs, *numTempBufs, allocMain)) break;
+       default:
+       {
+-        SKIP_DATA(sd, size);
++        SKIP_DATA(sd, size)
+       }
+     }
+   }
+@@ -1288,10 +1298,10 @@ static SRes SzReadHeader2(
+   for (;;)
+   {
+     UInt64 type;
+-    RINOK(ReadID(sd, &type));
++    RINOK(ReadID(sd, &type))
+     if (type == k7zIdEnd)
+       break;
+-    RINOK(SkipData(sd));
++    RINOK(SkipData(sd))
+   }
+ 
+   {
+@@ -1303,40 +1313,37 @@ static SRes SzReadHeader2(
+     UInt64 unpackPos = 0;
+     const Byte *digestsDefs = NULL;
+     const Byte *digestsVals = NULL;
+-    UInt32 digestsValsIndex = 0;
+-    UInt32 digestIndex;
+-    Byte allDigestsDefined = 0;
++    UInt32 digestIndex = 0;
+     Byte isDirMask = 0;
+     Byte crcMask = 0;
+     Byte mask = 0x80;
+     
+-    MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain);
+-    MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain);
+-    MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain);
+-    MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain);
++    MY_ALLOC(UInt32, p->FolderToFile, p->db.NumFolders + 1, allocMain)
++    MY_ALLOC_ZE(UInt32, p->FileToFolder, p->NumFiles, allocMain)
++    MY_ALLOC(UInt64, p->UnpackPositions, p->NumFiles + 1, allocMain)
++    MY_ALLOC_ZE(Byte, p->IsDirs, (p->NumFiles + 7) >> 3, allocMain)
+ 
+-    RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain));
++    RINOK(SzBitUi32s_Alloc(&p->CRCs, p->NumFiles, allocMain))
+ 
+     if (ssi.sdCRCs.Size != 0)
+     {
+-      SZ_READ_BYTE_SD(&ssi.sdCRCs, allDigestsDefined);
++      Byte allDigestsDefined = 0;
++      SZ_READ_BYTE_SD_NOCHECK(&ssi.sdCRCs, allDigestsDefined)
+       if (allDigestsDefined)
+         digestsVals = ssi.sdCRCs.Data;
+       else
+       {
+-        size_t numBytes = (ssi.NumSubDigests + 7) >> 3;
++        const size_t numBytes = (ssi.NumSubDigests + 7) >> 3;
+         digestsDefs = ssi.sdCRCs.Data;
+         digestsVals = digestsDefs + numBytes;
+       }
+     }
+ 
+-    digestIndex = 0;
+-    
+     for (i = 0; i < numFiles; i++, mask >>= 1)
+     {
+       if (mask == 0)
+       {
+-        UInt32 byteIndex = (i - 1) >> 3;
++        const UInt32 byteIndex = (i - 1) >> 3;
+         p->IsDirs[byteIndex] = isDirMask;
+         p->CRCs.Defs[byteIndex] = crcMask;
+         isDirMask = 0;
+@@ -1374,18 +1381,17 @@ static SRes SzReadHeader2(
+           numSubStreams = 1;
+           if (ssi.sdNumSubStreams.Data)
+           {
+-            RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
++            RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams))
+           }
+           remSubStreams = numSubStreams;
+           if (numSubStreams != 0)
+             break;
+           {
+-            UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
++            const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+             unpackPos += folderUnpackSize;
+             if (unpackPos < folderUnpackSize)
+               return SZ_ERROR_ARCHIVE;
+           }
+-
+           folderIndex++;
+         }
+       }
+@@ -1397,47 +1403,44 @@ static SRes SzReadHeader2(
+       
+       if (--remSubStreams == 0)
+       {
+-        UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+-        UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]];
++        const UInt64 folderUnpackSize = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
++        const UInt64 startFolderUnpackPos = p->UnpackPositions[p->FolderToFile[folderIndex]];
+         if (folderUnpackSize < unpackPos - startFolderUnpackPos)
+           return SZ_ERROR_ARCHIVE;
+         unpackPos = startFolderUnpackPos + folderUnpackSize;
+         if (unpackPos < folderUnpackSize)
+           return SZ_ERROR_ARCHIVE;
+ 
+-        if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, i))
++        if (numSubStreams == 1 && SzBitWithVals_Check(&p->db.FolderCRCs, folderIndex))
+         {
+           p->CRCs.Vals[i] = p->db.FolderCRCs.Vals[folderIndex];
+           crcMask |= mask;
+         }
+-        else if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
+-        {
+-          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+-          digestsValsIndex++;
+-          crcMask |= mask;
+-        }
+-        
+         folderIndex++;
+       }
+       else
+       {
+         UInt64 v;
+-        RINOK(ReadNumber(&ssi.sdSizes, &v));
++        RINOK(ReadNumber(&ssi.sdSizes, &v))
+         unpackPos += v;
+         if (unpackPos < v)
+           return SZ_ERROR_ARCHIVE;
+-        if (allDigestsDefined || (digestsDefs && SzBitArray_Check(digestsDefs, digestIndex)))
++      }
++      if ((crcMask & mask) == 0 && digestsVals)
++      {
++        if (!digestsDefs || SzBitArray_Check(digestsDefs, digestIndex))
+         {
+-          p->CRCs.Vals[i] = GetUi32(digestsVals + (size_t)digestsValsIndex * 4);
+-          digestsValsIndex++;
++          p->CRCs.Vals[i] = GetUi32(digestsVals);
++          digestsVals += 4;
+           crcMask |= mask;
+         }
++        digestIndex++;
+       }
+     }
+ 
+     if (mask != 0x80)
+     {
+-      UInt32 byteIndex = (i - 1) >> 3;
++      const UInt32 byteIndex = (i - 1) >> 3;
+       p->IsDirs[byteIndex] = isDirMask;
+       p->CRCs.Defs[byteIndex] = crcMask;
+     }
+@@ -1454,7 +1457,7 @@ static SRes SzReadHeader2(
+         break;
+       if (!ssi.sdNumSubStreams.Data)
+         return SZ_ERROR_ARCHIVE;
+-      RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams));
++      RINOK(SzReadNumber32(&ssi.sdNumSubStreams, &numSubStreams))
+       if (numSubStreams != 0)
+         return SZ_ERROR_ARCHIVE;
+       /*
+@@ -1479,7 +1482,7 @@ static SRes SzReadHeader2(
+ static SRes SzReadHeader(
+     CSzArEx *p,
+     CSzData *sd,
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     ISzAllocPtr allocMain,
+     ISzAllocPtr allocTemp)
+ {
+@@ -1498,7 +1501,7 @@ static SRes SzReadHeader(
+   for (i = 0; i < NUM_ADDITIONAL_STREAMS_MAX; i++)
+     Buf_Free(tempBufs + i, allocTemp);
+ 
+-  RINOK(res);
++  RINOK(res)
+ 
+   if (sd->Size != 0)
+     return SZ_ERROR_FAIL;
+@@ -1508,7 +1511,7 @@ static SRes SzReadHeader(
+ 
+ static SRes SzArEx_Open2(
+     CSzArEx *p,
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     ISzAllocPtr allocMain,
+     ISzAllocPtr allocTemp)
+ {
+@@ -1521,9 +1524,9 @@ static SRes SzArEx_Open2(
+   SRes res;
+ 
+   startArcPos = 0;
+-  RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR));
++  RINOK(ILookInStream_Seek(inStream, &startArcPos, SZ_SEEK_CUR))
+ 
+-  RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE));
++  RINOK(LookInStream_Read2(inStream, header, k7zStartHeaderSize, SZ_ERROR_NO_ARCHIVE))
+ 
+   if (!TestSignatureCandidate(header))
+     return SZ_ERROR_NO_ARCHIVE;
+@@ -1552,14 +1555,14 @@ static SRes SzArEx_Open2(
+ 
+   {
+     Int64 pos = 0;
+-    RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
++    RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END))
+     if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
+         (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
+         (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
+       return SZ_ERROR_INPUT_EOF;
+   }
+ 
+-  RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
++  RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset))
+ 
+   if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
+     return SZ_ERROR_MEM;
+@@ -1634,10 +1637,10 @@ static SRes SzArEx_Open2(
+ }
+ 
+ 
+-SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
++SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream,
+     ISzAllocPtr allocMain, ISzAllocPtr allocTemp)
+ {
+-  SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp);
++  const SRes res = SzArEx_Open2(p, inStream, allocMain, allocTemp);
+   if (res != SZ_OK)
+     SzArEx_Free(p, allocMain);
+   return res;
+@@ -1646,7 +1649,7 @@ SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
+ 
+ SRes SzArEx_Extract(
+     const CSzArEx *p,
+-    ILookInStream *inStream,
++    ILookInStreamPtr inStream,
+     UInt32 fileIndex,
+     UInt32 *blockIndex,
+     Byte **tempBuf,
+@@ -1656,7 +1659,7 @@ SRes SzArEx_Extract(
+     ISzAllocPtr allocMain,
+     ISzAllocPtr allocTemp)
+ {
+-  UInt32 folderIndex = p->FileToFolder[fileIndex];
++  const UInt32 folderIndex = p->FileToFolder[fileIndex];
+   SRes res = SZ_OK;
+   
+   *offset = 0;
+@@ -1673,13 +1676,13 @@ SRes SzArEx_Extract(
+ 
+   if (*tempBuf == NULL || *blockIndex != folderIndex)
+   {
+-    UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
++    const UInt64 unpackSizeSpec = SzAr_GetFolderUnpackSize(&p->db, folderIndex);
+     /*
+     UInt64 unpackSizeSpec =
+         p->UnpackPositions[p->FolderToFile[(size_t)folderIndex + 1]] -
+         p->UnpackPositions[p->FolderToFile[folderIndex]];
+     */
+-    size_t unpackSize = (size_t)unpackSizeSpec;
++    const size_t unpackSize = (size_t)unpackSizeSpec;
+ 
+     if (unpackSize != unpackSizeSpec)
+       return SZ_ERROR_MEM;
+@@ -1707,7 +1710,7 @@ SRes SzArEx_Extract(
+ 
+   if (res == SZ_OK)
+   {
+-    UInt64 unpackPos = p->UnpackPositions[fileIndex];
++    const UInt64 unpackPos = p->UnpackPositions[fileIndex];
+     *offset = (size_t)(unpackPos - p->UnpackPositions[p->FolderToFile[folderIndex]]);
+     *outSizeProcessed = (size_t)(p->UnpackPositions[(size_t)fileIndex + 1] - unpackPos);
+     if (*offset + *outSizeProcessed > *outBufferSize)
+@@ -1723,8 +1726,8 @@ SRes SzArEx_Extract(
+ 
+ size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest)
+ {
+-  size_t offs = p->FileNameOffsets[fileIndex];
+-  size_t len = p->FileNameOffsets[fileIndex + 1] - offs;
++  const size_t offs = p->FileNameOffsets[fileIndex];
++  const size_t len = p->FileNameOffsets[fileIndex + 1] - offs;
+   if (dest != 0)
+   {
+     size_t i;
+diff --git a/third_party/lzma_sdk/C/7zBuf.h b/third_party/lzma_sdk/C/7zBuf.h
+index 81d1b5b646cadd987f590d9089064fb259aa654a..c0ba8a7b607beefbdcde06a5798ed6f8f7052eef 100644
+--- a/third_party/lzma_sdk/C/7zBuf.h
++++ b/third_party/lzma_sdk/C/7zBuf.h
+@@ -1,8 +1,8 @@
+ /* 7zBuf.h -- Byte Buffer
+-2017-04-03 : Igor Pavlov : Public domain */
++2023-03-04 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_BUF_H
+-#define __7Z_BUF_H
++#ifndef ZIP7_INC_7Z_BUF_H
++#define ZIP7_INC_7Z_BUF_H
+ 
+ #include "7zTypes.h"
+ 
+diff --git a/third_party/lzma_sdk/C/7zCrc.c b/third_party/lzma_sdk/C/7zCrc.c
+index c0cc9bc7812e0b34c56a0d2dc4f412c2666ee808..6e2db9eab192b47b0b8452e45fa2e34a99d4334f 100644
+--- a/third_party/lzma_sdk/C/7zCrc.c
++++ b/third_party/lzma_sdk/C/7zCrc.c
+@@ -1,182 +1,218 @@
+-/* 7zCrc.c -- CRC32 init
+-2021-04-01 : Igor Pavlov : Public domain */
++/* 7zCrc.c -- CRC32 calculation and init
++2024-03-01 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include "7zCrc.h"
+ #include "CpuArch.h"
+ 
+-#define kCrcPoly 0xEDB88320
++// for debug:
++// #define __ARM_FEATURE_CRC32 1
+ 
+-#ifdef MY_CPU_LE
+-  #define CRC_NUM_TABLES 8
+-#else
+-  #define CRC_NUM_TABLES 9
++#ifdef __ARM_FEATURE_CRC32
++// #pragma message("__ARM_FEATURE_CRC32")
++#define Z7_CRC_HW_FORCE
++#endif
+ 
+-  #define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
++// #define Z7_CRC_DEBUG_BE
++#ifdef Z7_CRC_DEBUG_BE
++#undef MY_CPU_LE
++#define MY_CPU_BE
++#endif
+ 
+-  UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-  UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
++#ifdef Z7_CRC_HW_FORCE
++  #define Z7_CRC_NUM_TABLES_USE  1
++#else
++#ifdef Z7_CRC_NUM_TABLES
++  #define Z7_CRC_NUM_TABLES_USE  Z7_CRC_NUM_TABLES
++#else
++  #define Z7_CRC_NUM_TABLES_USE  12
++#endif
+ #endif
+ 
+-#ifndef MY_CPU_BE
+-  UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-  UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-#endif
+-
+-typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-
+-extern
+-CRC_FUNC g_CrcUpdateT4;
+-CRC_FUNC g_CrcUpdateT4;
+-extern
+-CRC_FUNC g_CrcUpdateT8;
+-CRC_FUNC g_CrcUpdateT8;
+-extern
+-CRC_FUNC g_CrcUpdateT0_32;
+-CRC_FUNC g_CrcUpdateT0_32;
+-extern
+-CRC_FUNC g_CrcUpdateT0_64;
+-CRC_FUNC g_CrcUpdateT0_64;
+-extern
+-CRC_FUNC g_CrcUpdate;
+-CRC_FUNC g_CrcUpdate;
+-
+-UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
+-
+-UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
+-{
+-  return g_CrcUpdate(v, data, size, g_CrcTable);
+-}
++#if Z7_CRC_NUM_TABLES_USE < 1
++  #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
++#endif
+ 
+-UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
+-{
+-  return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
+-}
++#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1)
++  #define Z7_CRC_NUM_TABLES_TOTAL  Z7_CRC_NUM_TABLES_USE
++#else
++  #define Z7_CRC_NUM_TABLES_TOTAL  (Z7_CRC_NUM_TABLES_USE + 1)
++#endif
+ 
+-#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++#ifndef Z7_CRC_HW_FORCE
+ 
+-UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
++#if Z7_CRC_NUM_TABLES_USE == 1 \
++   || (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
++#define CRC_UPDATE_BYTE_2(crc, b)   (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++#define Z7_CRC_UPDATE_T1_FUNC_NAME  CrcUpdateGT1
++static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size)
+ {
++  const UInt32 *table = g_CrcTable;
+   const Byte *p = (const Byte *)data;
+-  const Byte *pEnd = p + size;
+-  for (; p != pEnd; p++)
++  const Byte *lim = p + size;
++  for (; p != lim; p++)
+     v = CRC_UPDATE_BYTE_2(v, *p);
+   return v;
+ }
++#endif
+ 
+ 
++#if Z7_CRC_NUM_TABLES_USE != 1
++#ifndef MY_CPU_BE
++  #define FUNC_NAME_LE_2(s)   CrcUpdateT ## s
++  #define FUNC_NAME_LE_1(s)   FUNC_NAME_LE_2(s)
++  #define FUNC_NAME_LE        FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE)
++  UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table);
++#endif
++#ifndef MY_CPU_LE
++  #define FUNC_NAME_BE_2(s)   CrcUpdateT1_BeT ## s
++  #define FUNC_NAME_BE_1(s)   FUNC_NAME_BE_2(s)
++  #define FUNC_NAME_BE        FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE)
++  UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table);
++#endif
++#endif
++
++#endif // Z7_CRC_HW_FORCE
++
+ /* ---------- hardware CRC ---------- */
+ 
+ #ifdef MY_CPU_LE
+ 
+ #if defined(MY_CPU_ARM_OR_ARM64)
+-
+ // #pragma message("ARM*")
+ 
+-  #if defined(_MSC_VER)
+-    #if defined(MY_CPU_ARM64)
+-    #if (_MSC_VER >= 1910)
+-        // #define USE_ARM64_CRC
+-    #endif
+-    #endif
+-  #elif (defined(__clang__) && (__clang_major__ >= 3)) \
+-     || (defined(__GNUC__) && (__GNUC__ > 4))
++  #if (defined(__clang__) && (__clang_major__ >= 3)) \
++     || defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \
++     || defined(__GNUC__) && (__GNUC__ >= 8)
+       #if !defined(__ARM_FEATURE_CRC32)
+-        // #define __ARM_FEATURE_CRC32 1
+-          #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
+-            // #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
++//        #pragma message("!defined(__ARM_FEATURE_CRC32)")
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++        #define __ARM_FEATURE_CRC32 1
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++        #define Z7_ARM_FEATURE_CRC32_WAS_SET
++        #if defined(__clang__)
++          #if defined(MY_CPU_ARM64)
++            #define ATTRIB_CRC __attribute__((__target__("crc")))
++          #else
++            #define ATTRIB_CRC __attribute__((__target__("armv8-a,crc")))
+           #endif
++        #else
++          #if defined(MY_CPU_ARM64)
++#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000)
++            #define ATTRIB_CRC __attribute__((__target__("+crc")))
++#endif
++          #else
++#if !defined(Z7_GCC_VERSION) || (__GNUC__  >= 8)
++#if defined(__ARM_FP) && __GNUC__ >= 8
++// for -mfloat-abi=hard: similar to <arm_acle.h>
++            #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd")))
++#else
++            #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
++#endif
++#endif
++          #endif
++        #endif
+       #endif
+       #if defined(__ARM_FEATURE_CRC32)
+-        // #define USE_ARM64_CRC
+-        // #include <arm_acle.h>
++      // #pragma message("<arm_acle.h>")
++/*
++arm_acle.h (GGC):
++    before Nov 17, 2017:
++#ifdef __ARM_FEATURE_CRC32
++
++    Nov 17, 2017: gcc10.0  (gcc 9.2.0) checked"
++#if __ARM_ARCH >= 8
++#pragma GCC target ("arch=armv8-a+crc")
++
++    Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1:
++#ifdef __ARM_FEATURE_CRC32
++#ifdef __ARM_FP
++#pragma GCC target ("arch=armv8-a+crc+simd")
++#else
++#pragma GCC target ("arch=armv8-a+crc")
++#endif
++*/
++#if defined(__ARM_ARCH) && __ARM_ARCH < 8
++#if defined(Z7_GCC_VERSION) && (__GNUC__ ==   8) && (Z7_GCC_VERSION <  80400) \
++ || defined(Z7_GCC_VERSION) && (__GNUC__ ==   9) && (Z7_GCC_VERSION <  90201) \
++ || defined(Z7_GCC_VERSION) && (__GNUC__ ==  10) && (Z7_GCC_VERSION < 100100)
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++// #pragma message("#define __ARM_ARCH 8")
++#undef  __ARM_ARCH
++#define __ARM_ARCH 8
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++#endif
++#endif
++        #define Z7_CRC_HW_USE
++        #include <arm_acle.h>
+       #endif
++  #elif defined(_MSC_VER)
++    #if defined(MY_CPU_ARM64)
++    #if (_MSC_VER >= 1910)
++    #ifdef __clang__
++       // #define Z7_CRC_HW_USE
++       // #include <arm_acle.h>
++    #else
++       #define Z7_CRC_HW_USE
++       #include <intrin.h>
++    #endif
++    #endif
++    #endif
+   #endif
+ 
+-#else
+-
+-// no hardware CRC
+-
+-// #define USE_CRC_EMU
+-
+-#ifdef USE_CRC_EMU
+-
+-#pragma message("ARM64 CRC emulation")
++#else // non-ARM*
+ 
+-MY_FORCE_INLINE
+-UInt32 __crc32b(UInt32 v, UInt32 data)
+-{
+-  const UInt32 *table = g_CrcTable;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data);
+-  return v;
+-}
+-
+-MY_FORCE_INLINE
+-UInt32 __crc32w(UInt32 v, UInt32 data)
+-{
+-  const UInt32 *table = g_CrcTable;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  return v;
+-}
++// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code
++#ifdef Z7_CRC_HW_USE
++#include "7zCrcEmu.h"
++#endif
+ 
+-MY_FORCE_INLINE
+-UInt32 __crc32d(UInt32 v, UInt64 data)
+-{
+-  const UInt32 *table = g_CrcTable;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
+-  return v;
+-}
++#endif // non-ARM*
+ 
+-#endif // USE_CRC_EMU
+ 
+-#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
+ 
++#if defined(Z7_CRC_HW_USE)
+ 
++// #pragma message("USE ARM HW CRC")
+ 
+-#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
++#ifdef MY_CPU_64BIT
++  #define CRC_HW_WORD_TYPE  UInt64
++  #define CRC_HW_WORD_FUNC  __crc32d
++#else
++  #define CRC_HW_WORD_TYPE  UInt32
++  #define CRC_HW_WORD_FUNC  __crc32w
++#endif
+ 
+-#define T0_32_UNROLL_BYTES (4 * 4)
+-#define T0_64_UNROLL_BYTES (4 * 8)
++#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4)
+ 
+-#ifndef ATTRIB_CRC
+-#define ATTRIB_CRC
++#ifdef ATTRIB_CRC
++  ATTRIB_CRC
+ #endif
+-// #pragma message("USE ARM HW CRC")
+-
+-ATTRIB_CRC
+-UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-ATTRIB_CRC
+-UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
++Z7_NO_INLINE
++#ifdef Z7_CRC_HW_FORCE
++         UInt32 Z7_FASTCALL CrcUpdate
++#else
++  static UInt32 Z7_FASTCALL CrcUpdate_HW
++#endif
++    (UInt32 v, const void *data, size_t size)
+ {
+   const Byte *p = (const Byte *)data;
+-  UNUSED_VAR(table);
+-
+-  for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
++  for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--)
+     v = __crc32b(v, *p++);
+-
+-  if (size >= T0_32_UNROLL_BYTES)
++  if (size >= CRC_HW_UNROLL_BYTES)
+   {
+     const Byte *lim = p + size;
+-    size &= (T0_32_UNROLL_BYTES - 1);
++    size &= CRC_HW_UNROLL_BYTES - 1;
+     lim -= size;
+     do
+     {
+-      v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+-      v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
+-      v = __crc32w(v, *(const UInt32 *)(const void *)(p));
+-      v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
++      v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
++      v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
++      p += 2 * sizeof(CRC_HW_WORD_TYPE);
++      v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
++      v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
++      p += 2 * sizeof(CRC_HW_WORD_TYPE);
+     }
+     while (p != lim);
+   }
+@@ -187,136 +223,198 @@ UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, cons
+   return v;
+ }
+ 
+-ATTRIB_CRC
+-UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-ATTRIB_CRC
+-UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
++#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++#undef __ARM_FEATURE_CRC32
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++#undef Z7_ARM_FEATURE_CRC32_WAS_SET
++#endif
++
++#endif // defined(Z7_CRC_HW_USE)
++#endif // MY_CPU_LE
++
++
++
++#ifndef Z7_CRC_HW_FORCE
++
++#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
++/*
++typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC)
++    (UInt32 v, const void *data, size_t size, const UInt32 *table);
++Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate;
++*/
++static unsigned g_Crc_Algo;
++#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
++static unsigned g_Crc_Be;
++#endif
++#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
++
++
++
++Z7_NO_INLINE
++#ifdef Z7_CRC_HW_USE
++  static UInt32 Z7_FASTCALL CrcUpdate_Base
++#else
++         UInt32 Z7_FASTCALL CrcUpdate
++#endif
++    (UInt32 crc, const void *data, size_t size)
+ {
+-  const Byte *p = (const Byte *)data;
+-  UNUSED_VAR(table);
++#if Z7_CRC_NUM_TABLES_USE == 1
++    return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
++#else // Z7_CRC_NUM_TABLES_USE != 1
++#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME
++  if (g_Crc_Algo == 1)
++    return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
++#endif
+ 
+-  for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
+-    v = __crc32b(v, *p++);
++#ifdef MY_CPU_LE
++    return FUNC_NAME_LE(crc, data, size, g_CrcTable);
++#elif defined(MY_CPU_BE)
++    return FUNC_NAME_BE(crc, data, size, g_CrcTable);
++#else
++  if (g_Crc_Be)
++    return FUNC_NAME_BE(crc, data, size, g_CrcTable);
++  else
++    return FUNC_NAME_LE(crc, data, size, g_CrcTable);
++#endif
++#endif // Z7_CRC_NUM_TABLES_USE != 1
++}
+ 
+-  if (size >= T0_64_UNROLL_BYTES)
+-  {
+-    const Byte *lim = p + size;
+-    size &= (T0_64_UNROLL_BYTES - 1);
+-    lim -= size;
+-    do
+-    {
+-      v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+-      v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+-      v = __crc32d(v, *(const UInt64 *)(const void *)(p));
+-      v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
+-    }
+-    while (p != lim);
+-  }
+-  
+-  for (; size != 0; size--)
+-    v = __crc32b(v, *p++);
+ 
+-  return v;
++#ifdef Z7_CRC_HW_USE
++Z7_NO_INLINE
++UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size)
++{
++  if (g_Crc_Algo == 0)
++    return CrcUpdate_HW(crc, data, size);
++  return CrcUpdate_Base(crc, data, size);
+ }
++#endif
+ 
+-#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
++#endif // !defined(Z7_CRC_HW_FORCE)
+ 
+-#endif // MY_CPU_LE
+ 
+ 
++UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
++{
++  return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL;
++}
++
+ 
++MY_ALIGN(64)
++UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL];
+ 
+-void MY_FAST_CALL CrcGenerateTable()
++
++void Z7_FASTCALL CrcGenerateTable(void)
+ {
+   UInt32 i;
+   for (i = 0; i < 256; i++)
+   {
++#if defined(Z7_CRC_HW_FORCE)
++    g_CrcTable[i] = __crc32b(i, 0);
++#else
++    #define kCrcPoly 0xEDB88320
+     UInt32 r = i;
+     unsigned j;
+     for (j = 0; j < 8; j++)
+       r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+     g_CrcTable[i] = r;
++#endif
+   }
+-  for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
++  for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++)
+   {
+-    UInt32 r = g_CrcTable[(size_t)i - 256];
++    const UInt32 r = g_CrcTable[(size_t)i - 256];
+     g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
+   }
+ 
+-  #if CRC_NUM_TABLES < 4
+-  
+-  g_CrcUpdate = CrcUpdateT1;
+-  
+-  #else
+- 
+-  #ifdef MY_CPU_LE
++#if !defined(Z7_CRC_HW_FORCE) && \
++    (defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE))
+ 
+-    g_CrcUpdateT4 = CrcUpdateT4;
+-    g_CrcUpdate = CrcUpdateT4;
++#if Z7_CRC_NUM_TABLES_USE <= 1
++    g_Crc_Algo = 1;
++#else // Z7_CRC_NUM_TABLES_USE <= 1
+ 
+-    #if CRC_NUM_TABLES >= 8
+-      g_CrcUpdateT8 = CrcUpdateT8;
+-  
+-      #ifdef MY_CPU_X86_OR_AMD64
+-      if (!CPU_Is_InOrder())
+-      #endif
+-        g_CrcUpdate = CrcUpdateT8;
+-    #endif
+-
+-  #else
++#if defined(MY_CPU_LE)
++    g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
++#else // !defined(MY_CPU_LE)
+   {
+-    #ifndef MY_CPU_BE
++#ifndef MY_CPU_BE
+     UInt32 k = 0x01020304;
+     const Byte *p = (const Byte *)&k;
+     if (p[0] == 4 && p[1] == 3)
+-    {
+-      g_CrcUpdateT4 = CrcUpdateT4;
+-      g_CrcUpdate = CrcUpdateT4;
+-      #if CRC_NUM_TABLES >= 8
+-      g_CrcUpdateT8 = CrcUpdateT8;
+-      g_CrcUpdate = CrcUpdateT8;
+-      #endif
+-    }
++      g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
+     else if (p[0] != 1 || p[1] != 2)
+-      g_CrcUpdate = CrcUpdateT1;
++      g_Crc_Algo = 1;
+     else
+-    #endif
++#endif // MY_CPU_BE
+     {
+-      for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
++      for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--)
+       {
+-        UInt32 x = g_CrcTable[(size_t)i - 256];
+-        g_CrcTable[i] = CRC_UINT32_SWAP(x);
++        const UInt32 x = g_CrcTable[(size_t)i - 256];
++        g_CrcTable[i] = Z7_BSWAP32(x);
+       }
+-      g_CrcUpdateT4 = CrcUpdateT1_BeT4;
+-      g_CrcUpdate = CrcUpdateT1_BeT4;
+-      #if CRC_NUM_TABLES >= 8
+-      g_CrcUpdateT8 = CrcUpdateT1_BeT8;
+-      g_CrcUpdate = CrcUpdateT1_BeT8;
+-      #endif
++#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
++      g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
++#endif
++#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
++      g_Crc_Be = 1;
++#endif
+     }
+   }
+-  #endif
+-  #endif
++#endif  // !defined(MY_CPU_LE)
+ 
+-  #ifdef MY_CPU_LE
+-    #ifdef USE_ARM64_CRC
+-      if (CPU_IsSupported_CRC32())
+-      {
+-        g_CrcUpdateT0_32 = CrcUpdateT0_32;
+-        g_CrcUpdateT0_64 = CrcUpdateT0_64;
+-        g_CrcUpdate =
+-          #if defined(MY_CPU_ARM)
+-            CrcUpdateT0_32;
+-          #else
+-            CrcUpdateT0_64;
+-          #endif
+-      }
+-    #endif
+-    
+-    #ifdef USE_CRC_EMU
+-      g_CrcUpdateT0_32 = CrcUpdateT0_32;
+-      g_CrcUpdateT0_64 = CrcUpdateT0_64;
+-      g_CrcUpdate = CrcUpdateT0_64;
+-    #endif
++#ifdef MY_CPU_LE
++#ifdef Z7_CRC_HW_USE
++  if (CPU_IsSupported_CRC32())
++    g_Crc_Algo = 0;
++#endif // Z7_CRC_HW_USE
++#endif // MY_CPU_LE
++
++#endif // Z7_CRC_NUM_TABLES_USE <= 1
++#endif // g_Crc_Algo was declared
++}
++
++Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo)
++{
++  if (algo == 0)
++    return &CrcUpdate;
++
++#if defined(Z7_CRC_HW_USE)
++  if (algo == sizeof(CRC_HW_WORD_TYPE) * 8)
++  {
++#ifdef Z7_CRC_HW_FORCE
++    return &CrcUpdate;
++#else
++    if (g_Crc_Algo == 0)
++      return &CrcUpdate_HW;
++#endif
++  }
++#endif
++
++#ifndef Z7_CRC_HW_FORCE
++  if (algo == Z7_CRC_NUM_TABLES_USE)
++    return
++  #ifdef Z7_CRC_HW_USE
++      &CrcUpdate_Base;
++  #else
++      &CrcUpdate;
+   #endif
++#endif
++
++  return NULL;
+ }
++
++#undef kCrcPoly
++#undef Z7_CRC_NUM_TABLES_USE
++#undef Z7_CRC_NUM_TABLES_TOTAL
++#undef CRC_UPDATE_BYTE_2
++#undef FUNC_NAME_LE_2
++#undef FUNC_NAME_LE_1
++#undef FUNC_NAME_LE
++#undef FUNC_NAME_BE_2
++#undef FUNC_NAME_BE_1
++#undef FUNC_NAME_BE
++
++#undef CRC_HW_UNROLL_BYTES
++#undef CRC_HW_WORD_FUNC
++#undef CRC_HW_WORD_TYPE
+diff --git a/third_party/lzma_sdk/C/7zCrc.h b/third_party/lzma_sdk/C/7zCrc.h
+index 8fd5795871540bcfafb399285a585ecd4cdc3e6f..3e6d408b9287f702eb3eae84fa49425209386eee 100644
+--- a/third_party/lzma_sdk/C/7zCrc.h
++++ b/third_party/lzma_sdk/C/7zCrc.h
+@@ -1,8 +1,8 @@
+ /* 7zCrc.h -- CRC32 calculation
+-2013-01-18 : Igor Pavlov : Public domain */
++2024-01-22 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_CRC_H
+-#define __7Z_CRC_H
++#ifndef ZIP7_INC_7Z_CRC_H
++#define ZIP7_INC_7Z_CRC_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -11,14 +11,17 @@ EXTERN_C_BEGIN
+ extern UInt32 g_CrcTable[];
+ 
+ /* Call CrcGenerateTable one time before other CRC functions */
+-void MY_FAST_CALL CrcGenerateTable(void);
++void Z7_FASTCALL CrcGenerateTable(void);
+ 
+ #define CRC_INIT_VAL 0xFFFFFFFF
+ #define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
+ #define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+ 
+-UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
+-UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
++UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
++UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
++
++typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size);
++Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo);
+ 
+ EXTERN_C_END
+ 
+diff --git a/third_party/lzma_sdk/C/7zCrcOpt.c b/third_party/lzma_sdk/C/7zCrcOpt.c
+index 69fad9ca2b476ed8589f6bee8a08de37be47297e..9408017ed4fc1cda88c7e5e4ce6b7a61bdd633eb 100644
+--- a/third_party/lzma_sdk/C/7zCrcOpt.c
++++ b/third_party/lzma_sdk/C/7zCrcOpt.c
+@@ -1,117 +1,199 @@
+-/* 7zCrcOpt.c -- CRC32 calculation
+-2021-02-09 : Igor Pavlov : Public domain */
++/* 7zCrcOpt.c -- CRC32 calculation (optimized functions)
++2023-12-07 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include "CpuArch.h"
+ 
++#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1
++
++// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu
++// #define Z7_CRC_DEBUG_BE
++#ifdef Z7_CRC_DEBUG_BE
++#undef MY_CPU_LE
++#define MY_CPU_BE
++#endif
++
++// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c
++#ifdef Z7_CRC_NUM_TABLES
++#define Z7_CRC_NUM_TABLES_USE  Z7_CRC_NUM_TABLES
++#else
++#define Z7_CRC_NUM_TABLES_USE  12
++#endif
++
++#if Z7_CRC_NUM_TABLES_USE % 4     || \
++    Z7_CRC_NUM_TABLES_USE < 4 * 1 || \
++    Z7_CRC_NUM_TABLES_USE > 4 * 6
++  #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
++#endif
++
++
+ #ifndef MY_CPU_BE
+ 
+-#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++#define CRC_UPDATE_BYTE_2(crc, b)  (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+ 
+-UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+-{
+-  const Byte *p = (const Byte *)data;
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+-    v = CRC_UPDATE_BYTE_2(v, *p);
+-  for (; size >= 4; size -= 4, p += 4)
+-  {
+-    v ^= *(const UInt32 *)(const void *)p;
+-    v =
+-          (table + 0x300)[((v      ) & 0xFF)]
+-        ^ (table + 0x200)[((v >>  8) & 0xFF)]
+-        ^ (table + 0x100)[((v >> 16) & 0xFF)]
+-        ^ (table + 0x000)[((v >> 24))];
+-  }
+-  for (; size > 0; size--, p++)
+-    v = CRC_UPDATE_BYTE_2(v, *p);
+-  return v;
+-}
++#define Q(n, d) \
++    ( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \
++    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
++
++#define R(a)  *((const UInt32 *)(const void *)p + (a))
++
++#define CRC_FUNC_PRE_LE2(step) \
++UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
+ 
+-UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
+-UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
++#define CRC_FUNC_PRE_LE(step)   \
++        CRC_FUNC_PRE_LE2(step); \
++        CRC_FUNC_PRE_LE2(step)
++
++CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE)
+ {
+   const Byte *p = (const Byte *)data;
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
++  const Byte *lim;
++  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
+     v = CRC_UPDATE_BYTE_2(v, *p);
+-  for (; size >= 8; size -= 8, p += 8)
++  lim = p + size;
++  if (size >= Z7_CRC_NUM_TABLES_USE)
+   {
+-    UInt32 d;
+-    v ^= *(const UInt32 *)(const void *)p;
+-    v =
+-          (table + 0x700)[((v      ) & 0xFF)]
+-        ^ (table + 0x600)[((v >>  8) & 0xFF)]
+-        ^ (table + 0x500)[((v >> 16) & 0xFF)]
+-        ^ (table + 0x400)[((v >> 24))];
+-    d = *((const UInt32 *)(const void *)p + 1);
+-    v ^=
+-          (table + 0x300)[((d      ) & 0xFF)]
+-        ^ (table + 0x200)[((d >>  8) & 0xFF)]
+-        ^ (table + 0x100)[((d >> 16) & 0xFF)]
+-        ^ (table + 0x000)[((d >> 24))];
++    lim -= Z7_CRC_NUM_TABLES_USE;
++    do
++    {
++      v ^= R(0);
++      {
++#if Z7_CRC_NUM_TABLES_USE == 1 * 4
++        v = Q(0, v);
++#else
++#define U2(r, op) \
++        { d = R(r);  x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
++        UInt32 d, x;
++        U2(1, =)
++#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
++#define U(r)  U2(r, ^=)
++        U(2)
++#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
++        U(3)
++#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
++        U(4)
++#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
++        U(5)
++#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
++#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
++#endif
++#endif
++#endif
++#endif
++#endif
++#undef U
++#undef U2
++        v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
++#endif
++      }
++      p += Z7_CRC_NUM_TABLES_USE;
++    }
++    while (p <= lim);
++    lim += Z7_CRC_NUM_TABLES_USE;
+   }
+-  for (; size > 0; size--, p++)
++  for (; p < lim; p++)
+     v = CRC_UPDATE_BYTE_2(v, *p);
+   return v;
+ }
+ 
++#undef CRC_UPDATE_BYTE_2
++#undef R
++#undef Q
++#undef CRC_FUNC_PRE_LE
++#undef CRC_FUNC_PRE_LE2
++
+ #endif
+ 
+ 
++
++
+ #ifndef MY_CPU_LE
+ 
+-#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
++#define CRC_UPDATE_BYTE_2_BE(crc, b)  (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8))
+ 
+-#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
++#define Q(n, d) \
++    ( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \
++    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
+ 
+-UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
+-{
+-  const Byte *p = (const Byte *)data;
+-  table += 0x100;
+-  v = CRC_UINT32_SWAP(v);
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
+-    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+-  for (; size >= 4; size -= 4, p += 4)
+-  {
+-    v ^= *(const UInt32 *)(const void *)p;
+-    v =
+-          (table + 0x000)[((v      ) & 0xFF)]
+-        ^ (table + 0x100)[((v >>  8) & 0xFF)]
+-        ^ (table + 0x200)[((v >> 16) & 0xFF)]
+-        ^ (table + 0x300)[((v >> 24))];
+-  }
+-  for (; size > 0; size--, p++)
+-    v = CRC_UPDATE_BYTE_2_BE(v, *p);
+-  return CRC_UINT32_SWAP(v);
+-}
++#ifdef Z7_CRC_DEBUG_BE
++  #define R(a)  GetBe32a((const UInt32 *)(const void *)p + (a))
++#else
++  #define R(a)         *((const UInt32 *)(const void *)p + (a))
++#endif
++
++
++#define CRC_FUNC_PRE_BE2(step) \
++UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
+ 
+-UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
++#define CRC_FUNC_PRE_BE(step)   \
++        CRC_FUNC_PRE_BE2(step); \
++        CRC_FUNC_PRE_BE2(step)
++
++CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE)
+ {
+   const Byte *p = (const Byte *)data;
++  const Byte *lim;
+   table += 0x100;
+-  v = CRC_UINT32_SWAP(v);
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
++  v = Z7_BSWAP32(v);
++  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
+     v = CRC_UPDATE_BYTE_2_BE(v, *p);
+-  for (; size >= 8; size -= 8, p += 8)
++  lim = p + size;
++  if (size >= Z7_CRC_NUM_TABLES_USE)
+   {
+-    UInt32 d;
+-    v ^= *(const UInt32 *)(const void *)p;
+-    v =
+-          (table + 0x400)[((v      ) & 0xFF)]
+-        ^ (table + 0x500)[((v >>  8) & 0xFF)]
+-        ^ (table + 0x600)[((v >> 16) & 0xFF)]
+-        ^ (table + 0x700)[((v >> 24))];
+-    d = *((const UInt32 *)(const void *)p + 1);
+-    v ^=
+-          (table + 0x000)[((d      ) & 0xFF)]
+-        ^ (table + 0x100)[((d >>  8) & 0xFF)]
+-        ^ (table + 0x200)[((d >> 16) & 0xFF)]
+-        ^ (table + 0x300)[((d >> 24))];
++    lim -= Z7_CRC_NUM_TABLES_USE;
++    do
++    {
++      v ^= R(0);
++      {
++#if Z7_CRC_NUM_TABLES_USE == 1 * 4
++        v = Q(0, v);
++#else
++#define U2(r, op) \
++        { d = R(r);  x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
++        UInt32 d, x;
++        U2(1, =)
++#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
++#define U(r)  U2(r, ^=)
++        U(2)
++#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
++        U(3)
++#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
++        U(4)
++#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
++        U(5)
++#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
++#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
++#endif
++#endif
++#endif
++#endif
++#endif
++#undef U
++#undef U2
++        v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
++#endif
++      }
++      p += Z7_CRC_NUM_TABLES_USE;
++    }
++    while (p <= lim);
++    lim += Z7_CRC_NUM_TABLES_USE;
+   }
+-  for (; size > 0; size--, p++)
++  for (; p < lim; p++)
+     v = CRC_UPDATE_BYTE_2_BE(v, *p);
+-  return CRC_UINT32_SWAP(v);
++  return Z7_BSWAP32(v);
+ }
+ 
++#undef CRC_UPDATE_BYTE_2_BE
++#undef R
++#undef Q
++#undef CRC_FUNC_PRE_BE
++#undef CRC_FUNC_PRE_BE2
++
++#endif
++#undef Z7_CRC_NUM_TABLES_USE
+ #endif
+diff --git a/third_party/lzma_sdk/C/7zDec.c b/third_party/lzma_sdk/C/7zDec.c
+index fbfd016e1e6a11bd976f39fe90de619166910e0e..520cbfd833be22e29a248eb05aca7e81d55d2ac4 100644
+--- a/third_party/lzma_sdk/C/7zDec.c
++++ b/third_party/lzma_sdk/C/7zDec.c
+@@ -1,11 +1,11 @@
+ /* 7zDec.c -- Decoding from 7z folder
+-2021-02-09 : Igor Pavlov : Public domain */
++: Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include <string.h>
+ 
+-/* #define _7ZIP_PPMD_SUPPPORT */
++/* #define Z7_PPMD_SUPPORT */
+ 
+ #include "7z.h"
+ #include "7zCrc.h"
+@@ -16,27 +16,50 @@
+ #include "Delta.h"
+ #include "LzmaDec.h"
+ #include "Lzma2Dec.h"
+-#ifdef _7ZIP_PPMD_SUPPPORT
++#ifdef Z7_PPMD_SUPPORT
+ #include "Ppmd7.h"
+ #endif
+ 
+ #define k_Copy 0
+-#ifndef _7Z_NO_METHOD_LZMA2
++#ifndef Z7_NO_METHOD_LZMA2
+ #define k_LZMA2 0x21
+ #endif
+ #define k_LZMA  0x30101
+ #define k_BCJ2  0x303011B
+-#ifndef _7Z_NO_METHODS_FILTERS
++
++#if !defined(Z7_NO_METHODS_FILTERS)
++#define Z7_USE_BRANCH_FILTER
++#endif
++
++#if !defined(Z7_NO_METHODS_FILTERS) || \
++     defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64)
++#define Z7_USE_FILTER_ARM64
++#ifndef Z7_USE_BRANCH_FILTER
++#define Z7_USE_BRANCH_FILTER
++#endif
++#define k_ARM64 0xa
++#endif
++
++#if !defined(Z7_NO_METHODS_FILTERS) || \
++     defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT)
++#define Z7_USE_FILTER_ARMT
++#ifndef Z7_USE_BRANCH_FILTER
++#define Z7_USE_BRANCH_FILTER
++#endif
++#define k_ARMT  0x3030701
++#endif
++
++#ifndef Z7_NO_METHODS_FILTERS
+ #define k_Delta 3
++#define k_RISCV 0xb
+ #define k_BCJ   0x3030103
+ #define k_PPC   0x3030205
+ #define k_IA64  0x3030401
+ #define k_ARM   0x3030501
+-#define k_ARMT  0x3030701
+ #define k_SPARC 0x3030805
+ #endif
+ 
+-#ifdef _7ZIP_PPMD_SUPPPORT
++#ifdef Z7_PPMD_SUPPORT
+ 
+ #define k_PPMD 0x30401
+ 
+@@ -49,12 +72,12 @@ typedef struct
+   UInt64 processed;
+   BoolInt extra;
+   SRes res;
+-  const ILookInStream *inStream;
++  ILookInStreamPtr inStream;
+ } CByteInToLook;
+ 
+-static Byte ReadByte(const IByteIn *pp)
++static Byte ReadByte(IByteInPtr pp)
+ {
+-  CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook)
+   if (p->cur != p->end)
+     return *p->cur++;
+   if (p->res == SZ_OK)
+@@ -67,13 +90,13 @@ static Byte ReadByte(const IByteIn *pp)
+     p->cur = p->begin;
+     p->end = p->begin + size;
+     if (size != 0)
+-      return *p->cur++;;
++      return *p->cur++;
+   }
+   p->extra = True;
+   return 0;
+ }
+ 
+-static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream,
++static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
+     Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+ {
+   CPpmd7 ppmd;
+@@ -138,14 +161,14 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
+ #endif
+ 
+ 
+-static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
++static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
+     Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+ {
+   CLzmaDec state;
+   SRes res = SZ_OK;
+ 
+-  LzmaDec_Construct(&state);
+-  RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
++  LzmaDec_CONSTRUCT(&state)
++  RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain))
+   state.dic = outBuffer;
+   state.dicBufSize = outSize;
+   LzmaDec_Init(&state);
+@@ -196,18 +219,18 @@ static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, I
+ }
+ 
+ 
+-#ifndef _7Z_NO_METHOD_LZMA2
++#ifndef Z7_NO_METHOD_LZMA2
+ 
+-static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
++static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
+     Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
+ {
+   CLzma2Dec state;
+   SRes res = SZ_OK;
+ 
+-  Lzma2Dec_Construct(&state);
++  Lzma2Dec_CONSTRUCT(&state)
+   if (propsSize != 1)
+     return SZ_ERROR_DATA;
+-  RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
++  RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain))
+   state.decoder.dic = outBuffer;
+   state.decoder.dicBufSize = outSize;
+   Lzma2Dec_Init(&state);
+@@ -257,7 +280,7 @@ static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize,
+ #endif
+ 
+ 
+-static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
++static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer)
+ {
+   while (inSize > 0)
+   {
+@@ -265,13 +288,13 @@ static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer
+     size_t curSize = (1 << 18);
+     if (curSize > inSize)
+       curSize = (size_t)inSize;
+-    RINOK(ILookInStream_Look(inStream, &inBuf, &curSize));
++    RINOK(ILookInStream_Look(inStream, &inBuf, &curSize))
+     if (curSize == 0)
+       return SZ_ERROR_INPUT_EOF;
+     memcpy(outBuffer, inBuf, curSize);
+     outBuffer += curSize;
+     inSize -= curSize;
+-    RINOK(ILookInStream_Skip(inStream, curSize));
++    RINOK(ILookInStream_Skip(inStream, curSize))
+   }
+   return SZ_OK;
+ }
+@@ -282,15 +305,16 @@ static BoolInt IS_MAIN_METHOD(UInt32 m)
+   {
+     case k_Copy:
+     case k_LZMA:
+-    #ifndef _7Z_NO_METHOD_LZMA2
++  #ifndef Z7_NO_METHOD_LZMA2
+     case k_LZMA2:
+-    #endif
+-    #ifdef _7ZIP_PPMD_SUPPPORT
++  #endif
++  #ifdef Z7_PPMD_SUPPORT
+     case k_PPMD:
+-    #endif
++  #endif
+       return True;
++    default:
++      return False;
+   }
+-  return False;
+ }
+ 
+ static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
+@@ -317,7 +341,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
+   }
+   
+   
+-  #ifndef _7Z_NO_METHODS_FILTERS
++  #if defined(Z7_USE_BRANCH_FILTER)
+ 
+   if (f->NumCoders == 2)
+   {
+@@ -333,13 +357,21 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
+       return SZ_ERROR_UNSUPPORTED;
+     switch ((UInt32)c->MethodID)
+     {
++    #if !defined(Z7_NO_METHODS_FILTERS)
+       case k_Delta:
+       case k_BCJ:
+       case k_PPC:
+       case k_IA64:
+       case k_SPARC:
+       case k_ARM:
++      case k_RISCV:
++    #endif
++    #ifdef Z7_USE_FILTER_ARM64
++      case k_ARM64:
++    #endif
++    #ifdef Z7_USE_FILTER_ARMT
+       case k_ARMT:
++    #endif
+         break;
+       default:
+         return SZ_ERROR_UNSUPPORTED;
+@@ -372,15 +404,16 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
+   return SZ_ERROR_UNSUPPORTED;
+ }
+ 
+-#ifndef _7Z_NO_METHODS_FILTERS
+-#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
+-#endif
++
++
++
++
+ 
+ static SRes SzFolder_Decode2(const CSzFolder *folder,
+     const Byte *propsData,
+     const UInt64 *unpackSizes,
+     const UInt64 *packPositions,
+-    ILookInStream *inStream, UInt64 startPos,
++    ILookInStreamPtr inStream, UInt64 startPos,
+     Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
+     Byte *tempBuf[])
+ {
+@@ -389,7 +422,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+   SizeT tempSize3 = 0;
+   Byte *tempBuf3 = 0;
+ 
+-  RINOK(CheckSupportedFolder(folder));
++  RINOK(CheckSupportedFolder(folder))
+ 
+   for (ci = 0; ci < folder->NumCoders; ci++)
+   {
+@@ -404,8 +437,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+       SizeT outSizeCur = outSize;
+       if (folder->NumCoders == 4)
+       {
+-        UInt32 indices[] = { 3, 2, 0 };
+-        UInt64 unpackSize = unpackSizes[ci];
++        const UInt32 indices[] = { 3, 2, 0 };
++        const UInt64 unpackSize = unpackSizes[ci];
+         si = indices[ci];
+         if (ci < 2)
+         {
+@@ -431,37 +464,37 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+       }
+       offset = packPositions[si];
+       inSize = packPositions[(size_t)si + 1] - offset;
+-      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
++      RINOK(LookInStream_SeekTo(inStream, startPos + offset))
+ 
+       if (coder->MethodID == k_Copy)
+       {
+         if (inSize != outSizeCur) /* check it */
+           return SZ_ERROR_DATA;
+-        RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
++        RINOK(SzDecodeCopy(inSize, inStream, outBufCur))
+       }
+       else if (coder->MethodID == k_LZMA)
+       {
+-        RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
++        RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
+       }
+-      #ifndef _7Z_NO_METHOD_LZMA2
++    #ifndef Z7_NO_METHOD_LZMA2
+       else if (coder->MethodID == k_LZMA2)
+       {
+-        RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
++        RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
+       }
+-      #endif
+-      #ifdef _7ZIP_PPMD_SUPPPORT
++    #endif
++    #ifdef Z7_PPMD_SUPPORT
+       else if (coder->MethodID == k_PPMD)
+       {
+-        RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
++        RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
+       }
+-      #endif
++    #endif
+       else
+         return SZ_ERROR_UNSUPPORTED;
+     }
+     else if (coder->MethodID == k_BCJ2)
+     {
+-      UInt64 offset = packPositions[1];
+-      UInt64 s3Size = packPositions[2] - offset;
++      const UInt64 offset = packPositions[1];
++      const UInt64 s3Size = packPositions[2] - offset;
+       
+       if (ci != 3)
+         return SZ_ERROR_UNSUPPORTED;
+@@ -473,8 +506,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+       if (!tempBuf[2] && tempSizes[2] != 0)
+         return SZ_ERROR_MEM;
+       
+-      RINOK(LookInStream_SeekTo(inStream, startPos + offset));
+-      RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
++      RINOK(LookInStream_SeekTo(inStream, startPos + offset))
++      RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]))
+ 
+       if ((tempSizes[0] & 3) != 0 ||
+           (tempSizes[1] & 3) != 0 ||
+@@ -493,26 +526,22 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+         p.destLim = outBuffer + outSize;
+         
+         Bcj2Dec_Init(&p);
+-        RINOK(Bcj2Dec_Decode(&p));
++        RINOK(Bcj2Dec_Decode(&p))
+ 
+         {
+           unsigned i;
+           for (i = 0; i < 4; i++)
+             if (p.bufs[i] != p.lims[i])
+               return SZ_ERROR_DATA;
+-          
+-          if (!Bcj2Dec_IsFinished(&p))
+-            return SZ_ERROR_DATA;
+-
+-          if (p.dest != p.destLim
+-             || p.state != BCJ2_STREAM_MAIN)
++          if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p))
+             return SZ_ERROR_DATA;
+         }
+       }
+     }
+-    #ifndef _7Z_NO_METHODS_FILTERS
++#if defined(Z7_USE_BRANCH_FILTER)
+     else if (ci == 1)
+     {
++#if !defined(Z7_NO_METHODS_FILTERS)
+       if (coder->MethodID == k_Delta)
+       {
+         if (coder->PropsSize != 1)
+@@ -522,31 +551,75 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+           Delta_Init(state);
+           Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
+         }
++        continue;
+       }
+-      else
++#endif
++
++#ifdef Z7_USE_FILTER_ARM64
++      if (coder->MethodID == k_ARM64)
++      {
++        UInt32 pc = 0;
++        if (coder->PropsSize == 4)
++        {
++          pc = GetUi32(propsData + coder->PropsOffset);
++          if (pc & 3)
++            return SZ_ERROR_UNSUPPORTED;
++        }
++        else if (coder->PropsSize != 0)
++          return SZ_ERROR_UNSUPPORTED;
++        z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc);
++        continue;
++      }
++#endif
++
++#if !defined(Z7_NO_METHODS_FILTERS)
++      if (coder->MethodID == k_RISCV)
++      {
++        UInt32 pc = 0;
++        if (coder->PropsSize == 4)
++        {
++          pc = GetUi32(propsData + coder->PropsOffset);
++          if (pc & 1)
++            return SZ_ERROR_UNSUPPORTED;
++        }
++        else if (coder->PropsSize != 0)
++          return SZ_ERROR_UNSUPPORTED;
++        z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc);
++        continue;
++      }
++#endif
++
++#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
+       {
+         if (coder->PropsSize != 0)
+           return SZ_ERROR_UNSUPPORTED;
++       #define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0;
+         switch (coder->MethodID)
+         {
++         #if !defined(Z7_NO_METHODS_FILTERS)
+           case k_BCJ:
+           {
+-            UInt32 state;
+-            x86_Convert_Init(state);
+-            x86_Convert(outBuffer, outSize, 0, &state, 0);
++            UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
++            z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0
+             break;
+           }
+-          CASE_BRA_CONV(PPC)
++          case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0;
++          // CASE_BRA_CONV(PPC)
+           CASE_BRA_CONV(IA64)
+           CASE_BRA_CONV(SPARC)
+           CASE_BRA_CONV(ARM)
++         #endif
++         #if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
+           CASE_BRA_CONV(ARMT)
++         #endif
+           default:
+             return SZ_ERROR_UNSUPPORTED;
+         }
++        continue;
+       }
+-    }
+-    #endif
++#endif
++    } // (c == 1)
++#endif // Z7_USE_BRANCH_FILTER
+     else
+       return SZ_ERROR_UNSUPPORTED;
+   }
+@@ -556,7 +629,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
+ 
+ 
+ SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
+-    ILookInStream *inStream, UInt64 startPos,
++    ILookInStreamPtr inStream, UInt64 startPos,
+     Byte *outBuffer, size_t outSize,
+     ISzAllocPtr allocMain)
+ {
+diff --git a/third_party/lzma_sdk/C/7zFile.c b/third_party/lzma_sdk/C/7zFile.c
+index 13d2efa47f470d0a3918a86b2f27d55c5e59b870..ba5daa133b9a2347720ceca3bcf5245e434937c7 100644
+--- a/third_party/lzma_sdk/C/7zFile.c
++++ b/third_party/lzma_sdk/C/7zFile.c
+@@ -1,5 +1,5 @@
+ /* 7zFile.c -- File IO
+-2021-04-29 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -268,7 +268,7 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
+       return errno;
+     if (processed == 0)
+       break;
+-    data = (void *)((Byte *)data + (size_t)processed);
++    data = (const void *)((const Byte *)data + (size_t)processed);
+     originalSize -= (size_t)processed;
+     *size += (size_t)processed;
+   }
+@@ -287,7 +287,8 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
+   DWORD moveMethod;
+   UInt32 low = (UInt32)*pos;
+   LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
+-  switch (origin)
++  // (int) to eliminate clang warning
++  switch ((int)origin)
+   {
+     case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
+     case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
+@@ -308,7 +309,7 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
+   
+   int moveMethod; // = origin;
+ 
+-  switch (origin)
++  switch ((int)origin)
+   {
+     case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
+     case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
+@@ -387,10 +388,10 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
+ 
+ /* ---------- FileSeqInStream ---------- */
+ 
+-static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
++static SRes FileSeqInStream_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
+ {
+-  CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
+-  WRes wres = File_Read(&p->file, buf, size);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileSeqInStream)
++  const WRes wres = File_Read(&p->file, buf, size);
+   p->wres = wres;
+   return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
+ }
+@@ -403,18 +404,18 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
+ 
+ /* ---------- FileInStream ---------- */
+ 
+-static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
++static SRes FileInStream_Read(ISeekInStreamPtr pp, void *buf, size_t *size)
+ {
+-  CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
+-  WRes wres = File_Read(&p->file, buf, size);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
++  const WRes wres = File_Read(&p->file, buf, size);
+   p->wres = wres;
+   return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
+ }
+ 
+-static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
++static SRes FileInStream_Seek(ISeekInStreamPtr pp, Int64 *pos, ESzSeek origin)
+ {
+-  CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
+-  WRes wres = File_Seek(&p->file, pos, origin);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
++  const WRes wres = File_Seek(&p->file, pos, origin);
+   p->wres = wres;
+   return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
+ }
+@@ -428,10 +429,10 @@ void FileInStream_CreateVTable(CFileInStream *p)
+ 
+ /* ---------- FileOutStream ---------- */
+ 
+-static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
++static size_t FileOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
+ {
+-  CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
+-  WRes wres = File_Write(&p->file, data, &size);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileOutStream)
++  const WRes wres = File_Write(&p->file, data, &size);
+   p->wres = wres;
+   return size;
+ }
+diff --git a/third_party/lzma_sdk/C/7zFile.h b/third_party/lzma_sdk/C/7zFile.h
+index 788abb6b9db0590d50ca057c9929aba0a98abad2..f5069cd9ee2edd9c310e772eb4a4c8253acfb263 100644
+--- a/third_party/lzma_sdk/C/7zFile.h
++++ b/third_party/lzma_sdk/C/7zFile.h
+@@ -1,8 +1,8 @@
+ /* 7zFile.h -- File IO
+-2021-02-15 : Igor Pavlov : Public domain */
++2023-03-05 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_FILE_H
+-#define __7Z_FILE_H
++#ifndef ZIP7_INC_FILE_H
++#define ZIP7_INC_FILE_H
+ 
+ #ifdef _WIN32
+ #define USE_WINDOWS_FILE
+@@ -10,7 +10,8 @@
+ #endif
+ 
+ #ifdef USE_WINDOWS_FILE
+-#include <windows.h>
++#include "7zWindows.h"
++
+ #else
+ // note: USE_FOPEN mode is limited to 32-bit file size
+ // #define USE_FOPEN
+diff --git a/third_party/lzma_sdk/C/7zStream.c b/third_party/lzma_sdk/C/7zStream.c
+index 28a14604fb1bbcb40cdca86d6dc0e03d219c8fc3..74e75b65aa2540203b71a5a1363e428e03afab95 100644
+--- a/third_party/lzma_sdk/C/7zStream.c
++++ b/third_party/lzma_sdk/C/7zStream.c
+@@ -1,5 +1,5 @@
+ /* 7zStream.c -- 7z Stream functions
+-2021-02-09 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -7,12 +7,33 @@
+ 
+ #include "7zTypes.h"
+ 
+-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
++
++SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize)
++{
++  size_t size = *processedSize;
++  *processedSize = 0;
++  while (size != 0)
++  {
++    size_t cur = size;
++    const SRes res = ISeqInStream_Read(stream, buf, &cur);
++    *processedSize += cur;
++    buf = (void *)((Byte *)buf + cur);
++    size -= cur;
++    if (res != SZ_OK)
++      return res;
++    if (cur == 0)
++      return SZ_OK;
++  }
++  return SZ_OK;
++}
++
++/*
++SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType)
+ {
+   while (size != 0)
+   {
+     size_t processed = size;
+-    RINOK(ISeqInStream_Read(stream, buf, &processed));
++    RINOK(ISeqInStream_Read(stream, buf, &processed))
+     if (processed == 0)
+       return errorType;
+     buf = (void *)((Byte *)buf + processed);
+@@ -21,42 +42,44 @@ SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes
+   return SZ_OK;
+ }
+ 
+-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
++SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size)
+ {
+   return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+ }
++*/
++
+ 
+-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
++SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf)
+ {
+   size_t processed = 1;
+-  RINOK(ISeqInStream_Read(stream, buf, &processed));
++  RINOK(ISeqInStream_Read(stream, buf, &processed))
+   return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
+ }
+ 
+ 
+ 
+-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
++SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset)
+ {
+   Int64 t = (Int64)offset;
+   return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
+ }
+ 
+-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
++SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size)
+ {
+   const void *lookBuf;
+   if (*size == 0)
+     return SZ_OK;
+-  RINOK(ILookInStream_Look(stream, &lookBuf, size));
++  RINOK(ILookInStream_Look(stream, &lookBuf, size))
+   memcpy(buf, lookBuf, *size);
+   return ILookInStream_Skip(stream, *size);
+ }
+ 
+-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
++SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType)
+ {
+   while (size != 0)
+   {
+     size_t processed = size;
+-    RINOK(ILookInStream_Read(stream, buf, &processed));
++    RINOK(ILookInStream_Read(stream, buf, &processed))
+     if (processed == 0)
+       return errorType;
+     buf = (void *)((Byte *)buf + processed);
+@@ -65,16 +88,16 @@ SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRe
+   return SZ_OK;
+ }
+ 
+-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
++SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size)
+ {
+   return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
+ }
+ 
+ 
+ 
+-#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
++#define GET_LookToRead2  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2)
+ 
+-static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
++static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size)
+ {
+   SRes res = SZ_OK;
+   GET_LookToRead2
+@@ -93,7 +116,7 @@ static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf
+   return res;
+ }
+ 
+-static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
++static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size)
+ {
+   SRes res = SZ_OK;
+   GET_LookToRead2
+@@ -113,14 +136,14 @@ static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, si
+   return res;
+ }
+ 
+-static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
++static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset)
+ {
+   GET_LookToRead2
+   p->pos += offset;
+   return SZ_OK;
+ }
+ 
+-static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
++static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size)
+ {
+   GET_LookToRead2
+   size_t rem = p->size - p->pos;
+@@ -134,7 +157,7 @@ static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
+   return SZ_OK;
+ }
+ 
+-static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
++static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin)
+ {
+   GET_LookToRead2
+   p->pos = p->size = 0;
+@@ -153,9 +176,9 @@ void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
+ 
+ 
+ 
+-static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
++static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
+ {
+-  CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook)
+   return LookInStream_LookRead(p->realStream, buf, size);
+ }
+ 
+@@ -164,9 +187,9 @@ void SecToLook_CreateVTable(CSecToLook *p)
+   p->vt.Read = SecToLook_Read;
+ }
+ 
+-static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
++static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
+ {
+-  CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead)
+   return ILookInStream_Read(p->realStream, buf, size);
+ }
+ 
+diff --git a/third_party/lzma_sdk/C/7zTypes.h b/third_party/lzma_sdk/C/7zTypes.h
+index f7d7071830c77af46dbc5545168ed8647ce44579..5b77420a3b2fb73c43be9d56292c75e86c97e59f 100644
+--- a/third_party/lzma_sdk/C/7zTypes.h
++++ b/third_party/lzma_sdk/C/7zTypes.h
+@@ -1,8 +1,8 @@
+ /* 7zTypes.h -- Basic types
+-2022-04-01 : Igor Pavlov : Public domain */
++2024-01-24 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_TYPES_H
+-#define __7Z_TYPES_H
++#ifndef ZIP7_7Z_TYPES_H
++#define ZIP7_7Z_TYPES_H
+ 
+ #ifdef _WIN32
+ /* #include <windows.h> */
+@@ -52,6 +52,11 @@ typedef int SRes;
+     #define MY_ALIGN(n)
+   #endif
+ #else
++  /*
++  // C11/C++11:
++  #include <stdalign.h>
++  #define MY_ALIGN(n) alignas(n)
++  */
+   #define MY_ALIGN(n) __attribute__ ((aligned(n)))
+ #endif
+ 
+@@ -62,7 +67,7 @@ typedef int SRes;
+ typedef unsigned WRes;
+ #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+ 
+-// #define MY_HRES_ERROR__INTERNAL_ERROR  MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
++// #define MY_HRES_ERROR_INTERNAL_ERROR  MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
+ 
+ #else // _WIN32
+ 
+@@ -70,13 +75,13 @@ typedef unsigned WRes;
+ typedef int WRes;
+ 
+ // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
+-#define MY__FACILITY_ERRNO  0x800
+-#define MY__FACILITY_WIN32  7
+-#define MY__FACILITY__WRes  MY__FACILITY_ERRNO
++#define MY_FACILITY_ERRNO  0x800
++#define MY_FACILITY_WIN32  7
++#define MY_FACILITY_WRes  MY_FACILITY_ERRNO
+ 
+ #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
+           ( (HRESULT)(x) & 0x0000FFFF) \
+-          | (MY__FACILITY__WRes << 16)  \
++          | (MY_FACILITY_WRes << 16)  \
+           | (HRESULT)0x80000000 ))
+ 
+ #define MY_SRes_HRESULT_FROM_WRes(x) \
+@@ -120,17 +125,17 @@ typedef int WRes;
+ #define ERROR_INVALID_REPARSE_DATA  ((HRESULT)0x80071128L)
+ #define ERROR_REPARSE_TAG_INVALID   ((HRESULT)0x80071129L)
+ 
+-// if (MY__FACILITY__WRes != FACILITY_WIN32),
++// if (MY_FACILITY_WRes != FACILITY_WIN32),
+ // we use FACILITY_WIN32 for COM errors:
+ #define E_OUTOFMEMORY               ((HRESULT)0x8007000EL)
+ #define E_INVALIDARG                ((HRESULT)0x80070057L)
+-#define MY__E_ERROR_NEGATIVE_SEEK   ((HRESULT)0x80070083L)
++#define MY_E_ERROR_NEGATIVE_SEEK    ((HRESULT)0x80070083L)
+ 
+ /*
+ // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
+ #define E_OUTOFMEMORY             MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
+ #define E_INVALIDARG              MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+-#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
++#define MY_E_ERROR_NEGATIVE_SEEK  MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
+ */
+ 
+ #define TEXT(quote) quote
+@@ -156,18 +161,18 @@ typedef int WRes;
+ 
+ 
+ #ifndef RINOK
+-#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
++#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
+ #endif
+ 
+ #ifndef RINOK_WRes
+-#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
++#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
+ #endif
+ 
+ typedef unsigned char Byte;
+ typedef short Int16;
+ typedef unsigned short UInt16;
+ 
+-#ifdef _LZMA_UINT32_IS_ULONG
++#ifdef Z7_DECL_Int32_AS_long
+ typedef long Int32;
+ typedef unsigned long UInt32;
+ #else
+@@ -206,37 +211,51 @@ typedef size_t SIZE_T;
+ #endif //  _WIN32
+ 
+ 
+-#define MY_HRES_ERROR__INTERNAL_ERROR  ((HRESULT)0x8007054FL)
++#define MY_HRES_ERROR_INTERNAL_ERROR  ((HRESULT)0x8007054FL)
+ 
+ 
+-#ifdef _SZ_NO_INT_64
+-
+-/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+-   NOTES: Some code will work incorrectly in that case! */
++#ifdef Z7_DECL_Int64_AS_long
+ 
+ typedef long Int64;
+ typedef unsigned long UInt64;
+ 
+ #else
+ 
+-#if defined(_MSC_VER) || defined(__BORLANDC__)
++#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
+ typedef __int64 Int64;
+ typedef unsigned __int64 UInt64;
+-#define UINT64_CONST(n) n
++#else
++#if defined(__clang__) || defined(__GNUC__)
++#include <stdint.h>
++typedef int64_t Int64;
++typedef uint64_t UInt64;
+ #else
+ typedef long long int Int64;
+ typedef unsigned long long int UInt64;
+-#define UINT64_CONST(n) n ## ULL
++// #define UINT64_CONST(n) n ## ULL
++#endif
+ #endif
+ 
+ #endif
+ 
+-#ifdef _LZMA_NO_SYSTEM_SIZE_T
+-typedef UInt32 SizeT;
++#define UINT64_CONST(n) n
++
++
++#ifdef Z7_DECL_SizeT_AS_unsigned_int
++typedef unsigned int SizeT;
+ #else
+ typedef size_t SizeT;
+ #endif
+ 
++/*
++#if (defined(_MSC_VER) && _MSC_VER <= 1200)
++typedef size_t MY_uintptr_t;
++#else
++#include <stdint.h>
++typedef uintptr_t MY_uintptr_t;
++#endif
++*/
++
+ typedef int BoolInt;
+ /* typedef BoolInt Bool; */
+ #define True 1
+@@ -244,23 +263,23 @@ typedef int BoolInt;
+ 
+ 
+ #ifdef _WIN32
+-#define MY_STD_CALL __stdcall
++#define Z7_STDCALL __stdcall
+ #else
+-#define MY_STD_CALL
++#define Z7_STDCALL
+ #endif
+ 
+ #ifdef _MSC_VER
+ 
+ #if _MSC_VER >= 1300
+-#define MY_NO_INLINE __declspec(noinline)
++#define Z7_NO_INLINE __declspec(noinline)
+ #else
+-#define MY_NO_INLINE
++#define Z7_NO_INLINE
+ #endif
+ 
+-#define MY_FORCE_INLINE __forceinline
++#define Z7_FORCE_INLINE __forceinline
+ 
+-#define MY_CDECL __cdecl
+-#define MY_FAST_CALL __fastcall
++#define Z7_CDECL      __cdecl
++#define Z7_FASTCALL  __fastcall
+ 
+ #else //  _MSC_VER
+ 
+@@ -268,27 +287,25 @@ typedef int BoolInt;
+     || (defined(__clang__) && (__clang_major__ >= 4)) \
+     || defined(__INTEL_COMPILER) \
+     || defined(__xlC__)
+-#define MY_NO_INLINE __attribute__((noinline))
+-// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
++#define Z7_NO_INLINE      __attribute__((noinline))
++#define Z7_FORCE_INLINE   __attribute__((always_inline)) inline
+ #else
+-#define MY_NO_INLINE
++#define Z7_NO_INLINE
++#define Z7_FORCE_INLINE
+ #endif
+ 
+-#define MY_FORCE_INLINE
+-
+-
+-#define MY_CDECL
++#define Z7_CDECL
+ 
+ #if  defined(_M_IX86) \
+   || defined(__i386__)
+-// #define MY_FAST_CALL __attribute__((fastcall))
+-// #define MY_FAST_CALL __attribute__((cdecl))
+-#define MY_FAST_CALL
++// #define Z7_FASTCALL __attribute__((fastcall))
++// #define Z7_FASTCALL __attribute__((cdecl))
++#define Z7_FASTCALL
+ #elif defined(MY_CPU_AMD64)
+-// #define MY_FAST_CALL __attribute__((ms_abi))
+-#define MY_FAST_CALL
++// #define Z7_FASTCALL __attribute__((ms_abi))
++#define Z7_FASTCALL
+ #else
+-#define MY_FAST_CALL
++#define Z7_FASTCALL
+ #endif
+ 
+ #endif //  _MSC_VER
+@@ -296,41 +313,49 @@ typedef int BoolInt;
+ 
+ /* The following interfaces use first parameter as pointer to structure */
+ 
+-typedef struct IByteIn IByteIn;
+-struct IByteIn
++// #define Z7_C_IFACE_CONST_QUAL
++#define Z7_C_IFACE_CONST_QUAL const
++
++#define Z7_C_IFACE_DECL(a) \
++  struct a ## _; \
++  typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
++  typedef struct a ## _ a; \
++  struct a ## _
++
++
++Z7_C_IFACE_DECL (IByteIn)
+ {
+-  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
++  Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
+ };
+ #define IByteIn_Read(p) (p)->Read(p)
+ 
+ 
+-typedef struct IByteOut IByteOut;
+-struct IByteOut
++Z7_C_IFACE_DECL (IByteOut)
+ {
+-  void (*Write)(const IByteOut *p, Byte b);
++  void (*Write)(IByteOutPtr p, Byte b);
+ };
+ #define IByteOut_Write(p, b) (p)->Write(p, b)
+ 
+ 
+-typedef struct ISeqInStream ISeqInStream;
+-struct ISeqInStream
++Z7_C_IFACE_DECL (ISeqInStream)
+ {
+-  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
++  SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
+     /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+        (output(*size) < input(*size)) is allowed */
+ };
+ #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+ 
++/* try to read as much as avail in stream and limited by (*processedSize) */
++SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
+ /* it can return SZ_ERROR_INPUT_EOF */
+-SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+-SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+-SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
++// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
++// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
++SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
+ 
+ 
+-typedef struct ISeqOutStream ISeqOutStream;
+-struct ISeqOutStream
++Z7_C_IFACE_DECL (ISeqOutStream)
+ {
+-  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
++  size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
+     /* Returns: result - the number of actually written bytes.
+        (result < size) means error */
+ };
+@@ -344,29 +369,26 @@ typedef enum
+ } ESzSeek;
+ 
+ 
+-typedef struct ISeekInStream ISeekInStream;
+-struct ISeekInStream
++Z7_C_IFACE_DECL (ISeekInStream)
+ {
+-  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+-  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
++  SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
++  SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
+ };
+ #define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+ #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+ 
+ 
+-typedef struct ILookInStream ILookInStream;
+-struct ILookInStream
++Z7_C_IFACE_DECL (ILookInStream)
+ {
+-  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
++  SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
+     /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+        (output(*size) > input(*size)) is not allowed
+        (output(*size) < input(*size)) is allowed */
+-  SRes (*Skip)(const ILookInStream *p, size_t offset);
++  SRes (*Skip)(ILookInStreamPtr p, size_t offset);
+     /* offset must be <= output(*size) of Look */
+-
+-  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
++  SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
+     /* reads directly (without buffer). It's same as ISeqInStream::Read */
+-  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
++  SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
+ };
+ 
+ #define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size)
+@@ -375,19 +397,18 @@ struct ILookInStream
+ #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+ 
+ 
+-SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+-SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
++SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
++SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
+ 
+ /* reads via ILookInStream::Read */
+-SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+-SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+-
++SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
++SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
+ 
+ 
+ typedef struct
+ {
+   ILookInStream vt;
+-  const ISeekInStream *realStream;
++  ISeekInStreamPtr realStream;
+  
+   size_t pos;
+   size_t size; /* it's data size */
+@@ -399,13 +420,13 @@ typedef struct
+ 
+ void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+ 
+-#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
++#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
+ 
+ 
+ typedef struct
+ {
+   ISeqInStream vt;
+-  const ILookInStream *realStream;
++  ILookInStreamPtr realStream;
+ } CSecToLook;
+ 
+ void SecToLook_CreateVTable(CSecToLook *p);
+@@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
+ typedef struct
+ {
+   ISeqInStream vt;
+-  const ILookInStream *realStream;
++  ILookInStreamPtr realStream;
+ } CSecToRead;
+ 
+ void SecToRead_CreateVTable(CSecToRead *p);
+ 
+ 
+-typedef struct ICompressProgress ICompressProgress;
+-
+-struct ICompressProgress
++Z7_C_IFACE_DECL (ICompressProgress)
+ {
+-  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
++  SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
+     /* Returns: result. (result != SZ_OK) means break.
+        Value (UInt64)(Int64)-1 for size means unknown value. */
+ };
++
+ #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+ 
+ 
+@@ -466,13 +486,13 @@ struct ISzAlloc
+ 
+ 
+ 
+-#ifndef MY_container_of
++#ifndef Z7_container_of
+ 
+ /*
+-#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+-#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+-#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+-#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
++#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
++#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
++#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
++#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+ */
+ 
+ /*
+@@ -481,24 +501,64 @@ struct ISzAlloc
+     GCC 4.8.1 : classes with non-public variable members"
+ */
+ 
+-#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
++#define Z7_container_of(ptr, type, m) \
++  ((type *)(void *)((char *)(void *) \
++  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+ 
+-#endif
+-
+-#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
++#define Z7_container_of_CONST(ptr, type, m) \
++  ((const type *)(const void *)((const char *)(const void *) \
++  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+ 
+ /*
+-#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
++#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
++  ((type *)(void *)(const void *)((const char *)(const void *) \
++  (1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
+ */
+-#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+ 
+-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
++#endif
++
++#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
++
++// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
++#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
++// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
++
++#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
++
++#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+ /*
+-#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
++#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
+ */
++#if defined (__clang__) || defined(__GNUC__)
++#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
++  _Pragma("GCC diagnostic push") \
++  _Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
++#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
++  _Pragma("GCC diagnostic pop")
++#else
++#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
++#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
++#endif
++
++#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
++  Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
++  type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
++  Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
++
++#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
+ 
+ 
+-#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
++// #define ZIP7_DECLARE_HANDLE(name)  typedef void *name;
++#define Z7_DECLARE_HANDLE(name)  struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
++
++
++#define Z7_memset_0_ARRAY(a)  memset((a), 0, sizeof(a))
++
++#ifndef Z7_ARRAY_SIZE
++#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
++#endif
++
+ 
+ #ifdef _WIN32
+ 
+@@ -527,3 +587,11 @@ struct ISzAlloc
+ EXTERN_C_END
+ 
+ #endif
++
++/*
++#ifndef Z7_ST
++#ifdef _7ZIP_ST
++#define Z7_ST
++#endif
++#endif
++*/
+diff --git a/third_party/lzma_sdk/C/7zVersion.h b/third_party/lzma_sdk/C/7zVersion.h
+deleted file mode 100644
+index 49ea81dd3beaf4d5af32e25dab0360856ee99c4a..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/C/7zVersion.h
++++ /dev/null
+@@ -1,27 +0,0 @@
+-#define MY_VER_MAJOR 22
+-#define MY_VER_MINOR 01
+-#define MY_VER_BUILD 0
+-#define MY_VERSION_NUMBERS "22.01"
+-#define MY_VERSION MY_VERSION_NUMBERS
+-
+-#ifdef MY_CPU_NAME
+-  #define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")"
+-#else
+-  #define MY_VERSION_CPU MY_VERSION
+-#endif
+-
+-#define MY_DATE "2022-07-15"
+-#undef MY_COPYRIGHT
+-#undef MY_VERSION_COPYRIGHT_DATE
+-#define MY_AUTHOR_NAME "Igor Pavlov"
+-#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
+-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov"
+-
+-#ifdef USE_COPYRIGHT_CR
+-  #define MY_COPYRIGHT MY_COPYRIGHT_CR
+-#else
+-  #define MY_COPYRIGHT MY_COPYRIGHT_PD
+-#endif
+-
+-#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE
+-#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE
+diff --git a/third_party/lzma_sdk/C/7zVersion.rc b/third_party/lzma_sdk/C/7zVersion.rc
+deleted file mode 100644
+index 6ed26de74452e5f8cd98cded9642ed1ddb7a74b7..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/C/7zVersion.rc
++++ /dev/null
+@@ -1,55 +0,0 @@
+-#define MY_VS_FFI_FILEFLAGSMASK  0x0000003FL
+-#define MY_VOS_NT_WINDOWS32  0x00040004L
+-#define MY_VOS_CE_WINDOWS32  0x00050004L
+-
+-#define MY_VFT_APP  0x00000001L
+-#define MY_VFT_DLL  0x00000002L
+-
+-// #include <WinVer.h>
+-
+-#ifndef MY_VERSION
+-#include "7zVersion.h"
+-#endif
+-
+-#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0
+-
+-#ifdef DEBUG
+-#define DBG_FL VS_FF_DEBUG
+-#else
+-#define DBG_FL 0
+-#endif
+-
+-#define MY_VERSION_INFO(fileType, descr, intName, origName)  \
+-LANGUAGE 9, 1 \
+-1 VERSIONINFO \
+-  FILEVERSION MY_VER \
+-  PRODUCTVERSION MY_VER \
+-  FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \
+-  FILEFLAGS DBG_FL \
+-  FILEOS MY_VOS_NT_WINDOWS32 \
+-  FILETYPE fileType \
+-  FILESUBTYPE 0x0L \
+-BEGIN \
+-    BLOCK "StringFileInfo" \
+-    BEGIN  \
+-        BLOCK "040904b0" \
+-        BEGIN \
+-            VALUE "CompanyName", "Igor Pavlov" \
+-            VALUE "FileDescription", descr \
+-            VALUE "FileVersion", MY_VERSION  \
+-            VALUE "InternalName", intName \
+-            VALUE "LegalCopyright", MY_COPYRIGHT \
+-            VALUE "OriginalFilename", origName \
+-            VALUE "ProductName", "7-Zip" \
+-            VALUE "ProductVersion", MY_VERSION \
+-        END \
+-    END \
+-    BLOCK "VarFileInfo" \
+-    BEGIN \
+-        VALUE "Translation", 0x409, 1200 \
+-    END \
+-END
+-
+-#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe")
+-
+-#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll")
+diff --git a/third_party/lzma_sdk/C/7zWindows.h b/third_party/lzma_sdk/C/7zWindows.h
+new file mode 100644
+index 0000000000000000000000000000000000000000..42c6db8bfc2765f7232d46de65c8333ac3d9f664
+--- /dev/null
++++ b/third_party/lzma_sdk/C/7zWindows.h
+@@ -0,0 +1,101 @@
++/* 7zWindows.h -- StdAfx
++2023-04-02 : Igor Pavlov : Public domain */
++
++#ifndef ZIP7_INC_7Z_WINDOWS_H
++#define ZIP7_INC_7Z_WINDOWS_H
++
++#ifdef _WIN32
++
++#if defined(__clang__)
++# pragma clang diagnostic push
++#endif
++
++#if defined(_MSC_VER)
++
++#pragma warning(push)
++#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
++
++#if _MSC_VER == 1900
++// for old kit10 versions
++// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
++#endif
++// win10 Windows Kit:
++#endif // _MSC_VER
++
++#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
++// for msvc6 without sdk2003
++#define RPC_NO_WINDOWS_H
++#endif
++
++#if defined(__MINGW32__) || defined(__MINGW64__)
++// #if defined(__GNUC__) && !defined(__clang__)
++#include <windows.h>
++#else
++#include <Windows.h>
++#endif
++// #include <basetsd.h>
++// #include <wtypes.h>
++
++// but if precompiled with clang-cl then we need
++// #include <windows.h>
++#if defined(_MSC_VER)
++#pragma warning(pop)
++#endif
++
++#if defined(__clang__)
++# pragma clang diagnostic pop
++#endif
++
++#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
++#ifndef _W64
++
++typedef long LONG_PTR, *PLONG_PTR;
++typedef unsigned long ULONG_PTR, *PULONG_PTR;
++typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
++
++#define Z7_OLD_WIN_SDK
++#endif // _W64
++#endif // _MSC_VER == 1200
++
++#ifdef Z7_OLD_WIN_SDK
++
++#ifndef INVALID_FILE_ATTRIBUTES
++#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
++#endif
++#ifndef INVALID_SET_FILE_POINTER
++#define INVALID_SET_FILE_POINTER ((DWORD)-1)
++#endif
++#ifndef FILE_SPECIAL_ACCESS
++#define FILE_SPECIAL_ACCESS    (FILE_ANY_ACCESS)
++#endif
++
++// ShlObj.h:
++// #define BIF_NEWDIALOGSTYLE     0x0040
++
++#pragma warning(disable : 4201)
++// #pragma warning(disable : 4115)
++
++#undef  VARIANT_TRUE
++#define VARIANT_TRUE ((VARIANT_BOOL)-1)
++#endif
++
++#endif // Z7_OLD_WIN_SDK
++
++#ifdef UNDER_CE
++#undef  VARIANT_TRUE
++#define VARIANT_TRUE ((VARIANT_BOOL)-1)
++#endif
++
++
++#if defined(_MSC_VER)
++#if _MSC_VER >= 1400 && _MSC_VER <= 1600
++  // BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
++  // string.h
++  // #pragma warning(disable : 4514)
++#endif
++#endif
++
++
++/* #include "7zTypes.h" */
++
++#endif
+diff --git a/third_party/lzma_sdk/C/Alloc.c b/third_party/lzma_sdk/C/Alloc.c
+index d1af76c5c308802a17a6e5fce9c15d3eed6ffe75..63e1a121e7dd2571037a42698fbd772eac40d260 100644
+--- a/third_party/lzma_sdk/C/Alloc.c
++++ b/third_party/lzma_sdk/C/Alloc.c
+@@ -1,38 +1,53 @@
+ /* Alloc.c -- Memory allocation functions
+-2021-07-13 : Igor Pavlov : Public domain */
++2024-02-18 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+-#include <stdio.h>
+-
+ #ifdef _WIN32
+-#include <Windows.h>
++#include "7zWindows.h"
+ #endif
+ #include <stdlib.h>
+ 
+ #include "Alloc.h"
+ 
+-/* #define _SZ_ALLOC_DEBUG */
++#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \
++    (!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502)  // < Win2003 (xp-64)
++  #define Z7_USE_DYN_GetLargePageMinimum
++#endif
++
++// for debug:
++#if 0
++#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
++// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ")
++#define Z7_ALLOC_NO_OFFSET_ALLOCATOR
++#endif
++#endif
+ 
+-/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
+-#ifdef _SZ_ALLOC_DEBUG
++// #define SZ_ALLOC_DEBUG
++/* #define SZ_ALLOC_DEBUG */
+ 
++/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
++#ifdef SZ_ALLOC_DEBUG
++
++#include <string.h>
+ #include <stdio.h>
+-int g_allocCount = 0;
+-int g_allocCountMid = 0;
+-int g_allocCountBig = 0;
++static int g_allocCount = 0;
++#ifdef _WIN32
++static int g_allocCountMid = 0;
++static int g_allocCountBig = 0;
++#endif
+ 
+ 
+ #define CONVERT_INT_TO_STR(charType, tempSize) \
+-  unsigned char temp[tempSize]; unsigned i = 0; \
+-  while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
++  char temp[tempSize]; unsigned i = 0; \
++  while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
+   *s++ = (charType)('0' + (unsigned)val); \
+   while (i != 0) { i--; *s++ = temp[i]; } \
+   *s = 0;
+ 
+ static void ConvertUInt64ToString(UInt64 val, char *s)
+ {
+-  CONVERT_INT_TO_STR(char, 24);
++  CONVERT_INT_TO_STR(char, 24)
+ }
+ 
+ #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+@@ -77,7 +92,7 @@ static void PrintAligned(const char *s, size_t align)
+   Print(s);
+ }
+ 
+-static void PrintLn()
++static void PrintLn(void)
+ {
+   Print("\n");
+ }
+@@ -89,10 +104,10 @@ static void PrintHex(UInt64 v, size_t align)
+   PrintAligned(s, align);
+ }
+ 
+-static void PrintDec(UInt64 v, size_t align)
++static void PrintDec(int v, size_t align)
+ {
+   char s[32];
+-  ConvertUInt64ToString(v, s);
++  ConvertUInt64ToString((unsigned)v, s);
+   PrintAligned(s, align);
+ }
+ 
+@@ -102,12 +117,19 @@ static void PrintAddr(void *p)
+ }
+ 
+ 
+-#define PRINT_ALLOC(name, cnt, size, ptr) \
++#define PRINT_REALLOC(name, cnt, size, ptr) { \
++    Print(name " "); \
++    if (!ptr) PrintDec(cnt++, 10); \
++    PrintHex(size, 10); \
++    PrintAddr(ptr); \
++    PrintLn(); }
++
++#define PRINT_ALLOC(name, cnt, size, ptr) { \
+     Print(name " "); \
+     PrintDec(cnt++, 10); \
+     PrintHex(size, 10); \
+     PrintAddr(ptr); \
+-    PrintLn();
++    PrintLn(); }
+  
+ #define PRINT_FREE(name, cnt, ptr) if (ptr) { \
+     Print(name " "); \
+@@ -117,26 +139,45 @@ static void PrintAddr(void *p)
+  
+ #else
+ 
++#ifdef _WIN32
+ #define PRINT_ALLOC(name, cnt, size, ptr)
++#endif
+ #define PRINT_FREE(name, cnt, ptr)
+ #define Print(s)
+ #define PrintLn()
++#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+ #define PrintHex(v, align)
++#endif
+ #define PrintAddr(p)
+ 
+ #endif
+ 
+ 
++/*
++by specification:
++  malloc(non_NULL, 0)   : returns NULL or a unique pointer value that can later be successfully passed to free()
++  realloc(NULL, size)   : the call is equivalent to malloc(size)
++  realloc(non_NULL, 0)  : the call is equivalent to free(ptr)
++
++in main compilers:
++  malloc(0)             : returns non_NULL
++  realloc(NULL,     0)  : returns non_NULL
++  realloc(non_NULL, 0)  : returns NULL
++*/
++
+ 
+ void *MyAlloc(size_t size)
+ {
+   if (size == 0)
+     return NULL;
+-  PRINT_ALLOC("Alloc    ", g_allocCount, size, NULL);
+-  #ifdef _SZ_ALLOC_DEBUG
++  // PRINT_ALLOC("Alloc    ", g_allocCount, size, NULL)
++  #ifdef SZ_ALLOC_DEBUG
+   {
+     void *p = malloc(size);
+-    // PRINT_ALLOC("Alloc    ", g_allocCount, size, p);
++    if (p)
++    {
++      PRINT_ALLOC("Alloc    ", g_allocCount, size, p)
++    }
+     return p;
+   }
+   #else
+@@ -146,71 +187,107 @@ void *MyAlloc(size_t size)
+ 
+ void MyFree(void *address)
+ {
+-  PRINT_FREE("Free    ", g_allocCount, address);
++  PRINT_FREE("Free    ", g_allocCount, address)
+   
+   free(address);
+ }
+ 
++void *MyRealloc(void *address, size_t size)
++{
++  if (size == 0)
++  {
++    MyFree(address);
++    return NULL;
++  }
++  // PRINT_REALLOC("Realloc  ", g_allocCount, size, address)
++  #ifdef SZ_ALLOC_DEBUG
++  {
++    void *p = realloc(address, size);
++    if (p)
++    {
++      PRINT_REALLOC("Realloc    ", g_allocCount, size, address)
++    }
++    return p;
++  }
++  #else
++  return realloc(address, size);
++  #endif
++}
++
++
+ #ifdef _WIN32
+ 
+ void *MidAlloc(size_t size)
+ {
+   if (size == 0)
+     return NULL;
+-  
+-  PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
+-  
++  #ifdef SZ_ALLOC_DEBUG
++  {
++    void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
++    if (p)
++    {
++      PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
++    }
++    return p;
++  }
++  #else
+   return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
++  #endif
+ }
+ 
+ void MidFree(void *address)
+ {
+-  PRINT_FREE("Free-Mid", g_allocCountMid, address);
++  PRINT_FREE("Free-Mid", g_allocCountMid, address)
+ 
+   if (!address)
+     return;
+   VirtualFree(address, 0, MEM_RELEASE);
+ }
+ 
+-#ifdef _7ZIP_LARGE_PAGES
++#ifdef Z7_LARGE_PAGES
+ 
+ #ifdef MEM_LARGE_PAGES
+-  #define MY__MEM_LARGE_PAGES  MEM_LARGE_PAGES
++  #define MY_MEM_LARGE_PAGES  MEM_LARGE_PAGES
+ #else
+-  #define MY__MEM_LARGE_PAGES  0x20000000
++  #define MY_MEM_LARGE_PAGES  0x20000000
+ #endif
+ 
+ extern
+ SIZE_T g_LargePageSize;
+ SIZE_T g_LargePageSize = 0;
+-typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
++typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
+ 
+-#endif // _7ZIP_LARGE_PAGES
+-
+-void SetLargePageSize()
++void SetLargePageSize(void)
+ {
+-  #ifdef _7ZIP_LARGE_PAGES
+   SIZE_T size;
+-  GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
+-        GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
+-  if (!largePageMinimum)
++#ifdef Z7_USE_DYN_GetLargePageMinimum
++Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
++
++  const
++   Func_GetLargePageMinimum fn =
++  (Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
++       "GetLargePageMinimum");
++  if (!fn)
+     return;
+-  size = largePageMinimum();
++  size = fn();
++#else
++  size = GetLargePageMinimum();
++#endif
+   if (size == 0 || (size & (size - 1)) != 0)
+     return;
+   g_LargePageSize = size;
+-  #endif
+ }
+ 
++#endif // Z7_LARGE_PAGES
+ 
+ void *BigAlloc(size_t size)
+ {
+   if (size == 0)
+     return NULL;
+ 
+-  PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
+-  
+-  #ifdef _7ZIP_LARGE_PAGES
++  PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
++
++  #ifdef Z7_LARGE_PAGES
+   {
+     SIZE_T ps = g_LargePageSize;
+     if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
+@@ -220,56 +297,43 @@ void *BigAlloc(size_t size)
+       size2 = (size + ps) & ~ps;
+       if (size2 >= size)
+       {
+-        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
+-        if (res)
+-          return res;
++        void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE);
++        if (p)
++        {
++          PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
++          return p;
++        }
+       }
+     }
+   }
+   #endif
+ 
+-  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
++  return MidAlloc(size);
+ }
+ 
+ void BigFree(void *address)
+ {
+-  PRINT_FREE("Free-Big", g_allocCountBig, address);
+-  
+-  if (!address)
+-    return;
+-  VirtualFree(address, 0, MEM_RELEASE);
++  PRINT_FREE("Free-Big", g_allocCountBig, address)
++  MidFree(address);
+ }
+ 
+-#endif
++#endif // _WIN32
+ 
+ 
+-static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
+-static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
++static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p)  return MyAlloc(size); }
++static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p)  MyFree(address); }
+ const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+ 
+ #ifdef _WIN32
+-static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
+-static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
+-static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
+-static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
++static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p)  return MidAlloc(size); }
++static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p)  MidFree(address); }
++static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p)  return BigAlloc(size); }
++static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p)  BigFree(address); }
+ const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
+ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
+ #endif
+ 
+-/*
+-  uintptr_t : <stdint.h> C99 (optional)
+-            : unsupported in VS6
+-*/
+-
+-#ifdef _WIN32
+-  typedef UINT_PTR UIntPtr;
+-#else
+-  /*
+-  typedef uintptr_t UIntPtr;
+-  */
+-  typedef ptrdiff_t UIntPtr;
+-#endif
+-
++#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
+ 
+ #define ADJUST_ALLOC_SIZE 0
+ /*
+@@ -280,14 +344,36 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
+      MyAlloc() can return address that is NOT multiple of sizeof(void *).
+ */
+ 
+-
+ /*
+-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
++  uintptr_t : <stdint.h> C99 (optional)
++            : unsupported in VS6
+ */
+-#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
++typedef
++  #ifdef _WIN32
++    UINT_PTR
++  #elif 1
++    uintptr_t
++  #else
++    ptrdiff_t
++  #endif
++    MY_uintptr_t;
++
++#if 0 \
++    || (defined(__CHERI__) \
++    || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8))
++// for 128-bit pointers (cheri):
++#define MY_ALIGN_PTR_DOWN(p, align)  \
++    ((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1))))
++#else
++#define MY_ALIGN_PTR_DOWN(p, align) \
++    ((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1))))
++#endif
+ 
++#endif
+ 
+-#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
++#if !defined(_WIN32) \
++    && (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \
++        || defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L))
+   #define USE_posix_memalign
+ #endif
+ 
+@@ -327,14 +413,13 @@ static int posix_memalign(void **ptr, size_t align, size_t size)
+ 
+ #define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
+ 
+-static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
++void *z7_AlignedAlloc(size_t size)
+ {
+-  #ifndef USE_posix_memalign
++#ifndef USE_posix_memalign
+   
+   void *p;
+   void *pAligned;
+   size_t newSize;
+-  UNUSED_VAR(pp);
+ 
+   /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+      block to prevent cache line sharing with another allocated blocks */
+@@ -359,10 +444,9 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
+ 
+   return pAligned;
+ 
+-  #else
++#else
+ 
+   void *p;
+-  UNUSED_VAR(pp);
+   if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
+     return NULL;
+ 
+@@ -371,19 +455,37 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
+ 
+   return p;
+ 
+-  #endif
++#endif
++}
++
++
++void z7_AlignedFree(void *address)
++{
++#ifndef USE_posix_memalign
++  if (address)
++    MyFree(((void **)address)[-1]);
++#else
++  free(address);
++#endif
++}
++
++
++static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
++{
++  UNUSED_VAR(pp)
++  return z7_AlignedAlloc(size);
+ }
+ 
+ 
+ static void SzAlignedFree(ISzAllocPtr pp, void *address)
+ {
+-  UNUSED_VAR(pp);
+-  #ifndef USE_posix_memalign
++  UNUSED_VAR(pp)
++#ifndef USE_posix_memalign
+   if (address)
+     MyFree(((void **)address)[-1]);
+-  #else
++#else
+   free(address);
+-  #endif
++#endif
+ }
+ 
+ 
+@@ -391,17 +493,45 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
+ 
+ 
+ 
+-#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
+-
+ /* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
+-#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
+-/*
+-#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
+-*/
++#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
++#if 1
++  #define MY_ALIGN_PTR_DOWN_1(p)  MY_ALIGN_PTR_DOWN(p, sizeof(void *))
++  #define REAL_BLOCK_PTR_VAR(p)  ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
++#else
++  // we can use this simplified code,
++  // if (CAlignOffsetAlloc::offset == (k * sizeof(void *))
++  #define REAL_BLOCK_PTR_VAR(p)  (((void **)(p))[-1])
++#endif
++#endif
++
++
++#if 0
++#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
++#include <stdio.h>
++static void PrintPtr(const char *s, const void *p)
++{
++  const Byte *p2 = (const Byte *)&p;
++  unsigned i;
++  printf("%s %p ", s, p);
++  for (i = sizeof(p); i != 0;)
++  {
++    i--;
++    printf("%02x", p2[i]);
++  }
++  printf("\n");
++}
++#endif
++#endif
++
+ 
+ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+ {
+-  CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
++#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
++  UNUSED_VAR(pp)
++  return z7_AlignedAlloc(size);
++#else
++  const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+   void *adr;
+   void *pAligned;
+   size_t newSize;
+@@ -429,6 +559,12 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+   pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
+       alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
+ 
++#if 0
++  printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size);
++  PrintPtr("base", adr);
++  PrintPtr("alig", pAligned);
++#endif
++
+   PrintLn();
+   Print("- Aligned: ");
+   Print(" size="); PrintHex(size, 8);
+@@ -440,19 +576,25 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+   REAL_BLOCK_PTR_VAR(pAligned) = adr;
+ 
+   return pAligned;
++#endif
+ }
+ 
+ 
+ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
+ {
++#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
++  UNUSED_VAR(pp)
++  z7_AlignedFree(address);
++#else
+   if (address)
+   {
+-    CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
++    const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
+     PrintLn();
+     Print("- Aligned Free: ");
+     PrintLn();
+     ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
+   }
++#endif
+ }
+ 
+ 
+diff --git a/third_party/lzma_sdk/C/Alloc.h b/third_party/lzma_sdk/C/Alloc.h
+index 3be2041eb75502c36faf8064e0edd6679d7abdbb..01bf6b7dd6b1c7fabbd8c2797738b6f56709377a 100644
+--- a/third_party/lzma_sdk/C/Alloc.h
++++ b/third_party/lzma_sdk/C/Alloc.h
+@@ -1,31 +1,49 @@
+ /* Alloc.h -- Memory allocation functions
+-2021-07-13 : Igor Pavlov : Public domain */
++2024-01-22 : Igor Pavlov : Public domain */
+ 
+-#ifndef __COMMON_ALLOC_H
+-#define __COMMON_ALLOC_H
++#ifndef ZIP7_INC_ALLOC_H
++#define ZIP7_INC_ALLOC_H
+ 
+ #include "7zTypes.h"
+ 
+ EXTERN_C_BEGIN
+ 
++/*
++  MyFree(NULL)        : is allowed, as free(NULL)
++  MyAlloc(0)          : returns NULL : but malloc(0)        is allowed to return NULL or non_NULL
++  MyRealloc(NULL, 0)  : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
++MyRealloc() is similar to realloc() for the following cases:
++  MyRealloc(non_NULL, 0)         : returns NULL and always calls MyFree(ptr)
++  MyRealloc(NULL, non_ZERO)      : returns NULL, if allocation failed
++  MyRealloc(non_NULL, non_ZERO)  : returns NULL, if reallocation failed
++*/
++
+ void *MyAlloc(size_t size);
+ void MyFree(void *address);
++void *MyRealloc(void *address, size_t size);
++
++void *z7_AlignedAlloc(size_t size);
++void  z7_AlignedFree(void *p);
+ 
+ #ifdef _WIN32
+ 
++#ifdef Z7_LARGE_PAGES
+ void SetLargePageSize(void);
++#endif
+ 
+ void *MidAlloc(size_t size);
+ void MidFree(void *address);
+ void *BigAlloc(size_t size);
+ void BigFree(void *address);
+ 
++/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */
++
+ #else
+ 
+-#define MidAlloc(size) MyAlloc(size)
+-#define MidFree(address) MyFree(address)
+-#define BigAlloc(size) MyAlloc(size)
+-#define BigFree(address) MyFree(address)
++#define MidAlloc(size)    z7_AlignedAlloc(size)
++#define MidFree(address)  z7_AlignedFree(address)
++#define BigAlloc(size)    z7_AlignedAlloc(size)
++#define BigFree(address)  z7_AlignedFree(address)
+ 
+ #endif
+ 
+diff --git a/third_party/lzma_sdk/C/Bcj2.c b/third_party/lzma_sdk/C/Bcj2.c
+index c7b956708f6e82641623bcaa9e9abcecf1635750..7cb57ad62dcf0aef9f5cb77078897d17400266be 100644
+--- a/third_party/lzma_sdk/C/Bcj2.c
++++ b/third_party/lzma_sdk/C/Bcj2.c
+@@ -1,29 +1,24 @@
+ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
+-2021-02-09 : Igor Pavlov : Public domain */
++2023-03-01 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include "Bcj2.h"
+ #include "CpuArch.h"
+ 
+-#define CProb UInt16
+-
+ #define kTopValue ((UInt32)1 << 24)
+-#define kNumModelBits 11
+-#define kBitModelTotal (1 << kNumModelBits)
++#define kNumBitModelTotalBits 11
++#define kBitModelTotal (1 << kNumBitModelTotalBits)
+ #define kNumMoveBits 5
+ 
+-#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
+-#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+-#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
++// UInt32 bcj2_stats[256 + 2][2];
+ 
+ void Bcj2Dec_Init(CBcj2Dec *p)
+ {
+   unsigned i;
+-
+-  p->state = BCJ2_DEC_STATE_OK;
++  p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
+   p->ip = 0;
+-  p->temp[3] = 0;
++  p->temp = 0;
+   p->range = 0;
+   p->code = 0;
+   for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
+@@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p)
+ 
+ SRes Bcj2Dec_Decode(CBcj2Dec *p)
+ {
++  UInt32 v = p->temp;
++  // const Byte *src;
+   if (p->range <= 5)
+   {
+-    p->state = BCJ2_DEC_STATE_OK;
++    UInt32 code = p->code;
++    p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
+     for (; p->range != 5; p->range++)
+     {
+-      if (p->range == 1 && p->code != 0)
++      if (p->range == 1 && code != 0)
+         return SZ_ERROR_DATA;
+-      
+       if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+       {
+         p->state = BCJ2_STREAM_RC;
+         return SZ_OK;
+       }
+-
+-      p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
++      code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
++      p->code = code;
+     }
+-    
+-    if (p->code == 0xFFFFFFFF)
++    if (code == 0xffffffff)
+       return SZ_ERROR_DATA;
+-    
+-    p->range = 0xFFFFFFFF;
++    p->range = 0xffffffff;
+   }
+-  else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
++  // else
+   {
+-    while (p->state <= BCJ2_DEC_STATE_ORIG_3)
++    unsigned state = p->state;
++    // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
++    if (BCJ2_IS_32BIT_STREAM(state))
+     {
+-      Byte *dest = p->dest;
+-      if (dest == p->destLim)
++      const Byte *cur = p->bufs[state];
++      if (cur == p->lims[state])
+         return SZ_OK;
+-      *dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
+-      p->state++;
+-      p->dest = dest + 1;
++      p->bufs[state] = cur + 4;
++      {
++        const UInt32 ip = p->ip + 4;
++        v = GetBe32a(cur) - ip;
++        p->ip = ip;
++      }
++      state = BCJ2_DEC_STATE_ORIG_0;
+     }
+-  }
+-
+-  /*
+-  if (BCJ2_IS_32BIT_STREAM(p->state))
+-  {
+-    const Byte *cur = p->bufs[p->state];
+-    if (cur == p->lims[p->state])
+-      return SZ_OK;
+-    p->bufs[p->state] = cur + 4;
+-    
++    if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
+     {
+-      UInt32 val;
+-      Byte *dest;
+-      SizeT rem;
+-      
+-      p->ip += 4;
+-      val = GetBe32(cur) - p->ip;
+-      dest = p->dest;
+-      rem = p->destLim - dest;
+-      if (rem < 4)
++      Byte *dest = p->dest;
++      for (;;)
+       {
+-        SizeT i;
+-        SetUi32(p->temp, val);
+-        for (i = 0; i < rem; i++)
+-          dest[i] = p->temp[i];
+-        p->dest = dest + rem;
+-        p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+-        return SZ_OK;
++        if (dest == p->destLim)
++        {
++          p->state = state;
++          p->temp = v;
++          return SZ_OK;
++        }
++        *dest++ = (Byte)v;
++        p->dest = dest;
++        if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
++          break;
++        v >>= 8;
+       }
+-      SetUi32(dest, val);
+-      p->temp[3] = (Byte)(val >> 24);
+-      p->dest = dest + 4;
+-      p->state = BCJ2_DEC_STATE_OK;
+     }
+   }
+-  */
+ 
++  // src = p->bufs[BCJ2_STREAM_MAIN];
+   for (;;)
+   {
++    /*
+     if (BCJ2_IS_32BIT_STREAM(p->state))
+       p->state = BCJ2_DEC_STATE_OK;
+     else
++    */
+     {
+       if (p->range < kTopValue)
+       {
+         if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
+         {
+           p->state = BCJ2_STREAM_RC;
++          p->temp = v;
+           return SZ_OK;
+         }
+         p->range <<= 8;
+         p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+       }
+-
+       {
+         const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
+         const Byte *srcLim;
+-        Byte *dest;
+-        SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
+-        
+-        if (num == 0)
++        Byte *dest = p->dest;
+         {
+-          p->state = BCJ2_STREAM_MAIN;
+-          return SZ_OK;
++          const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
++          SizeT num = (SizeT)(p->destLim - dest);
++          if (num >= rem)
++            num = rem;
++        #define NUM_ITERS 4
++        #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
++          num &= ~((SizeT)NUM_ITERS - 1);   // if (NUM_ITERS == (1 << x))
++        #else
++          num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
++        #endif
++          srcLim = src + num;
+         }
+-        
+-        dest = p->dest;
+-        if (num > (SizeT)(p->destLim - dest))
++
++        #define NUM_SHIFT_BITS  24
++        #define ONE_ITER(indx) { \
++          const unsigned b = src[indx]; \
++          *dest++ = (Byte)b; \
++          v = (v << NUM_SHIFT_BITS) | b; \
++          if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
++          if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
++              ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
++            /* ++dest */; /* v = b; */ }
++
++        if (src != srcLim)
++        for (;;)
+         {
+-          num = (SizeT)(p->destLim - dest);
+-          if (num == 0)
+-          {
+-            p->state = BCJ2_DEC_STATE_ORIG;
+-            return SZ_OK;
+-          }
++            /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
++               But we can remove dependency chain with v = b in the end of loop. */
++          ONE_ITER(0)
++          #if (NUM_ITERS > 1)
++            ONE_ITER(1)
++          #if (NUM_ITERS > 2)
++            ONE_ITER(2)
++          #if (NUM_ITERS > 3)
++            ONE_ITER(3)
++          #if (NUM_ITERS > 4)
++            ONE_ITER(4)
++          #if (NUM_ITERS > 5)
++            ONE_ITER(5)
++          #if (NUM_ITERS > 6)
++            ONE_ITER(6)
++          #if (NUM_ITERS > 7)
++            ONE_ITER(7)
++          #endif
++          #endif
++          #endif
++          #endif
++          #endif
++          #endif
++          #endif
++
++          src += NUM_ITERS;
++          if (src == srcLim)
++            break;
+         }
+-       
+-        srcLim = src + num;
+ 
+-        if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
+-          *dest = src[0];
+-        else for (;;)
++        if (src == srcLim)
++      #if (NUM_ITERS > 1)
++        for (;;)
++      #endif
+         {
+-          Byte b = *src;
+-          *dest = b;
+-          if (b != 0x0F)
++        #if (NUM_ITERS > 1)
++          if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
++        #endif
+           {
+-            if ((b & 0xFE) == 0xE8)
+-              break;
+-            dest++;
+-            if (++src != srcLim)
+-              continue;
+-            break;
++            const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
++            p->bufs[BCJ2_STREAM_MAIN] = src;
++            p->dest = dest;
++            p->ip += (UInt32)num;
++            /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
++            p->state =
++              src == p->lims[BCJ2_STREAM_MAIN] ?
++                (unsigned)BCJ2_STREAM_MAIN :
++                (unsigned)BCJ2_DEC_STATE_ORIG;
++            p->temp = v;
++            return SZ_OK;
+           }
+-          dest++;
+-          if (++src == srcLim)
+-            break;
+-          if ((*src & 0xF0) != 0x80)
+-            continue;
+-          *dest = *src;
+-          break;
++        #if (NUM_ITERS > 1)
++          ONE_ITER(0)
++          src++;
++        #endif
+         }
+-        
+-        num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
+-        
+-        if (src == srcLim)
++
+         {
+-          p->temp[3] = src[-1];
+-          p->bufs[BCJ2_STREAM_MAIN] = src;
++          const SizeT num = (SizeT)(dest - p->dest);
++          p->dest = dest; // p->dest += num;
++          p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
+           p->ip += (UInt32)num;
+-          p->dest += num;
+-          p->state =
+-            p->bufs[BCJ2_STREAM_MAIN] ==
+-            p->lims[BCJ2_STREAM_MAIN] ?
+-              (unsigned)BCJ2_STREAM_MAIN :
+-              (unsigned)BCJ2_DEC_STATE_ORIG;
+-          return SZ_OK;
+         }
+-        
+         {
+           UInt32 bound, ttt;
+-          CProb *prob;
+-          Byte b = src[0];
+-          Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
+-          
+-          p->temp[3] = b;
+-          p->bufs[BCJ2_STREAM_MAIN] = src + 1;
+-          num++;
+-          p->ip += (UInt32)num;
+-          p->dest += num;
+-          
+-          prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
+-          
+-          _IF_BIT_0
++          CBcj2Prob *prob; // unsigned index;
++          /*
++          prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
++              2 + (Byte)(v >> 8) :
++              ((v >> 5) & 1));  // ((Byte)v < 0xe8 ? 0 : 1));
++          */
+           {
+-            _UPDATE_0
++            const unsigned c = ((v + 0x17) >> 6) & 1;
++            prob = p->probs + (unsigned)
++                (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
++                // (Byte)
++                // 8x->0     : e9->1     : xxe8->xx+2
++                // 8x->0x100 : e9->0x101 : xxe8->xx
++                // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
++                // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
++          }
++          ttt = *prob;
++          bound = (p->range >> kNumBitModelTotalBits) * ttt;
++          if (p->code < bound)
++          {
++            // bcj2_stats[prob - p->probs][0]++;
++            p->range = bound;
++            *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+             continue;
+           }
+-          _UPDATE_1
+-            
++          {
++            // bcj2_stats[prob - p->probs][1]++;
++            p->range -= bound;
++            p->code -= bound;
++            *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
++          }
+         }
+       }
+     }
+-
+     {
+-      UInt32 val;
+-      unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
++      /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
++      // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
++      const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
+       const Byte *cur = p->bufs[cj];
+       Byte *dest;
+       SizeT rem;
+-      
+       if (cur == p->lims[cj])
+       {
+         p->state = cj;
+         break;
+       }
+-      
+-      val = GetBe32(cur);
++      v = GetBe32a(cur);
+       p->bufs[cj] = cur + 4;
+-
+-      p->ip += 4;
+-      val -= p->ip;
++      {
++        const UInt32 ip = p->ip + 4;
++        v -= ip;
++        p->ip = ip;
++      }
+       dest = p->dest;
+       rem = (SizeT)(p->destLim - dest);
+-      
+       if (rem < 4)
+       {
+-        p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;
+-        p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;
+-        p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;
+-        p->temp[3] = (Byte)val;
++        if ((unsigned)rem > 0) { dest[0] = (Byte)v;  v >>= 8;
++        if ((unsigned)rem > 1) { dest[1] = (Byte)v;  v >>= 8;
++        if ((unsigned)rem > 2) { dest[2] = (Byte)v;  v >>= 8; }}}
++        p->temp = v;
+         p->dest = dest + rem;
+         p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
+         break;
+       }
+-      
+-      SetUi32(dest, val);
+-      p->temp[3] = (Byte)(val >> 24);
++      SetUi32(dest, v)
++      v >>= 24;
+       p->dest = dest + 4;
+     }
+   }
+@@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
+     p->range <<= 8;
+     p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
+   }
+-
+   return SZ_OK;
+ }
++
++#undef NUM_ITERS
++#undef ONE_ITER
++#undef NUM_SHIFT_BITS
++#undef kTopValue
++#undef kNumBitModelTotalBits
++#undef kBitModelTotal
++#undef kNumMoveBits
+diff --git a/third_party/lzma_sdk/C/Bcj2.h b/third_party/lzma_sdk/C/Bcj2.h
+index 8824080acfbcf767ee4aa127fd56354723f20bb6..4575545b62c06dd4a26646104e26a5a39685aa10 100644
+--- a/third_party/lzma_sdk/C/Bcj2.h
++++ b/third_party/lzma_sdk/C/Bcj2.h
+@@ -1,8 +1,8 @@
+-/* Bcj2.h -- BCJ2 Converter for x86 code
+-2014-11-10 : Igor Pavlov : Public domain */
++/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)
++2023-03-02 : Igor Pavlov : Public domain */
+ 
+-#ifndef __BCJ2_H
+-#define __BCJ2_H
++#ifndef ZIP7_INC_BCJ2_H
++#define ZIP7_INC_BCJ2_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -26,37 +26,68 @@ enum
+   BCJ2_DEC_STATE_ORIG_3,
+   
+   BCJ2_DEC_STATE_ORIG,
+-  BCJ2_DEC_STATE_OK
++  BCJ2_DEC_STATE_ERROR     /* after detected data error */
+ };
+ 
+ enum
+ {
+   BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
+-  BCJ2_ENC_STATE_OK
++  BCJ2_ENC_STATE_FINISHED  /* it's state after fully encoded stream */
+ };
+ 
+ 
+-#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
++/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */
++#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)
+ 
+ /*
+ CBcj2Dec / CBcj2Enc
+ bufs sizes:
+   BUF_SIZE(n) = lims[n] - bufs[n]
+-bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
++bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:
+     (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
+     (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
+ */
+ 
++// typedef UInt32 CBcj2Prob;
++typedef UInt16 CBcj2Prob;
++
++/*
++BCJ2 encoder / decoder internal requirements:
++  - If last bytes of stream contain marker (e8/e8/0f8x), then
++    there is also encoded symbol (0 : no conversion) in RC stream.
++  - One case of overlapped instructions is supported,
++    if last byte of converted instruction is (0f) and next byte is (8x):
++      marker [xx xx xx 0f] 8x
++    then the pair (0f 8x) is treated as marker.
++*/
++
++/* ---------- BCJ2 Decoder ---------- */
++
+ /*
+ CBcj2Dec:
+-dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
++(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
+   bufs[BCJ2_STREAM_MAIN] >= dest &&
+-  bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
++  bufs[BCJ2_STREAM_MAIN] - dest >=
+         BUF_SIZE(BCJ2_STREAM_CALL) +
+         BUF_SIZE(BCJ2_STREAM_JUMP)
+-     tempReserv = 0 : for first call of Bcj2Dec_Decode
+-     tempReserv = 4 : for any other calls of Bcj2Dec_Decode
+-  overlap with offset = 1 is not allowed
++  reserve = bufs[BCJ2_STREAM_MAIN] - dest -
++      ( BUF_SIZE(BCJ2_STREAM_CALL) +
++        BUF_SIZE(BCJ2_STREAM_JUMP) )
++  and additional conditions:
++  if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())
++  {
++    (reserve != 1) : if (ver <  v23.00)
++  }
++  else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())
++  {
++    (reserve >= 6) : if (ver <  v23.00)
++    (reserve >= 4) : if (ver >= v23.00)
++    We need that (reserve) because after first call of Bcj2Dec_Decode(),
++    CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).
++  }
++  (reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().
++  (reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,
++     and (reserve) is calculated from full (final) sizes of all streams before first call.
+ */
+ 
+ typedef struct
+@@ -68,21 +99,65 @@ typedef struct
+ 
+   unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
+ 
+-  UInt32 ip;
+-  Byte temp[4];
++  UInt32 ip;      /* property of starting base for decoding */
++  UInt32 temp;    /* Byte temp[4]; */
+   UInt32 range;
+   UInt32 code;
+-  UInt16 probs[2 + 256];
++  CBcj2Prob probs[2 + 256];
+ } CBcj2Dec;
+ 
++
++/* Note:
++   Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)
++   if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()
++*/
+ void Bcj2Dec_Init(CBcj2Dec *p);
+ 
+-/* Returns: SZ_OK or SZ_ERROR_DATA */
++
++/* Bcj2Dec_Decode():
++   returns:
++     SZ_OK
++     SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct
++*/
+ SRes Bcj2Dec_Decode(CBcj2Dec *p);
+ 
+-#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
++/* To check that decoding was finished you can compare
++   sizes of processed streams with sizes known from another sources.
++   You must do at least one mandatory check from the two following options:
++      - the check for size of processed output (ORIG) stream.
++      - the check for size of processed input  (MAIN) stream.
++   additional optional checks:
++      - the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)
++      - the checks Bcj2Dec_IsMaybeFinished*()
++   also before actual decoding you can check that the
++   following condition is met for stream sizes:
++     ( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )
++*/
+ 
++/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for
++      additional input data in BCJ2_STREAM_MAIN stream.
++   Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.
++*/
++#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)
+ 
++/* if the stream decoding was finished correctly, then range decoder
++   part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).
++   Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.
++*/
++#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)
++
++/* use Bcj2Dec_IsMaybeFinished() only as additional check
++    after at least one mandatory check from the two following options:
++      - the check for size of processed output (ORIG) stream.
++      - the check for size of processed input  (MAIN) stream.
++*/
++#define Bcj2Dec_IsMaybeFinished(_p_) ( \
++        Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \
++        Bcj2Dec_IsMaybeFinished_code(_p_))
++
++
++
++/* ---------- BCJ2 Encoder ---------- */
+ 
+ typedef enum
+ {
+@@ -91,6 +166,91 @@ typedef enum
+   BCJ2_ENC_FINISH_MODE_END_STREAM
+ } EBcj2Enc_FinishMode;
+ 
++/*
++  BCJ2_ENC_FINISH_MODE_CONTINUE:
++     process non finished encoding.
++     It notifies the encoder that additional further calls
++     can provide more input data (src) than provided by current call.
++     In  that case the CBcj2Enc encoder still can move (src) pointer
++     up to (srcLim), but CBcj2Enc encoder can store some of the last
++     processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.
++   at return:
++       (CBcj2Enc::src will point to position that includes
++       processed data and data copied to (temp[]) buffer)
++       That data from (temp[]) buffer will be used in further calls.
++
++  BCJ2_ENC_FINISH_MODE_END_BLOCK:
++     finish encoding of current block (ended at srcLim) without RC flushing.
++   at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&
++                  CBcj2Enc::src == CBcj2Enc::srcLim)
++        :  it shows that block encoding was finished. And the encoder is
++           ready for new (src) data or for stream finish operation.
++     finished block means
++     {
++       CBcj2Enc has completed block encoding up to (srcLim).
++       (1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will
++       not cross block boundary at (srcLim).
++       temporary CBcj2Enc buffer for (ORIG) src data is empty.
++       3 output uncompressed streams (MAIN, CALL, JUMP) were flushed.
++       RC stream was not flushed. And RC stream will cross block boundary.
++     }
++     Note: some possible implementation of BCJ2 encoder could
++     write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),
++     and it could calculate symbol for RC in another call of Bcj2Enc_Encode().
++     BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.
++     And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.
++     So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK
++     to ensure that RC symbol is calculated and written in proper block.
++
++  BCJ2_ENC_FINISH_MODE_END_STREAM
++     finish encoding of stream (ended at srcLim) fully including RC flushing.
++   at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)
++        : it shows that stream encoding was finished fully,
++          and all output streams were flushed fully.
++     also Bcj2Enc_IsFinished() can be called.
++*/
++
++
++/*
++  32-bit relative offset in JUMP/CALL commands is
++    - (mod 4 GiB)  for 32-bit x86 code
++    - signed Int32 for 64-bit x86-64 code
++  BCJ2 encoder also does internal relative to absolute address conversions.
++  And there are 2 possible ways to do it:
++    before v23: we used 32-bit variables and (mod 4 GiB) conversion
++    since  v23: we use  64-bit variables and (signed Int32 offset) conversion.
++  The absolute address condition for conversion in v23:
++    ((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)
++  note that if (fileSize64 > 2 GiB). there is difference between
++  old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).
++  And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.
++*/
++
++/*
++// for old (v22) way for conversion:
++typedef UInt32 CBcj2Enc_ip_unsigned;
++typedef  Int32 CBcj2Enc_ip_signed;
++#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)
++*/
++typedef UInt64 CBcj2Enc_ip_unsigned;
++typedef  Int64 CBcj2Enc_ip_signed;
++
++/* maximum size of file that can be used for conversion condition */
++#define BCJ2_ENC_FileSize_MAX             ((CBcj2Enc_ip_unsigned)0 - 2)
++
++/* default value of fileSize64_minus1 variable that means
++   that absolute address limitation will not be used */
++#define BCJ2_ENC_FileSizeField_UNLIMITED  ((CBcj2Enc_ip_unsigned)0 - 1)
++
++/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */
++#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \
++    ((CBcj2Enc_ip_unsigned)(fileSize) - 1)
++
++/* set CBcj2Enc::fileSize64_minus1 variable from size of file */
++#define Bcj2Enc_SET_FileSize(p, fileSize) \
++    (p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);
++
++
+ typedef struct
+ {
+   Byte *bufs[BCJ2_NUM_STREAMS];
+@@ -101,45 +261,71 @@ typedef struct
+   unsigned state;
+   EBcj2Enc_FinishMode finishMode;
+ 
+-  Byte prevByte;
++  Byte context;
++  Byte flushRem;
++  Byte isFlushState;
+ 
+   Byte cache;
+   UInt32 range;
+   UInt64 low;
+   UInt64 cacheSize;
++
++  // UInt32 context;  // for marker version, it can include marker flag.
+ 
+-  UInt32 ip;
+-
+-  /* 32-bit ralative offset in JUMP/CALL commands is
+-       - (mod 4 GB)   in 32-bit mode
+-       - signed Int32 in 64-bit mode
+-     We use (mod 4 GB) check for fileSize.
+-     Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
+-  UInt32 fileIp;
+-  UInt32 fileSize;    /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
+-  UInt32 relatLimit;  /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
++  /* (ip64) and (fileIp64) correspond to virtual source stream position
++     that doesn't include data in temp[] */
++  CBcj2Enc_ip_unsigned ip64;         /* current (ip) position */
++  CBcj2Enc_ip_unsigned fileIp64;     /* start (ip) position of current file */
++  CBcj2Enc_ip_unsigned fileSize64_minus1;   /* size of current file (for conversion limitation) */
++  UInt32 relatLimit;  /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */
++  // UInt32 relatExcludeBits;
+ 
+   UInt32 tempTarget;
+-  unsigned tempPos;
+-  Byte temp[4 * 2];
+-
+-  unsigned flushPos;
+-  
+-  UInt16 probs[2 + 256];
++  unsigned tempPos; /* the number of bytes that were copied to temp[] buffer
++                       (tempPos <= 4) outside of Bcj2Enc_Encode() */
++  // Byte temp[4]; // for marker version
++  Byte temp[8];
++  CBcj2Prob probs[2 + 256];
+ } CBcj2Enc;
+ 
+ void Bcj2Enc_Init(CBcj2Enc *p);
+-void Bcj2Enc_Encode(CBcj2Enc *p);
+ 
+-#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
+-#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
+ 
++/*
++Bcj2Enc_Encode(): at exit:
++  p->State <  BCJ2_NUM_STREAMS    : we need more buffer space for output stream
++                                    (bufs[p->State] == lims[p->State])
++  p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream
++                                    (src == srcLim)
++  p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream
++*/
++void Bcj2Enc_Encode(CBcj2Enc *p);
+ 
+-#define BCJ2_RELAT_LIMIT_NUM_BITS 26
+-#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
++/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.
++   CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.
++   (CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after
++   fully processed data and after data copied to temp buffer.
++   So if the caller needs to get real number of fully processed input
++   bytes (without look ahead data in temp buffer),
++   the caller must subtruct (CBcj2Enc::tempPos) value from processed size
++   value that is calculated based on current (CBcj2Enc::src):
++     cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -
++        Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);
++*/
++/* get the size of input data that was stored in temp[] buffer: */
++#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)
+ 
+-/* limit for CBcj2Enc::fileSize variable */
+-#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
++#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)
++
++/* Note : the decoder supports overlapping of marker (0f 80).
++   But we can eliminate such overlapping cases by setting
++   the limit for relative offset conversion as
++     CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)
++*/
++/* default value for CBcj2Enc::relatLimit */
++#define BCJ2_ENC_RELAT_LIMIT_DEFAULT  ((UInt32)0x0f << 24)
++#define BCJ2_ENC_RELAT_LIMIT_MAX      ((UInt32)1 << 31)
++// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5
+ 
+ EXTERN_C_END
+ 
+diff --git a/third_party/lzma_sdk/C/Bra.c b/third_party/lzma_sdk/C/Bra.c
+index 3b854d9cad7fba3cd7986cde50d17b23f5d7d24f..e61edf8f12e1a7710d1e108923bdc5ded259280f 100644
+--- a/third_party/lzma_sdk/C/Bra.c
++++ b/third_party/lzma_sdk/C/Bra.c
+@@ -1,230 +1,709 @@
+-/* Bra.c -- Converters for RISC code
+-2021-02-09 : Igor Pavlov : Public domain */
++/* Bra.c -- Branch converters for RISC code
++2024-01-20 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+-#include "CpuArch.h"
+ #include "Bra.h"
++#include "RotateDefs.h"
++#include "CpuArch.h"
++
++#if defined(MY_CPU_SIZEOF_POINTER) \
++    && ( MY_CPU_SIZEOF_POINTER == 4 \
++      || MY_CPU_SIZEOF_POINTER == 8)
++  #define BR_CONV_USE_OPT_PC_PTR
++#endif
++
++#ifdef BR_CONV_USE_OPT_PC_PTR
++#define BR_PC_INIT  pc -= (UInt32)(SizeT)p;
++#define BR_PC_GET   (pc + (UInt32)(SizeT)p)
++#else
++#define BR_PC_INIT  pc += (UInt32)size;
++#define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
++// #define BR_PC_INIT
++// #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
++#endif
++
++#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
++// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
++
++#define Z7_BRANCH_CONV(name) z7_ ## name
++
++#define Z7_BRANCH_FUNC_MAIN(name) \
++static \
++Z7_FORCE_INLINE \
++Z7_ATTRIB_NO_VECTOR \
++Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
++
++#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
++Z7_NO_INLINE \
++Z7_ATTRIB_NO_VECTOR \
++Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
++  { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
++
++#ifdef Z7_EXTRACT_ONLY
++#define Z7_BRANCH_FUNCS_IMP(name) \
++  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
++#else
++#define Z7_BRANCH_FUNCS_IMP(name) \
++  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
++  Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
++#endif
+ 
+-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
++#if defined(__clang__)
++#define BR_EXTERNAL_FOR
++#define BR_NEXT_ITERATION  continue;
++#else
++#define BR_EXTERNAL_FOR    for (;;)
++#define BR_NEXT_ITERATION  break;
++#endif
++
++#if defined(__clang__) && (__clang_major__ >= 8) \
++  || defined(__GNUC__) && (__GNUC__ >= 1000) \
++  // GCC is not good for __builtin_expect() here
++  /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
++  // #define Z7_unlikely [[unlikely]]
++  // #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
++  #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
++  // #define Z7_likely [[likely]]
++#else
++  // #define Z7_LIKELY(x)   (x)
++  #define Z7_UNLIKELY(x) (x)
++  // #define Z7_likely
++#endif
++
++
++Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
+ {
+-  Byte *p;
++  // Byte *p = data;
+   const Byte *lim;
+-  size &= ~(size_t)3;
+-  ip += 4;
+-  p = data;
+-  lim = data + size;
++  const UInt32 flag = (UInt32)1 << (24 - 4);
++  const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
++  size &= ~(SizeT)3;
++  // if (size == 0) return p;
++  lim = p + size;
++  BR_PC_INIT
++  pc -= 4;  // because (p) will point to next instruction
++
++  BR_EXTERNAL_FOR
++  {
++    // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
++    for (;;)
++    {
++      UInt32 v;
++      if Z7_UNLIKELY(p == lim)
++        return p;
++      v = GetUi32a(p);
++      p += 4;
++      if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
++      {
++        UInt32 c = BR_PC_GET >> 2;
++        BR_CONVERT_VAL(v, c)
++        v &= 0x03ffffff;
++        v |= 0x94000000;
++        SetUi32a(p - 4, v)
++        BR_NEXT_ITERATION
++      }
++      // v = rotlFixed(v, 8);  v += (flag << 8) - 0x90;  if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
++      v -= 0x90000000;  if Z7_UNLIKELY((v & 0x9f000000) == 0)
++      {
++        UInt32 z, c;
++        // v = rotrFixed(v, 8);
++        v += flag; if Z7_UNLIKELY(v & mask) continue;
++        z = (v & 0xffffffe0) | (v >> 26);
++        c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
++        BR_CONVERT_VAL(z, c)
++        v &= 0x1f;
++        v |= 0x90000000;
++        v |= z << 26;
++        v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
++        SetUi32a(p - 4, v)
++      }
++    }
++  }
++}
++Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
+ 
+-  if (encoding)
+ 
++Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
++{
++  // Byte *p = data;
++  const Byte *lim;
++  size &= ~(SizeT)3;
++  lim = p + size;
++  BR_PC_INIT
++  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
++     (p) will point to next instruction */
++  pc += 8 - 4;
++
+   for (;;)
+   {
+     for (;;)
+     {
+-      if (p >= lim)
+-        return (SizeT)(p - data);
+-      p += 4;
+-      if (p[-1] == 0xEB)
+-        break;
++      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
++      if Z7_UNLIKELY(p >= lim) { return p; }  p += 4;  if Z7_UNLIKELY(p[-1] == 0xeb) break;
+     }
+     {
+-      UInt32 v = GetUi32(p - 4);
+-      v <<= 2;
+-        v += ip + (UInt32)(p - data);
+-      v >>= 2;
+-      v &= 0x00FFFFFF;
+-      v |= 0xEB000000;
+-      SetUi32(p - 4, v);
++      UInt32 v = GetUi32a(p - 4);
++      UInt32 c = BR_PC_GET >> 2;
++      BR_CONVERT_VAL(v, c)
++      v &= 0x00ffffff;
++      v |= 0xeb000000;
++      SetUi32a(p - 4, v)
+     }
+   }
++}
++Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
+ 
++
++Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
++{
++  // Byte *p = data;
++  const Byte *lim;
++  size &= ~(SizeT)3;
++  lim = p + size;
++  BR_PC_INIT
++  pc -= 4;  // because (p) will point to next instruction
++
+   for (;;)
+   {
++    UInt32 v;
+     for (;;)
+     {
+-      if (p >= lim)
+-        return (SizeT)(p - data);
++      if Z7_UNLIKELY(p == lim)
++        return p;
++      // v = GetBe32a(p);
++      v = *(UInt32 *)(void *)p;
+       p += 4;
+-      if (p[-1] == 0xEB)
+-        break;
++      // if ((v & 0xfc000003) == 0x48000001) break;
++      // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
++      if Z7_UNLIKELY(
++          ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
++              & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
+     }
+     {
+-      UInt32 v = GetUi32(p - 4);
+-      v <<= 2;
+-        v -= ip + (UInt32)(p - data);
+-      v >>= 2;
+-      v &= 0x00FFFFFF;
+-      v |= 0xEB000000;
+-      SetUi32(p - 4, v);
++      v = Z7_CONV_NATIVE_TO_BE_32(v);
++      {
++        UInt32 c = BR_PC_GET;
++        BR_CONVERT_VAL(v, c)
++      }
++      v &= 0x03ffffff;
++      v |= 0x48000000;
++      SetBe32a(p - 4, v)
+     }
+   }
+ }
++Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
++
+ 
++#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
++#define BR_SPARC_USE_ROTATE
++#endif
+ 
+-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
++Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
+ {
+-  Byte *p;
++  // Byte *p = data;
+   const Byte *lim;
+-  size &= ~(size_t)1;
+-  p = data;
+-  lim = data + size - 4;
+-
+-  if (encoding)
+-  
++  const UInt32 flag = (UInt32)1 << 22;
++  size &= ~(SizeT)3;
++  lim = p + size;
++  BR_PC_INIT
++  pc -= 4;  // because (p) will point to next instruction
+   for (;;)
+   {
+-    UInt32 b1;
++    UInt32 v;
+     for (;;)
+     {
+-      UInt32 b3;
+-      if (p > lim)
+-        return (SizeT)(p - data);
+-      b1 = p[1];
+-      b3 = p[3];
+-      p += 2;
+-      b1 ^= 8;
+-      if ((b3 & b1) >= 0xF8)
++      if Z7_UNLIKELY(p == lim)
++        return p;
++      /* // the code without GetBe32a():
++      { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
++      */
++      v = GetBe32a(p);
++      p += 4;
++    #ifdef BR_SPARC_USE_ROTATE
++      v = rotlFixed(v, 2);
++      v += (flag << 2) - 1;
++      if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
++    #else
++      v += (UInt32)5 << 29;
++      v ^= (UInt32)7 << 29;
++      v += flag;
++      if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
++    #endif
+         break;
+     }
+     {
+-      UInt32 v =
+-             ((UInt32)b1 << 19)
+-          + (((UInt32)p[1] & 0x7) << 8)
+-          + (((UInt32)p[-2] << 11))
+-          + (p[0]);
+-
+-      p += 2;
++      // UInt32 v = GetBe32a(p - 4);
++    #ifndef BR_SPARC_USE_ROTATE
++      v <<= 2;
++    #endif
+       {
+-        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
+-          v += cur;
++        UInt32 c = BR_PC_GET;
++        BR_CONVERT_VAL(v, c)
+       }
+-
+-      p[-4] = (Byte)(v >> 11);
+-      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
+-      p[-2] = (Byte)v;
+-      p[-1] = (Byte)(0xF8 | (v >> 8));
++      v &= (flag << 3) - 1;
++    #ifdef BR_SPARC_USE_ROTATE
++      v -= (flag << 2) - 1;
++      v = rotrFixed(v, 2);
++    #else
++      v -= (flag << 2);
++      v >>= 2;
++      v |= (UInt32)1 << 30;
++    #endif
++      SetBe32a(p - 4, v)
+     }
+   }
++}
++Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
++
++
++Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
++{
++  // Byte *p = data;
++  Byte *lim;
++  size &= ~(SizeT)1;
++  // if (size == 0) return p;
++  if (size <= 2) return p;
++  size -= 2;
++  lim = p + size;
++  BR_PC_INIT
++  /* in ARM: branch offset is relative to the +2 instructions from current instruction.
++     (p) will point to the +2 instructions from current instruction */
++  // pc += 4 - 4;
++  // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
++  // #define ARMT_TAIL_PROC { goto armt_tail; }
++  #define ARMT_TAIL_PROC { return p; }
+   
+-  for (;;)
++  do
+   {
+-    UInt32 b1;
++    /* in MSVC 32-bit x86 compilers:
++       UInt32 version : it loads value from memory with movzx
++       Byte   version : it loads value to 8-bit register (AL/CL)
++       movzx version is slightly faster in some cpus
++    */
++    unsigned b1;
++    // Byte / unsigned
++    b1 = p[1];
++    // optimized version to reduce one (p >= lim) check:
++    // unsigned a1 = p[1];  b1 = p[3];  p += 2;  if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
+     for (;;)
+     {
+-      UInt32 b3;
+-      if (p > lim)
+-        return (SizeT)(p - data);
+-      b1 = p[1];
+-      b3 = p[3];
+-      p += 2;
+-      b1 ^= 8;
+-      if ((b3 & b1) >= 0xF8)
+-        break;
++      unsigned b3; // Byte / UInt32
++      /* (Byte)(b3) normalization can use low byte computations in MSVC.
++         It gives smaller code, and no loss of speed in some compilers/cpus.
++         But new MSVC 32-bit x86 compilers use more slow load
++         from memory to low byte register in that case.
++         So we try to use full 32-bit computations for faster code.
++      */
++      // if (p >= lim) { ARMT_TAIL_PROC }  b3 = b1 + 8;  b1 = p[3];  p += 2;  if ((b3 & b1) >= 0xf8) break;
++      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b3 = p[3];  p += 2;  if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
++      if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC }  b1 = p[3];  p += 2;  if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
+     }
+     {
++      /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
++         But gcc/clang for arm64 can use bfi instruction for full code here */
+       UInt32 v =
+-             ((UInt32)b1 << 19)
++          ((UInt32)GetUi16a(p - 2) << 11) |
++          ((UInt32)GetUi16a(p) & 0x7FF);
++      /*
++      UInt32 v =
++            ((UInt32)p[1 - 2] << 19)
+           + (((UInt32)p[1] & 0x7) << 8)
+           + (((UInt32)p[-2] << 11))
+           + (p[0]);
+-
++      */
+       p += 2;
+       {
+-        UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
+-          v -= cur;
++        UInt32 c = BR_PC_GET >> 1;
++        BR_CONVERT_VAL(v, c)
+       }
+-
++      SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
++      SetUi16a(p - 2, (UInt16)(v | 0xf800))
+       /*
+-      SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
+-      SetUi16(p - 2, (UInt16)(v | 0xF800));
+-      */
+-      
+       p[-4] = (Byte)(v >> 11);
+-      p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
++      p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
+       p[-2] = (Byte)v;
+-      p[-1] = (Byte)(0xF8 | (v >> 8));
++      p[-1] = (Byte)(0xf8 | (v >> 8));
++      */
+     }
+   }
++  while (p < lim);
++  return p;
++  // armt_tail:
++  // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; }  return lim;
++  // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
++  // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
++  // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
+ }
++Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
+ 
+ 
+-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
++// #define BR_IA64_NO_INLINE
++
++Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
+ {
+-  Byte *p;
++  // Byte *p = data;
+   const Byte *lim;
+-  size &= ~(size_t)3;
+-  ip -= 4;
+-  p = data;
+-  lim = data + size;
+-
++  size &= ~(SizeT)15;
++  lim = p + size;
++  pc -= 1 << 4;
++  pc >>= 4 - 1;
++  // pc -= 1 << 1;
++
+   for (;;)
+   {
++    unsigned m;
+     for (;;)
+     {
+-      if (p >= lim)
+-        return (SizeT)(p - data);
+-      p += 4;
+-      /* if ((v & 0xFC000003) == 0x48000001) */
+-      if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
++      if Z7_UNLIKELY(p == lim)
++        return p;
++      m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
++      p += 16;
++      pc += 1 << 1;
++      if (m &= 3)
+         break;
+     }
+     {
+-      UInt32 v = GetBe32(p - 4);
+-      if (encoding)
+-        v += ip + (UInt32)(p - data);
+-      else
+-        v -= ip + (UInt32)(p - data);
+-      v &= 0x03FFFFFF;
+-      v |= 0x48000000;
+-      SetBe32(p - 4, v);
++      p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
++      do
++      {
++        const UInt32 t =
++          #if defined(MY_CPU_X86_OR_AMD64)
++            // we use 32-bit load here to reduce code size on x86:
++            GetUi32(p);
++          #else
++            GetUi16(p);
++          #endif
++        UInt32 z = GetUi32(p + 1) >> m;
++        p += 5;
++        if (((t >> m) & (0x70 << 1)) == 0
++            && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
++        {
++          UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
++          z ^= v;
++        #ifdef BR_IA64_NO_INLINE
++          v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
++          {
++            UInt32 c = pc;
++            BR_CONVERT_VAL(v, c)
++          }
++          v &= (0x1fffff << 1) | 1;
++        #else
++          {
++            if (encoding)
++            {
++              // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
++              pc &= (0x1fffff << 1) | 1;
++              v += pc;
++            }
++            else
++            {
++              // pc |= 0xc00000 << 1; // we need to set at least 2 bits
++              pc |= ~(UInt32)((0x1fffff << 1) | 1);
++              v -= pc;
++            }
++          }
++          v &= ~(UInt32)(0x600000 << 1);
++        #endif
++          v += (0x700000 << 1);
++          v &= (0x8fffff << 1) | 1;
++          z |= v;
++          z <<= m;
++          SetUi32(p + 1 - 5, z)
++        }
++        m++;
++      }
++      while (m &= 3); // while (m < 4);
+     }
+   }
+ }
++Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
++
++
++#define BR_CONVERT_VAL_ENC(v)  v += BR_PC_GET;
++#define BR_CONVERT_VAL_DEC(v)  v -= BR_PC_GET;
+ 
++#if 1 && defined(MY_CPU_LE_UNALIGN)
++  #define RISCV_USE_UNALIGNED_LOAD
++#endif
+ 
+-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
++#ifdef RISCV_USE_UNALIGNED_LOAD
++  #define RISCV_GET_UI32(p)      GetUi32(p)
++  #define RISCV_SET_UI32(p, v)   { SetUi32(p, v) }
++#else
++  #define RISCV_GET_UI32(p) \
++    ((UInt32)GetUi16a(p) + \
++    ((UInt32)GetUi16a((p) + 2) << 16))
++  #define RISCV_SET_UI32(p, v) { \
++    SetUi16a(p, (UInt16)(v)) \
++    SetUi16a((p) + 2, (UInt16)(v >> 16)) }
++#endif
++
++#if 1 && defined(MY_CPU_LE)
++  #define RISCV_USE_16BIT_LOAD
++#endif
++
++#ifdef RISCV_USE_16BIT_LOAD
++  #define RISCV_LOAD_VAL(p)  GetUi16a(p)
++#else
++  #define RISCV_LOAD_VAL(p)  (*(p))
++#endif
++
++#define RISCV_INSTR_SIZE  2
++#define RISCV_STEP_1      (4 + RISCV_INSTR_SIZE)
++#define RISCV_STEP_2      4
++#define RISCV_REG_VAL     (2 << 7)
++#define RISCV_CMD_VAL     3
++#if 1
++  // for code size optimization:
++  #define RISCV_DELTA_7F  0x7f
++#else
++  #define RISCV_DELTA_7F  0
++#endif
++
++#define RISCV_CHECK_1(v, b) \
++    (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
++
++#if 1
++  #define RISCV_CHECK_2(v, r) \
++    ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
++           << 18) \
++     < ((r) & 0x1d))
++#else
++  // this branch gives larger code, because
++  // compilers generate larger code for big constants.
++  #define RISCV_CHECK_2(v, r) \
++    ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
++           & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
++     < ((r) & 0x1d))
++#endif
++
++
++#define RISCV_SCAN_LOOP \
++  Byte *lim; \
++  size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
++  if (size <= 6) return p; \
++  size -= 6; \
++  lim = p + size; \
++  BR_PC_INIT \
++  for (;;) \
++  { \
++    UInt32 a, v; \
++    /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
++    for (;;) \
++    { \
++      if Z7_UNLIKELY(p >= lim) { return p; } \
++      a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
++      if ((a & 0x77) == 0) break; \
++      a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
++      p += RISCV_INSTR_SIZE * 2; \
++      if ((a & 0x77) == 0) \
++      { \
++        p -= RISCV_INSTR_SIZE; \
++        if Z7_UNLIKELY(p >= lim) { return p; } \
++        break; \
++      } \
++    }
++// (xx6f ^ 10) + 1 = xx7f + 1 = xx80       : JAL
++// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
++// (xx17 ^ 10) + 1 = xx07 + 1 = xx08       : AUIPC
++// (xx97 ^ 10) + 1 = xx87 + 1 = xx88       : AUIPC
++
++Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
+ {
+-  Byte *p;
+-  const Byte *lim;
+-  size &= ~(size_t)3;
+-  ip -= 4;
+-  p = data;
+-  lim = data + size;
++  RISCV_SCAN_LOOP
++    v = a;
++    a = RISCV_GET_UI32(p);
++#ifndef RISCV_USE_16BIT_LOAD
++    v += (UInt32)p[1] << 8;
++#endif
+ 
+-  for (;;)
+-  {
+-    for (;;)
++    if ((v & 8) == 0) // JAL
+     {
+-      if (p >= lim)
+-        return (SizeT)(p - data);
+-      /*
+-      v = GetBe32(p);
+-      p += 4;
+-      m = v + ((UInt32)5 << 29);
+-      m ^= (UInt32)7 << 29;
+-      m += (UInt32)1 << 22;
+-      if ((m & ((UInt32)0x1FF << 23)) == 0)
+-        break;
+-      */
++      if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
++      {
++        p += RISCV_INSTR_SIZE;
++        continue;
++      }
++      {
++        v = ((a &    1u << 31) >> 11)
++          | ((a & 0x3ff << 21) >> 20)
++          | ((a &     1 << 20) >> 9)
++          |  (a &  0xff << 12);
++        BR_CONVERT_VAL_ENC(v)
++        // ((v & 1) == 0)
++        // v: bits [1 : 20] contain offset bits
++#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
++        a &= 0xfff;
++        a |= ((UInt32)(v << 23))
++          |  ((UInt32)(v <<  7) & ((UInt32)0xff << 16))
++          |  ((UInt32)(v >>  5) & ((UInt32)0xf0 << 8));
++        RISCV_SET_UI32(p, a)
++#else // aligned
++#if 0
++        SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
++#else
++        p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
++#endif
++
++#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
++        v <<= 15;
++        v = Z7_BSWAP32(v);
++        SetUi16a(p + 2, (UInt16)v)
++#else
++        p[2] = (Byte)(v >> 9);
++        p[3] = (Byte)(v >> 1);
++#endif
++#endif // aligned
++      }
+       p += 4;
+-      if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
+-          (p[-4] == 0x7F && (p[-3] >= 0xC0)))
+-        break;
++      continue;
++    } // JAL
++
++    {
++      // AUIPC
++      if (v & 0xe80)  // (not x0) and (not x2)
++      {
++        const UInt32 b = RISCV_GET_UI32(p + 4);
++        if (RISCV_CHECK_1(v, b))
++        {
++          {
++            const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
++            RISCV_SET_UI32(p, temp)
++          }
++          a &= 0xfffff000;
++          {
++#if 1
++          const int t = -1 >> 1;
++          if (t != -1)
++            a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
++          else
++#endif
++            a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
++          }
++          BR_CONVERT_VAL_ENC(a)
++#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
++          a = Z7_BSWAP32(a);
++          RISCV_SET_UI32(p + 4, a)
++#else
++          SetBe32(p + 4, a)
++#endif
++          p += 8;
++        }
++        else
++          p += RISCV_STEP_1;
++      }
++      else
++      {
++        UInt32 r = a >> 27;
++        if (RISCV_CHECK_2(v, r))
++        {
++          v = RISCV_GET_UI32(p + 4);
++          r = (r << 7) + 0x17 + (v & 0xfffff000);
++          a = (a >> 12) | (v << 20);
++          RISCV_SET_UI32(p, r)
++          RISCV_SET_UI32(p + 4, a)
++          p += 8;
++        }
++        else
++          p += RISCV_STEP_2;
++      }
+     }
++  } // for
++}
++
++
++Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
++{
++  RISCV_SCAN_LOOP
++#ifdef RISCV_USE_16BIT_LOAD
++    if ((a & 8) == 0)
+     {
+-      UInt32 v = GetBe32(p - 4);
+-      v <<= 2;
+-      if (encoding)
+-        v += ip + (UInt32)(p - data);
++#else
++    v = a;
++    a += (UInt32)p[1] << 8;
++    if ((v & 8) == 0)
++    {
++#endif
++      // JAL
++      a -= 0x100 - RISCV_DELTA_7F;
++      if (a & 0xd80)
++      {
++        p += RISCV_INSTR_SIZE;
++        continue;
++      }
++      {
++        const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
++#if 0 // unaligned
++        a = GetUi32(p);
++        v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
++          | (UInt32)(a >>  7) & ((UInt32)0xff << 9)
++#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
++        v = GetUi16a(p + 2);
++        v = Z7_BSWAP32(v) >> 15
++#else
++        v = (UInt32)p[3] << 1
++          | (UInt32)p[2] << 9
++#endif
++          | (UInt32)((a & 0xf000) << 5);
++        BR_CONVERT_VAL_DEC(v)
++        a = a_old
++          | (v << 11 &    1u << 31)
++          | (v << 20 & 0x3ff << 21)
++          | (v <<  9 &     1 << 20)
++          | (v       &  0xff << 12);
++        RISCV_SET_UI32(p, a)
++      }
++      p += 4;
++      continue;
++    } // JAL
++
++    {
++      // AUIPC
++      v = a;
++#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
++      a = GetUi32(p);
++#else
++      a |= (UInt32)GetUi16a(p + 2) << 16;
++#endif
++      if ((v & 0xe80) == 0)  // x0/x2
++      {
++        const UInt32 r = a >> 27;
++        if (RISCV_CHECK_2(v, r))
++        {
++          UInt32 b;
++#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
++          b = RISCV_GET_UI32(p + 4);
++          b = Z7_BSWAP32(b);
++#else
++          b = GetBe32(p + 4);
++#endif
++          v = a >> 12;
++          BR_CONVERT_VAL_DEC(b)
++          a = (r << 7) + 0x17;
++          a += (b + 0x800) & 0xfffff000;
++          v |= b << 20;
++          RISCV_SET_UI32(p, a)
++          RISCV_SET_UI32(p + 4, v)
++          p += 8;
++        }
++        else
++          p += RISCV_STEP_2;
++      }
+       else
+-        v -= ip + (UInt32)(p - data);
+-      
+-      v &= 0x01FFFFFF;
+-      v -= (UInt32)1 << 24;
+-      v ^= 0xFF000000;
+-      v >>= 2;
+-      v |= 0x40000000;
+-      SetBe32(p - 4, v);
++      {
++        const UInt32 b = RISCV_GET_UI32(p + 4);
++        if (!RISCV_CHECK_1(v, b))
++          p += RISCV_STEP_1;
++        else
++        {
++          v = (a & 0xfffff000) | (b >> 20);
++          a = (b << 12) | (0x17 + RISCV_REG_VAL);
++          RISCV_SET_UI32(p, a)
++          RISCV_SET_UI32(p + 4, v)
++          p += 8;
++        }
++      }
+     }
+-  }
++  } // for
+ }
+diff --git a/third_party/lzma_sdk/C/Bra.h b/third_party/lzma_sdk/C/Bra.h
+index 855e37a6b5018e07b5e049b014401069d2158747..b47112cedc3dbe14079af262dd17aa59772dc335 100644
+--- a/third_party/lzma_sdk/C/Bra.h
++++ b/third_party/lzma_sdk/C/Bra.h
+@@ -1,64 +1,105 @@
+ /* Bra.h -- Branch converters for executables
+-2013-01-18 : Igor Pavlov : Public domain */
++2024-01-20 : Igor Pavlov : Public domain */
+ 
+-#ifndef __BRA_H
+-#define __BRA_H
++#ifndef ZIP7_INC_BRA_H
++#define ZIP7_INC_BRA_H
+ 
+ #include "7zTypes.h"
+ 
+ EXTERN_C_BEGIN
+ 
++/* #define PPC BAD_PPC_11 // for debug */
++
++#define Z7_BRANCH_CONV_DEC_2(name)  z7_ ## name ## _Dec
++#define Z7_BRANCH_CONV_ENC_2(name)  z7_ ## name ## _Enc
++#define Z7_BRANCH_CONV_DEC(name)    Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
++#define Z7_BRANCH_CONV_ENC(name)    Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
++#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
++#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
++
++#define Z7_BRANCH_CONV_DECL(name)    Byte * name(Byte *data, SizeT size, UInt32 pc)
++#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
++
++typedef Z7_BRANCH_CONV_DECL(   (*z7_Func_BranchConv));
++typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
++
++#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
++Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
++Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
++
++#define Z7_BRANCH_FUNCS_DECL(name) \
++Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
++Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
++
++Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
++Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
++Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
++Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
++Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
++Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
++Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
++
+ /*
+-These functions convert relative addresses to absolute addresses
+-in CALL instructions to increase the compression ratio.
+-  
+-  In:
+-    data     - data buffer
+-    size     - size of data
+-    ip       - current virtual Instruction Pinter (IP) value
+-    state    - state variable for x86 converter
+-    encoding - 0 (for decoding), 1 (for encoding)
+-  
+-  Out:
+-    state    - state variable for x86 converter
++These functions convert data that contain CPU instructions.
++Each such function converts relative addresses to absolute addresses in some
++branch instructions: CALL (in all converters) and JUMP (X86 converter only).
++Such conversion allows to increase compression ratio, if we compress that data.
++
++There are 2 types of converters:
++  Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
++  Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
++Each Converter supports 2 versions: one for encoding
++and one for decoding (_Enc/_Dec postfixes in function name).
+ 
+-  Returns:
+-    The number of processed bytes. If you call these functions with multiple calls,
+-    you must start next call with first byte after block of processed bytes.
++In params:
++  data  : data buffer
++  size  : size of data
++  pc    : current virtual Program Counter (Instruction Pointer) value
++In/Out param:
++  state : pointer to state variable (for X86 converter only)
++
++Return:
++  The pointer to position in (data) buffer after last byte that was processed.
++  If the caller calls converter again, it must call it starting with that position.
++  But the caller is allowed to move data in buffer. So pointer to
++  current processed position also will be changed for next call.
++  Also the caller must increase internal (pc) value for next call.
+   
++Each converter has some characteristics: Endian, Alignment, LookAhead.
+   Type   Endian  Alignment  LookAhead
+   
+-  x86    little      1          4
++  X86    little      1          4
+   ARMT   little      2          2
++  RISCV  little      2          6
+   ARM    little      4          0
++  ARM64  little      4          0
+   PPC     big        4          0
+   SPARC   big        4          0
+   IA64   little     16          0
+ 
+-  size must be >= Alignment + LookAhead, if it's not last block.
+-  If (size < Alignment + LookAhead), converter returns 0.
+-
+-  Example:
++  (data) must be aligned for (Alignment).
++  processed size can be calculated as:
++    SizeT processed = Conv(data, size, pc) - data;
++  if (processed == 0)
++    it means that converter needs more data for processing.
++  If (size < Alignment + LookAhead)
++    then (processed == 0) is allowed.
+ 
+-    UInt32 ip = 0;
+-    for ()
+-    {
+-      ; size must be >= Alignment + LookAhead, if it's not last block
+-      SizeT processed = Convert(data, size, ip, 1);
+-      data += processed;
+-      size -= processed;
+-      ip += processed;
+-    }
++Example code for conversion in loop:
++  UInt32 pc = 0;
++  size = 0;
++  for (;;)
++  {
++    size += Load_more_input_data(data + size);
++    SizeT processed = Conv(data, size, pc) - data;
++    if (processed == 0 && no_more_input_data_after_size)
++      break; // we stop convert loop
++    data += processed;
++    size -= processed;
++    pc += processed;
++  }
+ */
+ 
+-#define x86_Convert_Init(state) { state = 0; }
+-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
+-SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+-SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+-SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+-SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+-
+ EXTERN_C_END
+ 
+ #endif
+diff --git a/third_party/lzma_sdk/C/Bra86.c b/third_party/lzma_sdk/C/Bra86.c
+index 10a0fbd161cb78f8778c4fee5df1461310df337e..d81f392ae040469faf1a3514d1f88c5677b03aae 100644
+--- a/third_party/lzma_sdk/C/Bra86.c
++++ b/third_party/lzma_sdk/C/Bra86.c
+@@ -1,82 +1,187 @@
+-/* Bra86.c -- Converter for x86 code (BCJ)
+-2021-02-09 : Igor Pavlov : Public domain */
++/* Bra86.c -- Branch converter for X86 code (BCJ)
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include "Bra.h"
++#include "CpuArch.h"
+ 
+-#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
+ 
+-SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
++#if defined(MY_CPU_SIZEOF_POINTER) \
++    && ( MY_CPU_SIZEOF_POINTER == 4 \
++      || MY_CPU_SIZEOF_POINTER == 8)
++  #define BR_CONV_USE_OPT_PC_PTR
++#endif
++
++#ifdef BR_CONV_USE_OPT_PC_PTR
++#define BR_PC_INIT  pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
++#define BR_PC_GET   (pc + (UInt32)(SizeT)p)
++#else
++#define BR_PC_INIT  pc += (UInt32)size;
++#define BR_PC_GET   (pc - (UInt32)(SizeT)(lim - p))
++// #define BR_PC_INIT
++// #define BR_PC_GET   (pc + (UInt32)(SizeT)(p - data))
++#endif
++
++#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
++// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
++
++#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
++
++#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
++
++#ifdef MY_CPU_LE_UNALIGN
++  #define BR86_PREPARE_BCJ_SCAN  const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
++  #define BR86_IS_BCJ_BYTE(n)    ((v & ((UInt32)0xfe << (n) * 8)) == 0)
++#else
++  #define BR86_PREPARE_BCJ_SCAN
++  // bad for MSVC X86 (partial write to byte reg):
++  #define BR86_IS_BCJ_BYTE(n)    ((p[n - 4] & 0xfe) == 0xe8)
++  // bad for old MSVC (partial write to byte reg):
++  // #define BR86_IS_BCJ_BYTE(n)    (((*p ^ 0xe8) & 0xfe) == 0)
++#endif
++
++static
++Z7_FORCE_INLINE
++Z7_ATTRIB_NO_VECTOR
++Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
+ {
+-  SizeT pos = 0;
+-  UInt32 mask = *state & 7;
+   if (size < 5)
+-    return 0;
+-  size -= 4;
+-  ip += 5;
++    return p;
++ {
++  // Byte *p = data;
++  const Byte *lim = p + size - 4;
++  unsigned mask = (unsigned)*state;  // & 7;
++#ifdef BR_CONV_USE_OPT_PC_PTR
++  /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
++        because call/jump offset is relative to the next instruction.
++     if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
++         because  BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
++  */
++  pc += 4;
++#endif
++  BR_PC_INIT
++  goto start;
+ 
+-  for (;;)
++  for (;; mask |= 4)
+   {
+-    Byte *p = data + pos;
+-    const Byte *limit = data + size;
+-    for (; p < limit; p++)
+-      if ((*p & 0xFE) == 0xE8)
+-        break;
+-
++    // cont: mask |= 4;
++  start:
++    if (p >= lim)
++      goto fin;
+     {
+-      SizeT d = (SizeT)(p - data) - pos;
+-      pos = (SizeT)(p - data);
+-      if (p >= limit)
+-      {
+-        *state = (d > 2 ? 0 : mask >> (unsigned)d);
+-        return pos;
+-      }
+-      if (d > 2)
+-        mask = 0;
+-      else
+-      {
+-        mask >>= (unsigned)d;
+-        if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
+-        {
+-          mask = (mask >> 1) | 4;
+-          pos++;
+-          continue;
+-        }
+-      }
++      BR86_PREPARE_BCJ_SCAN
++      p += 4;
++      if (BR86_IS_BCJ_BYTE(0))  { goto m0; }  mask >>= 1;
++      if (BR86_IS_BCJ_BYTE(1))  { goto m1; }  mask >>= 1;
++      if (BR86_IS_BCJ_BYTE(2))  { goto m2; }  mask = 0;
++      if (BR86_IS_BCJ_BYTE(3))  { goto a3; }
+     }
++    goto main_loop;
+ 
+-    if (Test86MSByte(p[4]))
++  m0: p--;
++  m1: p--;
++  m2: p--;
++    if (mask == 0)
++      goto a3;
++    if (p > lim)
++      goto fin_p;
++
++    // if (((0x17u >> mask) & 1) == 0)
++    if (mask > 4 || mask == 3)
++    {
++      mask >>= 1;
++      continue; // goto cont;
++    }
++    mask >>= 1;
++    if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
++      continue; // goto cont;
++    // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
+     {
+-      UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
+-      UInt32 cur = ip + (UInt32)pos;
+-      pos += 5;
+-      if (encoding)
+-        v += cur;
+-      else
+-        v -= cur;
+-      if (mask != 0)
++      UInt32 v = GetUi32(p);
++      UInt32 c;
++      v += (1 << 24);  if (v & 0xfe000000) continue; // goto cont;
++      c = BR_PC_GET;
++      BR_CONVERT_VAL(v, c)
+       {
+-        unsigned sh = (mask & 6) << 2;
+-        if (Test86MSByte((Byte)(v >> sh)))
++        mask <<= 3;
++        if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
+         {
+-          v ^= (((UInt32)0x100 << sh) - 1);
+-          if (encoding)
+-            v += cur;
+-          else
+-            v -= cur;
++          v ^= (((UInt32)0x100 << mask) - 1);
++          #ifdef MY_CPU_X86
++          // for X86 : we can recalculate (c) to reduce register pressure
++            c = BR_PC_GET;
++          #endif
++          BR_CONVERT_VAL(v, c)
+         }
+         mask = 0;
+       }
+-      p[1] = (Byte)v;
+-      p[2] = (Byte)(v >> 8);
+-      p[3] = (Byte)(v >> 16);
+-      p[4] = (Byte)(0 - ((v >> 24) & 1));
++      // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
++      v &= (1 << 25) - 1;  v -= (1 << 24);
++      SetUi32(p, v)
++      p += 4;
++      goto main_loop;
+     }
+-    else
++
++  main_loop:
++    if (p >= lim)
++      goto fin;
++    for (;;)
+     {
+-      mask = (mask >> 1) | 4;
+-      pos++;
++      BR86_PREPARE_BCJ_SCAN
++      p += 4;
++      if (BR86_IS_BCJ_BYTE(0))  { goto a0; }
++      if (BR86_IS_BCJ_BYTE(1))  { goto a1; }
++      if (BR86_IS_BCJ_BYTE(2))  { goto a2; }
++      if (BR86_IS_BCJ_BYTE(3))  { goto a3; }
++      if (p >= lim)
++        goto fin;
++    }
++
++  a0: p--;
++  a1: p--;
++  a2: p--;
++  a3:
++    if (p > lim)
++      goto fin_p;
++    // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
++    {
++      UInt32 v = GetUi32(p);
++      UInt32 c;
++      v += (1 << 24);  if (v & 0xfe000000) continue; // goto cont;
++      c = BR_PC_GET;
++      BR_CONVERT_VAL(v, c)
++      // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
++      v &= (1 << 25) - 1;  v -= (1 << 24);
++      SetUi32(p, v)
++      p += 4;
++      goto main_loop;
+     }
+   }
++
++fin_p:
++  p--;
++fin:
++  // the following processing for tail is optional and can be commented
++  /*
++  lim += 4;
++  for (; p < lim; p++, mask >>= 1)
++    if ((*p & 0xfe) == 0xe8)
++      break;
++  */
++  *state = (UInt32)mask;
++  return p;
++ }
+ }
++
++
++#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
++Z7_NO_INLINE \
++Z7_ATTRIB_NO_VECTOR \
++Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
++  { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
++
++Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
++#ifndef Z7_EXTRACT_ONLY
++Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
++#endif
+diff --git a/third_party/lzma_sdk/C/BraIA64.c b/third_party/lzma_sdk/C/BraIA64.c
+index d1dbc62c55bebd5ff5d4d53b51f28aabecab6179..9dfe3e289589d6288227ee819835941b6be7eabf 100644
+--- a/third_party/lzma_sdk/C/BraIA64.c
++++ b/third_party/lzma_sdk/C/BraIA64.c
+@@ -1,53 +1,14 @@
+ /* BraIA64.c -- Converter for IA-64 code
+-2017-01-26 : Igor Pavlov : Public domain */
++2023-02-20 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+-#include "CpuArch.h"
+-#include "Bra.h"
++// the code was moved to Bra.c
+ 
+-SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+-{
+-  SizeT i;
+-  if (size < 16)
+-    return 0;
+-  size -= 16;
+-  i = 0;
+-  do
+-  {
+-    unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
+-    if (m)
+-    {
+-      m++;
+-      do
+-      {
+-        Byte *p = data + (i + (size_t)m * 5 - 8);
+-        if (((p[3] >> m) & 15) == 5
+-            && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
+-        {
+-          unsigned raw = GetUi32(p);
+-          unsigned v = raw >> m;
+-          v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
+-          
+-          v <<= 4;
+-          if (encoding)
+-            v += ip + (UInt32)i;
+-          else
+-            v -= ip + (UInt32)i;
+-          v >>= 4;
+-          
+-          v &= 0x1FFFFF;
+-          v += 0x700000;
+-          v &= 0x8FFFFF;
+-          raw &= ~((UInt32)0x8FFFFF << m);
+-          raw |= (v << m);
+-          SetUi32(p, raw);
+-        }
+-      }
+-      while (++m <= 4);
+-    }
+-    i += 16;
+-  }
+-  while (i <= size);
+-  return i;
+-}
++#ifdef _MSC_VER
++#pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
++#endif
++
++#if defined(__clang__)
++#pragma GCC diagnostic ignored "-Wempty-translation-unit"
++#endif
+diff --git a/third_party/lzma_sdk/C/Compiler.h b/third_party/lzma_sdk/C/Compiler.h
+index a9816fa5ad014863211aa19723f8a87b312e4bfa..2a9c2b7a0896c847d65b9aae4b5c1f2cbf29297b 100644
+--- a/third_party/lzma_sdk/C/Compiler.h
++++ b/third_party/lzma_sdk/C/Compiler.h
+@@ -1,12 +1,105 @@
+-/* Compiler.h
+-2021-01-05 : Igor Pavlov : Public domain */
++/* Compiler.h : Compiler specific defines and pragmas
++2024-01-22 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_COMPILER_H
+-#define __7Z_COMPILER_H
++#ifndef ZIP7_INC_COMPILER_H
++#define ZIP7_INC_COMPILER_H
+ 
+-  #ifdef __clang__
+-    #pragma clang diagnostic ignored "-Wunused-private-field"
++#if defined(__clang__)
++# define Z7_CLANG_VERSION  (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
++#endif
++#if defined(__clang__) && defined(__apple_build_version__)
++# define Z7_APPLE_CLANG_VERSION   Z7_CLANG_VERSION
++#elif defined(__clang__)
++# define Z7_LLVM_CLANG_VERSION    Z7_CLANG_VERSION
++#elif defined(__GNUC__)
++# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
++#endif
++
++#ifdef _MSC_VER
++#if !defined(__clang__) && !defined(__GNUC__)
++#define Z7_MSC_VER_ORIGINAL _MSC_VER
++#endif
++#endif
++
++#if defined(__MINGW32__) || defined(__MINGW64__)
++#define Z7_MINGW
++#endif
++
++#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
++#define Z7_MCST_LCC
++#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
++#endif
++
++/*
++#if defined(__AVX2__) \
++    || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
++    || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
++    || defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
++    || defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
++    || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
++    #define Z7_COMPILER_AVX2_SUPPORTED
+   #endif
++#endif
++*/
++
++// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
++
++#ifdef __clang__
++// padding size of '' with 4 bytes to alignment boundary
++#pragma GCC diagnostic ignored "-Wpadded"
++
++#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
++  && defined(__FreeBSD__)
++// freebsd:
++#pragma GCC diagnostic ignored "-Wexcess-padding"
++#endif
++
++#if __clang_major__ >= 16
++#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
++#endif
++
++#if __clang_major__ == 13
++#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
++// cheri
++#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
++#endif
++#endif
++
++#if __clang_major__ == 13
++  // for <arm_neon.h>
++  #pragma GCC diagnostic ignored "-Wreserved-identifier"
++#endif
++
++#endif // __clang__
++
++#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
++// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
++#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
++  _Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
++#else
++#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
++#endif
++
++typedef void (*Z7_void_Function)(void);
++#if defined(__clang__) || defined(__GNUC__)
++#define Z7_CAST_FUNC_C  (Z7_void_Function)
++#elif defined(_MSC_VER) && _MSC_VER > 1920
++#define Z7_CAST_FUNC_C  (void *)
++// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
++#else
++#define Z7_CAST_FUNC_C
++#endif
++/*
++#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
++  // #pragma GCC diagnostic ignored "-Wcast-function-type"
++#endif
++*/
++#ifdef __GNUC__
++#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
++#pragma GCC diagnostic ignored "-Wstrict-aliasing"
++#endif
++#endif
++
+ 
+ #ifdef _MSC_VER
+ 
+@@ -17,24 +110,124 @@
+     #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+   #endif
+ 
+-  #if _MSC_VER >= 1300
+-    #pragma warning(disable : 4996) // This function or variable may be unsafe
+-  #else
+-    #pragma warning(disable : 4511) // copy constructor could not be generated
+-    #pragma warning(disable : 4512) // assignment operator could not be generated
+-    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+-    #pragma warning(disable : 4702) // unreachable code
+-    #pragma warning(disable : 4710) // not inlined
+-    #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+-    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+-  #endif
++#if defined(_MSC_VER) && _MSC_VER >= 1800
++#pragma warning(disable : 4464) // relative include path contains '..'
++#endif
+ 
+-  #ifdef __clang__
+-    #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+-    #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
+-    // #pragma clang diagnostic ignored "-Wreserved-id-macro"
+-  #endif
++// == 1200 : -O1 : for __forceinline
++// >= 1900 : -O1 : for printf
++#pragma warning(disable : 4710) // function not inlined
++
++#if _MSC_VER < 1900
++// winnt.h: 'Int64ShllMod32'
++#pragma warning(disable : 4514) // unreferenced inline function has been removed
++#endif
++
++#if _MSC_VER < 1300
++// #pragma warning(disable : 4702) // unreachable code
++// Bra.c : -O1:
++#pragma warning(disable : 4714) // function marked as __forceinline not inlined
++#endif
++
++/*
++#if _MSC_VER > 1400 && _MSC_VER <= 1900
++// strcat: This function or variable may be unsafe
++// sysinfoapi.h: kit10: GetVersion was declared deprecated
++#pragma warning(disable : 4996)
++#endif
++*/
++
++#if _MSC_VER > 1200
++// -Wall warnings
++
++#pragma warning(disable : 4711) // function selected for automatic inline expansion
++#pragma warning(disable : 4820) // '2' bytes padding added after data member
++
++#if _MSC_VER >= 1400 && _MSC_VER < 1920
++// 1400: string.h: _DBG_MEMCPY_INLINE_
++// 1600 - 191x : smmintrin.h __cplusplus'
++// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
++#pragma warning(disable : 4668)
++
++// 1400 - 1600 : WinDef.h : 'FARPROC' :
++// 1900 - 191x : immintrin.h: _readfsbase_u32
++// no function prototype given : converting '()' to '(void)'
++#pragma warning(disable : 4255)
++#endif
++
++#if _MSC_VER >= 1914
++// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
++#pragma warning(disable : 5045)
++#endif
++
++#endif // _MSC_VER > 1200
++#endif // _MSC_VER
++
++
++#if defined(__clang__) && (__clang_major__ >= 4)
++  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
++    _Pragma("clang loop unroll(disable)") \
++    _Pragma("clang loop vectorize(disable)")
++  #define Z7_ATTRIB_NO_VECTORIZE
++#elif defined(__GNUC__) && (__GNUC__ >= 5) \
++    && (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
++  #define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
++  // __attribute__((optimize("no-unroll-loops")));
++  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
++#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
++  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
++    _Pragma("loop( no_vector )")
++  #define Z7_ATTRIB_NO_VECTORIZE
++#else
++  #define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
++  #define Z7_ATTRIB_NO_VECTORIZE
++#endif
++
++#if defined(MY_CPU_X86_OR_AMD64) && ( \
++       defined(__clang__) && (__clang_major__ >= 4) \
++    || defined(__GNUC__) && (__GNUC__ >= 5))
++  #define Z7_ATTRIB_NO_SSE  __attribute__((__target__("no-sse")))
++#else
++  #define Z7_ATTRIB_NO_SSE
++#endif
++
++#define Z7_ATTRIB_NO_VECTOR \
++  Z7_ATTRIB_NO_VECTORIZE \
++  Z7_ATTRIB_NO_SSE
++
++
++#if defined(__clang__) && (__clang_major__ >= 8) \
++  || defined(__GNUC__) && (__GNUC__ >= 1000) \
++  /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
++  // GCC is not good for __builtin_expect()
++  #define Z7_LIKELY(x)   (__builtin_expect((x), 1))
++  #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
++  // #define Z7_unlikely [[unlikely]]
++  // #define Z7_likely [[likely]]
++#else
++  #define Z7_LIKELY(x)   (x)
++  #define Z7_UNLIKELY(x) (x)
++  // #define Z7_likely
++#endif
++
++
++#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
++
++#if (Z7_CLANG_VERSION < 130000)
++#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
++  _Pragma("GCC diagnostic push") \
++  _Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
++#else
++#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
++  _Pragma("GCC diagnostic push") \
++  _Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
++#endif
+ 
++#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
++  _Pragma("GCC diagnostic pop")
++#else
++#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
+ #endif
+ 
+ #define UNUSED_VAR(x) (void)x;
+diff --git a/third_party/lzma_sdk/C/CpuArch.c b/third_party/lzma_sdk/C/CpuArch.c
+index 30451fba9b97b34669186865dcf72d8792334423..6e02551e2dca2d98db76fa4a31f23a1f9ce6fc95 100644
+--- a/third_party/lzma_sdk/C/CpuArch.c
++++ b/third_party/lzma_sdk/C/CpuArch.c
+@@ -1,187 +1,357 @@
+ /* CpuArch.c -- CPU specific code
+-2021-07-13 : Igor Pavlov : Public domain */
++Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
++// #include <stdio.h>
++
+ #include "CpuArch.h"
+ 
+ #ifdef MY_CPU_X86_OR_AMD64
+ 
+-#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
+-#define USE_ASM
++#undef NEED_CHECK_FOR_CPUID
++#if !defined(MY_CPU_AMD64)
++#define NEED_CHECK_FOR_CPUID
+ #endif
+ 
+-#if !defined(USE_ASM) && _MSC_VER >= 1500
+-#include <intrin.h>
+-#endif
++/*
++  cpuid instruction supports (subFunction) parameter in ECX,
++  that is used only with some specific (function) parameter values.
++  most functions use only (subFunction==0).
++*/
++/*
++  __cpuid(): MSVC and GCC/CLANG use same function/macro name
++             but parameters are different.
++   We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
++*/
++
++#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
++    || defined(__clang__) /* && (__clang_major__ >= 10) */
++
++/* there was some CLANG/GCC compilers that have issues with
++   rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
++   compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
++   The history of __cpuid() changes in CLANG/GCC:
++   GCC:
++     2007: it preserved ebx for (__PIC__ && __i386__)
++     2013: it preserved rbx and ebx for __PIC__
++     2014: it doesn't preserves rbx and ebx anymore
++     we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
++   CLANG:
++     2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
++   Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
++   Do we need __PIC__ test for CLANG or we must care about rbx even if
++   __PIC__ is not defined?
++*/
++
++#define ASM_LN "\n"
++
++#if defined(MY_CPU_AMD64) && defined(__PIC__) \
++    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
++
++  /* "=&r" selects free register. It can select even rbx, if that register is free.
++     "=&D" for (RDI) also works, but the code can be larger with "=&D"
++     "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
++
++#define x86_cpuid_MACRO_2(p, func, subFunc) { \
++  __asm__ __volatile__ ( \
++    ASM_LN   "mov     %%rbx, %q1"  \
++    ASM_LN   "cpuid"               \
++    ASM_LN   "xchg    %%rbx, %q1"  \
++    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
++
++#elif defined(MY_CPU_X86) && defined(__PIC__) \
++    && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
++
++#define x86_cpuid_MACRO_2(p, func, subFunc) { \
++  __asm__ __volatile__ ( \
++    ASM_LN   "mov     %%ebx, %k1"  \
++    ASM_LN   "cpuid"               \
++    ASM_LN   "xchg    %%ebx, %k1"  \
++    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
+ 
+-#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
+-static UInt32 CheckFlag(UInt32 flag)
+-{
+-  #ifdef _MSC_VER
+-  __asm pushfd;
+-  __asm pop EAX;
+-  __asm mov EDX, EAX;
+-  __asm xor EAX, flag;
+-  __asm push EAX;
+-  __asm popfd;
+-  __asm pushfd;
+-  __asm pop EAX;
+-  __asm xor EAX, EDX;
+-  __asm push EDX;
+-  __asm popfd;
+-  __asm and flag, EAX;
+-  #else
+-  __asm__ __volatile__ (
+-    "pushf\n\t"
+-    "pop  %%EAX\n\t"
+-    "movl %%EAX,%%EDX\n\t"
+-    "xorl %0,%%EAX\n\t"
+-    "push %%EAX\n\t"
+-    "popf\n\t"
+-    "pushf\n\t"
+-    "pop  %%EAX\n\t"
+-    "xorl %%EDX,%%EAX\n\t"
+-    "push %%EDX\n\t"
+-    "popf\n\t"
+-    "andl %%EAX, %0\n\t":
+-    "=c" (flag) : "c" (flag) :
+-    "%eax", "%edx");
+-  #endif
+-  return flag;
+-}
+-#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
+ #else
+-#define CHECK_CPUID_IS_SUPPORTED
++
++#define x86_cpuid_MACRO_2(p, func, subFunc) { \
++  __asm__ __volatile__ ( \
++    ASM_LN   "cpuid"               \
++    : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
++
+ #endif
+ 
+-#ifndef USE_ASM
+-  #ifdef _MSC_VER
+-    #if _MSC_VER >= 1600
+-      #define MY__cpuidex  __cpuidex
+-    #else
++#define x86_cpuid_MACRO(p, func)  x86_cpuid_MACRO_2(p, func, 0)
+ 
+-/*
+- __cpuid (function == 4) requires subfunction number in ECX.
+-  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
+-   __cpuid() in new MSVC clears ECX.
+-   __cpuid() in old MSVC (14.00) doesn't clear ECX
+- We still can use __cpuid for low (function) values that don't require ECX,
+- but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
+- So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
+- where ECX value is first parameter for FAST_CALL / NO_INLINE function,
+- So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
+- old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
+- 
+- DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
+-*/
++void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
++{
++  x86_cpuid_MACRO(p, func)
++}
+ 
+ static
+-MY_NO_INLINE
+-void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
++void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+ {
+-  UNUSED_VAR(subFunction);
+-  __cpuid(CPUInfo, function);
++  x86_cpuid_MACRO_2(p, func, subFunc)
+ }
+ 
+-      #define MY__cpuidex(info, func, func2)  MY__cpuidex_HACK(func2, info, func)
+-      #pragma message("======== MY__cpuidex_HACK WAS USED ========")
+-    #endif
+-  #else
+-     #define MY__cpuidex(info, func, func2)  __cpuid(info, func)
+-     #pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
+-  #endif
+-#endif
+ 
++Z7_NO_INLINE
++UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
++{
++ #if defined(NEED_CHECK_FOR_CPUID)
++  #define EFALGS_CPUID_BIT 21
++  UInt32 a;
++  __asm__ __volatile__ (
++    ASM_LN   "pushf"
++    ASM_LN   "pushf"
++    ASM_LN   "pop     %0"
++    // ASM_LN   "movl    %0, %1"
++    // ASM_LN   "xorl    $0x200000, %0"
++    ASM_LN   "btc     %1, %0"
++    ASM_LN   "push    %0"
++    ASM_LN   "popf"
++    ASM_LN   "pushf"
++    ASM_LN   "pop     %0"
++    ASM_LN   "xorl    (%%esp), %0"
++
++    ASM_LN   "popf"
++    ASM_LN
++    : "=&r" (a) // "=a"
++    : "i" (EFALGS_CPUID_BIT)
++    );
++  if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
++    return 0;
++ #endif
++  {
++    UInt32 p[4];
++    x86_cpuid_MACRO(p, 0)
++    return p[0];
++  }
++}
+ 
++#undef ASM_LN
+ 
++#elif !defined(_MSC_VER)
+ 
+-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
++/*
++// for gcc/clang and other: we can try to use __cpuid macro:
++#include <cpuid.h>
++void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
+ {
+-  #ifdef USE_ASM
++  __cpuid(func, p[0], p[1], p[2], p[3]);
++}
++UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
++{
++  return (UInt32)__get_cpuid_max(0, NULL);
++}
++*/
++// for unsupported cpuid:
++void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
++{
++  UNUSED_VAR(func)
++  p[0] = p[1] = p[2] = p[3] = 0;
++}
++UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
++{
++  return 0;
++}
+ 
+-  #ifdef _MSC_VER
++#else // _MSC_VER
+ 
+-  UInt32 a2, b2, c2, d2;
+-  __asm xor EBX, EBX;
+-  __asm xor ECX, ECX;
+-  __asm xor EDX, EDX;
+-  __asm mov EAX, function;
+-  __asm cpuid;
+-  __asm mov a2, EAX;
+-  __asm mov b2, EBX;
+-  __asm mov c2, ECX;
+-  __asm mov d2, EDX;
++#if !defined(MY_CPU_AMD64)
+ 
+-  *a = a2;
+-  *b = b2;
+-  *c = c2;
+-  *d = d2;
++UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
++{
++  #if defined(NEED_CHECK_FOR_CPUID)
++  #define EFALGS_CPUID_BIT 21
++  __asm   pushfd
++  __asm   pushfd
++  /*
++  __asm   pop     eax
++  // __asm   mov     edx, eax
++  __asm   btc     eax, EFALGS_CPUID_BIT
++  __asm   push    eax
++  */
++  __asm   btc     dword ptr [esp], EFALGS_CPUID_BIT
++  __asm   popfd
++  __asm   pushfd
++  __asm   pop     eax
++  // __asm   xor     eax, edx
++  __asm   xor     eax, [esp]
++  // __asm   push    edx
++  __asm   popfd
++  __asm   and     eax, (1 shl EFALGS_CPUID_BIT)
++  __asm   jz end_func
++  #endif
++  __asm   push    ebx
++  __asm   xor     eax, eax    // func
++  __asm   xor     ecx, ecx    // subFunction (optional) for (func == 0)
++  __asm   cpuid
++  __asm   pop     ebx
++  #if defined(NEED_CHECK_FOR_CPUID)
++  end_func:
++  #endif
++  __asm   ret 0
++}
+ 
+-  #else
++void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
++{
++  UNUSED_VAR(p)
++  UNUSED_VAR(func)
++  __asm   push    ebx
++  __asm   push    edi
++  __asm   mov     edi, ecx    // p
++  __asm   mov     eax, edx    // func
++  __asm   xor     ecx, ecx    // subfunction (optional) for (func == 0)
++  __asm   cpuid
++  __asm   mov     [edi     ], eax
++  __asm   mov     [edi +  4], ebx
++  __asm   mov     [edi +  8], ecx
++  __asm   mov     [edi + 12], edx
++  __asm   pop     edi
++  __asm   pop     ebx
++  __asm   ret     0
++}
+ 
+-  __asm__ __volatile__ (
+-  #if defined(MY_CPU_AMD64) && defined(__PIC__)
+-    "mov %%rbx, %%rdi;"
+-    "cpuid;"
+-    "xchg %%rbx, %%rdi;"
+-    : "=a" (*a) ,
+-      "=D" (*b) ,
+-  #elif defined(MY_CPU_X86) && defined(__PIC__)
+-    "mov %%ebx, %%edi;"
+-    "cpuid;"
+-    "xchgl %%ebx, %%edi;"
+-    : "=a" (*a) ,
+-      "=D" (*b) ,
+-  #else
+-    "cpuid"
+-    : "=a" (*a) ,
+-      "=b" (*b) ,
+-  #endif
+-      "=c" (*c) ,
+-      "=d" (*d)
+-    : "0" (function), "c"(0) ) ;
++static
++void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
++{
++  UNUSED_VAR(p)
++  UNUSED_VAR(func)
++  UNUSED_VAR(subFunc)
++  __asm   push    ebx
++  __asm   push    edi
++  __asm   mov     edi, ecx    // p
++  __asm   mov     eax, edx    // func
++  __asm   mov     ecx, [esp + 12]  // subFunc
++  __asm   cpuid
++  __asm   mov     [edi     ], eax
++  __asm   mov     [edi +  4], ebx
++  __asm   mov     [edi +  8], ecx
++  __asm   mov     [edi + 12], edx
++  __asm   pop     edi
++  __asm   pop     ebx
++  __asm   ret     4
++}
+ 
+-  #endif
+-  
+-  #else
++#else // MY_CPU_AMD64
+ 
+-  int CPUInfo[4];
++    #if _MSC_VER >= 1600
++      #include <intrin.h>
++      #define MY_cpuidex  __cpuidex
+ 
+-  MY__cpuidex(CPUInfo, (int)function, 0);
++static
++void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
++{
++  __cpuidex((int *)p, func, subFunc);
++}
+ 
+-  *a = (UInt32)CPUInfo[0];
+-  *b = (UInt32)CPUInfo[1];
+-  *c = (UInt32)CPUInfo[2];
+-  *d = (UInt32)CPUInfo[3];
++    #else
++/*
++ __cpuid (func == (0 or 7)) requires subfunction number in ECX.
++  MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
++   __cpuid() in new MSVC clears ECX.
++   __cpuid() in old MSVC (14.00) x64 doesn't clear ECX
++ We still can use __cpuid for low (func) values that don't require ECX,
++ but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
++ So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
++ where ECX value is first parameter for FASTCALL / NO_INLINE func.
++ So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
++ old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
++
++DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
++*/
++static
++Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
++{
++  UNUSED_VAR(subFunction)
++  __cpuid(CPUInfo, func);
++}
++      #define MY_cpuidex(info, func, func2)  MY_cpuidex_HACK(func2, func, info)
++      #pragma message("======== MY_cpuidex_HACK WAS USED ========")
++static
++void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
++{
++  MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
++}
++    #endif // _MSC_VER >= 1600
+ 
+-  #endif
++#if !defined(MY_CPU_AMD64)
++/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
++   so we disable inlining here */
++Z7_NO_INLINE
++#endif
++void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
++{
++  MY_cpuidex((Int32 *)p, (Int32)func, 0);
+ }
+ 
+-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
++Z7_NO_INLINE
++UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
++{
++  Int32 a[4];
++  MY_cpuidex(a, 0, 0);
++  return a[0];
++}
++
++#endif // MY_CPU_AMD64
++#endif // _MSC_VER
++
++#if defined(NEED_CHECK_FOR_CPUID)
++#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
++#else
++#define CHECK_CPUID_IS_SUPPORTED
++#endif
++#undef NEED_CHECK_FOR_CPUID
++
++
++static
++BoolInt x86cpuid_Func_1(UInt32 *p)
+ {
+   CHECK_CPUID_IS_SUPPORTED
+-  MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
+-  MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
++  z7_x86_cpuid(p, 1);
+   return True;
+ }
+ 
+-static const UInt32 kVendors[][3] =
++/*
++static const UInt32 kVendors[][1] =
++{
++  { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
++  { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
++  { 0x746E6543 }  // , 0x48727561, 0x736C7561 }
++};
++*/
++
++/*
++typedef struct
++{
++  UInt32 maxFunc;
++  UInt32 vendor[3];
++  UInt32 ver;
++  UInt32 b;
++  UInt32 c;
++  UInt32 d;
++} Cx86cpuid;
++
++enum
+ {
+-  { 0x756E6547, 0x49656E69, 0x6C65746E},
+-  { 0x68747541, 0x69746E65, 0x444D4163},
+-  { 0x746E6543, 0x48727561, 0x736C7561}
++  CPU_FIRM_INTEL,
++  CPU_FIRM_AMD,
++  CPU_FIRM_VIA
+ };
++int x86cpuid_GetFirm(const Cx86cpuid *p);
++#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
++#define x86cpuid_ver_GetModel(ver)  (((ver >> 12) &  0xf0) | ((ver >> 4) & 0xf))
++#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
+ 
+ int x86cpuid_GetFirm(const Cx86cpuid *p)
+ {
+   unsigned i;
+-  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
++  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
+   {
+     const UInt32 *v = kVendors[i];
+-    if (v[0] == p->vendor[0] &&
+-        v[1] == p->vendor[1] &&
+-        v[2] == p->vendor[2])
++    if (v[0] == p->vendor[0]
++        // && v[1] == p->vendor[1]
++        // && v[2] == p->vendor[2]
++        )
+       return (int)i;
+   }
+   return -1;
+@@ -190,41 +360,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
+ BoolInt CPU_Is_InOrder()
+ {
+   Cx86cpuid p;
+-  int firm;
+   UInt32 family, model;
+   if (!x86cpuid_CheckAndRead(&p))
+     return True;
+ 
+-  family = x86cpuid_GetFamily(p.ver);
+-  model = x86cpuid_GetModel(p.ver);
+-  
+-  firm = x86cpuid_GetFirm(&p);
++  family = x86cpuid_ver_GetFamily(p.ver);
++  model = x86cpuid_ver_GetModel(p.ver);
+ 
+-  switch (firm)
++  switch (x86cpuid_GetFirm(&p))
+   {
+     case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+-        /* In-Order Atom CPU */
+-           model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
+-        || model == 0x26  /* 45 nm, Z6xx */
+-        || model == 0x27  /* 32 nm, Z2460 */
+-        || model == 0x35  /* 32 nm, Z2760 */
+-        || model == 0x36  /* 32 nm, N2xxx, D2xxx */
++        // In-Order Atom CPU
++           model == 0x1C  // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
++        || model == 0x26  // 45 nm, Z6xx
++        || model == 0x27  // 32 nm, Z2460
++        || model == 0x35  // 32 nm, Z2760
++        || model == 0x36  // 32 nm, N2xxx, D2xxx
+         )));
+     case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+     case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+   }
+-  return True;
++  return False; // v23 : unknown processors are not In-Order
+ }
++*/
++
++#ifdef _WIN32
++#include "7zWindows.h"
++#endif
+ 
+ #if !defined(MY_CPU_AMD64) && defined(_WIN32)
+-#include <Windows.h>
+-static BoolInt CPU_Sys_Is_SSE_Supported()
++
++/* for legacy SSE ia32: there is no user-space cpu instruction to check
++   that OS supports SSE register storing/restoring on context switches.
++   So we need some OS-specific function to check that it's safe to use SSE registers.
++*/
++
++Z7_FORCE_INLINE
++static BoolInt CPU_Sys_Is_SSE_Supported(void)
+ {
+-  OSVERSIONINFO vi;
+-  vi.dwOSVersionInfoSize = sizeof(vi);
+-  if (!GetVersionEx(&vi))
+-    return False;
+-  return (vi.dwMajorVersion >= 5);
++#ifdef _MSC_VER
++  #pragma warning(push)
++  #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
++#endif
++  /* low byte is major version of Windows
++     We suppose that any Windows version since
++     Windows2000 (major == 5) supports SSE registers */
++  return (Byte)GetVersion() >= 5;
++#if defined(_MSC_VER)
++  #pragma warning(pop)
++#endif
+ }
+ #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+ #else
+@@ -232,117 +416,364 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
+ #endif
+ 
+ 
+-static UInt32 X86_CPUID_ECX_Get_Flags()
++#if !defined(MY_CPU_AMD64)
++
++BoolInt CPU_IsSupported_CMOV(void)
+ {
+-  Cx86cpuid p;
++  UInt32 a[4];
++  if (!x86cpuid_Func_1(&a[0]))
++    return 0;
++  return (BoolInt)(a[3] >> 15) & 1;
++}
++
++BoolInt CPU_IsSupported_SSE(void)
++{
++  UInt32 a[4];
+   CHECK_SYS_SSE_SUPPORT
+-  if (!x86cpuid_CheckAndRead(&p))
++  if (!x86cpuid_Func_1(&a[0]))
++    return 0;
++  return (BoolInt)(a[3] >> 25) & 1;
++}
++
++BoolInt CPU_IsSupported_SSE2(void)
++{
++  UInt32 a[4];
++  CHECK_SYS_SSE_SUPPORT
++  if (!x86cpuid_Func_1(&a[0]))
+     return 0;
+-  return p.c;
++  return (BoolInt)(a[3] >> 26) & 1;
+ }
+ 
+-BoolInt CPU_IsSupported_AES()
++#endif
++
++
++static UInt32 x86cpuid_Func_1_ECX(void)
+ {
+-  return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
++  UInt32 a[4];
++  CHECK_SYS_SSE_SUPPORT
++  if (!x86cpuid_Func_1(&a[0]))
++    return 0;
++  return a[2];
+ }
+ 
+-BoolInt CPU_IsSupported_SSSE3()
++BoolInt CPU_IsSupported_AES(void)
+ {
+-  return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
++  return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
+ }
+ 
+-BoolInt CPU_IsSupported_SSE41()
++BoolInt CPU_IsSupported_SSSE3(void)
+ {
+-  return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
++  return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
+ }
+ 
+-BoolInt CPU_IsSupported_SHA()
++BoolInt CPU_IsSupported_SSE41(void)
++{
++  return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
++}
++
++BoolInt CPU_IsSupported_SHA(void)
+ {
+-  Cx86cpuid p;
+   CHECK_SYS_SSE_SUPPORT
+-  if (!x86cpuid_CheckAndRead(&p))
++
++  if (z7_x86_cpuid_GetMaxFunc() < 7)
+     return False;
++  {
++    UInt32 d[4];
++    z7_x86_cpuid(d, 7);
++    return (BoolInt)(d[1] >> 29) & 1;
++  }
++}
++
+ 
+-  if (p.maxFunc < 7)
++BoolInt CPU_IsSupported_SHA512(void)
++{
++  if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
++
++  if (z7_x86_cpuid_GetMaxFunc() < 7)
+     return False;
+   {
+-    UInt32 d[4] = { 0 };
+-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
+-    return (d[1] >> 29) & 1;
++    UInt32 d[4];
++    z7_x86_cpuid_subFunc(d, 7, 0);
++    if (d[0] < 1) // d[0] - is max supported subleaf value
++      return False;
++    z7_x86_cpuid_subFunc(d, 7, 1);
++    return (BoolInt)(d[0]) & 1;
+   }
+ }
+ 
+-// #include <stdio.h>
++/*
++MSVC: _xgetbv() intrinsic is available since VS2010SP1.
++   MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
++   <immintrin.h> that we can use or check.
++   For any 32-bit x86 we can use asm code in MSVC,
++   but MSVC asm code is huge after compilation.
++   So _xgetbv() is better
++
++ICC: _xgetbv() intrinsic is available (in what version of ICC?)
++   ICC defines (__GNUC___) and it supports gnu assembler
++   also ICC supports MASM style code with -use-msasm switch.
++   but ICC doesn't support __attribute__((__target__))
++
++GCC/CLANG 9:
++  _xgetbv() is macro that works via __builtin_ia32_xgetbv()
++  and we need __attribute__((__target__("xsave")).
++  But with __target__("xsave") the function will be not
++  inlined to function that has no __target__("xsave") attribute.
++  If we want _xgetbv() call inlining, then we should use asm version
++  instead of calling _xgetbv().
++  Note:intrinsic is broke before GCC 8.2:
++    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
++*/
+ 
+-#ifdef _WIN32
+-#include <Windows.h>
++#if    defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
++    || defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219)  \
++    || defined(__GNUC__) && (__GNUC__ >= 9) \
++    || defined(__clang__) && (__clang_major__ >= 9)
++// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
++#if defined(__INTEL_COMPILER)
++#define ATTRIB_XGETBV
++#elif defined(__GNUC__) || defined(__clang__)
++// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
++// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
++#else
++#define ATTRIB_XGETBV
+ #endif
++#endif
++
++#if defined(ATTRIB_XGETBV)
++#include <immintrin.h>
++#endif
++
+ 
+-BoolInt CPU_IsSupported_AVX2()
++// XFEATURE_ENABLED_MASK/XCR0
++#define MY_XCR_XFEATURE_ENABLED_MASK 0
++
++#if defined(ATTRIB_XGETBV)
++ATTRIB_XGETBV
++#endif
++static UInt64 x86_xgetbv_0(UInt32 num)
+ {
+-  Cx86cpuid p;
+-  CHECK_SYS_SSE_SUPPORT
++#if defined(ATTRIB_XGETBV)
++  {
++    return
++      #if (defined(_MSC_VER))
++        _xgetbv(num);
++      #else
++        __builtin_ia32_xgetbv(
++          #if !defined(__clang__)
++            (int)
++          #endif
++            num);
++      #endif
++  }
++
++#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
++
++  UInt32 a, d;
++ #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
++  __asm__
++  (
++    "xgetbv"
++    : "=a"(a), "=d"(d) : "c"(num) : "cc"
++  );
++ #else // is old gcc
++  __asm__
++  (
++    ".byte 0x0f, 0x01, 0xd0" "\n\t"
++    : "=a"(a), "=d"(d) : "c"(num) : "cc"
++  );
++ #endif
++  return ((UInt64)d << 32) | a;
++  // return a;
++
++#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
++
++  UInt32 a, d;
++  __asm {
++    push eax
++    push edx
++    push ecx
++    mov ecx, num;
++    // xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
++    _emit 0x0f
++    _emit 0x01
++    _emit 0xd0
++    mov a, eax
++    mov d, edx
++    pop ecx
++    pop edx
++    pop eax
++  }
++  return ((UInt64)d << 32) | a;
++  // return a;
++
++#else // it's unknown compiler
++  // #error "Need xgetbv function"
++  UNUSED_VAR(num)
++  // for MSVC-X64 we could call external function from external file.
++  /* Actually we had checked OSXSAVE/AVX in cpuid before.
++     So it's expected that OS supports at least AVX and below. */
++  // if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
++  return
++      // (1 << 0) |  // x87
++        (1 << 1)   // SSE
++      | (1 << 2);  // AVX
++
++#endif
++}
+ 
++#ifdef _WIN32
++/*
++  Windows versions do not know about new ISA extensions that
++  can be introduced. But we still can use new extensions,
++  even if Windows doesn't report about supporting them,
++  But we can use new extensions, only if Windows knows about new ISA extension
++  that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
++  So it's enough to check
++    MY_PF_AVX_INSTRUCTIONS_AVAILABLE
++      instead of
++    MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
++*/
++#define MY_PF_XSAVE_ENABLED                            17
++// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE             36
++// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE            37
++// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE            38
++// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE               39
++// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE              40
++// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE           41
++#endif
++
++BoolInt CPU_IsSupported_AVX(void)
++{
+   #ifdef _WIN32
+-  #define MY__PF_XSAVE_ENABLED  17
+-  if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
++  if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
++    return False;
++  /* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
++     some latest Win10 revisions. But we need AVX in older Windows also.
++     So we don't use the following check: */
++  /*
++  if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
+     return False;
++  */
+   #endif
+ 
+-  if (!x86cpuid_CheckAndRead(&p))
++  /*
++    OS must use new special XSAVE/XRSTOR instructions to save
++    AVX registers when it required for context switching.
++    At OS statring:
++      OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
++      Also OS sets bitmask in XCR0 register that defines what
++      registers will be processed by XSAVE instruction:
++        XCR0.SSE[bit 0] - x87 registers and state
++        XCR0.SSE[bit 1] - SSE registers and state
++        XCR0.AVX[bit 2] - AVX registers and state
++    CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
++       So we can read that bit in user-space.
++    XCR0 is available for reading in user-space by new XGETBV instruction.
++  */
++  {
++    const UInt32 c = x86cpuid_Func_1_ECX();
++    if (0 == (1
++        & (c >> 28)   // AVX instructions are supported by hardware
++        & (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
++      return False;
++  }
++
++  /* also we can check
++     CPUID.1:ECX.XSAVE [bit 26] : that shows that
++        XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
++     But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
++
++  /* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
++     in most cases we expect that OS also will support storing/restoring
++     for AVX and SSE states at least.
++     But to be ensure for that we call user-space instruction
++     XGETBV(0) to get XCR0 value that contains bitmask that defines
++     what exact states(registers) OS have enabled for storing/restoring.
++  */
++
++  {
++    const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
++    // printf("\n=== XGetBV=0x%x\n", bm);
++    return 1
++        & (BoolInt)(bm >> 1)  // SSE state is supported (set by OS) for storing/restoring
++        & (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
++  }
++  // since Win7SP1: we can use GetEnabledXStateFeatures();
++}
++
++
++BoolInt CPU_IsSupported_AVX2(void)
++{
++  if (!CPU_IsSupported_AVX())
+     return False;
+-  if (p.maxFunc < 7)
++  if (z7_x86_cpuid_GetMaxFunc() < 7)
+     return False;
+   {
+-    UInt32 d[4] = { 0 };
+-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
++    UInt32 d[4];
++    z7_x86_cpuid(d, 7);
+     // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+     return 1
+-      & (d[1] >> 5); // avx2
++      & (BoolInt)(d[1] >> 5); // avx2
+   }
+ }
+ 
+-BoolInt CPU_IsSupported_VAES_AVX2()
++#if 0
++BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
+ {
+-  Cx86cpuid p;
+-  CHECK_SYS_SSE_SUPPORT
+-
+-  #ifdef _WIN32
+-  #define MY__PF_XSAVE_ENABLED  17
+-  if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
++  if (!CPU_IsSupported_AVX())
+     return False;
+-  #endif
++  if (z7_x86_cpuid_GetMaxFunc() < 7)
++    return False;
++  {
++    UInt32 d[4];
++    BoolInt v;
++    z7_x86_cpuid(d, 7);
++    // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
++    v = 1
++      & (BoolInt)(d[1] >> 16)  // avx512f
++      & (BoolInt)(d[1] >> 31); // avx512vl
++    if (!v)
++      return False;
++  }
++  {
++    const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
++    // printf("\n=== XGetBV=0x%x\n", bm);
++    return 1
++        & (BoolInt)(bm >> 5)  // OPMASK
++        & (BoolInt)(bm >> 6)  // ZMM upper 256-bit
++        & (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
++  }
++}
++#endif
+ 
+-  if (!x86cpuid_CheckAndRead(&p))
++BoolInt CPU_IsSupported_VAES_AVX2(void)
++{
++  if (!CPU_IsSupported_AVX())
+     return False;
+-  if (p.maxFunc < 7)
++  if (z7_x86_cpuid_GetMaxFunc() < 7)
+     return False;
+   {
+-    UInt32 d[4] = { 0 };
+-    MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
++    UInt32 d[4];
++    z7_x86_cpuid(d, 7);
+     // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
+     return 1
+-      & (d[1] >> 5) // avx2
++      & (BoolInt)(d[1] >> 5) // avx2
+       // & (d[1] >> 31) // avx512vl
+-      & (d[2] >> 9); // vaes // VEX-256/EVEX
++      & (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
+   }
+ }
+ 
+-BoolInt CPU_IsSupported_PageGB()
++BoolInt CPU_IsSupported_PageGB(void)
+ {
+-  Cx86cpuid cpuid;
+-  if (!x86cpuid_CheckAndRead(&cpuid))
+-    return False;
++  CHECK_CPUID_IS_SUPPORTED
+   {
+-    UInt32 d[4] = { 0 };
+-    MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
++    UInt32 d[4];
++    z7_x86_cpuid(d, 0x80000000);
+     if (d[0] < 0x80000001)
+       return False;
+-  }
+-  {
+-    UInt32 d[4] = { 0 };
+-    MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
+-    return (d[3] >> 26) & 1;
++    z7_x86_cpuid(d, 0x80000001);
++    return (BoolInt)(d[3] >> 26) & 1;
+   }
+ }
+ 
+@@ -351,11 +782,11 @@ BoolInt CPU_IsSupported_PageGB()
+ 
+ #ifdef _WIN32
+ 
+-#include <Windows.h>
++#include "7zWindows.h"
+ 
+-BoolInt CPU_IsSupported_CRC32()  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+-BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+-BoolInt CPU_IsSupported_NEON()   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
++BoolInt CPU_IsSupported_CRC32(void)  { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
++BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
++BoolInt CPU_IsSupported_NEON(void)   { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
+ 
+ #else
+ 
+@@ -378,29 +809,40 @@ static void Print_sysctlbyname(const char *name)
+   }
+ }
+ */
++/*
++  Print_sysctlbyname("hw.pagesize");
++  Print_sysctlbyname("machdep.cpu.brand_string");
++*/
+ 
+-static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
++static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
+ {
+   UInt32 val = 0;
+-  if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
++  if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
+     return 1;
+   return 0;
+ }
+ 
+-  /*
+-  Print_sysctlbyname("hw.pagesize");
+-  Print_sysctlbyname("machdep.cpu.brand_string");
+-  */
+-
+ BoolInt CPU_IsSupported_CRC32(void)
+ {
+-  return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
++  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
+ }
+ 
+ BoolInt CPU_IsSupported_NEON(void)
+ {
+-  return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
++  return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
++}
++
++BoolInt CPU_IsSupported_SHA512(void)
++{
++  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
++}
++
++/*
++BoolInt CPU_IsSupported_SHA3(void)
++{
++  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
+ }
++*/
+ 
+ #ifdef MY_CPU_ARM64
+ #define APPLE_CRYPTO_SUPPORT_VAL 1
+@@ -415,35 +857,70 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+ 
+ #else // __APPLE__
+ 
+-#include <sys/auxv.h>
++#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
++  #define Z7_GETAUXV_AVAILABLE
++#else
++// #pragma message("=== is not NEW GLIBC === ")
++  #if defined __has_include
++  #if __has_include (<sys/auxv.h>)
++// #pragma message("=== sys/auxv.h is avail=== ")
++    #define Z7_GETAUXV_AVAILABLE
++  #endif
++  #endif
++#endif
+ 
+-#if !defined(ARMV8_OS_FUCHSIA)
++#ifdef Z7_GETAUXV_AVAILABLE
++// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
++#include <sys/auxv.h>
+ #define USE_HWCAP
+-#endif // !defined(ARMV8_OS_FUCHSIA)
++#endif
+ 
+ #ifdef USE_HWCAP
+ 
++#if defined(__FreeBSD__)
++static unsigned long MY_getauxval(int aux)
++{
++  unsigned long val;
++  if (elf_aux_info(aux, &val, sizeof(val)))
++    return 0;
++  return val;
++}
++#else
++#define MY_getauxval  getauxval
++  #if defined __has_include
++  #if __has_include (<asm/hwcap.h>)
+ #include <asm/hwcap.h>
++  #endif
++  #endif
++#endif
+ 
+   #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
+-  BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP)  & (HWCAP_  ## name2)) ? 1 : 0; }
++  BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP)  & (HWCAP_  ## name2)); }
+ 
+ #ifdef MY_CPU_ARM64
+   #define MY_HWCAP_CHECK_FUNC(name) \
+   MY_HWCAP_CHECK_FUNC_2(name, name)
++#if 1 || defined(__ARM_NEON)
++  BoolInt CPU_IsSupported_NEON(void) { return True; }
++#else
+   MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
++#endif
+ // MY_HWCAP_CHECK_FUNC (ASIMD)
+ #elif defined(MY_CPU_ARM)
+   #define MY_HWCAP_CHECK_FUNC(name) \
+-  BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
++  BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
+   MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
+ #endif
+ 
+ #else // USE_HWCAP
+ 
+   #define MY_HWCAP_CHECK_FUNC(name) \
+-  BoolInt CPU_IsSupported_ ## name() { return 0; }
++  BoolInt CPU_IsSupported_ ## name(void) { return 0; }
++#if defined(__ARM_NEON)
++  BoolInt CPU_IsSupported_NEON(void) { return True; }
++#else
+   MY_HWCAP_CHECK_FUNC(NEON)
++#endif
+ 
+ #endif // USE_HWCAP
+ 
+@@ -451,6 +928,19 @@ MY_HWCAP_CHECK_FUNC (CRC32)
+ MY_HWCAP_CHECK_FUNC (SHA1)
+ MY_HWCAP_CHECK_FUNC (SHA2)
+ MY_HWCAP_CHECK_FUNC (AES)
++#ifdef MY_CPU_ARM64
++// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
++// we define them here, if they are not defined
++#ifndef HWCAP_SHA3
++// #define HWCAP_SHA3    (1 << 17)
++#endif
++#ifndef HWCAP_SHA512
++// #pragma message("=== HWCAP_SHA512 define === ")
++#define HWCAP_SHA512  (1 << 21)
++#endif
++MY_HWCAP_CHECK_FUNC (SHA512)
++// MY_HWCAP_CHECK_FUNC (SHA3)
++#endif
+ 
+ #endif // __APPLE__
+ #endif // _WIN32
+@@ -463,15 +953,15 @@ MY_HWCAP_CHECK_FUNC (AES)
+ 
+ #include <sys/sysctl.h>
+ 
+-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
++int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
+ {
+   return sysctlbyname(name, buf, bufSize, NULL, 0);
+ }
+ 
+-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
++int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
+ {
+   size_t bufSize = sizeof(*val);
+-  int res = My_sysctlbyname_Get(name, val, &bufSize);
++  const int res = z7_sysctlbyname_Get(name, val, &bufSize);
+   if (res == 0 && bufSize != sizeof(*val))
+     return EFAULT;
+   return res;
+diff --git a/third_party/lzma_sdk/C/CpuArch.h b/third_party/lzma_sdk/C/CpuArch.h
+index 8cd55bea48dc6ffa4d7b8be52e7baf3fd47e7ad9..2843b4562c2bd9b06aabfc5c13300d14639bedc9 100644
+--- a/third_party/lzma_sdk/C/CpuArch.h
++++ b/third_party/lzma_sdk/C/CpuArch.h
+@@ -1,8 +1,8 @@
+ /* CpuArch.h -- CPU specific code
+-2022-07-15 : Igor Pavlov : Public domain */
++Igor Pavlov : Public domain */
+ 
+-#ifndef __CPU_ARCH_H
+-#define __CPU_ARCH_H
++#ifndef ZIP7_INC_CPU_ARCH_H
++#define ZIP7_INC_CPU_ARCH_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+   MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
+ */
+ 
++#if !defined(_M_ARM64EC)
+ #if  defined(_M_X64) \
+   || defined(_M_AMD64) \
+   || defined(__x86_64__) \
+@@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+   #endif
+   #define MY_CPU_64BIT
+ #endif
++#endif
+ 
+ 
+ #if  defined(_M_IX86) \
+@@ -47,11 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ 
+ 
+ #if  defined(_M_ARM64) \
++  || defined(_M_ARM64EC) \
+   || defined(__AARCH64EL__) \
+   || defined(__AARCH64EB__) \
+   || defined(__aarch64__)
+   #define MY_CPU_ARM64
+-  #define MY_CPU_NAME "arm64"
++#if   defined(__ILP32__) \
++   || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
++    #define MY_CPU_NAME "arm64-32"
++    #define MY_CPU_SIZEOF_POINTER 4
++#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
++    #define MY_CPU_NAME "arm64-128"
++    #define MY_CPU_SIZEOF_POINTER 16
++#else
++#if defined(_M_ARM64EC)
++    #define MY_CPU_NAME "arm64ec"
++#else
++    #define MY_CPU_NAME "arm64"
++#endif
++    #define MY_CPU_SIZEOF_POINTER 8
++#endif
+   #define MY_CPU_64BIT
+ #endif
+ 
+@@ -68,8 +85,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+   #define MY_CPU_ARM
+ 
+   #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
++    #define MY_CPU_ARMT
+     #define MY_CPU_NAME "armt"
+   #else
++    #define MY_CPU_ARM32
+     #define MY_CPU_NAME "arm"
+   #endif
+   /* #define MY_CPU_32BIT */
+@@ -103,6 +122,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+   || defined(__PPC__) \
+   || defined(_POWER)
+ 
++#define MY_CPU_PPC_OR_PPC64
++
+ #if  defined(__ppc64__) \
+   || defined(__powerpc64__) \
+   || defined(_LP64) \
+@@ -123,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ #endif
+ 
+ 
++#if   defined(__sparc__) \
++   || defined(__sparc)
++  #define MY_CPU_SPARC
++  #if  defined(__LP64__) \
++    || defined(_LP64) \
++    || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
++    #define MY_CPU_NAME "sparcv9"
++    #define MY_CPU_SIZEOF_POINTER 8
++    #define MY_CPU_64BIT
++  #elif defined(__sparc_v9__) \
++     || defined(__sparcv9)
++    #define MY_CPU_64BIT
++    #if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
++      #define MY_CPU_NAME "sparcv9-32"
++    #else
++      #define MY_CPU_NAME "sparcv9m"
++    #endif
++  #elif defined(__sparc_v8__) \
++     || defined(__sparcv8)
++    #define MY_CPU_NAME "sparcv8"
++    #define MY_CPU_SIZEOF_POINTER 4
++  #else
++    #define MY_CPU_NAME "sparc"
++  #endif
++#endif
++
++
+ #if  defined(__riscv) \
+   || defined(__riscv__)
++    #define MY_CPU_RISCV
+   #if __riscv_xlen == 32
+     #define MY_CPU_NAME "riscv32"
+   #elif __riscv_xlen == 64
+@@ -135,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ #endif
+ 
+ 
++#if defined(__loongarch__)
++  #define MY_CPU_LOONGARCH
++  #if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
++  #define MY_CPU_64BIT
++  #endif
++  #if defined(__loongarch64)
++  #define MY_CPU_NAME "loongarch64"
++  #define MY_CPU_LOONGARCH64
++  #else
++  #define MY_CPU_NAME "loongarch"
++  #endif
++#endif
++
++
++// #undef MY_CPU_NAME
++// #undef MY_CPU_SIZEOF_POINTER
++// #define __e2k__
++// #define __SIZEOF_POINTER__ 4
++#if  defined(__e2k__)
++  #define MY_CPU_E2K
++  #if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
++    #define MY_CPU_NAME "e2k-32"
++    #define MY_CPU_SIZEOF_POINTER 4
++  #else
++    #define MY_CPU_NAME "e2k"
++    #if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
++      #define MY_CPU_SIZEOF_POINTER 8
++    #endif
++  #endif
++  #define MY_CPU_64BIT
++#endif
++
++
+ #if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+ #define MY_CPU_X86_OR_AMD64
+ #endif
+@@ -165,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+     || defined(MY_CPU_ARM_LE) \
+     || defined(MY_CPU_ARM64_LE) \
+     || defined(MY_CPU_IA64_LE) \
++    || defined(_LITTLE_ENDIAN) \
+     || defined(__LITTLE_ENDIAN__) \
+     || defined(__ARMEL__) \
+     || defined(__THUMBEL__) \
+@@ -197,6 +280,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+   #error Stop_Compiling_Bad_Endian
+ #endif
+ 
++#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
++  #error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
++#endif
+ 
+ #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
+   #error Stop_Compiling_Bad_32_64_BIT
+@@ -238,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ 
+ 
+ #ifndef MY_CPU_NAME
++  // #define MY_CPU_IS_UNKNOWN
+   #ifdef MY_CPU_LE
+     #define MY_CPU_NAME "LE"
+   #elif defined(MY_CPU_BE)
+@@ -253,24 +340,127 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ 
+ 
+ 
++#ifdef __has_builtin
++  #define Z7_has_builtin(x)  __has_builtin(x)
++#else
++  #define Z7_has_builtin(x)  0
++#endif
++
++
++#define Z7_BSWAP32_CONST(v) \
++       ( (((UInt32)(v) << 24)                   ) \
++       | (((UInt32)(v) <<  8) & (UInt32)0xff0000) \
++       | (((UInt32)(v) >>  8) & (UInt32)0xff00  ) \
++       | (((UInt32)(v) >> 24)                   ))
++
++
++#if defined(_MSC_VER) && (_MSC_VER >= 1300)
++
++#include <stdlib.h>
++
++/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
++
++#pragma intrinsic(_byteswap_ushort)
++#pragma intrinsic(_byteswap_ulong)
++#pragma intrinsic(_byteswap_uint64)
++
++#define Z7_BSWAP16(v)  _byteswap_ushort(v)
++#define Z7_BSWAP32(v)  _byteswap_ulong (v)
++#define Z7_BSWAP64(v)  _byteswap_uint64(v)
++#define Z7_CPU_FAST_BSWAP_SUPPORTED
++
++/* GCC can generate slow code that calls function for __builtin_bswap32() for:
++     - GCC for RISCV, if Zbb/XTHeadBb extension is not used.
++     - GCC for SPARC.
++   The code from CLANG for SPARC also is not fastest.
++   So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
++*/
++#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
++    && !defined(MY_CPU_SPARC) \
++    && ( \
++       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
++    || (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
++    )
++
++#define Z7_BSWAP16(v)  __builtin_bswap16(v)
++#define Z7_BSWAP32(v)  __builtin_bswap32(v)
++#define Z7_BSWAP64(v)  __builtin_bswap64(v)
++#define Z7_CPU_FAST_BSWAP_SUPPORTED
++
++#else
++
++#define Z7_BSWAP16(v) ((UInt16) \
++       ( ((UInt32)(v) << 8) \
++       | ((UInt32)(v) >> 8) \
++       ))
++
++#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
++
++#define Z7_BSWAP64(v) \
++       ( ( ( (UInt64)(v)                           ) << 8 * 7 ) \
++       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
++       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
++       | ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
++       | ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
++       | ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
++       | ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
++       | ( ( (UInt64)(v) >> 8 * 7 )                           ) \
++       )
++
++#endif
++
++
+ // Disable MY_CPU_LE_UNALIGN. Although the underlying ISA may be able to load
+ // unaligned words, doing so via pointer casts is undefined behavior in C and
+ // C++, under both strict aliasing and because it is invalid to construct
+ // unaligned pointers. Instead, load the bytes generically and leave optimizing
+ // this to the compiler.
+-#if 0
+ #ifdef MY_CPU_LE
+   #if defined(MY_CPU_X86_OR_AMD64) \
+-      || defined(MY_CPU_ARM64)
+-    #define MY_CPU_LE_UNALIGN
+-    #define MY_CPU_LE_UNALIGN_64
++      || defined(MY_CPU_ARM64) \
++      || defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
++      || defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
++    // #define MY_CPU_LE_UNALIGN
++    // #define MY_CPU_LE_UNALIGN_64
+   #elif defined(__ARM_FEATURE_UNALIGNED)
+-    /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
+-       So we can't use unaligned 64-bit operations. */
+-    #define MY_CPU_LE_UNALIGN
++/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
++  Description of problems:
++problem-1 : 32-bit ARM architecture:
++  multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
++  require 32-bit (WORD) alignment (by 32-bit ARM architecture).
++  So there is "Alignment fault exception", if data is not aligned for 32-bit.
++
++problem-2 : 32-bit kernels and arm64 kernels:
++  32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
++  So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
++
++  But some arm64 kernels do not handle these faults in 32-bit programs.
++  So we have unhandled exception for such instructions.
++  Probably some new arm64 kernels have fixed it, and unaligned
++  paired-access instructions work in new kernels?
++
++problem-3 : compiler for 32-bit arm:
++  Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
++  and for another cases where two 32-bit accesses are fused
++  to one multi-access instruction.
++  So UInt64 variables must be aligned for 32-bit, and each
++  32-bit access must be aligned for 32-bit, if we want to
++  avoid "Alignment fault" exception (handled or unhandled).
++
++problem-4 : performace:
++  Even if unaligned access is handled by kernel, it will be slow.
++  So if we allow unaligned access, we can get fast unaligned
++  single-access, and slow unaligned paired-access.
++
++  We don't allow unaligned access on 32-bit arm, because compiler
++  genarates paired-access instructions that require 32-bit alignment,
++  and some arm64 kernels have no handler for these instructions.
++  Also unaligned paired-access instructions will be slow, if kernel handles them.
++*/
++    // it must be disabled:
++    // #define MY_CPU_LE_UNALIGN
+   #endif
+ #endif
+-#endif
+ 
+ 
+ #ifdef MY_CPU_LE_UNALIGN
+@@ -279,13 +469,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+ #ifdef MY_CPU_LE_UNALIGN_64
+ #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
++#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+ #endif
+ 
+ #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
+ #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
+-#ifdef MY_CPU_LE_UNALIGN_64
+-#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
+-#endif
+ 
+ #else
+ 
+@@ -312,50 +500,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ #endif
+ 
+ 
+-#ifndef MY_CPU_LE_UNALIGN_64
+-
++#ifndef GetUi64
+ #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
++#endif
+ 
++#ifndef SetUi64
+ #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+-    SetUi32(_ppp2_    , (UInt32)_vvv2_); \
+-    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
+-
++    SetUi32(_ppp2_    , (UInt32)_vvv2_) \
++    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
+ #endif
+ 
+ 
++#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
+ 
+-
+-#ifdef __has_builtin
+-  #define MY__has_builtin(x) __has_builtin(x)
++#if 0
++// Z7_BSWAP16 can be slow for x86-msvc
++#define GetBe16_to32(p)  (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
+ #else
+-  #define MY__has_builtin(x) 0
++#define GetBe16_to32(p)  (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
+ #endif
+ 
+-#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
+-
+-/* Note: we use bswap instruction, that is unsupported in 386 cpu */
+-
+-#include <stdlib.h>
+-
+-#pragma intrinsic(_byteswap_ushort)
+-#pragma intrinsic(_byteswap_ulong)
+-#pragma intrinsic(_byteswap_uint64)
+-
+-/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
+-#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
+-#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
+-
+-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
+-
+-#elif defined(MY_CPU_LE_UNALIGN) && ( \
+-       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+-    || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
+-
+-/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
+-#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
+-#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
++#define GetBe32(p)  Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
++#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
+ 
+-#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
++#if defined(MY_CPU_LE_UNALIGN_64)
++#define GetBe64(p)  Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
++#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
++#endif
+ 
+ #else
+ 
+@@ -365,8 +536,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+     ((UInt32)((const Byte *)(p))[2] <<  8) | \
+              ((const Byte *)(p))[3] )
+ 
+-#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+-
+ #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+     _ppp_[0] = (Byte)(_vvv_ >> 24); \
+     _ppp_[1] = (Byte)(_vvv_ >> 16); \
+@@ -375,53 +544,113 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+ 
+ #endif
+ 
++#ifndef GetBe64
++#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
++#endif
+ 
+-#ifndef GetBe16
++#ifndef SetBe64
++#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
++    _ppp_[0] = (Byte)(_vvv_ >> 56); \
++    _ppp_[1] = (Byte)(_vvv_ >> 48); \
++    _ppp_[2] = (Byte)(_vvv_ >> 40); \
++    _ppp_[3] = (Byte)(_vvv_ >> 32); \
++    _ppp_[4] = (Byte)(_vvv_ >> 24); \
++    _ppp_[5] = (Byte)(_vvv_ >> 16); \
++    _ppp_[6] = (Byte)(_vvv_ >> 8); \
++    _ppp_[7] = (Byte)_vvv_; }
++#endif
+ 
++#ifndef GetBe16
++#ifdef GetBe16_to32
++#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
++#else
+ #define GetBe16(p) ( (UInt16) ( \
+     ((UInt16)((const Byte *)(p))[0] << 8) | \
+              ((const Byte *)(p))[1] ))
++#endif
++#endif
+ 
++
++#if defined(MY_CPU_BE)
++#define Z7_CONV_BE_TO_NATIVE_CONST32(v)  (v)
++#define Z7_CONV_LE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
++#define Z7_CONV_NATIVE_TO_BE_32(v)       (v)
++#elif defined(MY_CPU_LE)
++#define Z7_CONV_BE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
++#define Z7_CONV_LE_TO_NATIVE_CONST32(v)  (v)
++#define Z7_CONV_NATIVE_TO_BE_32(v)       Z7_BSWAP32(v)
++#else
++#error Stop_Compiling_Unknown_Endian_CONV
+ #endif
+ 
+ 
++#if defined(MY_CPU_BE)
+ 
+-#ifdef MY_CPU_X86_OR_AMD64
++#define GetBe64a(p)      (*(const UInt64 *)(const void *)(p))
++#define GetBe32a(p)      (*(const UInt32 *)(const void *)(p))
++#define GetBe16a(p)      (*(const UInt16 *)(const void *)(p))
++#define SetBe32a(p, v)   { *(UInt32 *)(void *)(p) = (v); }
++#define SetBe16a(p, v)   { *(UInt16 *)(void *)(p) = (v); }
+ 
+-typedef struct
+-{
+-  UInt32 maxFunc;
+-  UInt32 vendor[3];
+-  UInt32 ver;
+-  UInt32 b;
+-  UInt32 c;
+-  UInt32 d;
+-} Cx86cpuid;
++#define GetUi64a(p)      GetUi64(p)
++#define GetUi32a(p)      GetUi32(p)
++#define GetUi16a(p)      GetUi16(p)
++#define SetUi32a(p, v)   SetUi32(p, v)
++#define SetUi16a(p, v)   SetUi16(p, v)
+ 
+-enum
+-{
+-  CPU_FIRM_INTEL,
+-  CPU_FIRM_AMD,
+-  CPU_FIRM_VIA
+-};
++#elif defined(MY_CPU_LE)
+ 
+-void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
++#define GetUi64a(p)      (*(const UInt64 *)(const void *)(p))
++#define GetUi32a(p)      (*(const UInt32 *)(const void *)(p))
++#define GetUi16a(p)      (*(const UInt16 *)(const void *)(p))
++#define SetUi32a(p, v)   { *(UInt32 *)(void *)(p) = (v); }
++#define SetUi16a(p, v)   { *(UInt16 *)(void *)(p) = (v); }
+ 
+-BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
+-int x86cpuid_GetFirm(const Cx86cpuid *p);
++#define GetBe64a(p)      GetBe64(p)
++#define GetBe32a(p)      GetBe32(p)
++#define GetBe16a(p)      GetBe16(p)
++#define SetBe32a(p, v)   SetBe32(p, v)
++#define SetBe16a(p, v)   SetBe16(p, v)
+ 
+-#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
+-#define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
+-#define x86cpuid_GetStepping(ver) (ver & 0xF)
++#else
++#error Stop_Compiling_Unknown_Endian_CPU_a
++#endif
++
++
++#ifndef GetBe16_to32
++#define GetBe16_to32(p) GetBe16(p)
++#endif
++
++
++#if defined(MY_CPU_X86_OR_AMD64) \
++  || defined(MY_CPU_ARM_OR_ARM64) \
++  || defined(MY_CPU_PPC_OR_PPC64)
++  #define Z7_CPU_FAST_ROTATE_SUPPORTED
++#endif
+ 
+-BoolInt CPU_Is_InOrder(void);
++
++#ifdef MY_CPU_X86_OR_AMD64
++
++void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
++UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
++#if defined(MY_CPU_AMD64)
++#define Z7_IF_X86_CPUID_SUPPORTED
++#else
++#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
++#endif
+ 
+ BoolInt CPU_IsSupported_AES(void);
++BoolInt CPU_IsSupported_AVX(void);
+ BoolInt CPU_IsSupported_AVX2(void);
++BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
+ BoolInt CPU_IsSupported_VAES_AVX2(void);
++BoolInt CPU_IsSupported_CMOV(void);
++BoolInt CPU_IsSupported_SSE(void);
++BoolInt CPU_IsSupported_SSE2(void);
+ BoolInt CPU_IsSupported_SSSE3(void);
+ BoolInt CPU_IsSupported_SSE41(void);
+ BoolInt CPU_IsSupported_SHA(void);
++BoolInt CPU_IsSupported_SHA512(void);
+ BoolInt CPU_IsSupported_PageGB(void);
+ 
+ #elif defined(MY_CPU_ARM_OR_ARM64)
+@@ -439,12 +668,13 @@ BoolInt CPU_IsSupported_SHA1(void);
+ BoolInt CPU_IsSupported_SHA2(void);
+ BoolInt CPU_IsSupported_AES(void);
+ #endif
++BoolInt CPU_IsSupported_SHA512(void);
+ 
+ #endif
+ 
+ #if defined(__APPLE__)
+-int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
+-int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
++int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
++int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
+ #endif
+ 
+ EXTERN_C_END
+diff --git a/third_party/lzma_sdk/C/Delta.h b/third_party/lzma_sdk/C/Delta.h
+index 2fa54ad67b38ef9d02c0ebe3611ba9e55d370097..706095417aab39514e942969c7a51c15979b902f 100644
+--- a/third_party/lzma_sdk/C/Delta.h
++++ b/third_party/lzma_sdk/C/Delta.h
+@@ -1,8 +1,8 @@
+ /* Delta.h -- Delta converter
+-2013-01-18 : Igor Pavlov : Public domain */
++2023-03-03 : Igor Pavlov : Public domain */
+ 
+-#ifndef __DELTA_H
+-#define __DELTA_H
++#ifndef ZIP7_INC_DELTA_H
++#define ZIP7_INC_DELTA_H
+ 
+ #include "7zTypes.h"
+ 
+diff --git a/third_party/lzma_sdk/C/DllSecur.c b/third_party/lzma_sdk/C/DllSecur.c
+index dce0c96c08cb53ff0db5d908d078b0e423f46aa6..bbbfc0a7638a299464c78209696621a7576106c2 100644
+--- a/third_party/lzma_sdk/C/DllSecur.c
++++ b/third_party/lzma_sdk/C/DllSecur.c
+@@ -1,114 +1,99 @@
+ /* DllSecur.c -- DLL loading security
+-2022-07-15 : Igor Pavlov : Public domain */
++2023-12-03 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #ifdef _WIN32
+ 
+-#include <Windows.h>
++#include "7zWindows.h"
+ 
+ #include "DllSecur.h"
+ 
+ #ifndef UNDER_CE
+ 
+-#if defined(__GNUC__) && (__GNUC__ >= 8)
+-  #pragma GCC diagnostic ignored "-Wcast-function-type"
+-#endif
++Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
+ 
+ typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
+ 
+ #define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
+ #define MY_LOAD_LIBRARY_SEARCH_SYSTEM32  0x800
+ 
++#define DELIM "\0"
++
+ static const char * const g_Dlls =
++         "userenv"
++  DELIM  "setupapi"
++  DELIM  "apphelp"
++  DELIM  "propsys"
++  DELIM  "dwmapi"
++  DELIM  "cryptbase"
++  DELIM  "oleacc"
++  DELIM  "clbcatq"
++  DELIM  "version"
+   #ifndef _CONSOLE
+-  "UXTHEME\0"
++  DELIM  "uxtheme"
+   #endif
+-  "USERENV\0"
+-  "SETUPAPI\0"
+-  "APPHELP\0"
+-  "PROPSYS\0"
+-  "DWMAPI\0"
+-  "CRYPTBASE\0"
+-  "OLEACC\0"
+-  "CLBCATQ\0"
+-  "VERSION\0"
+-  ;
++  DELIM;
++
++#endif
+ 
++#ifdef __clang__
++  #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
++#endif
++#if defined (_MSC_VER) && _MSC_VER >= 1900
++// sysinfoapi.h: kit10: GetVersion was declared deprecated
++#pragma warning(disable : 4996)
+ #endif
+ 
+-// #define MY_CAST_FUNC  (void(*)())
+-#define MY_CAST_FUNC
++#define IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN \
++    if ((UInt16)GetVersion() != 6) { \
++      const \
++       Func_SetDefaultDllDirectories setDllDirs = \
++      (Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
++           "SetDefaultDllDirectories"); \
++      if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; }
+ 
+-void My_SetDefaultDllDirectories()
++void My_SetDefaultDllDirectories(void)
+ {
+   #ifndef UNDER_CE
+-  
+-    OSVERSIONINFO vi;
+-    vi.dwOSVersionInfoSize = sizeof(vi);
+-    if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
+-    {
+-      Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
+-          MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+-      if (setDllDirs)
+-        if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
+-          return;
+-    }
+-
++  IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
+   #endif
+ }
+ 
+ 
+-void LoadSecurityDlls()
++void LoadSecurityDlls(void)
+ {
+   #ifndef UNDER_CE
+-  
+-  wchar_t buf[MAX_PATH + 100];
+-
+-  {
+-    // at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
+-    OSVERSIONINFO vi;
+-    vi.dwOSVersionInfoSize = sizeof(vi);
+-    if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
+-    {
+-      Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
+-          MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
+-      if (setDllDirs)
+-        if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
+-          return;
+-    }
+-  }
+-
+-  {
+-    unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2);
+-    if (len == 0 || len > MAX_PATH)
+-      return;
+-  }
++  // at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
++  IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
+   {
++    wchar_t buf[MAX_PATH + 100];
+     const char *dll;
+-    unsigned pos = (unsigned)lstrlenW(buf);
+-
++    unsigned pos = GetSystemDirectoryW(buf, MAX_PATH + 2);
++    if (pos == 0 || pos > MAX_PATH)
++      return;
+     if (buf[pos - 1] != '\\')
+       buf[pos++] = '\\';
+-    
+-    for (dll = g_Dlls; dll[0] != 0;)
++    for (dll = g_Dlls; *dll != 0;)
+     {
+-      unsigned k = 0;
++      wchar_t *dest = &buf[pos];
+       for (;;)
+       {
+-        char c = *dll++;
+-        buf[pos + k] = (Byte)c;
+-        k++;
++        const char c = *dll++;
+         if (c == 0)
+           break;
++        *dest++ = (Byte)c;
+       }
+-
+-      lstrcatW(buf, L".dll");
++      dest[0] = '.';
++      dest[1] = 'd';
++      dest[2] = 'l';
++      dest[3] = 'l';
++      dest[4] = 0;
++      // lstrcatW(buf, L".dll");
+       LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+     }
+   }
+-  
+   #endif
+ }
+ 
+-#endif
++#endif // _WIN32
+diff --git a/third_party/lzma_sdk/C/DllSecur.h b/third_party/lzma_sdk/C/DllSecur.h
+index 64ff26cd93aa741a39a2c11300e85e2a02eac19a..9fa41538265ac26c09123747b7806dab8d698d2d 100644
+--- a/third_party/lzma_sdk/C/DllSecur.h
++++ b/third_party/lzma_sdk/C/DllSecur.h
+@@ -1,8 +1,8 @@
+ /* DllSecur.h -- DLL loading for security
+-2018-02-19 : Igor Pavlov : Public domain */
++2023-03-03 : Igor Pavlov : Public domain */
+ 
+-#ifndef __DLL_SECUR_H
+-#define __DLL_SECUR_H
++#ifndef ZIP7_INC_DLL_SECUR_H
++#define ZIP7_INC_DLL_SECUR_H
+ 
+ #include "7zTypes.h"
+ 
+diff --git a/third_party/lzma_sdk/C/LzFind.c b/third_party/lzma_sdk/C/LzFind.c
+index 36f7330911435779e4d213f340c33c93c9df20cb..5941582e7977685a981afa0a0b2589479d2a9f7b 100644
+--- a/third_party/lzma_sdk/C/LzFind.c
++++ b/third_party/lzma_sdk/C/LzFind.c
+@@ -1,5 +1,5 @@
+ /* LzFind.c -- Match finder for LZ algorithms
+-2021-11-29 : Igor Pavlov : Public domain */
++2024-03-01 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -17,7 +17,7 @@
+ #define kEmptyHashValue 0
+ 
+ #define kMaxValForNormalize ((UInt32)0)
+-// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug
++// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
+ 
+ // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
+ 
+@@ -67,10 +67,10 @@
+ 
+ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+ {
+-  if (!p->directInput)
++  // if (!p->directInput)
+   {
+-    ISzAlloc_Free(alloc, p->bufferBase);
+-    p->bufferBase = NULL;
++    ISzAlloc_Free(alloc, p->bufBase);
++    p->bufBase = NULL;
+   }
+ }
+ 
+@@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
+ {
+   if (blockSize == 0)
+     return 0;
+-  if (!p->bufferBase || p->blockSize != blockSize)
++  if (!p->bufBase || p->blockSize != blockSize)
+   {
+     // size_t blockSizeT;
+     LzInWindow_Free(p, alloc);
+@@ -101,19 +101,25 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
+     #endif
+     */
+     
+-    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
+-    // printf("\nbufferBase = %p\n", p->bufferBase);
++    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
++    // printf("\nbufferBase = %p\n", p->bufBase);
+     // return 0; // for debug
+   }
+-  return (p->bufferBase != NULL);
++  return (p->bufBase != NULL);
+ }
+ 
+-static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
++static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
++{
++  return ((CMatchFinder *)p)->buffer;
++}
+ 
+-static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
++static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
++{
++  return GET_AVAIL_BYTES((CMatchFinder *)p);
++}
+ 
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static void MatchFinder_ReadBlock(CMatchFinder *p)
+ {
+   if (p->streamEndWasReached || p->result != SZ_OK)
+@@ -127,8 +133,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
+     UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
+     if (curSize > p->directInputRem)
+       curSize = (UInt32)p->directInputRem;
+-    p->directInputRem -= curSize;
+     p->streamPos += curSize;
++    p->directInputRem -= curSize;
+     if (p->directInputRem == 0)
+       p->streamEndWasReached = 1;
+     return;
+@@ -136,8 +142,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
+   
+   for (;;)
+   {
+-    Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
+-    size_t size = (size_t)(p->bufferBase + p->blockSize - dest);
++    const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
++    size_t size = (size_t)(p->bufBase + p->blockSize - dest);
+     if (size == 0)
+     {
+       /* we call ReadBlock() after NeedMove() and MoveBlock().
+@@ -153,7 +159,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
+     // #define kRead 3
+     // if (size > kRead) size = kRead; // for debug
+ 
+-    p->result = ISeqInStream_Read(p->stream, dest, &size);
++    /*
++    // we need cast (Byte *)dest.
++    #ifdef __clang__
++      #pragma GCC diagnostic ignored "-Wcast-qual"
++    #endif
++    */
++    p->result = ISeqInStream_Read(p->stream,
++        p->bufBase + (dest - p->bufBase), &size);
+     if (p->result != SZ_OK)
+       return;
+     if (size == 0)
+@@ -173,14 +186,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
+ 
+ 
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ void MatchFinder_MoveBlock(CMatchFinder *p)
+ {
+-  const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore;
++  const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
+   const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
+-  p->buffer = p->bufferBase + keepBefore;
+-  memmove(p->bufferBase,
+-      p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
++  p->buffer = p->bufBase + keepBefore;
++  memmove(p->bufBase,
++      p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
+       keepBefore + (size_t)GET_AVAIL_BYTES(p));
+ }
+ 
+@@ -198,7 +211,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
+     return 0;
+   if (p->streamEndWasReached || p->result != SZ_OK)
+     return 0;
+-  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
++  return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+ }
+ 
+ void MatchFinder_ReadIfRequired(CMatchFinder *p)
+@@ -214,6 +227,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+   p->cutValue = 32;
+   p->btMode = 1;
+   p->numHashBytes = 4;
++  p->numHashBytes_Min = 2;
++  p->numHashOutBits = 0;
+   p->bigHash = 0;
+ }
+ 
+@@ -222,8 +237,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+ void MatchFinder_Construct(CMatchFinder *p)
+ {
+   unsigned i;
+-  p->bufferBase = NULL;
++  p->buffer = NULL;
++  p->bufBase = NULL;
+   p->directInput = 0;
++  p->stream = NULL;
+   p->hash = NULL;
+   p->expectedDataSize = (UInt64)(Int64)-1;
+   MatchFinder_SetDefaultSettings(p);
+@@ -238,6 +255,8 @@ void MatchFinder_Construct(CMatchFinder *p)
+   }
+ }
+ 
++#undef kCrcPoly
++
+ static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+ {
+   ISzAlloc_Free(alloc, p->hash);
+@@ -252,7 +271,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+ 
+ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+ {
+-  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
++  const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+   if (sizeInBytes / sizeof(CLzRef) != num)
+     return NULL;
+   return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+@@ -298,6 +317,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
+ }
+ 
+ 
++// input is historySize
++static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
++{
++  if (p->numHashBytes == 2)
++    return (1 << 16) - 1;
++  if (hs != 0)
++    hs--;
++  hs |= (hs >> 1);
++  hs |= (hs >> 2);
++  hs |= (hs >> 4);
++  hs |= (hs >> 8);
++  // we propagated 16 bits in (hs). Low 16 bits must be set later
++  if (hs >= (1 << 24))
++  {
++    if (p->numHashBytes == 3)
++      hs = (1 << 24) - 1;
++    /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
++  }
++  // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
++  hs |= (1 << 16) - 1; /* don't change it! */
++  // bt5: we adjust the size with recommended minimum size
++  if (p->numHashBytes >= 5)
++    hs |= (256 << kLzHash_CrcShift_2) - 1;
++  return hs;
++}
++
++// input is historySize
++static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
++{
++  if (p->numHashBytes == 2)
++    return (1 << 16) - 1;
++  if (hs != 0)
++    hs--;
++  hs |= (hs >> 1);
++  hs |= (hs >> 2);
++  hs |= (hs >> 4);
++  hs |= (hs >> 8);
++  // we propagated 16 bits in (hs). Low 16 bits must be set later
++  hs >>= 1;
++  if (hs >= (1 << 24))
++  {
++    if (p->numHashBytes == 3)
++      hs = (1 << 24) - 1;
++    else
++      hs >>= 1;
++    /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
++  }
++  // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
++  hs |= (1 << 16) - 1; /* don't change it! */
++  // bt5: we adjust the size with recommended minimum size
++  if (p->numHashBytes >= 5)
++    hs |= (256 << kLzHash_CrcShift_2) - 1;
++  return hs;
++}
++
++
+ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+     UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+     ISzAllocPtr alloc)
+@@ -318,78 +393,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+     p->blockSize = 0;
+   if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
+   {
+-    const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
+-    UInt32 hs;
+-    p->matchMaxLen = matchMaxLen;
++    size_t hashSizeSum;
+     {
+-      // UInt32 hs4;
+-      p->fixedHashSize = 0;
+-      hs = (1 << 16) - 1;
+-      if (p->numHashBytes != 2)
++      UInt32 hs;
++      UInt32 hsCur;
++
++      if (p->numHashOutBits != 0)
+       {
+-        hs = historySize;
+-        if (hs > p->expectedDataSize)
+-          hs = (UInt32)p->expectedDataSize;
+-        if (hs != 0)
+-          hs--;
+-        hs |= (hs >> 1);
+-        hs |= (hs >> 2);
+-        hs |= (hs >> 4);
+-        hs |= (hs >> 8);
+-        // we propagated 16 bits in (hs). Low 16 bits must be set later
+-        hs >>= 1;
+-        if (hs >= (1 << 24))
+-        {
+-          if (p->numHashBytes == 3)
+-            hs = (1 << 24) - 1;
+-          else
+-            hs >>= 1;
+-          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+-        }
+-        
+-        // hs = ((UInt32)1 << 25) - 1; // for test
+-        
++        unsigned numBits = p->numHashOutBits;
++        const unsigned nbMax =
++            (p->numHashBytes == 2 ? 16 :
++            (p->numHashBytes == 3 ? 24 : 32));
++        if (numBits > nbMax)
++          numBits = nbMax;
++        if (numBits >= 32)
++          hs = (UInt32)0 - 1;
++        else
++          hs = ((UInt32)1 << numBits) - 1;
+         // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
+         hs |= (1 << 16) - 1; /* don't change it! */
+-        
+-        // bt5: we adjust the size with recommended minimum size
+         if (p->numHashBytes >= 5)
+           hs |= (256 << kLzHash_CrcShift_2) - 1;
++        {
++          const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
++          if (hs > hs2)
++            hs = hs2;
++        }
++        hsCur = hs;
++        if (p->expectedDataSize < historySize)
++        {
++          const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
++          if (hsCur > hs2)
++            hsCur = hs2;
++        }
+       }
+-      p->hashMask = hs;
+-      hs++;
+-
+-      /*
+-      hs4 = (1 << 20);
+-      if (hs4 > hs)
+-        hs4 = hs;
+-      // hs4 = (1 << 16); // for test
+-      p->hash4Mask = hs4 - 1;
+-      */
++      else
++      {
++        hs = MatchFinder_GetHashMask(p, historySize);
++        hsCur = hs;
++        if (p->expectedDataSize < historySize)
++        {
++          hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
++          if (hsCur > hs) // is it possible?
++            hsCur = hs;
++        }
++      }
++
++      p->hashMask = hsCur;
+ 
+-      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+-      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+-      // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
+-      hs += p->fixedHashSize;
++      hashSizeSum = hs;
++      hashSizeSum++;
++      if (hashSizeSum < hs)
++        return 0;
++      {
++        UInt32 fixedHashSize = 0;
++        if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
++        if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
++        // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
++        hashSizeSum += fixedHashSize;
++        p->fixedHashSize = fixedHashSize;
++      }
+     }
+ 
++    p->matchMaxLen = matchMaxLen;
++
+     {
+       size_t newSize;
+       size_t numSons;
++      const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
+       p->historySize = historySize;
+-      p->hashSizeSum = hs;
+       p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
+       
+       numSons = newCyclicBufferSize;
+       if (p->btMode)
+         numSons <<= 1;
+-      newSize = hs + numSons;
++      newSize = hashSizeSum + numSons;
++
++      if (numSons < newCyclicBufferSize || newSize < numSons)
++        return 0;
+ 
+       // aligned size is not required here, but it can be better for some loops
+       #define NUM_REFS_ALIGN_MASK 0xF
+       newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
+ 
+-      if (p->hash && p->numRefs == newSize)
++      // 22.02: we don't reallocate buffer, if old size is enough
++      if (p->hash && p->numRefs >= newSize)
+         return 1;
+       
+       MatchFinder_FreeThisClassMemory(p, alloc);
+@@ -398,7 +486,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+       
+       if (p->hash)
+       {
+-        p->son = p->hash + p->hashSizeSum;
++        p->son = p->hash + hashSizeSum;
+         return 1;
+       }
+     }
+@@ -470,7 +558,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
+ 
+ void MatchFinder_Init_4(CMatchFinder *p)
+ {
+-  p->buffer = p->bufferBase;
++  if (!p->directInput)
++    p->buffer = p->bufBase;
+   {
+     /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
+        the code in CMatchFinderMt expects (pos = 1) */
+@@ -488,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p)
+ #define CYC_TO_POS_OFFSET 0
+ // #define CYC_TO_POS_OFFSET 1 // for debug
+ 
+-void MatchFinder_Init(CMatchFinder *p)
++void MatchFinder_Init(void *_p)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   MatchFinder_Init_HighHash(p);
+   MatchFinder_Init_LowHash(p);
+   MatchFinder_Init_4(p);
+@@ -507,165 +597,173 @@ void MatchFinder_Init(CMatchFinder *p)
+ 
+ #if 0
+ #ifdef MY_CPU_X86_OR_AMD64
+-  #if defined(__clang__) && (__clang_major__ >= 8) \
+-    || defined(__GNUC__) && (__GNUC__ >= 8) \
+-    || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
+-      #define USE_SATUR_SUB_128
+-      #define USE_AVX2
+-      #define ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
+-      #define ATTRIB_AVX2 __attribute__((__target__("avx2")))
++  #if defined(__clang__) && (__clang_major__ >= 4) \
++    || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
++    // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
++
++      #define USE_LZFIND_SATUR_SUB_128
++      #define USE_LZFIND_SATUR_SUB_256
++      #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
++      #define LZFIND_ATTRIB_AVX2  __attribute__((__target__("avx2")))
+   #elif defined(_MSC_VER)
+     #if (_MSC_VER >= 1600)
+-      #define USE_SATUR_SUB_128
+-      #if (_MSC_VER >= 1900)
+-        #define USE_AVX2
+-        #include <immintrin.h> // avx
+-      #endif
++      #define USE_LZFIND_SATUR_SUB_128
++    #endif
++    #if (_MSC_VER >= 1900)
++      #define USE_LZFIND_SATUR_SUB_256
+     #endif
+   #endif
+ 
+-// #elif defined(MY_CPU_ARM_OR_ARM64)
+-#elif defined(MY_CPU_ARM64)
++#elif defined(MY_CPU_ARM64) \
++  /* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
+ 
+-  #if defined(__clang__) && (__clang_major__ >= 8) \
+-    || defined(__GNUC__) && (__GNUC__ >= 8)
+-      #define USE_SATUR_SUB_128
++  #if  defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
++    || defined(__GNUC__) && (__GNUC__ >= 6)
++      #define USE_LZFIND_SATUR_SUB_128
+     #ifdef MY_CPU_ARM64
+-      // #define ATTRIB_SSE41 __attribute__((__target__("")))
++      // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
+     #else
+-      // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
++      #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
+     #endif
+ 
+   #elif defined(_MSC_VER)
+     #if (_MSC_VER >= 1910)
+-      #define USE_SATUR_SUB_128
++      #define USE_LZFIND_SATUR_SUB_128
+     #endif
+   #endif
+ 
+-  #if defined(_MSC_VER) && defined(MY_CPU_ARM64)
++  #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
+     #include <arm64_neon.h>
+   #else
+     #include <arm_neon.h>
+   #endif
+ 
+ #endif
+-#endif
++#endif // #if 0
+ 
+-/*
+-#ifndef ATTRIB_SSE41
+-  #define ATTRIB_SSE41
+-#endif
+-#ifndef ATTRIB_AVX2
+-  #define ATTRIB_AVX2
+-#endif
+-*/
+-
+-#ifdef USE_SATUR_SUB_128
++#ifdef USE_LZFIND_SATUR_SUB_128
+ 
+-// #define _SHOW_HW_STATUS
++// #define Z7_SHOW_HW_STATUS
+ 
+-#ifdef _SHOW_HW_STATUS
++#ifdef Z7_SHOW_HW_STATUS
+ #include <stdio.h>
+-#define _PRF(x) x
+-_PRF(;)
++#define PRF(x) x
++PRF(;)
+ #else
+-#define _PRF(x)
++#define PRF(x)
+ #endif
+ 
++
+ #ifdef MY_CPU_ARM_OR_ARM64
+ 
+ #ifdef MY_CPU_ARM64
+-// #define FORCE_SATUR_SUB_128
++// #define FORCE_LZFIND_SATUR_SUB_128
+ #endif
++typedef uint32x4_t LzFind_v128;
++#define SASUB_128_V(v, s) \
++  vsubq_u32(vmaxq_u32(v, s), s)
+ 
+-typedef uint32x4_t v128;
+-#define SASUB_128(i) \
+-   *(v128 *)(void *)(items + (i) * 4) = \
+-  vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
+-
+-#else
++#else // MY_CPU_ARM_OR_ARM64
+ 
+ #include <smmintrin.h> // sse4.1
+ 
+-typedef __m128i v128;
+-#define SASUB_128(i) \
+-  *(v128 *)(void *)(items + (i) * 4) = \
+-  _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1
++typedef __m128i LzFind_v128;
++// SSE 4.1
++#define SASUB_128_V(v, s)   \
++  _mm_sub_epi32(_mm_max_epu32(v, s), s)
++
++#endif // MY_CPU_ARM_OR_ARM64
+ 
+-#endif
+ 
++#define SASUB_128(i) \
++  *(      LzFind_v128 *)(      void *)(items + (i) * 4) = SASUB_128_V( \
++  *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
+ 
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static
+-#ifdef ATTRIB_SSE41
+-ATTRIB_SSE41
++#ifdef LZFIND_ATTRIB_SSE41
++LZFIND_ATTRIB_SSE41
+ #endif
+ void
+-MY_FAST_CALL
++Z7_FASTCALL
+ LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+ {
+-  v128 sub2 =
++  const LzFind_v128 sub2 =
+     #ifdef MY_CPU_ARM_OR_ARM64
+       vdupq_n_u32(subValue);
+     #else
+       _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
+     #endif
++  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+   do
+   {
+-    SASUB_128(0)
+-    SASUB_128(1)
+-    SASUB_128(2)
+-    SASUB_128(3)
+-    items += 4 * 4;
++    SASUB_128(0)  SASUB_128(1)  items += 2 * 4;
++    SASUB_128(0)  SASUB_128(1)  items += 2 * 4;
+   }
+   while (items != lim);
+ }
+ 
+ 
+ 
+-#ifdef USE_AVX2
++#ifdef USE_LZFIND_SATUR_SUB_256
+ 
+ #include <immintrin.h> // avx
++/*
++clang :immintrin.h uses
++#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
++    defined(__AVX2__)
++#include <avx2intrin.h>
++#endif
++so we need <avxintrin.h> for clang-cl */
+ 
+-#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2
++#if defined(__clang__)
++#include <avxintrin.h>
++#include <avx2intrin.h>
++#endif
+ 
+-MY_NO_INLINE
++// AVX2:
++#define SASUB_256(i) \
++    *(      __m256i *)(      void *)(items + (i) * 8) = \
++   _mm256_sub_epi32(_mm256_max_epu32( \
++    *(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
++
++Z7_NO_INLINE
+ static
+-#ifdef ATTRIB_AVX2
+-ATTRIB_AVX2
++#ifdef LZFIND_ATTRIB_AVX2
++LZFIND_ATTRIB_AVX2
+ #endif
+ void
+-MY_FAST_CALL
++Z7_FASTCALL
+ LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+ {
+-  __m256i sub2 = _mm256_set_epi32(
++  const __m256i sub2 = _mm256_set_epi32(
+       (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
+       (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
++  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+   do
+   {
+-    SASUB_256(0)
+-    SASUB_256(1)
+-    items += 2 * 8;
++    SASUB_256(0)  SASUB_256(1)  items += 2 * 8;
++    SASUB_256(0)  SASUB_256(1)  items += 2 * 8;
+   }
+   while (items != lim);
+ }
+-#endif // USE_AVX2
++#endif // USE_LZFIND_SATUR_SUB_256
+ 
+-#ifndef FORCE_SATUR_SUB_128
+-typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)(
++#ifndef FORCE_LZFIND_SATUR_SUB_128
++typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
+     UInt32 subValue, CLzRef *items, const CLzRef *lim);
+ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+-#endif // FORCE_SATUR_SUB_128
++#endif // FORCE_LZFIND_SATUR_SUB_128
+ 
+-#endif // USE_SATUR_SUB_128
++#endif // USE_LZFIND_SATUR_SUB_128
+ 
+ 
+ // kEmptyHashValue must be zero
+-// #define SASUB_32(i) v = items[i];  m = v - subValue;  if (v < subValue) m = kEmptyHashValue;  items[i] = m;
+-#define SASUB_32(i) v = items[i];  if (v < subValue) v = subValue; items[i] = v - subValue;
++// #define SASUB_32(i)  { UInt32 v = items[i];  UInt32 m = v - subValue;  if (v < subValue) m = kEmptyHashValue;  items[i] = m; }
++#define SASUB_32(i)  { UInt32 v = items[i];  if (v < subValue) v = subValue; items[i] = v - subValue; }
+ 
+-#ifdef FORCE_SATUR_SUB_128
++#ifdef FORCE_LZFIND_SATUR_SUB_128
+ 
+ #define DEFAULT_SaturSub LzFind_SaturSub_128
+ 
+@@ -673,24 +771,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
+ 
+ #define DEFAULT_SaturSub LzFind_SaturSub_32
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static
+ void
+-MY_FAST_CALL
++Z7_FASTCALL
+ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+ {
++  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+   do
+   {
+-    UInt32 v;
+-    SASUB_32(0)
+-    SASUB_32(1)
+-    SASUB_32(2)
+-    SASUB_32(3)
+-    SASUB_32(4)
+-    SASUB_32(5)
+-    SASUB_32(6)
+-    SASUB_32(7)
+-    items += 8;
++    SASUB_32(0)  SASUB_32(1)  items += 2;
++    SASUB_32(0)  SASUB_32(1)  items += 2;
++    SASUB_32(0)  SASUB_32(1)  items += 2;
++    SASUB_32(0)  SASUB_32(1)  items += 2;
+   }
+   while (items != lim);
+ }
+@@ -698,27 +791,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
+ #endif
+ 
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+ {
+-  #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6)
+-  
+-  CLzRef *lim;
+-
+-  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
++  #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
++  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
++  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
+   {
+-    UInt32 v;
+-    SASUB_32(0);
++    SASUB_32(0)
+     items++;
+   }
+-
+   {
+-    #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1)
+-    lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK);
+-    numItems &= K_NORM_ALIGN_MASK;
++    const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
++    CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
++    numItems &= k_Align_Mask;
+     if (items != lim)
+     {
+-      #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128)
++      #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
+         if (g_LzFind_SaturSub)
+           g_LzFind_SaturSub(subValue, items, lim);
+         else
+@@ -727,12 +816,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+     }
+     items = lim;
+   }
+-
+-
++  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
+   for (; numItems != 0; numItems--)
+   {
+-    UInt32 v;
+-    SASUB_32(0);
++    SASUB_32(0)
+     items++;
+   }
+ }
+@@ -741,7 +828,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+ 
+ // call MatchFinder_CheckLimits() only after (p->pos++) update
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static void MatchFinder_CheckLimits(CMatchFinder *p)
+ {
+   if (// !p->streamEndWasReached && p->result == SZ_OK &&
+@@ -769,11 +856,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
+     const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
+     // const UInt32 subValue = (1 << 15); // for debug
+     // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
+-    size_t numSonRefs = p->cyclicBufferSize;
+-    if (p->btMode)
+-      numSonRefs <<= 1;
+-    Inline_MatchFinder_ReduceOffsets(p, subValue);
+-    MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs);
++    MatchFinder_REDUCE_OFFSETS(p, subValue)
++    MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
++    {
++      size_t numSonRefs = p->cyclicBufferSize;
++      if (p->btMode)
++        numSonRefs <<= 1;
++      MatchFinder_Normalize3(subValue, p->son, numSonRefs);
++    }
+   }
+ 
+   if (p->cyclicBufferPos == p->cyclicBufferSize)
+@@ -786,7 +876,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
+ /*
+   (lenLimit > maxLen)
+ */
+-MY_FORCE_INLINE
++Z7_FORCE_INLINE
+ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+     UInt32 *d, unsigned maxLen)
+@@ -868,7 +958,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
+ }
+ 
+ 
+-MY_FORCE_INLINE
++Z7_FORCE_INLINE
+ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+     size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+     UInt32 *d, UInt32 maxLen)
+@@ -999,13 +1089,15 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
+ 
+ 
+ #define MOVE_POS \
+-  ++p->cyclicBufferPos; \
++  p->cyclicBufferPos++; \
+   p->buffer++; \
+-  { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
++  { const UInt32 pos1 = p->pos + 1; \
++    p->pos = pos1; \
++    if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
+ 
+ #define MOVE_POS_RET MOVE_POS return distances;
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static void MatchFinder_MovePos(CMatchFinder *p)
+ {
+   /* we go here at the end of stream data, when (avail < num_hash_bytes)
+@@ -1016,24 +1108,30 @@ static void MatchFinder_MovePos(CMatchFinder *p)
+      if (p->btMode)
+         p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0;  // kEmptyHashValue
+   */
+-  MOVE_POS;
++  MOVE_POS
+ }
+ 
+ #define GET_MATCHES_HEADER2(minLen, ret_op) \
+-  unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \
+-  lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
++  UInt32 hv; const Byte *cur; UInt32 curMatch; \
++  UInt32 lenLimit = p->lenLimit; \
++  if (lenLimit < minLen) { MatchFinder_MovePos(p);  ret_op; } \
+   cur = p->buffer;
+ 
+ #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
+-#define SKIP_HEADER(minLen)   do { GET_MATCHES_HEADER2(minLen, continue)
++#define SKIP_HEADER(minLen)  \
++  do { GET_MATCHES_HEADER2(minLen, continue)
+ 
+-#define MF_PARAMS(p)  lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
++#define MF_PARAMS(p)  lenLimit, curMatch, p->pos, p->buffer, p->son, \
++    p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+ 
+-#define SKIP_FOOTER  SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num);
++#define SKIP_FOOTER  \
++    SkipMatchesSpec(MF_PARAMS(p)); \
++    MOVE_POS \
++  } while (--num);
+ 
+ #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
+-  distances = func(MF_PARAMS(p), \
+-  distances, (UInt32)_maxLen_); MOVE_POS_RET;
++  distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
++  MOVE_POS_RET
+ 
+ #define GET_MATCHES_FOOTER_BT(_maxLen_) \
+   GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
+@@ -1050,10 +1148,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
+     for (; c != lim; c++) if (*(c + diff) != *c) break; \
+     maxLen = (unsigned)(c - cur); }
+ 
+-static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   GET_MATCHES_HEADER(2)
+-  HASH2_CALC;
++  HASH2_CALC
+   curMatch = p->hash[hv];
+   p->hash[hv] = p->pos;
+   GET_MATCHES_FOOTER_BT(1)
+@@ -1062,7 +1161,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ {
+   GET_MATCHES_HEADER(3)
+-  HASH_ZIP_CALC;
++  HASH_ZIP_CALC
+   curMatch = p->hash[hv];
+   p->hash[hv] = p->pos;
+   GET_MATCHES_FOOTER_BT(2)
+@@ -1075,15 +1174,16 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+     mmm = pos;
+ 
+ 
+-static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   UInt32 mmm;
+   UInt32 h2, d2, pos;
+   unsigned maxLen;
+   UInt32 *hash;
+   GET_MATCHES_HEADER(3)
+ 
+-  HASH3_CALC;
++  HASH3_CALC
+ 
+   hash = p->hash;
+   pos = p->pos;
+@@ -1108,7 +1208,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+     if (maxLen == lenLimit)
+     {
+       SkipMatchesSpec(MF_PARAMS(p));
+-      MOVE_POS_RET;
++      MOVE_POS_RET
+     }
+   }
+   
+@@ -1116,15 +1216,16 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ }
+ 
+ 
+-static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   UInt32 mmm;
+   UInt32 h2, h3, d2, d3, pos;
+   unsigned maxLen;
+   UInt32 *hash;
+   GET_MATCHES_HEADER(4)
+ 
+-  HASH4_CALC;
++  HASH4_CALC
+ 
+   hash = p->hash;
+   pos = p->pos;
+@@ -1184,14 +1285,16 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ }
+ 
+ 
+-static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   UInt32 mmm;
+-  UInt32 h2, h3, d2, d3, maxLen, pos;
++  UInt32 h2, h3, d2, d3, pos;
++  unsigned maxLen;
+   UInt32 *hash;
+   GET_MATCHES_HEADER(5)
+ 
+-  HASH5_CALC;
++  HASH5_CALC
+ 
+   hash = p->hash;
+   pos = p->pos;
+@@ -1247,7 +1350,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+     if (maxLen == lenLimit)
+     {
+       SkipMatchesSpec(MF_PARAMS(p));
+-      MOVE_POS_RET;
++      MOVE_POS_RET
+     }
+     break;
+   }
+@@ -1256,15 +1359,16 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ }
+ 
+ 
+-static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   UInt32 mmm;
+   UInt32 h2, h3, d2, d3, pos;
+   unsigned maxLen;
+   UInt32 *hash;
+   GET_MATCHES_HEADER(4)
+ 
+-  HASH4_CALC;
++  HASH4_CALC
+ 
+   hash = p->hash;
+   pos = p->pos;
+@@ -1315,23 +1419,25 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+     if (maxLen == lenLimit)
+     {
+       p->son[p->cyclicBufferPos] = curMatch;
+-      MOVE_POS_RET;
++      MOVE_POS_RET
+     }
+     break;
+   }
+   
+-  GET_MATCHES_FOOTER_HC(maxLen);
++  GET_MATCHES_FOOTER_HC(maxLen)
+ }
+ 
+ 
+-static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
++static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   UInt32 mmm;
+-  UInt32 h2, h3, d2, d3, maxLen, pos;
++  UInt32 h2, h3, d2, d3, pos;
++  unsigned maxLen;
+   UInt32 *hash;
+   GET_MATCHES_HEADER(5)
+ 
+-  HASH5_CALC;
++  HASH5_CALC
+ 
+   hash = p->hash;
+   pos = p->pos;
+@@ -1383,34 +1489,35 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+     if (*(cur - d2 + 3) != cur[3])
+       break;
+     UPDATE_maxLen
+-    distances[-2] = maxLen;
++    distances[-2] = (UInt32)maxLen;
+     if (maxLen == lenLimit)
+     {
+       p->son[p->cyclicBufferPos] = curMatch;
+-      MOVE_POS_RET;
++      MOVE_POS_RET
+     }
+     break;
+   }
+   
+-  GET_MATCHES_FOOTER_HC(maxLen);
++  GET_MATCHES_FOOTER_HC(maxLen)
+ }
+ 
+ 
+ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+ {
+   GET_MATCHES_HEADER(3)
+-  HASH_ZIP_CALC;
++  HASH_ZIP_CALC
+   curMatch = p->hash[hv];
+   p->hash[hv] = p->pos;
+   GET_MATCHES_FOOTER_HC(2)
+ }
+ 
+ 
+-static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   SKIP_HEADER(2)
+   {
+-    HASH2_CALC;
++    HASH2_CALC
+     curMatch = p->hash[hv];
+     p->hash[hv] = p->pos;
+   }
+@@ -1421,20 +1528,21 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+ {
+   SKIP_HEADER(3)
+   {
+-    HASH_ZIP_CALC;
++    HASH_ZIP_CALC
+     curMatch = p->hash[hv];
+     p->hash[hv] = p->pos;
+   }
+   SKIP_FOOTER
+ }
+ 
+-static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   SKIP_HEADER(3)
+   {
+     UInt32 h2;
+     UInt32 *hash;
+-    HASH3_CALC;
++    HASH3_CALC
+     hash = p->hash;
+     curMatch = (hash + kFix3HashSize)[hv];
+     hash[h2] =
+@@ -1443,13 +1551,14 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+   SKIP_FOOTER
+ }
+ 
+-static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   SKIP_HEADER(4)
+   {
+     UInt32 h2, h3;
+     UInt32 *hash;
+-    HASH4_CALC;
++    HASH4_CALC
+     hash = p->hash;
+     curMatch = (hash + kFix4HashSize)[hv];
+     hash                  [h2] =
+@@ -1459,13 +1568,14 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+   SKIP_FOOTER
+ }
+ 
+-static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   SKIP_HEADER(5)
+   {
+     UInt32 h2, h3;
+     UInt32 *hash;
+-    HASH5_CALC;
++    HASH5_CALC
+     hash = p->hash;
+     curMatch = (hash + kFix5HashSize)[hv];
+     hash                  [h2] =
+@@ -1479,7 +1589,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+ 
+ #define HC_SKIP_HEADER(minLen) \
+     do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
+-    Byte *cur; \
++    const Byte *cur; \
+     UInt32 *hash; \
+     UInt32 *son; \
+     UInt32 pos = p->pos; \
+@@ -1506,12 +1616,13 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+     }} while(num); \
+ 
+ 
+-static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   HC_SKIP_HEADER(4)
+ 
+     UInt32 h2, h3;
+-    HASH4_CALC;
++    HASH4_CALC
+     curMatch = (hash + kFix4HashSize)[hv];
+     hash                  [h2] =
+     (hash + kFix3HashSize)[h3] =
+@@ -1521,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+ }
+ 
+ 
+-static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
++static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
+ {
++  CMatchFinder *p = (CMatchFinder *)_p;
+   HC_SKIP_HEADER(5)
+   
+     UInt32 h2, h3;
+@@ -1541,7 +1653,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+ {
+   HC_SKIP_HEADER(3)
+ 
+-    HASH_ZIP_CALC;
++    HASH_ZIP_CALC
+     curMatch = hash[hv];
+     hash[hv] = pos;
+ 
+@@ -1551,57 +1663,57 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+ 
+ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
+ {
+-  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+-  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+-  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
++  vTable->Init = MatchFinder_Init;
++  vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
++  vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
+   if (!p->btMode)
+   {
+     if (p->numHashBytes <= 4)
+     {
+-      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+-      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
++      vTable->GetMatches = Hc4_MatchFinder_GetMatches;
++      vTable->Skip = Hc4_MatchFinder_Skip;
+     }
+     else
+     {
+-      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+-      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
++      vTable->GetMatches = Hc5_MatchFinder_GetMatches;
++      vTable->Skip = Hc5_MatchFinder_Skip;
+     }
+   }
+   else if (p->numHashBytes == 2)
+   {
+-    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+-    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
++    vTable->GetMatches = Bt2_MatchFinder_GetMatches;
++    vTable->Skip = Bt2_MatchFinder_Skip;
+   }
+   else if (p->numHashBytes == 3)
+   {
+-    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+-    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
++    vTable->GetMatches = Bt3_MatchFinder_GetMatches;
++    vTable->Skip = Bt3_MatchFinder_Skip;
+   }
+   else if (p->numHashBytes == 4)
+   {
+-    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+-    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
++    vTable->GetMatches = Bt4_MatchFinder_GetMatches;
++    vTable->Skip = Bt4_MatchFinder_Skip;
+   }
+   else
+   {
+-    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+-    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
++    vTable->GetMatches = Bt5_MatchFinder_GetMatches;
++    vTable->Skip = Bt5_MatchFinder_Skip;
+   }
+ }
+ 
+ 
+ 
+-void LzFindPrepare()
++void LzFindPrepare(void)
+ {
+-  #ifndef FORCE_SATUR_SUB_128
+-  #ifdef USE_SATUR_SUB_128
++  #ifndef FORCE_LZFIND_SATUR_SUB_128
++  #ifdef USE_LZFIND_SATUR_SUB_128
+   LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
+   #ifdef MY_CPU_ARM_OR_ARM64
+   {
+     if (CPU_IsSupported_NEON())
+     {
+       // #pragma message ("=== LzFind NEON")
+-      _PRF(printf("\n=== LzFind NEON\n"));
++      PRF(printf("\n=== LzFind NEON\n"));
+       f = LzFind_SaturSub_128;
+     }
+     // f = 0; // for debug
+@@ -1610,20 +1722,25 @@ void LzFindPrepare()
+   if (CPU_IsSupported_SSE41())
+   {
+     // #pragma message ("=== LzFind SSE41")
+-    _PRF(printf("\n=== LzFind SSE41\n"));
++    PRF(printf("\n=== LzFind SSE41\n"));
+     f = LzFind_SaturSub_128;
+ 
+-    #ifdef USE_AVX2
++    #ifdef USE_LZFIND_SATUR_SUB_256
+     if (CPU_IsSupported_AVX2())
+     {
+       // #pragma message ("=== LzFind AVX2")
+-      _PRF(printf("\n=== LzFind AVX2\n"));
++      PRF(printf("\n=== LzFind AVX2\n"));
+       f = LzFind_SaturSub_256;
+     }
+     #endif
+   }
+   #endif // MY_CPU_ARM_OR_ARM64
+   g_LzFind_SaturSub = f;
+-  #endif // USE_SATUR_SUB_128
+-  #endif // FORCE_SATUR_SUB_128
++  #endif // USE_LZFIND_SATUR_SUB_128
++  #endif // FORCE_LZFIND_SATUR_SUB_128
+ }
++
++
++#undef MOVE_POS
++#undef MOVE_POS_RET
++#undef PRF
+diff --git a/third_party/lzma_sdk/C/LzFind.h b/third_party/lzma_sdk/C/LzFind.h
+index eea873ff61ede1bee4c75f85e451c51333d76d56..67e8a6e0286ee0877043601039c5fbdae8588148 100644
+--- a/third_party/lzma_sdk/C/LzFind.h
++++ b/third_party/lzma_sdk/C/LzFind.h
+@@ -1,8 +1,8 @@
+ /* LzFind.h -- Match finder for LZ algorithms
+-2021-07-13 : Igor Pavlov : Public domain */
++2024-01-22 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZ_FIND_H
+-#define __LZ_FIND_H
++#ifndef ZIP7_INC_LZ_FIND_H
++#define ZIP7_INC_LZ_FIND_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -10,9 +10,9 @@ EXTERN_C_BEGIN
+ 
+ typedef UInt32 CLzRef;
+ 
+-typedef struct _CMatchFinder
++typedef struct
+ {
+-  Byte *buffer;
++  const Byte *buffer;
+   UInt32 pos;
+   UInt32 posLimit;
+   UInt32 streamPos;  /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
+@@ -32,8 +32,8 @@ typedef struct _CMatchFinder
+   UInt32 hashMask;
+   UInt32 cutValue;
+ 
+-  Byte *bufferBase;
+-  ISeqInStream *stream;
++  Byte *bufBase;
++  ISeqInStreamPtr stream;
+   
+   UInt32 blockSize;
+   UInt32 keepSizeBefore;
+@@ -43,7 +43,9 @@ typedef struct _CMatchFinder
+   size_t directInputRem;
+   UInt32 historySize;
+   UInt32 fixedHashSize;
+-  UInt32 hashSizeSum;
++  Byte numHashBytes_Min;
++  Byte numHashOutBits;
++  Byte _pad2_[2];
+   SRes result;
+   UInt32 crc[256];
+   size_t numRefs;
+@@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
+ 
+ void MatchFinder_Construct(CMatchFinder *p);
+ 
+-/* Conditions:
+-     historySize <= 3 GB
+-     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
++/* (directInput = 0) is default value.
++   It's required to provide correct (directInput) value
++   before calling MatchFinder_Create().
++   You can set (directInput) by any of the following calls:
++     - MatchFinder_SET_DIRECT_INPUT_BUF()
++     - MatchFinder_SET_STREAM()
++     - MatchFinder_SET_STREAM_MODE()
+ */
++
++#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
++  (p)->stream = NULL; \
++  (p)->directInput = 1; \
++  (p)->buffer = (_src_); \
++  (p)->directInputRem = (_srcLen_); }
++
++/*
++#define MatchFinder_SET_STREAM_MODE(p) { \
++  (p)->directInput = 0; }
++*/
++
++#define MatchFinder_SET_STREAM(p, _stream_) { \
++  (p)->stream = _stream_; \
++  (p)->directInput = 0; }
++
++
+ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+     UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+     ISzAllocPtr alloc);
+ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+-// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+ 
+ /*
+-#define Inline_MatchFinder_InitPos(p, val) \
++#define MatchFinder_INIT_POS(p, val) \
+     (p)->pos = (val); \
+     (p)->streamPos = (val);
+ */
+ 
+-#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
++// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
++#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
+     (p)->pos -= (subValue); \
+     (p)->streamPos -= (subValue);
+ 
+@@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+ typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+ typedef void (*Mf_Skip_Func)(void *object, UInt32);
+ 
+-typedef struct _IMatchFinder
++typedef struct
+ {
+   Mf_Init_Func Init;
+   Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+@@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
+ void MatchFinder_Init_LowHash(CMatchFinder *p);
+ void MatchFinder_Init_HighHash(CMatchFinder *p);
+ void MatchFinder_Init_4(CMatchFinder *p);
+-void MatchFinder_Init(CMatchFinder *p);
++// void MatchFinder_Init(CMatchFinder *p);
++void MatchFinder_Init(void *p);
+ 
+ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+diff --git a/third_party/lzma_sdk/C/LzHash.h b/third_party/lzma_sdk/C/LzHash.h
+index 77b898cfab57c7df241b767b21390e26cc23ec57..2b6290b64c08f7eb44568a67e90abcf4a611f979 100644
+--- a/third_party/lzma_sdk/C/LzHash.h
++++ b/third_party/lzma_sdk/C/LzHash.h
+@@ -1,8 +1,8 @@
+-/* LzHash.h -- HASH functions for LZ algorithms
+-2019-10-30 : Igor Pavlov : Public domain */
++/* LzHash.h -- HASH constants for LZ algorithms
++2023-03-05 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZ_HASH_H
+-#define __LZ_HASH_H
++#ifndef ZIP7_INC_LZ_HASH_H
++#define ZIP7_INC_LZ_HASH_H
+ 
+ /*
+   (kHash2Size >= (1 <<  8)) : Required
+diff --git a/third_party/lzma_sdk/C/Lzma2Dec.c b/third_party/lzma_sdk/C/Lzma2Dec.c
+index ac970a843d9fabcd6337577648524b0b8e990791..8bf54e499edf73a88df3a89ac04377201e5127f3 100644
+--- a/third_party/lzma_sdk/C/Lzma2Dec.c
++++ b/third_party/lzma_sdk/C/Lzma2Dec.c
+@@ -1,5 +1,5 @@
+ /* Lzma2Dec.c -- LZMA2 Decoder
+-2021-02-09 : Igor Pavlov : Public domain */
++2024-03-01 : Igor Pavlov : Public domain */
+ 
+ /* #define SHOW_DEBUG_INFO */
+ 
+@@ -71,14 +71,14 @@ static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
+ SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
+ {
+   Byte props[LZMA_PROPS_SIZE];
+-  RINOK(Lzma2Dec_GetOldProps(prop, props));
++  RINOK(Lzma2Dec_GetOldProps(prop, props))
+   return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+ }
+ 
+ SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
+ {
+   Byte props[LZMA_PROPS_SIZE];
+-  RINOK(Lzma2Dec_GetOldProps(prop, props));
++  RINOK(Lzma2Dec_GetOldProps(prop, props))
+   return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
+ }
+ 
+@@ -157,8 +157,10 @@ static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
+       p->decoder.prop.lp = (Byte)lp;
+       return LZMA2_STATE_DATA;
+     }
++
++    default:
++      return LZMA2_STATE_ERROR;
+   }
+-  return LZMA2_STATE_ERROR;
+ }
+ 
+ static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
+@@ -474,8 +476,8 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+   SizeT outSize = *destLen, inSize = *srcLen;
+   *destLen = *srcLen = 0;
+   *status = LZMA_STATUS_NOT_SPECIFIED;
+-  Lzma2Dec_Construct(&p);
+-  RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
++  Lzma2Dec_CONSTRUCT(&p)
++  RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc))
+   p.decoder.dic = dest;
+   p.decoder.dicBufSize = outSize;
+   Lzma2Dec_Init(&p);
+@@ -487,3 +489,5 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+   Lzma2Dec_FreeProbs(&p, alloc);
+   return res;
+ }
++
++#undef PRF
+diff --git a/third_party/lzma_sdk/C/Lzma2Dec.h b/third_party/lzma_sdk/C/Lzma2Dec.h
+index b8ddeac890b93c767bcf47c39abf3af9fcc113a4..1f5233a728a96d323cb527e09be2548285e09a43 100644
+--- a/third_party/lzma_sdk/C/Lzma2Dec.h
++++ b/third_party/lzma_sdk/C/Lzma2Dec.h
+@@ -1,8 +1,8 @@
+ /* Lzma2Dec.h -- LZMA2 Decoder
+-2018-02-19 : Igor Pavlov : Public domain */
++2023-03-03 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZMA2_DEC_H
+-#define __LZMA2_DEC_H
++#ifndef ZIP7_INC_LZMA2_DEC_H
++#define ZIP7_INC_LZMA2_DEC_H
+ 
+ #include "LzmaDec.h"
+ 
+@@ -22,9 +22,10 @@ typedef struct
+   CLzmaDec decoder;
+ } CLzma2Dec;
+ 
+-#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
+-#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
+-#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
++#define Lzma2Dec_CONSTRUCT(p)  LzmaDec_CONSTRUCT(&(p)->decoder)
++#define Lzma2Dec_Construct(p)  Lzma2Dec_CONSTRUCT(p)
++#define Lzma2Dec_FreeProbs(p, alloc)  LzmaDec_FreeProbs(&(p)->decoder, alloc)
++#define Lzma2Dec_Free(p, alloc)  LzmaDec_Free(&(p)->decoder, alloc)
+ 
+ SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
+ SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
+@@ -90,7 +91,7 @@ Lzma2Dec_GetUnpackExtra() returns the value that shows
+     at current input positon.
+ */
+ 
+-#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0);
++#define Lzma2Dec_GetUnpackExtra(p)  ((p)->isExtraMode ? (p)->unpackSize : 0)
+ 
+ 
+ /* ---------- One Call Interface ---------- */
+diff --git a/third_party/lzma_sdk/C/LzmaDec.c b/third_party/lzma_sdk/C/LzmaDec.c
+index d6742e5af8cd800a791245358ca331899ca08ca7..69bb8bba9d42ccebb3d953d95585183cdc71ac19 100644
+--- a/third_party/lzma_sdk/C/LzmaDec.c
++++ b/third_party/lzma_sdk/C/LzmaDec.c
+@@ -1,5 +1,5 @@
+ /* LzmaDec.c -- LZMA Decoder
+-2021-04-01 : Igor Pavlov : Public domain */
++2023-04-07 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -8,15 +8,15 @@
+ /* #include "CpuArch.h" */
+ #include "LzmaDec.h"
+ 
+-#define kNumTopBits 24
+-#define kTopValue ((UInt32)1 << kNumTopBits)
++// #define kNumTopBits 24
++#define kTopValue ((UInt32)1 << 24)
+ 
+ #define kNumBitModelTotalBits 11
+ #define kBitModelTotal (1 << kNumBitModelTotalBits)
+ 
+ #define RC_INIT_SIZE 5
+ 
+-#ifndef _LZMA_DEC_OPT
++#ifndef Z7_LZMA_DEC_OPT
+ 
+ #define kNumMoveBits 5
+ #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+@@ -25,14 +25,14 @@
+ #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+ #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+-  { UPDATE_0(p); i = (i + i); A0; } else \
+-  { UPDATE_1(p); i = (i + i) + 1; A1; }
++  { UPDATE_0(p)  i = (i + i); A0; } else \
++  { UPDATE_1(p)  i = (i + i) + 1; A1; }
+ 
+ #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+ 
+ #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+-  { UPDATE_0(p + i); A0; } else \
+-  { UPDATE_1(p + i); A1; }
++  { UPDATE_0(p + i)  A0; } else \
++  { UPDATE_1(p + i)  A1; }
+ #define REV_BIT_VAR(  p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+ #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m;       , i += m * 2; )
+ #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m        , ; )
+@@ -40,19 +40,19 @@
+ #define TREE_DECODE(probs, limit, i) \
+   { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+ 
+-/* #define _LZMA_SIZE_OPT */
++/* #define Z7_LZMA_SIZE_OPT */
+ 
+-#ifdef _LZMA_SIZE_OPT
++#ifdef Z7_LZMA_SIZE_OPT
+ #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+ #else
+ #define TREE_6_DECODE(probs, i) \
+   { i = 1; \
+-  TREE_GET_BIT(probs, i); \
+-  TREE_GET_BIT(probs, i); \
+-  TREE_GET_BIT(probs, i); \
+-  TREE_GET_BIT(probs, i); \
+-  TREE_GET_BIT(probs, i); \
+-  TREE_GET_BIT(probs, i); \
++  TREE_GET_BIT(probs, i) \
++  TREE_GET_BIT(probs, i) \
++  TREE_GET_BIT(probs, i) \
++  TREE_GET_BIT(probs, i) \
++  TREE_GET_BIT(probs, i) \
++  TREE_GET_BIT(probs, i) \
+   i -= 0x40; }
+ #endif
+ 
+@@ -64,25 +64,25 @@
+   probLit = prob + (offs + bit + symbol); \
+   GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+ 
+-#endif // _LZMA_DEC_OPT
++#endif // Z7_LZMA_DEC_OPT
+ 
+ 
+ #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
+ 
+-#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
++#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+ #define UPDATE_0_CHECK range = bound;
+ #define UPDATE_1_CHECK range -= bound; code -= bound;
+ #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+-  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+-  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
++  { UPDATE_0_CHECK  i = (i + i); A0; } else \
++  { UPDATE_1_CHECK  i = (i + i) + 1; A1; }
+ #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+ #define TREE_DECODE_CHECK(probs, limit, i) \
+   { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+ 
+ 
+ #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+-  { UPDATE_0_CHECK; i += m; m += m; } else \
+-  { UPDATE_1_CHECK; m += m; i += m; }
++  { UPDATE_0_CHECK  i += m; m += m; } else \
++  { UPDATE_1_CHECK  m += m; i += m; }
+ 
+ 
+ #define kNumPosBitsMax 4
+@@ -224,14 +224,14 @@ Out:
+ */
+ 
+ 
+-#ifdef _LZMA_DEC_OPT
++#ifdef Z7_LZMA_DEC_OPT
+ 
+-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
++int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+ 
+ #else
+ 
+ static
+-int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+ {
+   CLzmaProb *probs = GET_PROBS;
+   unsigned state = (unsigned)p->state;
+@@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+     IF_BIT_0(prob)
+     {
+       unsigned symbol;
+-      UPDATE_0(prob);
++      UPDATE_0(prob)
+       prob = probs + Literal;
+       if (processedPos != 0 || checkDicSize != 0)
+         prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+@@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+       {
+         state -= (state < 4) ? state : 3;
+         symbol = 1;
+-        #ifdef _LZMA_SIZE_OPT
++        #ifdef Z7_LZMA_SIZE_OPT
+         do { NORMAL_LITER_DEC } while (symbol < 0x100);
+         #else
+         NORMAL_LITER_DEC
+@@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+         unsigned offs = 0x100;
+         state -= (state < 10) ? 3 : 6;
+         symbol = 1;
+-        #ifdef _LZMA_SIZE_OPT
++        #ifdef Z7_LZMA_SIZE_OPT
+         do
+         {
+           unsigned bit;
+@@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+     }
+     
+     {
+-      UPDATE_1(prob);
++      UPDATE_1(prob)
+       prob = probs + IsRep + state;
+       IF_BIT_0(prob)
+       {
+-        UPDATE_0(prob);
++        UPDATE_0(prob)
+         state += kNumStates;
+         prob = probs + LenCoder;
+       }
+       else
+       {
+-        UPDATE_1(prob);
++        UPDATE_1(prob)
+         prob = probs + IsRepG0 + state;
+         IF_BIT_0(prob)
+         {
+-          UPDATE_0(prob);
++          UPDATE_0(prob)
+           prob = probs + IsRep0Long + COMBINED_PS_STATE;
+           IF_BIT_0(prob)
+           {
+-            UPDATE_0(prob);
++            UPDATE_0(prob)
+   
+             // that case was checked before with kBadRepCode
+             // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
+@@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+             state = state < kNumLitStates ? 9 : 11;
+             continue;
+           }
+-          UPDATE_1(prob);
++          UPDATE_1(prob)
+         }
+         else
+         {
+           UInt32 distance;
+-          UPDATE_1(prob);
++          UPDATE_1(prob)
+           prob = probs + IsRepG1 + state;
+           IF_BIT_0(prob)
+           {
+-            UPDATE_0(prob);
++            UPDATE_0(prob)
+             distance = rep1;
+           }
+           else
+           {
+-            UPDATE_1(prob);
++            UPDATE_1(prob)
+             prob = probs + IsRepG2 + state;
+             IF_BIT_0(prob)
+             {
+-              UPDATE_0(prob);
++              UPDATE_0(prob)
+               distance = rep2;
+             }
+             else
+             {
+-              UPDATE_1(prob);
++              UPDATE_1(prob)
+               distance = rep3;
+               rep3 = rep2;
+             }
+@@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+         prob = probs + RepLenCoder;
+       }
+       
+-      #ifdef _LZMA_SIZE_OPT
++      #ifdef Z7_LZMA_SIZE_OPT
+       {
+         unsigned lim, offset;
+         CLzmaProb *probLen = prob + LenChoice;
+         IF_BIT_0(probLen)
+         {
+-          UPDATE_0(probLen);
++          UPDATE_0(probLen)
+           probLen = prob + LenLow + GET_LEN_STATE;
+           offset = 0;
+           lim = (1 << kLenNumLowBits);
+         }
+         else
+         {
+-          UPDATE_1(probLen);
++          UPDATE_1(probLen)
+           probLen = prob + LenChoice2;
+           IF_BIT_0(probLen)
+           {
+-            UPDATE_0(probLen);
++            UPDATE_0(probLen)
+             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+             offset = kLenNumLowSymbols;
+             lim = (1 << kLenNumLowBits);
+           }
+           else
+           {
+-            UPDATE_1(probLen);
++            UPDATE_1(probLen)
+             probLen = prob + LenHigh;
+             offset = kLenNumLowSymbols * 2;
+             lim = (1 << kLenNumHighBits);
+           }
+         }
+-        TREE_DECODE(probLen, lim, len);
++        TREE_DECODE(probLen, lim, len)
+         len += offset;
+       }
+       #else
+@@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+         CLzmaProb *probLen = prob + LenChoice;
+         IF_BIT_0(probLen)
+         {
+-          UPDATE_0(probLen);
++          UPDATE_0(probLen)
+           probLen = prob + LenLow + GET_LEN_STATE;
+           len = 1;
+-          TREE_GET_BIT(probLen, len);
+-          TREE_GET_BIT(probLen, len);
+-          TREE_GET_BIT(probLen, len);
++          TREE_GET_BIT(probLen, len)
++          TREE_GET_BIT(probLen, len)
++          TREE_GET_BIT(probLen, len)
+           len -= 8;
+         }
+         else
+         {
+-          UPDATE_1(probLen);
++          UPDATE_1(probLen)
+           probLen = prob + LenChoice2;
+           IF_BIT_0(probLen)
+           {
+-            UPDATE_0(probLen);
++            UPDATE_0(probLen)
+             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+             len = 1;
+-            TREE_GET_BIT(probLen, len);
+-            TREE_GET_BIT(probLen, len);
+-            TREE_GET_BIT(probLen, len);
++            TREE_GET_BIT(probLen, len)
++            TREE_GET_BIT(probLen, len)
++            TREE_GET_BIT(probLen, len)
+           }
+           else
+           {
+-            UPDATE_1(probLen);
++            UPDATE_1(probLen)
+             probLen = prob + LenHigh;
+-            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
++            TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
+             len += kLenNumLowSymbols * 2;
+           }
+         }
+@@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+         UInt32 distance;
+         prob = probs + PosSlot +
+             ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+-        TREE_6_DECODE(prob, distance);
++        TREE_6_DECODE(prob, distance)
+         if (distance >= kStartPosModelIndex)
+         {
+           unsigned posSlot = (unsigned)distance;
+@@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+               distance++;
+               do
+               {
+-                REV_BIT_VAR(prob, distance, m);
++                REV_BIT_VAR(prob, distance, m)
+               }
+               while (--numDirectBits);
+               distance -= m;
+@@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+             distance <<= kNumAlignBits;
+             {
+               unsigned i = 1;
+-              REV_BIT_CONST(prob, i, 1);
+-              REV_BIT_CONST(prob, i, 2);
+-              REV_BIT_CONST(prob, i, 4);
+-              REV_BIT_LAST (prob, i, 8);
++              REV_BIT_CONST(prob, i, 1)
++              REV_BIT_CONST(prob, i, 2)
++              REV_BIT_CONST(prob, i, 4)
++              REV_BIT_LAST (prob, i, 8)
+               distance |= i;
+             }
+             if (distance == (UInt32)0xFFFFFFFF)
+@@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+   }
+   while (dicPos < limit && buf < bufLimit);
+ 
+-  NORMALIZE;
++  NORMALIZE
+   
+   p->buf = buf;
+   p->range = range;
+@@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
+ 
+ 
+ 
+-static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
++static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+ {
+   unsigned len = (unsigned)p->remainLen;
+   if (len == 0 /* || len >= kMatchSpecLenStart */)
+@@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
+     (p->checkDicSize == p->prop.dicSize)
+ */
+ 
+-static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
++static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+ {
+   if (p->checkDicSize == 0)
+   {
+@@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
+     else
+     {
+       unsigned len;
+-      UPDATE_1_CHECK;
++      UPDATE_1_CHECK
+ 
+       prob = probs + IsRep + state;
+       IF_BIT_0_CHECK(prob)
+       {
+-        UPDATE_0_CHECK;
++        UPDATE_0_CHECK
+         state = 0;
+         prob = probs + LenCoder;
+         res = DUMMY_MATCH;
+       }
+       else
+       {
+-        UPDATE_1_CHECK;
++        UPDATE_1_CHECK
+         res = DUMMY_REP;
+         prob = probs + IsRepG0 + state;
+         IF_BIT_0_CHECK(prob)
+         {
+-          UPDATE_0_CHECK;
++          UPDATE_0_CHECK
+           prob = probs + IsRep0Long + COMBINED_PS_STATE;
+           IF_BIT_0_CHECK(prob)
+           {
+-            UPDATE_0_CHECK;
++            UPDATE_0_CHECK
+             break;
+           }
+           else
+           {
+-            UPDATE_1_CHECK;
++            UPDATE_1_CHECK
+           }
+         }
+         else
+         {
+-          UPDATE_1_CHECK;
++          UPDATE_1_CHECK
+           prob = probs + IsRepG1 + state;
+           IF_BIT_0_CHECK(prob)
+           {
+-            UPDATE_0_CHECK;
++            UPDATE_0_CHECK
+           }
+           else
+           {
+-            UPDATE_1_CHECK;
++            UPDATE_1_CHECK
+             prob = probs + IsRepG2 + state;
+             IF_BIT_0_CHECK(prob)
+             {
+-              UPDATE_0_CHECK;
++              UPDATE_0_CHECK
+             }
+             else
+             {
+-              UPDATE_1_CHECK;
++              UPDATE_1_CHECK
+             }
+           }
+         }
+@@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
+         const CLzmaProb *probLen = prob + LenChoice;
+         IF_BIT_0_CHECK(probLen)
+         {
+-          UPDATE_0_CHECK;
++          UPDATE_0_CHECK
+           probLen = prob + LenLow + GET_LEN_STATE;
+           offset = 0;
+           limit = 1 << kLenNumLowBits;
+         }
+         else
+         {
+-          UPDATE_1_CHECK;
++          UPDATE_1_CHECK
+           probLen = prob + LenChoice2;
+           IF_BIT_0_CHECK(probLen)
+           {
+-            UPDATE_0_CHECK;
++            UPDATE_0_CHECK
+             probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+             offset = kLenNumLowSymbols;
+             limit = 1 << kLenNumLowBits;
+           }
+           else
+           {
+-            UPDATE_1_CHECK;
++            UPDATE_1_CHECK
+             probLen = prob + LenHigh;
+             offset = kLenNumLowSymbols * 2;
+             limit = 1 << kLenNumHighBits;
+           }
+         }
+-        TREE_DECODE_CHECK(probLen, limit, len);
++        TREE_DECODE_CHECK(probLen, limit, len)
+         len += offset;
+       }
+ 
+@@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
+         prob = probs + PosSlot +
+             ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+             kNumPosSlotBits);
+-        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
++        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
+         if (posSlot >= kStartPosModelIndex)
+         {
+           unsigned numDirectBits = ((posSlot >> 1) - 1);
+@@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
+             unsigned m = 1;
+             do
+             {
+-              REV_BIT_CHECK(prob, i, m);
++              REV_BIT_CHECK(prob, i, m)
+             }
+             while (--numDirectBits);
+           }
+@@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
+     }
+     break;
+   }
+-  NORMALIZE_CHECK;
++  NORMALIZE_CHECK
+ 
+   *bufOut = buf;
+   return res;
+@@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
+ */
+ 
+ 
+-#define RETURN__NOT_FINISHED__FOR_FINISH \
++#define RETURN_NOT_FINISHED_FOR_FINISH \
+   *status = LZMA_STATUS_NOT_FINISHED; \
+   return SZ_ERROR_DATA; // for strict mode
+   // return SZ_OK; // for relaxed mode
+@@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
+         }
+         if (p->remainLen != 0)
+         {
+-          RETURN__NOT_FINISHED__FOR_FINISH;
++          RETURN_NOT_FINISHED_FOR_FINISH
+         }
+         checkEndMarkNow = 1;
+       }
+@@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
+             for (i = 0; i < (unsigned)dummyProcessed; i++)
+               p->tempBuf[i] = src[i];
+             // p->remainLen = kMatchSpecLen_Error_Data;
+-            RETURN__NOT_FINISHED__FOR_FINISH;
++            RETURN_NOT_FINISHED_FOR_FINISH
+           }
+           
+           bufLimit = src;
+@@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
+             (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
+             p->tempBufSize = (unsigned)dummyProcessed;
+             // p->remainLen = kMatchSpecLen_Error_Data;
+-            RETURN__NOT_FINISHED__FOR_FINISH;
++            RETURN_NOT_FINISHED_FOR_FINISH
+           }
+         }
+ 
+@@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
+ SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+ {
+   CLzmaProps propNew;
+-  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++  RINOK(LzmaProps_Decode(&propNew, props, propsSize))
++  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+   p->prop = propNew;
+   return SZ_OK;
+ }
+@@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
+ {
+   CLzmaProps propNew;
+   SizeT dicBufSize;
+-  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+-  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
++  RINOK(LzmaProps_Decode(&propNew, props, propsSize))
++  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
+ 
+   {
+     UInt32 dictSize = propNew.dicSize;
+     SizeT mask = ((UInt32)1 << 12) - 1;
+          if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+-    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
++    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
+     dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+     if (dicBufSize < dictSize)
+       dicBufSize = dictSize;
+@@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+   *status = LZMA_STATUS_NOT_SPECIFIED;
+   if (inSize < RC_INIT_SIZE)
+     return SZ_ERROR_INPUT_EOF;
+-  LzmaDec_Construct(&p);
+-  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
++  LzmaDec_CONSTRUCT(&p)
++  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
+   p.dic = dest;
+   p.dicBufSize = outSize;
+   LzmaDec_Init(&p);
+diff --git a/third_party/lzma_sdk/C/LzmaDec.h b/third_party/lzma_sdk/C/LzmaDec.h
+index 6f1296250cfa69ed798c0ba8975764a1fcefc6de..b0ce28fa02a1adc422b7269c4e6d2c2d26fc60b9 100644
+--- a/third_party/lzma_sdk/C/LzmaDec.h
++++ b/third_party/lzma_sdk/C/LzmaDec.h
+@@ -1,19 +1,19 @@
+ /* LzmaDec.h -- LZMA Decoder
+-2020-03-19 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZMA_DEC_H
+-#define __LZMA_DEC_H
++#ifndef ZIP7_INC_LZMA_DEC_H
++#define ZIP7_INC_LZMA_DEC_H
+ 
+ #include "7zTypes.h"
+ 
+ EXTERN_C_BEGIN
+ 
+-/* #define _LZMA_PROB32 */
+-/* _LZMA_PROB32 can increase the speed on some CPUs,
++/* #define Z7_LZMA_PROB32 */
++/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
+    but memory usage for CLzmaDec::probs will be doubled in that case */
+ 
+ typedef
+-#ifdef _LZMA_PROB32
++#ifdef Z7_LZMA_PROB32
+   UInt32
+ #else
+   UInt16
+@@ -25,7 +25,7 @@ typedef
+ 
+ #define LZMA_PROPS_SIZE 5
+ 
+-typedef struct _CLzmaProps
++typedef struct
+ {
+   Byte lc;
+   Byte lp;
+@@ -73,7 +73,8 @@ typedef struct
+   Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+ } CLzmaDec;
+ 
+-#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
++#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
++#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
+ 
+ void LzmaDec_Init(CLzmaDec *p);
+ 
+diff --git a/third_party/lzma_sdk/C/LzmaEnc.c b/third_party/lzma_sdk/C/LzmaEnc.c
+index c8b31a19ecb69c25884ee928e893726024258708..088b78f8b562d2c5a428bd992f61e855b94992cf 100644
+--- a/third_party/lzma_sdk/C/LzmaEnc.c
++++ b/third_party/lzma_sdk/C/LzmaEnc.c
+@@ -1,5 +1,5 @@
+ /* LzmaEnc.c -- LZMA Encoder
+-2022-07-15: Igor Pavlov : Public domain */
++Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -16,22 +16,22 @@
+ #include "LzmaEnc.h"
+ 
+ #include "LzFind.h"
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ #include "LzFindMt.h"
+ #endif
+ 
+ /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
+ 
+-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
++SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+     ISzAllocPtr alloc, ISzAllocPtr allocBig);
+-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
++SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
+     UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
++SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+     Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
+-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
+-void LzmaEnc_Finish(CLzmaEncHandle pp);
+-void LzmaEnc_SaveState(CLzmaEncHandle pp);
+-void LzmaEnc_RestoreState(CLzmaEncHandle pp);
++const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
++void LzmaEnc_Finish(CLzmaEncHandle p);
++void LzmaEnc_SaveState(CLzmaEncHandle p);
++void LzmaEnc_RestoreState(CLzmaEncHandle p);
+ 
+ #ifdef SHOW_STAT
+ static unsigned g_STAT_OFFSET = 0;
+@@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0;
+ /* for good normalization speed we still reserve 256 MB before 4 GB range */
+ #define kLzmaMaxHistorySize ((UInt32)15 << 28)
+ 
+-#define kNumTopBits 24
+-#define kTopValue ((UInt32)1 << kNumTopBits)
++// #define kNumTopBits 24
++#define kTopValue ((UInt32)1 << 24)
+ 
+ #define kNumBitModelTotalBits 11
+ #define kBitModelTotal (1 << kNumBitModelTotalBits)
+@@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
+   p->dictSize = p->mc = 0;
+   p->reduceSize = (UInt64)(Int64)-1;
+   p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
++  p->numHashOutBits = 0;
+   p->writeEndMark = 0;
+   p->affinity = 0;
+ }
+@@ -71,11 +72,11 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
+   p->level = level;
+   
+   if (p->dictSize == 0)
+-    p->dictSize =
+-      ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
+-      ( level <= 6 ? ((UInt32)1 << (level + 19)) :
+-      ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
+-      )));
++    p->dictSize = (unsigned)level <= 4 ?
++        (UInt32)1 << (level * 2 + 16) :
++        (unsigned)level <= sizeof(size_t) / 2 + 4 ?
++          (UInt32)1 << (level + 20) :
++          (UInt32)1 << (sizeof(size_t) / 2 + 24);
+ 
+   if (p->dictSize > p->reduceSize)
+   {
+@@ -91,15 +92,15 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
+   if (p->lp < 0) p->lp = 0;
+   if (p->pb < 0) p->pb = 2;
+ 
+-  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+-  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
++  if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1;
++  if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64;
+   if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+   if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
+   if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
+   
+   if (p->numThreads < 0)
+     p->numThreads =
+-      #ifndef _7ZIP_ST
++      #ifndef Z7_ST
+       ((p->btMode && p->algo) ? 2 : 1);
+       #else
+       1;
+@@ -194,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos);
+ unsigned GetPosSlot1(UInt32 pos)
+ {
+   unsigned res;
+-  BSR2_RET(pos, res);
++  BSR2_RET(pos, res)
+   return res;
+ }
+-#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+-#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
++#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) }
++#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) }
+ 
+ 
+ #else // ! LZMA_LOG_BSR
+@@ -293,7 +294,7 @@ typedef struct
+ #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+ 
+ typedef
+-#ifdef _LZMA_PROB32
++#ifdef Z7_LZMA_PROB32
+   UInt32
+ #else
+   UInt16
+@@ -350,7 +351,7 @@ typedef struct
+   Byte *buf;
+   Byte *bufLim;
+   Byte *bufBase;
+-  ISeqOutStream *outStream;
++  ISeqOutStreamPtr outStream;
+   UInt64 processed;
+   SRes res;
+ } CRangeEnc;
+@@ -383,7 +384,7 @@ typedef struct
+ typedef UInt32 CProbPrice;
+ 
+ 
+-typedef struct
++struct CLzmaEnc
+ {
+   void *matchFinderObj;
+   IMatchFinder2 matchFinder;
+@@ -426,7 +427,7 @@ typedef struct
+   UInt32 dictSize;
+   SRes result;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   BoolInt mtMode;
+   // begin of CMatchFinderMt is used in LZ thread
+   CMatchFinderMt matchFinderMt;
+@@ -439,7 +440,7 @@ typedef struct
+   
+   // we suppose that we have 8-bytes alignment after CMatchFinder
+  
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   Byte pad[128];
+   #endif
+   
+@@ -479,77 +480,59 @@ typedef struct
+   CSaveState saveState;
+ 
+   // BoolInt mf_Failure;
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   Byte pad2[128];
+   #endif
+-} CLzmaEnc;
++};
+ 
+ 
+ #define MFB (p->matchFinderBase)
+ /*
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ #define MFB (p->matchFinderMt.MatchFinder)
+ #endif
+ */
+ 
+-#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
+-
+-void LzmaEnc_SaveState(CLzmaEncHandle pp)
+-{
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
+-  CSaveState *dest = &p->saveState;
+-  
+-  dest->state = p->state;
+-  
+-  dest->lenProbs = p->lenProbs;
+-  dest->repLenProbs = p->repLenProbs;
+-
+-  COPY_ARR(dest, p, reps);
+-
+-  COPY_ARR(dest, p, posAlignEncoder);
+-  COPY_ARR(dest, p, isRep);
+-  COPY_ARR(dest, p, isRepG0);
+-  COPY_ARR(dest, p, isRepG1);
+-  COPY_ARR(dest, p, isRepG2);
+-  COPY_ARR(dest, p, isMatch);
+-  COPY_ARR(dest, p, isRep0Long);
+-  COPY_ARR(dest, p, posSlotEncoder);
+-  COPY_ARR(dest, p, posEncoders);
+-
+-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
++// #define GET_CLzmaEnc_p  CLzmaEnc *p = (CLzmaEnc*)(void *)p;
++// #define GET_const_CLzmaEnc_p  const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
++
++#define COPY_ARR(dest, src, arr)  memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
++
++#define COPY_LZMA_ENC_STATE(d, s, p)  \
++  (d)->state = (s)->state;  \
++  COPY_ARR(d, s, reps)  \
++  COPY_ARR(d, s, posAlignEncoder)  \
++  COPY_ARR(d, s, isRep)  \
++  COPY_ARR(d, s, isRepG0)  \
++  COPY_ARR(d, s, isRepG1)  \
++  COPY_ARR(d, s, isRepG2)  \
++  COPY_ARR(d, s, isMatch)  \
++  COPY_ARR(d, s, isRep0Long)  \
++  COPY_ARR(d, s, posSlotEncoder)  \
++  COPY_ARR(d, s, posEncoders)  \
++  (d)->lenProbs = (s)->lenProbs;  \
++  (d)->repLenProbs = (s)->repLenProbs;  \
++  memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp);
++
++void LzmaEnc_SaveState(CLzmaEncHandle p)
++{
++  // GET_CLzmaEnc_p
++  CSaveState *v = &p->saveState;
++  COPY_LZMA_ENC_STATE(v, p, p)
+ }
+ 
+-
+-void LzmaEnc_RestoreState(CLzmaEncHandle pp)
++void LzmaEnc_RestoreState(CLzmaEncHandle p)
+ {
+-  CLzmaEnc *dest = (CLzmaEnc *)pp;
+-  const CSaveState *p = &dest->saveState;
+-
+-  dest->state = p->state;
+-
+-  dest->lenProbs = p->lenProbs;
+-  dest->repLenProbs = p->repLenProbs;
+-  
+-  COPY_ARR(dest, p, reps);
+-  
+-  COPY_ARR(dest, p, posAlignEncoder);
+-  COPY_ARR(dest, p, isRep);
+-  COPY_ARR(dest, p, isRepG0);
+-  COPY_ARR(dest, p, isRepG1);
+-  COPY_ARR(dest, p, isRepG2);
+-  COPY_ARR(dest, p, isMatch);
+-  COPY_ARR(dest, p, isRep0Long);
+-  COPY_ARR(dest, p, posSlotEncoder);
+-  COPY_ARR(dest, p, posEncoders);
+-
+-  memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
++  // GET_CLzmaEnc_p
++  const CSaveState *v = &p->saveState;
++  COPY_LZMA_ENC_STATE(p, v, p)
+ }
+ 
+ 
+-
+-SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
++Z7_NO_INLINE
++SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
++  // GET_CLzmaEnc_p
+   CLzmaEncProps props = *props2;
+   LzmaEncProps_Normalize(&props);
+ 
+@@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+   p->fastMode = (props.algo == 0);
+   // p->_maxMode = True;
+   MFB.btMode = (Byte)(props.btMode ? 1 : 0);
++  // MFB.btMode = (Byte)(props.btMode);
+   {
+     unsigned numHashBytes = 4;
+     if (props.btMode)
+@@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+     if (props.numHashBytes >= 5) numHashBytes = 5;
+ 
+     MFB.numHashBytes = numHashBytes;
++    // MFB.numHashBytes_Min = 2;
++    MFB.numHashOutBits = (Byte)props.numHashOutBits;
+   }
+ 
+   MFB.cutValue = props.mc;
+ 
+   p->writeEndMark = (BoolInt)props.writeEndMark;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   /*
+   if (newMultiThread != _multiThread)
+   {
+@@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+ }
+ 
+ 
+-void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
++void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
++  // GET_CLzmaEnc_p
+   MFB.expectedDataSize = expectedDataSiize;
+ }
+ 
+@@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p)
+   p->res = SZ_OK;
+ }
+ 
+-MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
++Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+ {
+   const size_t num = (size_t)(p->buf - p->bufBase);
+   if (p->res == SZ_OK)
+@@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+   p->buf = p->bufBase;
+ }
+ 
+-MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
++Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
+ {
+   UInt32 low = (UInt32)p->low;
+   unsigned high = (unsigned)(p->low >> 32);
+@@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p)
+   ttt = *(prob); \
+   newBound = (range >> kNumBitModelTotalBits) * ttt;
+ 
+-// #define _LZMA_ENC_USE_BRANCH
++// #define Z7_LZMA_ENC_USE_BRANCH
+ 
+-#ifdef _LZMA_ENC_USE_BRANCH
++#ifdef Z7_LZMA_ENC_USE_BRANCH
+ 
+ #define RC_BIT(p, prob, bit) { \
+   RC_BIT_PRE(p, prob) \
+@@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
+     CLzmaProb *prob = probs + (sym >> 8);
+     UInt32 bit = (sym >> 7) & 1;
+     sym <<= 1;
+-    RC_BIT(p, prob, bit);
++    RC_BIT(p, prob, bit)
+   }
+   while (sym < 0x10000);
+   p->range = range;
+@@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn
+     bit = (sym >> 7) & 1;
+     sym <<= 1;
+     offs &= ~(matchByte ^ sym);
+-    RC_BIT(p, prob, bit);
++    RC_BIT(p, prob, bit)
+   }
+   while (sym < 0x10000);
+   p->range = range;
+@@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
+ 
+ 
+ #define GET_PRICE(prob, bit) \
+-  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+ 
+ #define GET_PRICEa(prob, bit) \
+-     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
++     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
+ 
+ #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+ #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+@@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi
+     unsigned bit = sym & 1;
+     // RangeEnc_EncodeBit(rc, probs + m, bit);
+     sym >>= 1;
+-    RC_BIT(rc, probs + m, bit);
++    RC_BIT(rc, probs + m, bit)
+     m = (m << 1) | bit;
+   }
+   while (--numBits);
+@@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
+   UInt32 range, ttt, newBound;
+   CLzmaProb *probs = p->low;
+   range = rc->range;
+-  RC_BIT_PRE(rc, probs);
++  RC_BIT_PRE(rc, probs)
+   if (sym >= kLenNumLowSymbols)
+   {
+-    RC_BIT_1(rc, probs);
++    RC_BIT_1(rc, probs)
+     probs += kLenNumLowSymbols;
+-    RC_BIT_PRE(rc, probs);
++    RC_BIT_PRE(rc, probs)
+     if (sym >= kLenNumLowSymbols * 2)
+     {
+-      RC_BIT_1(rc, probs);
++      RC_BIT_1(rc, probs)
+       rc->range = range;
+       // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
+       LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
+@@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
+   {
+     unsigned m;
+     unsigned bit;
+-    RC_BIT_0(rc, probs);
++    RC_BIT_0(rc, probs)
+     probs += (posState << (1 + kLenNumLowBits));
+-    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;
+-    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
+-    bit =  sym       & 1; RC_BIT(rc, probs + m, bit);
++    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit)  m = (1 << 1) + bit;
++    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit)  m = (m << 1) + bit;
++    bit =  sym       & 1; RC_BIT(rc, probs + m, bit)
+     rc->range = range;
+   }
+ }
+@@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price
+ }
+ 
+ 
+-MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
++Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
+     CLenPriceEnc *p,
+     unsigned numPosStates,
+     const CLenEnc *enc,
+@@ -1054,14 +1040,14 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
+         UInt32 price = b;
+         do
+         {
+-          unsigned bit = sym & 1;
++          const unsigned bit = sym & 1;
+           sym >>= 1;
+           price += GET_PRICEa(probs[sym], bit);
+         }
+         while (sym >= 2);
+ 
+         {
+-          unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
++          const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+           prices[(size_t)i * 2    ] = price + GET_PRICEa_0(prob);
+           prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
+         }
+@@ -1070,7 +1056,7 @@ MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
+ 
+       {
+         unsigned posState;
+-        size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
++        const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+         for (posState = 1; posState < numPosStates; posState++)
+           memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
+       }
+@@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+   + GET_PRICE_1(p->isRep[state]) \
+   + GET_PRICE_0(p->isRepG0[state])
+   
+-MY_FORCE_INLINE
++Z7_FORCE_INLINE
+ static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
+ {
+   UInt32 price;
+@@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+           LitEnc_GetPrice(probs, curByte, p->ProbPrices));
+     }
+ 
+-    MakeAs_Lit(&p->opt[1]);
++    MakeAs_Lit(&p->opt[1])
+     
+     matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
+     repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
+@@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+       if (shortRepPrice < p->opt[1].price)
+       {
+         p->opt[1].price = shortRepPrice;
+-        MakeAs_ShortRep(&p->opt[1]);
++        MakeAs_ShortRep(&p->opt[1])
+       }
+       if (last < 2)
+       {
+@@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+           else
+           {
+             unsigned slot;
+-            GetPosSlot2(dist, slot);
++            GetPosSlot2(dist, slot)
+             price += p->alignPrices[dist & kAlignMask];
+             price += p->posSlotPrices[lenToPosState][slot];
+           }
+@@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+         unsigned delta = best - cur;
+         if (delta != 0)
+         {
+-          MOVE_POS(p, delta);
++          MOVE_POS(p, delta)
+         }
+       }
+       cur = best;
+@@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+       {
+         nextOpt->price = litPrice;
+         nextOpt->len = 1;
+-        MakeAs_Lit(nextOpt);
++        MakeAs_Lit(nextOpt)
+         nextIsLit = True;
+       }
+     }
+@@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+       {
+         nextOpt->price = shortRepPrice;
+         nextOpt->len = 1;
+-        MakeAs_ShortRep(nextOpt);
++        MakeAs_ShortRep(nextOpt)
+         nextIsLit = False;
+       }
+     }
+@@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+       dist = MATCHES[(size_t)offs + 1];
+       
+       // if (dist >= kNumFullDistances)
+-      GetPosSlot2(dist, posSlot);
++      GetPosSlot2(dist, posSlot)
+       
+       for (len = /*2*/ startLen; ; len++)
+       {
+@@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
+             break;
+           dist = MATCHES[(size_t)offs + 1];
+           // if (dist >= kNumFullDistances)
+-            GetPosSlot2(dist, posSlot);
++            GetPosSlot2(dist, posSlot)
+         }
+       }
+     }
+@@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
+     {
+       UInt32 ttt, newBound;
+       RC_BIT_PRE(p, probs + m)
+-      RC_BIT_1(&p->rc, probs + m);
++      RC_BIT_1(&p->rc, probs + m)
+       m = (m << 1) + 1;
+     }
+     while (m < (1 << kNumPosSlotBits));
+@@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
+     {
+       UInt32 ttt, newBound;
+       RC_BIT_PRE(p, probs + m)
+-      RC_BIT_1(&p->rc, probs + m);
++      RC_BIT_1(&p->rc, probs + m)
+       m = (m << 1) + 1;
+     }
+     while (m < kAlignTableSize);
+@@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p)
+   if (p->rc.res != SZ_OK)
+     p->result = SZ_ERROR_WRITE;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   if (
+       // p->mf_Failure ||
+         (p->mtMode &&
+@@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p)
+             p->matchFinderMt.failure_LZ_BT))
+      )
+   {
+-    p->result = MY_HRES_ERROR__INTERNAL_ERROR;
++    p->result = MY_HRES_ERROR_INTERNAL_ERROR;
+     // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
+   }
+   #endif
+@@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p)
+ }
+ 
+ 
+-MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
++Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+ {
+   /* ReleaseMFStream(); */
+   p->finished = True;
+@@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
+ }
+ 
+ 
+-MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
++Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+ {
+   unsigned i;
+   const CProbPrice *ProbPrices = p->ProbPrices;
+@@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+ }
+ 
+ 
+-MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
++Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+ {
+   // int y; for (y = 0; y < 100; y++) {
+ 
+@@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
+   RangeEnc_Construct(&p->rc);
+   MatchFinder_Construct(&MFB);
+   
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   p->matchFinderMt.MatchFinder = &MFB;
+   MatchFinderMt_Construct(&p->matchFinderMt);
+   #endif
+@@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
+   {
+     CLzmaEncProps props;
+     LzmaEncProps_Init(&props);
+-    LzmaEnc_SetProps(p, &props);
++    LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
+   }
+ 
+   #ifndef LZMA_LOG_BSR
+@@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+ 
+ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+   #endif
+   
+@@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
+ 
+ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
++  // GET_CLzmaEnc_p
++  LzmaEnc_Destruct(p, alloc, allocBig);
+   ISzAlloc_Free(alloc, p);
+ }
+ 
+ 
+-MY_NO_INLINE
++Z7_NO_INLINE
+ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
+ {
+   UInt32 nowPos32, startPos32;
+   if (p->needInit)
+   {
+-    #ifndef _7ZIP_ST
++    #ifndef Z7_ST
+     if (p->mtMode)
+     {
+-      RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
++      RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
+     }
+     #endif
+     p->matchFinder.Init(p->matchFinderObj);
+@@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+ 
+   if (p->finished)
+     return p->result;
+-  RINOK(CheckErrors(p));
++  RINOK(CheckErrors(p))
+ 
+   nowPos32 = (UInt32)p->nowPos64;
+   startPos32 = nowPos32;
+@@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+       const Byte *data;
+       unsigned state;
+ 
+-      RC_BIT_0(&p->rc, probs);
++      RC_BIT_0(&p->rc, probs)
+       p->rc.range = range;
+       data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+       probs = LIT_PROBS(nowPos32, *(data - 1));
+@@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+     }
+     else
+     {
+-      RC_BIT_1(&p->rc, probs);
++      RC_BIT_1(&p->rc, probs)
+       probs = &p->isRep[p->state];
+       RC_BIT_PRE(&p->rc, probs)
+       
+       if (dist < LZMA_NUM_REPS)
+       {
+-        RC_BIT_1(&p->rc, probs);
++        RC_BIT_1(&p->rc, probs)
+         probs = &p->isRepG0[p->state];
+         RC_BIT_PRE(&p->rc, probs)
+         if (dist == 0)
+         {
+-          RC_BIT_0(&p->rc, probs);
++          RC_BIT_0(&p->rc, probs)
+           probs = &p->isRep0Long[p->state][posState];
+           RC_BIT_PRE(&p->rc, probs)
+           if (len != 1)
+           {
+-            RC_BIT_1_BASE(&p->rc, probs);
++            RC_BIT_1_BASE(&p->rc, probs)
+           }
+           else
+           {
+-            RC_BIT_0_BASE(&p->rc, probs);
++            RC_BIT_0_BASE(&p->rc, probs)
+             p->state = kShortRepNextStates[p->state];
+           }
+         }
+         else
+         {
+-          RC_BIT_1(&p->rc, probs);
++          RC_BIT_1(&p->rc, probs)
+           probs = &p->isRepG1[p->state];
+           RC_BIT_PRE(&p->rc, probs)
+           if (dist == 1)
+           {
+-            RC_BIT_0_BASE(&p->rc, probs);
++            RC_BIT_0_BASE(&p->rc, probs)
+             dist = p->reps[1];
+           }
+           else
+           {
+-            RC_BIT_1(&p->rc, probs);
++            RC_BIT_1(&p->rc, probs)
+             probs = &p->isRepG2[p->state];
+             RC_BIT_PRE(&p->rc, probs)
+             if (dist == 2)
+             {
+-              RC_BIT_0_BASE(&p->rc, probs);
++              RC_BIT_0_BASE(&p->rc, probs)
+               dist = p->reps[2];
+             }
+             else
+             {
+-              RC_BIT_1_BASE(&p->rc, probs);
++              RC_BIT_1_BASE(&p->rc, probs)
+               dist = p->reps[3];
+               p->reps[3] = p->reps[2];
+             }
+@@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+       else
+       {
+         unsigned posSlot;
+-        RC_BIT_0(&p->rc, probs);
++        RC_BIT_0(&p->rc, probs)
+         p->rc.range = range;
+         p->state = kMatchNextStates[p->state];
+ 
+@@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+         p->reps[0] = dist + 1;
+         
+         p->matchPriceCount++;
+-        GetPosSlot(dist, posSlot);
++        GetPosSlot(dist, posSlot)
+         // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
+         {
+           UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
+@@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+             CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
+             UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
+             sym <<= 1;
+-            RC_BIT(&p->rc, prob, bit);
++            RC_BIT(&p->rc, prob, bit)
+           }
+           while (sym < (1 << kNumPosSlotBits * 2));
+           p->rc.range = range;
+@@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
+             {
+               unsigned m = 1;
+               unsigned bit;
+-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+-              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit;
+-              bit = dist & 1;             RC_BIT(&p->rc, p->posAlignEncoder + m, bit);
++              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
++              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
++              bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)  m = (m << 1) + bit;
++              bit = dist & 1;             RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
+               p->rc.range = range;
+               // p->alignPriceCount++;
+             }
+@@ -2704,17 +2691,17 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
+   if (!RangeEnc_Alloc(&p->rc, alloc))
+     return SZ_ERROR_MEM;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
+   #endif
+ 
+   {
+-    unsigned lclp = p->lc + p->lp;
++    const unsigned lclp = p->lc + p->lp;
+     if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+     {
+       LzmaEnc_FreeLits(p, alloc);
+-      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+-      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
++      p->litProbs =           (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
++      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
+       if (!p->litProbs || !p->saveState.litProbs)
+       {
+         LzmaEnc_FreeLits(p, alloc);
+@@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
+         (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
+   */
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   if (p->mtMode)
+   {
+     RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
+         p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
+-        , allocBig));
++        , allocBig))
+     p->matchFinderObj = &p->matchFinderMt;
+-    MFB.bigHash = (Byte)(
+-        (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
++    MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
+     MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+   }
+   else
+@@ -2816,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
+   }
+ 
+   {
+-    UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
+-    UInt32 k;
++    const size_t num = (size_t)0x300 << (p->lp + p->lc);
++    size_t k;
+     CLzmaProb *probs = p->litProbs;
+     for (k = 0; k < num; k++)
+       probs[k] = kProbInitValue;
+@@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr
+ 
+   p->finished = False;
+   p->result = SZ_OK;
+-  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig));
++  p->nowPos64 = 0;
++  p->needInit = 1;
++  RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
+   LzmaEnc_Init(p);
+   LzmaEnc_InitPrices(p);
+-  p->nowPos64 = 0;
+   return SZ_OK;
+ }
+ 
+-static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream,
++static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
++    ISeqOutStreamPtr outStream,
++    ISeqInStreamPtr inStream,
+     ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
+-  MFB.stream = inStream;
+-  p->needInit = 1;
++  // GET_CLzmaEnc_p
++  MatchFinder_SET_STREAM(&MFB, inStream)
+   p->rc.outStream = outStream;
+   return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
+ }
+ 
+-SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
+-    ISeqInStream *inStream, UInt32 keepWindowSize,
++SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
++    ISeqInStreamPtr inStream, UInt32 keepWindowSize,
+     ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
+-  MFB.stream = inStream;
+-  p->needInit = 1;
++  // GET_CLzmaEnc_p
++  MatchFinder_SET_STREAM(&MFB, inStream)
+   return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+ }
+ 
+-static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
+-{
+-  MFB.directInput = 1;
+-  MFB.bufferBase = (Byte *)src;
+-  MFB.directInputRem = srcLen;
+-}
+-
+-SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
+-    UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
++SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
++    const Byte *src, SizeT srcLen,
++    UInt32 keepWindowSize,
++    ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
+-  LzmaEnc_SetInputBuf(p, src, srcLen);
+-  p->needInit = 1;
+-
+-  LzmaEnc_SetDataSize(pp, srcLen);
++  // GET_CLzmaEnc_p
++  MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
++  LzmaEnc_SetDataSize(p, srcLen);
+   return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
+ }
+ 
+-void LzmaEnc_Finish(CLzmaEncHandle pp)
++void LzmaEnc_Finish(CLzmaEncHandle p)
+ {
+-  #ifndef _7ZIP_ST
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
++  #ifndef Z7_ST
++  // GET_CLzmaEnc_p
+   if (p->mtMode)
+     MatchFinderMt_ReleaseStream(&p->matchFinderMt);
+   #else
+-  UNUSED_VAR(pp);
++  UNUSED_VAR(p)
+   #endif
+ }
+ 
+@@ -2933,13 +2913,13 @@ typedef struct
+ {
+   ISeqOutStream vt;
+   Byte *data;
+-  SizeT rem;
++  size_t rem;
+   BoolInt overflow;
+ } CLzmaEnc_SeqOutStreamBuf;
+ 
+-static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
++static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
+ {
+-  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
++  Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
+   if (p->rem < size)
+   {
+     size = p->rem;
+@@ -2956,25 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
+ 
+ 
+ /*
+-UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
++UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
+ {
+-  const CLzmaEnc *p = (CLzmaEnc *)pp;
++  GET_const_CLzmaEnc_p
+   return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+ }
+ */
+ 
+-const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
++const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
+ {
+-  const CLzmaEnc *p = (CLzmaEnc *)pp;
++  // GET_const_CLzmaEnc_p
+   return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+ }
+ 
+ 
+ // (desiredPackSize == 0) is not allowed
+-SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
++SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
+     Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
++  // GET_CLzmaEnc_p
+   UInt64 nowPos64;
+   SRes res;
+   CLzmaEnc_SeqOutStreamBuf outStream;
+@@ -3006,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
+ }
+ 
+ 
+-MY_NO_INLINE
+-static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
++Z7_NO_INLINE
++static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
+ {
+   SRes res = SZ_OK;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   Byte allocaDummy[0x300];
+   allocaDummy[0] = 0;
+   allocaDummy[1] = allocaDummy[0];
+@@ -3033,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
+     }
+   }
+   
+-  LzmaEnc_Finish(p);
++  LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
+ 
+   /*
+   if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
+@@ -3045,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
+ }
+ 
+ 
+-SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress,
++SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
+     ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig));
+-  return LzmaEnc_Encode2((CLzmaEnc *)pp, progress);
++  // GET_CLzmaEnc_p
++  RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
++  return LzmaEnc_Encode2(p, progress);
+ }
+ 
+ 
+-SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
++SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
+ {
+   if (*size < LZMA_PROPS_SIZE)
+     return SZ_ERROR_PARAM;
+   *size = LZMA_PROPS_SIZE;
+   {
+-    const CLzmaEnc *p = (const CLzmaEnc *)pp;
++    // GET_CLzmaEnc_p
+     const UInt32 dictSize = p->dictSize;
+     UInt32 v;
+     props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+@@ -3083,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+       while (v < dictSize);
+     }
+ 
+-    SetUi32(props + 1, v);
++    SetUi32(props + 1, v)
+     return SZ_OK;
+   }
+ }
+ 
+ 
+-unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
++unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
+ {
+-  return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
++  // GET_CLzmaEnc_p
++  return (unsigned)p->writeEndMark;
+ }
+ 
+ 
+-SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
++SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
++    int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+   SRes res;
+-  CLzmaEnc *p = (CLzmaEnc *)pp;
++  // GET_CLzmaEnc_p
+ 
+   CLzmaEnc_SeqOutStreamBuf outStream;
+ 
+@@ -3111,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
+   p->writeEndMark = writeEndMark;
+   p->rc.outStream = &outStream.vt;
+ 
+-  res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig);
++  res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
+   
+   if (res == SZ_OK)
+   {
+@@ -3120,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
+       res = SZ_ERROR_FAIL;
+   }
+ 
+-  *destLen -= outStream.rem;
++  *destLen -= (SizeT)outStream.rem;
+   if (outStream.overflow)
+     return SZ_ERROR_OUTPUT_EOF;
+   return res;
+@@ -3129,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
+ 
+ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+     const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
++    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+ {
+-  CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc);
++  CLzmaEncHandle p = LzmaEnc_Create(alloc);
+   SRes res;
+   if (!p)
+     return SZ_ERROR_MEM;
+@@ -3151,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+ 
+ 
+ /*
+-#ifndef _7ZIP_ST
+-void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
++#ifndef Z7_ST
++void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
+ {
+-  const CLzmaEnc *p = (CLzmaEnc *)pp;
++  GET_const_CLzmaEnc_p
+   lz_threads[0] = p->matchFinderMt.hashSync.thread;
+   lz_threads[1] = p->matchFinderMt.btSync.thread;
+ }
+diff --git a/third_party/lzma_sdk/C/LzmaEnc.h b/third_party/lzma_sdk/C/LzmaEnc.h
+index bc2ed5042b93e182f53f08b374aec03b157c9bc9..9f8039a1032993c7311538f36fdd3e6265ab9d78 100644
+--- a/third_party/lzma_sdk/C/LzmaEnc.h
++++ b/third_party/lzma_sdk/C/LzmaEnc.h
+@@ -1,8 +1,8 @@
+ /*  LzmaEnc.h -- LZMA Encoder
+-2019-10-30 : Igor Pavlov : Public domain */
++2023-04-13 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZMA_ENC_H
+-#define __LZMA_ENC_H
++#ifndef ZIP7_INC_LZMA_ENC_H
++#define ZIP7_INC_LZMA_ENC_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -10,7 +10,7 @@ EXTERN_C_BEGIN
+ 
+ #define LZMA_PROPS_SIZE 5
+ 
+-typedef struct _CLzmaEncProps
++typedef struct
+ {
+   int level;       /* 0 <= level <= 9 */
+   UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+@@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
+   int fb;          /* 5 <= fb <= 273, default = 32 */
+   int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+   int numHashBytes; /* 2, 3 or 4, default = 4 */
++  unsigned numHashOutBits;  /* default = ? */
+   UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */
+   unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+   int numThreads;  /* 1 or 2, default = 2 */
+ 
++  // int _pad;
++
+   UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+                         Encoder uses this value to reduce dictionary size */
+ 
+@@ -51,7 +54,9 @@ SRes:
+   SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+ */
+ 
+-typedef void * CLzmaEncHandle;
++typedef struct CLzmaEnc CLzmaEnc;
++typedef CLzmaEnc * CLzmaEncHandle;
++// Z7_DECLARE_HANDLE(CLzmaEncHandle)
+ 
+ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+@@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+ SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+ unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+ 
+-SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
++SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
++    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+ SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+-    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
++    int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+ 
+ 
+ /* ---------- One Call Interface ---------- */
+ 
+ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+     const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+-    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
++    ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+ 
+ EXTERN_C_END
+ 
+diff --git a/third_party/lzma_sdk/C/LzmaLib.c b/third_party/lzma_sdk/C/LzmaLib.c
+index 706e9e58cd61c7f569255595568a74ee25f1d2c1..785e884874485e12830c5ccf224c7d935a9ea426 100644
+--- a/third_party/lzma_sdk/C/LzmaLib.c
++++ b/third_party/lzma_sdk/C/LzmaLib.c
+@@ -1,12 +1,14 @@
+ /* LzmaLib.c -- LZMA library wrapper
+-2015-06-13 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
++
++#include "Precomp.h"
+ 
+ #include "Alloc.h"
+ #include "LzmaDec.h"
+ #include "LzmaEnc.h"
+ #include "LzmaLib.h"
+ 
+-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
++Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+   unsigned char *outProps, size_t *outPropsSize,
+   int level, /* 0 <= level <= 9, default = 5 */
+   unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
+@@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
+ }
+ 
+ 
+-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
++Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
+   const unsigned char *props, size_t propsSize)
+ {
+   ELzmaStatus status;
+diff --git a/third_party/lzma_sdk/C/LzmaLib.h b/third_party/lzma_sdk/C/LzmaLib.h
+index c343a8596dd0a324b82bfdb6c13dc7eff000c98a..d7c0724de5e325ca7f1e3e4954fb78a15c68fa11 100644
+--- a/third_party/lzma_sdk/C/LzmaLib.h
++++ b/third_party/lzma_sdk/C/LzmaLib.h
+@@ -1,14 +1,14 @@
+ /* LzmaLib.h -- LZMA library interface
+-2021-04-03 : Igor Pavlov : Public domain */
++2023-04-02 : Igor Pavlov : Public domain */
+ 
+-#ifndef __LZMA_LIB_H
+-#define __LZMA_LIB_H
++#ifndef ZIP7_INC_LZMA_LIB_H
++#define ZIP7_INC_LZMA_LIB_H
+ 
+ #include "7zTypes.h"
+ 
+ EXTERN_C_BEGIN
+ 
+-#define MY_STDAPI int MY_STD_CALL
++#define Z7_STDAPI int Z7_STDCALL
+ 
+ #define LZMA_PROPS_SIZE 5
+ 
+@@ -100,7 +100,7 @@ Returns:
+   SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+ */
+ 
+-MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
++Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+   unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
+   int level,      /* 0 <= level <= 9, default = 5 */
+   unsigned dictSize,  /* default = (1 << 24) */
+@@ -130,7 +130,7 @@ Returns:
+   SZ_ERROR_INPUT_EOF   - it needs more bytes in input buffer (src)
+ */
+ 
+-MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
++Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
+   const unsigned char *props, size_t propsSize);
+ 
+ EXTERN_C_END
+diff --git a/third_party/lzma_sdk/C/Precomp.h b/third_party/lzma_sdk/C/Precomp.h
+index e8ff8b40e81ae1e2aa9b2bde34e8ada9ea37e012..7747fdd74c64b4761be0b9eebd2347215ee2cd40 100644
+--- a/third_party/lzma_sdk/C/Precomp.h
++++ b/third_party/lzma_sdk/C/Precomp.h
+@@ -1,10 +1,127 @@
+-/* Precomp.h -- StdAfx
+-2013-11-12 : Igor Pavlov : Public domain */
++/* Precomp.h -- precompilation file
++2024-01-25 : Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_PRECOMP_H
+-#define __7Z_PRECOMP_H
++#ifndef ZIP7_INC_PRECOMP_H
++#define ZIP7_INC_PRECOMP_H
++
++/*
++  this file must be included before another *.h files and before <windows.h>.
++  this file is included from the following files:
++    C\*.c
++    C\Util\*\Precomp.h   <-  C\Util\*\*.c
++    CPP\Common\Common.h  <-  *\StdAfx.h    <-  *\*.cpp
++
++  this file can set the following macros:
++    Z7_LARGE_PAGES 1
++    Z7_LONG_PATH 1
++    Z7_WIN32_WINNT_MIN  0x0500 (or higher) : we require at least win2000+ for 7-Zip
++    _WIN32_WINNT        0x0500 (or higher)
++    WINVER  _WIN32_WINNT
++    UNICODE 1
++    _UNICODE 1
++*/
+ 
+ #include "Compiler.h"
+-/* #include "7zTypes.h" */
++
++#ifdef _MSC_VER
++// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
++#if _MSC_VER >= 1912
++// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
++#endif
++#endif
++
++/*
++// for debug:
++#define UNICODE 1
++#define _UNICODE 1
++#define  _WIN32_WINNT  0x0500  // win2000
++#ifndef WINVER
++  #define WINVER  _WIN32_WINNT
++#endif
++*/
++
++#ifdef _WIN32
++/*
++  this "Precomp.h" file must be included before <windows.h>,
++  if we want to define _WIN32_WINNT before <windows.h>.
++*/
++
++#ifndef Z7_LARGE_PAGES
++#ifndef Z7_NO_LARGE_PAGES
++#define Z7_LARGE_PAGES 1
++#endif
++#endif
++
++#ifndef Z7_LONG_PATH
++#ifndef Z7_NO_LONG_PATH
++#define Z7_LONG_PATH 1
++#endif
++#endif
++
++#ifndef Z7_DEVICE_FILE
++#ifndef Z7_NO_DEVICE_FILE
++// #define Z7_DEVICE_FILE 1
++#endif
++#endif
++
++// we don't change macros if included after <windows.h>
++#ifndef _WINDOWS_
++
++#ifndef Z7_WIN32_WINNT_MIN
++  #if defined(_M_ARM64) || defined(__aarch64__)
++    // #define Z7_WIN32_WINNT_MIN  0x0a00  // win10
++    #define Z7_WIN32_WINNT_MIN  0x0600  // vista
++  #elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
++    // #define Z7_WIN32_WINNT_MIN  0x0602  // win8
++    #define Z7_WIN32_WINNT_MIN  0x0600  // vista
++  #elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
++    #define Z7_WIN32_WINNT_MIN  0x0503  // win2003
++  // #elif defined(_M_IX86) || defined(__i386__)
++  //   #define Z7_WIN32_WINNT_MIN  0x0500  // win2000
++  #else // x86 and another(old) systems
++    #define Z7_WIN32_WINNT_MIN  0x0500  // win2000
++    // #define Z7_WIN32_WINNT_MIN  0x0502  // win2003 // for debug
++  #endif
++#endif // Z7_WIN32_WINNT_MIN
++
++
++#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
++#ifdef _WIN32_WINNT
++  // #error Stop_Compiling_Bad_WIN32_WINNT
++#else
++  #ifndef Z7_NO_DEFINE_WIN32_WINNT
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++    #define _WIN32_WINNT  Z7_WIN32_WINNT_MIN
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++  #endif
++#endif // _WIN32_WINNT
++
++#ifndef WINVER
++  #define WINVER  _WIN32_WINNT
++#endif
++#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
++
++
++#ifndef _MBCS
++#ifndef Z7_NO_UNICODE
++// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
++
++#ifndef UNICODE
++#define UNICODE 1
++#endif
++
++#ifndef _UNICODE
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++#define _UNICODE 1
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++#endif
++
++#endif // Z7_NO_UNICODE
++#endif // _MBCS
++#endif // _WINDOWS_
++
++// #include "7zWindows.h"
++
++#endif // _WIN32
+ 
+ #endif
+diff --git a/third_party/lzma_sdk/C/RotateDefs.h b/third_party/lzma_sdk/C/RotateDefs.h
+index 8f01d1a6c595fba3df694493ecb10f74b7725436..c16b4f8e6a127f4bf12aa20d3d3fbd1f09ec04a9 100644
+--- a/third_party/lzma_sdk/C/RotateDefs.h
++++ b/third_party/lzma_sdk/C/RotateDefs.h
+@@ -1,14 +1,14 @@
+ /* RotateDefs.h -- Rotate functions
+-2015-03-25 : Igor Pavlov : Public domain */
++2023-06-18 : Igor Pavlov : Public domain */
+ 
+-#ifndef __ROTATE_DEFS_H
+-#define __ROTATE_DEFS_H
++#ifndef ZIP7_INC_ROTATE_DEFS_H
++#define ZIP7_INC_ROTATE_DEFS_H
+ 
+ #ifdef _MSC_VER
+ 
+ #include <stdlib.h>
+ 
+-/* don't use _rotl with MINGW. It can insert slow call to function. */
++/* don't use _rotl with old MINGW. It can insert slow call to function. */
+  
+ /* #if (_MSC_VER >= 1200) */
+ #pragma intrinsic(_rotl)
+@@ -18,12 +18,32 @@
+ #define rotlFixed(x, n) _rotl((x), (n))
+ #define rotrFixed(x, n) _rotr((x), (n))
+ 
++#if (_MSC_VER >= 1300)
++#define Z7_ROTL64(x, n) _rotl64((x), (n))
++#define Z7_ROTR64(x, n) _rotr64((x), (n))
++#else
++#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
++#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
++#endif
++
+ #else
+ 
+ /* new compilers can translate these macros to fast commands. */
+ 
++#if defined(__clang__) && (__clang_major__ >= 4) \
++  || defined(__GNUC__) && (__GNUC__ >= 5)
++/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
++#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
++#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
++#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
++#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
++#else
++/* for old GCC / clang: */
+ #define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+ #define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
++#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
++#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
++#endif
+ 
+ #endif
+ 
+diff --git a/third_party/lzma_sdk/C/Sha256.c b/third_party/lzma_sdk/C/Sha256.c
+index 21996848c91569501a6685f130566c79a096cec0..ea7ed8e751dc369fed569616880a68e0439f452e 100644
+--- a/third_party/lzma_sdk/C/Sha256.c
++++ b/third_party/lzma_sdk/C/Sha256.c
+@@ -1,65 +1,60 @@
+ /* Sha256.c -- SHA-256 Hash
+-2021-04-01 : Igor Pavlov : Public domain
++: Igor Pavlov : Public domain
+ This code is based on public domain code from Wei Dai's Crypto++ library. */
+ 
+ #include "Precomp.h"
+ 
+ #include <string.h>
+ 
+-#include "CpuArch.h"
+-#include "RotateDefs.h"
+ #include "Sha256.h"
+-
+-#if defined(_MSC_VER) && (_MSC_VER < 1900)
+-// #define USE_MY_MM
+-#endif
++#include "RotateDefs.h"
++#include "CpuArch.h"
+ 
+ #ifdef MY_CPU_X86_OR_AMD64
+-  #ifdef _MSC_VER
+-    #if _MSC_VER >= 1200
+-      #define _SHA_SUPPORTED
+-    #endif
+-  #elif defined(__clang__)
+-    #if (__clang_major__ >= 8) // fix that check
+-      #define _SHA_SUPPORTED
+-    #endif
+-  #elif defined(__GNUC__)
+-    #if (__GNUC__ >= 8) // fix that check
+-      #define _SHA_SUPPORTED
+-    #endif
+-  #elif defined(__INTEL_COMPILER)
+-    #if (__INTEL_COMPILER >= 1800) // fix that check
+-      #define _SHA_SUPPORTED
+-    #endif
++  #if   defined(Z7_LLVM_CLANG_VERSION)  && (Z7_LLVM_CLANG_VERSION  >= 30800) \
++     || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
++     || defined(Z7_GCC_VERSION)         && (Z7_GCC_VERSION         >= 40900) \
++     || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
++     || defined(_MSC_VER) && (_MSC_VER >= 1200)
++      #define Z7_COMPILER_SHA256_SUPPORTED
+   #endif
+-// TODO(crbug.com/1338627): Enable ARM optimizations
+-#elif 0 // defined(MY_CPU_ARM_OR_ARM64)
+-  #ifdef _MSC_VER
+-    #if _MSC_VER >= 1910
+-      #define _SHA_SUPPORTED
++#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
++
++  #if   defined(__ARM_FEATURE_SHA2) \
++     || defined(__ARM_FEATURE_CRYPTO)
++    #define Z7_COMPILER_SHA256_SUPPORTED
++  #else
++    #if  defined(MY_CPU_ARM64) \
++      || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
++      || defined(Z7_MSC_VER_ORIGINAL)
++    #if  defined(__ARM_FP) && \
++          (   defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
++           || defined(__GNUC__) && (__GNUC__ >= 6) \
++          ) \
++      || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
++    #if  defined(MY_CPU_ARM64) \
++      || !defined(Z7_CLANG_VERSION) \
++      || defined(__ARM_NEON) && \
++          (Z7_CLANG_VERSION < 170000 || \
++           Z7_CLANG_VERSION > 170001)
++      #define Z7_COMPILER_SHA256_SUPPORTED
+     #endif
+-  #elif defined(__clang__)
+-    #if (__clang_major__ >= 8) // fix that check
+-      #define _SHA_SUPPORTED
+     #endif
+-  #elif defined(__GNUC__)
+-    #if (__GNUC__ >= 6) // fix that check
+-      #define _SHA_SUPPORTED
+     #endif
+   #endif
+ #endif
+ 
+-void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
++void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+ 
+-#ifdef _SHA_SUPPORTED
+-  void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
++#ifdef Z7_COMPILER_SHA256_SUPPORTED
++  void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+ 
+-  static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
+-  static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
++  static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
++  static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
+ 
+-  #define UPDATE_BLOCKS(p) p->func_UpdateBlocks
++  #define SHA256_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
+ #else
+-  #define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
++  #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
+ #endif
+ 
+ 
+@@ -67,16 +62,16 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+ {
+   SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
+   
+-  #ifdef _SHA_SUPPORTED
++  #ifdef Z7_COMPILER_SHA256_SUPPORTED
+     if (algo != SHA256_ALGO_SW)
+     {
+       if (algo == SHA256_ALGO_DEFAULT)
+-        func = g_FUNC_UPDATE_BLOCKS;
++        func = g_SHA256_FUNC_UPDATE_BLOCKS;
+       else
+       {
+         if (algo != SHA256_ALGO_HW)
+           return False;
+-        func = g_FUNC_UPDATE_BLOCKS_HW;
++        func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
+         if (!func)
+           return False;
+       }
+@@ -86,24 +81,25 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+       return False;
+   #endif
+ 
+-  p->func_UpdateBlocks = func;
++  p->v.vars.func_UpdateBlocks = func;
+   return True;
+ }
+ 
+ 
+ /* define it for speed optimization */
+ 
+-#ifdef _SFX
++#ifdef Z7_SFX
+   #define STEP_PRE 1
+   #define STEP_MAIN 1
+ #else
+   #define STEP_PRE 2
+   #define STEP_MAIN 4
+-  // #define _SHA256_UNROLL
++  // #define Z7_SHA256_UNROLL
+ #endif
+ 
++#undef Z7_SHA256_BIG_W
+ #if STEP_MAIN != 16
+-  #define _SHA256_BIG_W
++  #define Z7_SHA256_BIG_W
+ #endif
+ 
+ 
+@@ -111,7 +107,7 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
+ 
+ void Sha256_InitState(CSha256 *p)
+ {
+-  p->count = 0;
++  p->v.vars.count = 0;
+   p->state[0] = 0x6a09e667;
+   p->state[1] = 0xbb67ae85;
+   p->state[2] = 0x3c6ef372;
+@@ -122,21 +118,28 @@ void Sha256_InitState(CSha256 *p)
+   p->state[7] = 0x5be0cd19;
+ }
+ 
++
++
++
++
++
++
++
+ void Sha256_Init(CSha256 *p)
+ {
+-  p->func_UpdateBlocks =
+-  #ifdef _SHA_SUPPORTED
+-      g_FUNC_UPDATE_BLOCKS;
++  p->v.vars.func_UpdateBlocks =
++  #ifdef Z7_COMPILER_SHA256_SUPPORTED
++      g_SHA256_FUNC_UPDATE_BLOCKS;
+   #else
+       NULL;
+   #endif
+   Sha256_InitState(p);
+ }
+ 
+-#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
+-#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
++#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x,22))
++#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x,25))
+ #define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
+-#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
++#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >>10))
+ 
+ #define Ch(x,y,z) (z^(x&(y^z)))
+ #define Maj(x,y,z) ((x&y)|(z&(x|y)))
+@@ -146,7 +149,7 @@ void Sha256_Init(CSha256 *p)
+ 
+ #define blk2_main(j, i)  s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
+ 
+-#ifdef _SHA256_BIG_W
++#ifdef Z7_SHA256_BIG_W
+     // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
+     #define w(j, i)     W[(size_t)(j) + i]
+     #define blk2(j, i)  (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
+@@ -177,7 +180,7 @@ void Sha256_Init(CSha256 *p)
+ #define R1_PRE(i)  T1( W_PRE, i)
+ #define R1_MAIN(i) T1( W_MAIN, i)
+ 
+-#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
++#if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
+ #define R2_MAIN(i) \
+     R1_MAIN(i) \
+     R1_MAIN(i + 1) \
+@@ -186,7 +189,7 @@ void Sha256_Init(CSha256 *p)
+ 
+ 
+ 
+-#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
++#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
+ 
+ #define T4( a,b,c,d,e,f,g,h, wx, i) \
+     h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
+@@ -224,14 +227,10 @@ void Sha256_Init(CSha256 *p)
+ 
+ #endif
+ 
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+ 
+-// static
+-extern MY_ALIGN(64)
+-const UInt32 SHA256_K_ARRAY[64];
+-
+-MY_ALIGN(64)
+-const UInt32 SHA256_K_ARRAY[64] = {
++extern
++MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64];
++MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64] = {
+   0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+   0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+   0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+@@ -250,27 +249,29 @@ const UInt32 SHA256_K_ARRAY[64] = {
+   0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+ };
+ 
+-#define K SHA256_K_ARRAY
+ 
+ 
+-MY_NO_INLINE
+-void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
++
++
++#define K SHA256_K_ARRAY
++
++Z7_NO_INLINE
++void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
+ {
+   UInt32 W
+-  #ifdef _SHA256_BIG_W
++#ifdef Z7_SHA256_BIG_W
+       [64];
+-  #else
++#else
+       [16];
+-  #endif
+-
++#endif
+   unsigned j;
+-
+   UInt32 a,b,c,d,e,f,g,h;
+-
+-  #if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
++#if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
+   UInt32 tmp;
+-  #endif
++#endif
+   
++  if (numBlocks == 0) return;
++
+   a = state[0];
+   b = state[1];
+   c = state[2];
+@@ -280,7 +281,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
+   g = state[6];
+   h = state[7];
+ 
+-  while (numBlocks)
++  do
+   {
+ 
+   for (j = 0; j < 16; j += STEP_PRE)
+@@ -298,12 +299,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
+ 
+     #else
+ 
+-      R1_PRE(0);
++      R1_PRE(0)
+       #if STEP_PRE >= 2
+-      R1_PRE(1);
++      R1_PRE(1)
+       #if STEP_PRE >= 4
+-      R1_PRE(2);
+-      R1_PRE(3);
++      R1_PRE(2)
++      R1_PRE(3)
+       #endif
+       #endif
+     
+@@ -312,32 +313,32 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
+ 
+   for (j = 16; j < 64; j += STEP_MAIN)
+   {
+-    #if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
++    #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
+ 
+       #if STEP_MAIN < 8
+-      R4_MAIN(0);
++      R4_MAIN(0)
+       #else
+-      R8_MAIN(0);
++      R8_MAIN(0)
+       #if STEP_MAIN == 16
+-      R8_MAIN(8);
++      R8_MAIN(8)
+       #endif
+       #endif
+ 
+     #else
+       
+-      R1_MAIN(0);
++      R1_MAIN(0)
+       #if STEP_MAIN >= 2
+-      R1_MAIN(1);
++      R1_MAIN(1)
+       #if STEP_MAIN >= 4
+-      R2_MAIN(2);
++      R2_MAIN(2)
+       #if STEP_MAIN >= 8
+-      R2_MAIN(4);
+-      R2_MAIN(6);
++      R2_MAIN(4)
++      R2_MAIN(6)
+       #if STEP_MAIN >= 16
+-      R2_MAIN(8);
+-      R2_MAIN(10);
+-      R2_MAIN(12);
+-      R2_MAIN(14);
++      R2_MAIN(8)
++      R2_MAIN(10)
++      R2_MAIN(12)
++      R2_MAIN(14)
+       #endif
+       #endif
+       #endif
+@@ -354,40 +355,27 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
+   g += state[6]; state[6] = g;
+   h += state[7]; state[7] = h;
+ 
+-  data += 64;
+-  numBlocks--;
++  data += SHA256_BLOCK_SIZE;
+   }
+-
+-  /* Wipe variables */
+-  /* memset(W, 0, sizeof(W)); */
++  while (--numBlocks);
+ }
+ 
+-#undef S0
+-#undef S1
+-#undef s0
+-#undef s1
+-#undef K
+ 
+-#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
++#define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
+ 
+ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
+ {
+   if (size == 0)
+     return;
+-
+   {
+-    unsigned pos = (unsigned)p->count & 0x3F;
+-    unsigned num;
+-    
+-    p->count += size;
+-    
+-    num = 64 - pos;
++    const unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
++    const unsigned num = SHA256_BLOCK_SIZE - pos;
++    p->v.vars.count += size;
+     if (num > size)
+     {
+       memcpy(p->buffer + pos, data, size);
+       return;
+     }
+-    
+     if (pos != 0)
+     {
+       size -= num;
+@@ -397,9 +385,10 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
+     }
+   }
+   {
+-    size_t numBlocks = size >> 6;
+-    UPDATE_BLOCKS(p)(p->state, data, numBlocks);
+-    size &= 0x3F;
++    const size_t numBlocks = size >> 6;
++    // if (numBlocks)
++    SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
++    size &= SHA256_BLOCK_SIZE - 1;
+     if (size == 0)
+       return;
+     data += (numBlocks << 6);
+@@ -410,78 +399,94 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
+ 
+ void Sha256_Final(CSha256 *p, Byte *digest)
+ {
+-  unsigned pos = (unsigned)p->count & 0x3F;
+-  unsigned i;
+-  
++  unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
+   p->buffer[pos++] = 0x80;
+-  
+-  if (pos > (64 - 8))
++  if (pos > (SHA256_BLOCK_SIZE - 4 * 2))
+   {
+-    while (pos != 64) { p->buffer[pos++] = 0; }
+-    // memset(&p->buf.buffer[pos], 0, 64 - pos);
++    while (pos != SHA256_BLOCK_SIZE) { p->buffer[pos++] = 0; }
++    // memset(&p->buf.buffer[pos], 0, SHA256_BLOCK_SIZE - pos);
+     Sha256_UpdateBlock(p);
+     pos = 0;
+   }
+-
+-  /*
+-  if (pos & 3)
++  memset(&p->buffer[pos], 0, (SHA256_BLOCK_SIZE - 4 * 2) - pos);
+   {
+-    p->buffer[pos] = 0;
+-    p->buffer[pos + 1] = 0;
+-    p->buffer[pos + 2] = 0;
+-    pos += 3;
+-    pos &= ~3;
++    const UInt64 numBits = p->v.vars.count << 3;
++    SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 2, (UInt32)(numBits >> 32))
++    SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 1, (UInt32)(numBits))
+   }
++  Sha256_UpdateBlock(p);
++#if 1 && defined(MY_CPU_BE)
++  memcpy(digest, p->state, SHA256_DIGEST_SIZE);
++#else
+   {
+-    for (; pos < 64 - 8; pos += 4)
+-      *(UInt32 *)(&p->buffer[pos]) = 0;
++    unsigned i;
++    for (i = 0; i < 8; i += 2)
++    {
++      const UInt32 v0 = p->state[i];
++      const UInt32 v1 = p->state[(size_t)i + 1];
++      SetBe32(digest    , v0)
++      SetBe32(digest + 4, v1)
++      digest += 4 * 2;
++    }
+   }
+-  */
+ 
+-  memset(&p->buffer[pos], 0, (64 - 8) - pos);
+ 
+-  {
+-    UInt64 numBits = (p->count << 3);
+-    SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
+-    SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
+-  }
+-  
+-  Sha256_UpdateBlock(p);
+ 
+-  for (i = 0; i < 8; i += 2)
+-  {
+-    UInt32 v0 = p->state[i];
+-    UInt32 v1 = p->state[(size_t)i + 1];
+-    SetBe32(digest    , v0);
+-    SetBe32(digest + 4, v1);
+-    digest += 8;
+-  }
+-  
++
++#endif
+   Sha256_InitState(p);
+ }
+ 
+ 
+-void Sha256Prepare()
++void Sha256Prepare(void)
+ {
+-  #ifdef _SHA_SUPPORTED
++#ifdef Z7_COMPILER_SHA256_SUPPORTED
+   SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
+   f = Sha256_UpdateBlocks;
+   f_hw = NULL;
+-  #ifdef MY_CPU_X86_OR_AMD64
+-  #ifndef USE_MY_MM
++#ifdef MY_CPU_X86_OR_AMD64
+   if (CPU_IsSupported_SHA()
+       && CPU_IsSupported_SSSE3()
+-      // && CPU_IsSupported_SSE41()
+       )
+-  #endif
+-  #else
++#else
+   if (CPU_IsSupported_SHA2())
+-  #endif
++#endif
+   {
+     // printf("\n========== HW SHA256 ======== \n");
+     f = f_hw = Sha256_UpdateBlocks_HW;
+   }
+-  g_FUNC_UPDATE_BLOCKS    = f;
+-  g_FUNC_UPDATE_BLOCKS_HW = f_hw;
+-  #endif
++  g_SHA256_FUNC_UPDATE_BLOCKS    = f;
++  g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
++#endif
+ }
++
++#undef U64C
++#undef K
++#undef S0
++#undef S1
++#undef s0
++#undef s1
++#undef Ch
++#undef Maj
++#undef W_MAIN
++#undef W_PRE
++#undef w
++#undef blk2_main
++#undef blk2
++#undef T1
++#undef T4
++#undef T8
++#undef R1_PRE
++#undef R1_MAIN
++#undef R2_MAIN
++#undef R4
++#undef R4_PRE
++#undef R4_MAIN
++#undef R8
++#undef R8_PRE
++#undef R8_MAIN
++#undef STEP_PRE
++#undef STEP_MAIN
++#undef Z7_SHA256_BIG_W
++#undef Z7_SHA256_UNROLL
++#undef Z7_COMPILER_SHA256_SUPPORTED
+diff --git a/third_party/lzma_sdk/C/Sha256.h b/third_party/lzma_sdk/C/Sha256.h
+index aa38501e57948196761aec76c9475fcbe67fc677..75329cdf02267d32eb0a20358608fc08ea8c903c 100644
+--- a/third_party/lzma_sdk/C/Sha256.h
++++ b/third_party/lzma_sdk/C/Sha256.h
+@@ -1,8 +1,8 @@
+ /* Sha256.h -- SHA-256 Hash
+-2021-01-01 : Igor Pavlov : Public domain */
++: Igor Pavlov : Public domain */
+ 
+-#ifndef __7Z_SHA256_H
+-#define __7Z_SHA256_H
++#ifndef ZIP7_INC_SHA256_H
++#define ZIP7_INC_SHA256_H
+ 
+ #include "7zTypes.h"
+ 
+@@ -14,7 +14,10 @@ EXTERN_C_BEGIN
+ #define SHA256_BLOCK_SIZE   (SHA256_NUM_BLOCK_WORDS * 4)
+ #define SHA256_DIGEST_SIZE  (SHA256_NUM_DIGEST_WORDS * 4)
+ 
+-typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
++
++
++
++typedef void (Z7_FASTCALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
+ 
+ /*
+   if (the system supports different SHA256 code implementations)
+@@ -32,9 +35,16 @@ typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const By
+ 
+ typedef struct
+ {
+-  SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
+-  UInt64 count;
+-  UInt64 __pad_2[2];
++  union
++  {
++    struct
++    {
++      SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
++      UInt64 count;
++    } vars;
++    UInt64 _pad_64bit[4];
++    void *_pad_align_ptr[2];
++  } v;
+   UInt32 state[SHA256_NUM_DIGEST_WORDS];
+ 
+   Byte buffer[SHA256_BLOCK_SIZE];
+@@ -62,7 +72,7 @@ void Sha256_Final(CSha256 *p, Byte *digest);
+ 
+ 
+ 
+-// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
++// void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+ 
+ /*
+ call Sha256Prepare() once at program start.
+diff --git a/third_party/lzma_sdk/C/Sha256Opt.c b/third_party/lzma_sdk/C/Sha256Opt.c
+index decc1382cd6ee9c7cb6e6ac27e3ee6e2cdb46b88..1c6b50f8d336e63be89462f15d48c7ed3ca625e9 100644
+--- a/third_party/lzma_sdk/C/Sha256Opt.c
++++ b/third_party/lzma_sdk/C/Sha256Opt.c
+@@ -1,71 +1,53 @@
+ /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
+-2021-04-01 : Igor Pavlov : Public domain */
++: Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+-
+-#if defined(_MSC_VER)
+-#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
+-// #define USE_MY_MM
+-#endif
+-#endif
+-
++#include "Compiler.h"
+ #include "CpuArch.h"
+ 
++// #define Z7_USE_HW_SHA_STUB // for debug
+ #ifdef MY_CPU_X86_OR_AMD64
+-  #if defined(__clang__)
+-    #if (__clang_major__ >= 8) // fix that check
++  #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
+       #define USE_HW_SHA
+-      #ifndef __SHA__
+-        #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+-        #if defined(_MSC_VER)
+-          // SSSE3: for clang-cl:
+-          #include <tmmintrin.h>
+-          #define __SHA__
+-        #endif
+-      #endif
+-
+-    #endif
+-  #elif defined(__GNUC__)
+-    #if (__GNUC__ >= 8) // fix that check
++  #elif defined(Z7_LLVM_CLANG_VERSION)  && (Z7_LLVM_CLANG_VERSION  >= 30800) \
++     || defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
++     || defined(Z7_GCC_VERSION)         && (Z7_GCC_VERSION         >= 40900)
+       #define USE_HW_SHA
+-      #ifndef __SHA__
++      #if !defined(__INTEL_COMPILER)
++      // icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
++      #if !defined(__SHA__) || !defined(__SSSE3__)
+         #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
+-        // #pragma GCC target("sha,ssse3")
+       #endif
+-    #endif
+-  #elif defined(__INTEL_COMPILER)
+-    #if (__INTEL_COMPILER >= 1800) // fix that check
+-      #define USE_HW_SHA
+-    #endif
++      #endif
+   #elif defined(_MSC_VER)
+-    #ifdef USE_MY_MM
+-      #define USE_VER_MIN 1300
+-    #else
+-      #define USE_VER_MIN 1910
+-    #endif
+-    #if _MSC_VER >= USE_VER_MIN
++    #if (_MSC_VER >= 1900)
+       #define USE_HW_SHA
++    #else
++      #define Z7_USE_HW_SHA_STUB
+     #endif
+   #endif
+ // #endif // MY_CPU_X86_OR_AMD64
++#ifndef USE_HW_SHA
++  // #define Z7_USE_HW_SHA_STUB // for debug
++#endif
+ 
+ #ifdef USE_HW_SHA
+ 
+ // #pragma message("Sha256 HW")
+-// #include <wmmintrin.h>
+ 
+-#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
+-#include <immintrin.h>
+-#else
+-#include <emmintrin.h>
+ 
+-#if defined(_MSC_VER) && (_MSC_VER >= 1600)
+-// #include <intrin.h>
+-#endif
+ 
+-#ifdef USE_MY_MM
+-#include "My_mm.h"
+-#endif
++
++// sse/sse2/ssse3:
++#include <tmmintrin.h>
++// sha*:
++#include <immintrin.h>
++
++#if defined (__clang__) && defined(_MSC_VER)
++  #if !defined(__SHA__)
++    #include <shaintrin.h>
++  #endif
++#else
+ 
+ #endif
+ 
+@@ -94,60 +76,44 @@ SHA:
+ extern
+ MY_ALIGN(64)
+ const UInt32 SHA256_K_ARRAY[64];
+-
+ #define K SHA256_K_ARRAY
+ 
+ 
+-#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
+-#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
+-#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
+-
++#define ADD_EPI32(dest, src)      dest = _mm_add_epi32(dest, src);
++#define SHA256_MSG1(dest, src)    dest = _mm_sha256msg1_epu32(dest, src);
++#define SHA256_MSG2(dest, src)    dest = _mm_sha256msg2_epu32(dest, src);
+ 
+ #define LOAD_SHUFFLE(m, k) \
+     m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
+     m = _mm_shuffle_epi8(m, mask); \
+ 
+-#define SM1(g0, g1, g2, g3) \
+-    SHA256_MSG1(g3, g0); \
+-
+-#define SM2(g0, g1, g2, g3) \
+-    tmp = _mm_alignr_epi8(g1, g0, 4); \
+-    ADD_EPI32(g2, tmp); \
+-    SHA25G_MSG2(g2, g1); \
+-
+-// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
+-// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
++#define NNN(m0, m1, m2, m3)
+ 
++#define SM1(m1, m2, m3, m0) \
++    SHA256_MSG1(m0, m1); \
+ 
+-#define NNN(g0, g1, g2, g3)
+-
++#define SM2(m2, m3, m0, m1) \
++    ADD_EPI32(m0, _mm_alignr_epi8(m3, m2, 4)) \
++    SHA256_MSG2(m0, m3); \
+ 
+ #define RND2(t0, t1) \
+     t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
+ 
+-#define RND2_0(m, k) \
+-    msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
+-    RND2(state0, state1); \
+-    msg = _mm_shuffle_epi32(msg, 0x0E); \
+ 
+ 
+-#define RND2_1 \
++#define R4(k, m0, m1, m2, m3, OP0, OP1) \
++    msg = _mm_add_epi32(m0, *(const __m128i *) (const void *) &K[(k) * 4]); \
++    RND2(state0, state1); \
++    msg = _mm_shuffle_epi32(msg, 0x0E); \
++    OP0(m0, m1, m2, m3) \
+     RND2(state1, state0); \
+-
+-
+-// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
+-
+-#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+-    RND2_0(g0, k); \
+-    OP0(g0, g1, g2, g3); \
+-    RND2_1; \
+-    OP1(g0, g1, g2, g3); \
++    OP1(m0, m1, m2, m3) \
+ 
+ #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+-    R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
+-    R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
+-    R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
+-    R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
++    R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
++    R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
++    R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
++    R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
+ 
+ #define PREPARE_STATE \
+     tmp    = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
+@@ -157,15 +123,16 @@ const UInt32 SHA256_K_ARRAY[64];
+     state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
+ 
+ 
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+ #ifdef ATTRIB_SHA
+ ATTRIB_SHA
+ #endif
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+ {
+   const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
+-  __m128i tmp;
+-  __m128i state0, state1;
++
++
++  __m128i tmp, state0, state1;
+ 
+   if (numBlocks == 0)
+     return;
+@@ -192,13 +159,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+ 
+ 
+ 
+-    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+-    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+-    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+-    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
++    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
++    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
++    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
++    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
+     
+-    ADD_EPI32(state0, state0_save);
+-    ADD_EPI32(state1, state1_save);
++    ADD_EPI32(state0, state0_save)
++    ADD_EPI32(state1, state1_save)
+     
+     data += 64;
+   }
+@@ -212,19 +179,28 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+ 
+ #endif // USE_HW_SHA
+ 
+-#elif defined(MY_CPU_ARM_OR_ARM64)
+-
+-  #if defined(__clang__)
+-    #if (__clang_major__ >= 8) // fix that check
++#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
++
++  #if   defined(__ARM_FEATURE_SHA2) \
++     || defined(__ARM_FEATURE_CRYPTO)
++    #define USE_HW_SHA
++  #else
++    #if  defined(MY_CPU_ARM64) \
++      || defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
++      || defined(Z7_MSC_VER_ORIGINAL)
++    #if  defined(__ARM_FP) && \
++          (   defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
++           || defined(__GNUC__) && (__GNUC__ >= 6) \
++          ) \
++      || defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
++    #if  defined(MY_CPU_ARM64) \
++      || !defined(Z7_CLANG_VERSION) \
++      || defined(__ARM_NEON) && \
++          (Z7_CLANG_VERSION < 170000 || \
++           Z7_CLANG_VERSION > 170001)
+       #define USE_HW_SHA
+     #endif
+-  #elif defined(__GNUC__)
+-    #if (__GNUC__ >= 6) // fix that check
+-      #define USE_HW_SHA
+     #endif
+-  #elif defined(_MSC_VER)
+-    #if _MSC_VER >= 1910
+-      #define USE_HW_SHA
+     #endif
+   #endif
+ 
+@@ -232,63 +208,144 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+ 
+ // #pragma message("=== Sha256 HW === ")
+ 
++
+ #if defined(__clang__) || defined(__GNUC__)
++#if !defined(__ARM_FEATURE_SHA2) && \
++    !defined(__ARM_FEATURE_CRYPTO)
+   #ifdef MY_CPU_ARM64
++#if defined(__clang__)
++    #define ATTRIB_SHA __attribute__((__target__("crypto")))
++#else
+     #define ATTRIB_SHA __attribute__((__target__("+crypto")))
++#endif
+   #else
++#if defined(__clang__) && (__clang_major__ >= 1)
++    #define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
++#else
+     #define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
++#endif
+   #endif
++#endif
+ #else
+   // _MSC_VER
+   // for arm32
+   #define _ARM_USE_NEW_NEON_INTRINSICS
+ #endif
+ 
+-#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
++#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
+ #include <arm64_neon.h>
+ #else
++
++#if defined(__clang__) && __clang_major__ < 16
++#if !defined(__ARM_FEATURE_SHA2) && \
++    !defined(__ARM_FEATURE_CRYPTO)
++//     #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
++    Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++    #define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
++// #if defined(__clang__) && __clang_major__ < 13
++    #define __ARM_FEATURE_CRYPTO 1
++// #else
++    #define __ARM_FEATURE_SHA2 1
++// #endif
++    Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++#endif
++#endif // clang
++
++#if defined(__clang__)
++
++#if defined(__ARM_ARCH) && __ARM_ARCH < 8
++    Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++//    #pragma message("#define __ARM_ARCH 8")
++    #undef  __ARM_ARCH
++    #define __ARM_ARCH 8
++    Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++#endif
++
++#endif // clang
++
+ #include <arm_neon.h>
++
++#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
++    defined(__ARM_FEATURE_CRYPTO) && \
++    defined(__ARM_FEATURE_SHA2)
++Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
++    #undef __ARM_FEATURE_CRYPTO
++    #undef __ARM_FEATURE_SHA2
++    #undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
++Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
++//    #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
+ #endif
+ 
++#endif // Z7_MSC_VER_ORIGINAL
++
+ typedef uint32x4_t v128;
+ // typedef __n128 v128; // MSVC
+ 
+ #ifdef MY_CPU_BE
+-  #define MY_rev32_for_LE(x)
++  #define MY_rev32_for_LE(x) x
+ #else
+-  #define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
++  #define MY_rev32_for_LE(x) vrev32q_u8(x)
+ #endif
+ 
+-#define LOAD_128(_p)      (*(const v128 *)(const void *)(_p))
+-#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
++#if 1 // 0 for debug
++// for arm32: it works slower by some reason than direct code
++/*
++for arm32 it generates:
++MSVC-2022, GCC-9:
++    vld1.32 {d18,d19}, [r10]
++    vst1.32 {d4,d5}, [r3]
++    vld1.8  {d20-d21}, [r4]
++there is no align hint (like [r10:128]).  So instruction allows unaligned access
++*/
++#define LOAD_128_32(_p)       vld1q_u32(_p)
++#define LOAD_128_8(_p)        vld1q_u8 (_p)
++#define STORE_128_32(_p, _v)  vst1q_u32(_p, _v)
++#else
++/*
++for arm32:
++MSVC-2022:
++    vldm r10,{d18,d19}
++    vstm r3,{d4,d5}
++    does it require strict alignment?
++GCC-9:
++    vld1.64 {d30-d31}, [r0:64]
++    vldr  d28, [r0, #16]
++    vldr  d29, [r0, #24]
++    vst1.64 {d30-d31}, [r0:64]
++    vstr  d28, [r0, #16]
++    vstr  d29, [r0, #24]
++there is hint [r0:64], so does it requires 64-bit alignment.
++*/
++#define LOAD_128_32(_p)       (*(const v128 *)(const void *)(_p))
++#define LOAD_128_8(_p)        vreinterpretq_u8_u32(*(const v128 *)(const void *)(_p))
++#define STORE_128_32(_p, _v)  *(v128 *)(void *)(_p) = (_v)
++#endif
+ 
+ #define LOAD_SHUFFLE(m, k) \
+-    m = LOAD_128((data + (k) * 16)); \
+-    MY_rev32_for_LE(m); \
++    m = vreinterpretq_u32_u8( \
++        MY_rev32_for_LE( \
++        LOAD_128_8(data + (k) * 16))); \
+ 
+ // K array must be aligned for 16-bytes at least.
+ extern
+ MY_ALIGN(64)
+ const UInt32 SHA256_K_ARRAY[64];
+-
+ #define K SHA256_K_ARRAY
+ 
+-
+ #define SHA256_SU0(dest, src)        dest = vsha256su0q_u32(dest, src);
+-#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
++#define SHA256_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
+ 
+-#define SM1(g0, g1, g2, g3)  SHA256_SU0(g3, g0)
+-#define SM2(g0, g1, g2, g3)  SHA25G_SU1(g2, g0, g1)
+-#define NNN(g0, g1, g2, g3)
++#define SM1(m0, m1, m2, m3)  SHA256_SU0(m3, m0)
++#define SM2(m0, m1, m2, m3)  SHA256_SU1(m2, m0, m1)
++#define NNN(m0, m1, m2, m3)
+ 
+-
+-#define R4(k, g0, g1, g2, g3, OP0, OP1) \
+-    msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
++#define R4(k, m0, m1, m2, m3, OP0, OP1) \
++    msg = vaddq_u32(m0, *(const v128 *) (const void *) &K[(k) * 4]); \
+     tmp = state0; \
+     state0 = vsha256hq_u32( state0, state1, msg ); \
+     state1 = vsha256h2q_u32( state1, tmp, msg ); \
+-    OP0(g0, g1, g2, g3); \
+-    OP1(g0, g1, g2, g3); \
++    OP0(m0, m1, m2, m3); \
++    OP1(m0, m1, m2, m3); \
+ 
+ 
+ #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
+@@ -298,19 +355,19 @@ const UInt32 SHA256_K_ARRAY[64];
+     R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
+ 
+ 
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+ #ifdef ATTRIB_SHA
+ ATTRIB_SHA
+ #endif
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+ {
+   v128 state0, state1;
+ 
+   if (numBlocks == 0)
+     return;
+ 
+-  state0 = LOAD_128(&state[0]);
+-  state1 = LOAD_128(&state[4]);
++  state0 = LOAD_128_32(&state[0]);
++  state1 = LOAD_128_32(&state[4]);
+   
+   do
+   {
+@@ -326,10 +383,10 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+     LOAD_SHUFFLE (m2, 2)
+     LOAD_SHUFFLE (m3, 3)
+ 
+-    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
+-    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+-    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
+-    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
++    R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
++    R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
++    R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
++    R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
+     
+     state0 = vaddq_u32(state0, state0_save);
+     state1 = vaddq_u32(state1, state1_save);
+@@ -338,8 +395,8 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+   }
+   while (--numBlocks);
+ 
+-  STORE_128(&state[0], state0);
+-  STORE_128(&state[4], state1);
++  STORE_128_32(&state[0], state0);
++  STORE_128_32(&state[4], state1);
+ }
+ 
+ #endif // USE_HW_SHA
+@@ -347,18 +404,19 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+ #endif // MY_CPU_ARM_OR_ARM64
+ 
+ 
+-#ifndef USE_HW_SHA
+-
++#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
+ // #error Stop_Compiling_UNSUPPORTED_SHA
+ // #include <stdlib.h>
+-
++// We can compile this file with another C compiler,
++// or we can compile asm version.
++// So we can generate real code instead of this stub function.
+ // #include "Sha256.h"
+-void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
+-
++// #if defined(_MSC_VER)
+ #pragma message("Sha256 HW-SW stub was used")
+-
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
+-void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
++// #endif
++void Z7_FASTCALL Sha256_UpdateBlocks   (UInt32 state[8], const Byte *data, size_t numBlocks);
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
++void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
+ {
+   Sha256_UpdateBlocks(state, data, numBlocks);
+   /*
+@@ -369,5 +427,25 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
+   return;
+   */
+ }
+-
+ #endif
++
++
++#undef K
++#undef RND2
++#undef MY_rev32_for_LE
++
++#undef NNN
++#undef LOAD_128
++#undef STORE_128
++#undef LOAD_SHUFFLE
++#undef SM1
++#undef SM2
++
++
++#undef R4
++#undef R16
++#undef PREPARE_STATE
++#undef USE_HW_SHA
++#undef ATTRIB_SHA
++#undef USE_VER_MIN
++#undef Z7_USE_HW_SHA_STUB
+diff --git a/third_party/lzma_sdk/C/Util/SfxSetup/BUILD.gn b/third_party/lzma_sdk/C/Util/SfxSetup/BUILD.gn
+index 1331312650400e1771dbdfc19bb347f27977cb1c..24d91bcc6074d0237eb09921af856c9f97396e73 100644
+--- a/third_party/lzma_sdk/C/Util/SfxSetup/BUILD.gn
++++ b/third_party/lzma_sdk/C/Util/SfxSetup/BUILD.gn
+@@ -3,12 +3,8 @@
+ # found in the LICENSE file.
+ 
+ source_set("7zS2_source") {
+-  sources = [
+-    "Precomp.h",
+-    "SfxSetup.c",
+-  ]
++  sources = [ "SfxSetup.c" ]
+   defines = [
+-    "_LZMA_PROB32",
+     "_WIN32",
+   ]
+   include_dirs = [
+diff --git a/third_party/lzma_sdk/C/Util/SfxSetup/Precomp.h b/third_party/lzma_sdk/C/Util/SfxSetup/Precomp.h
+deleted file mode 100644
+index 177a205f6979c34150d72b2a71ffcfd704a3f529..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/C/Util/SfxSetup/Precomp.h
++++ /dev/null
+@@ -1,10 +0,0 @@
+-/* Precomp.h -- StdAfx
+-2013-06-16 : Igor Pavlov : Public domain */
+-
+-#ifndef __7Z_PRECOMP_H
+-#define __7Z_PRECOMP_H
+-
+-#include "Compiler.h"
+-#include "7zTypes.h"
+-
+-#endif
+diff --git a/third_party/lzma_sdk/C/Util/SfxSetup/SfxSetup.c b/third_party/lzma_sdk/C/Util/SfxSetup/SfxSetup.c
+index 45efac060bd6ca7049658b1c637c7f86afab9d4b..c52e838f7f546dd12187a26a61837bf8123393a4 100644
+--- a/third_party/lzma_sdk/C/Util/SfxSetup/SfxSetup.c
++++ b/third_party/lzma_sdk/C/Util/SfxSetup/SfxSetup.c
+@@ -1,5 +1,5 @@
+ /* SfxSetup.c - 7z SFX Setup
+-2019-02-02 : Igor Pavlov : Public domain */
++2024-01-24 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -11,6 +11,7 @@
+ #define _UNICODE
+ #endif
+ 
++// SHELLEXECUTEINFO
+ #include <windows.h>
+ #include <shellapi.h>
+ 
+@@ -18,17 +19,23 @@
+ #include <stdio.h>
+ #endif
+ 
+-#include "7z.h"
+-#include "7zAlloc.h"
+-#include "7zCrc.h"
+-#include "7zFile.h"
+-#include "CpuArch.h"
+-#include "DllSecur.h"
++#include "../../7z.h"
++#include "../../7zAlloc.h"
++#include "../../7zCrc.h"
++#include "../../7zFile.h"
++#include "../../CpuArch.h"
++#include "../../DllSecur.h"
+ 
+ #define k_EXE_ExtIndex 2
+ 
+ #define kInputBufSize ((size_t)1 << 18)
+ 
++
++#define wcscat lstrcatW
++#define wcslen (size_t)lstrlenW
++#define wcscpy lstrcpyW
++// wcsncpy() and lstrcpynW() work differently. We don't use them.
++
+ static const char * const kExts[] =
+ {
+     "bat"
+@@ -67,7 +74,7 @@ static unsigned FindExt(const wchar_t *s, unsigned *extLen)
+   return len;
+ }
+ 
+-#define MAKE_CHAR_UPPER(c) ((((c) >= 'a' && (c) <= 'z') ? (c) -= 0x20 : (c)))
++#define MAKE_CHAR_UPPER(c) ((((c) >= 'a' && (c) <= 'z') ? (c) - 0x20 : (c)))
+ 
+ static unsigned FindItem(const char * const *items, unsigned num, const wchar_t *s, unsigned len)
+ {
+@@ -75,13 +82,13 @@ static unsigned FindItem(const char * const *items, unsigned num, const wchar_t
+   for (i = 0; i < num; i++)
+   {
+     const char *item = items[i];
+-    unsigned itemLen = (unsigned)strlen(item);
++    const unsigned itemLen = (unsigned)strlen(item);
+     unsigned j;
+     if (len != itemLen)
+       continue;
+     for (j = 0; j < len; j++)
+     {
+-      unsigned c = (Byte)item[j];
++      const unsigned c = (Byte)item[j];
+       if (c != s[j] && MAKE_CHAR_UPPER(c) != s[j])
+         break;
+     }
+@@ -99,10 +106,20 @@ static BOOL WINAPI HandlerRoutine(DWORD ctrlType)
+ }
+ #endif
+ 
++
++#ifdef _CONSOLE
++static void PrintStr(const char *s)
++{
++  fputs(s, stdout);
++}
++#endif
++
+ static void PrintErrorMessage(const char *message)
+ {
+   #ifdef _CONSOLE
+-  printf("\n7-Zip Error: %s\n", message);
++  PrintStr("\n7-Zip Error: ");
++  PrintStr(message);
++  PrintStr("\n");
+   #else
+   #ifdef UNDER_CE
+   WCHAR messageW[256 + 4];
+@@ -182,7 +199,7 @@ static WRes RemoveDirWithSubItems(WCHAR *path)
+   WIN32_FIND_DATAW fd;
+   HANDLE handle;
+   WRes res = 0;
+-  size_t len = wcslen(path);
++  const size_t len = wcslen(path);
+   wcscpy(path + len, L"*");
+   handle = FindFirstFileW(path, &fd);
+   path[len] = L'\0';
+@@ -231,7 +248,7 @@ static WRes RemoveDirWithSubItems(WCHAR *path)
+ }
+ 
+ #ifdef _CONSOLE
+-int MY_CDECL main()
++int Z7_CDECL main(void)
+ #else
+ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+   #ifdef UNDER_CE
+@@ -265,10 +282,10 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+   #ifdef _CONSOLE
+   SetConsoleCtrlHandler(HandlerRoutine, TRUE);
+   #else
+-  UNUSED_VAR(hInstance);
+-  UNUSED_VAR(hPrevInstance);
+-  UNUSED_VAR(lpCmdLine);
+-  UNUSED_VAR(nCmdShow);
++  UNUSED_VAR(hInstance)
++  UNUSED_VAR(hPrevInstance)
++  UNUSED_VAR(lpCmdLine)
++  UNUSED_VAR(nCmdShow)
+   #endif
+ 
+   CrcGenerateTable();
+@@ -293,7 +310,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+       BoolInt quoteMode = False;
+       for (;; cmdLineParams++)
+       {
+-        wchar_t c = *cmdLineParams;
++        const wchar_t c = *cmdLineParams;
+         if (c == L'\"')
+           quoteMode = !quoteMode;
+         else if (c == 0 || (c == L' ' && !quoteMode))
+@@ -327,7 +344,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+         unsigned k;
+         for (k = 0; k < 8; k++)
+         {
+-          unsigned t = value & 0xF;
++          const unsigned t = value & 0xF;
+           value >>= 4;
+           s[7 - k] = (wchar_t)((t < 10) ? ('0' + t) : ('A' + (t - 10)));
+         }
+@@ -389,7 +406,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+     {
+       lookStream.bufSize = kInputBufSize;
+       lookStream.realStream = &archiveStream.vt;
+-      LookToRead2_Init(&lookStream);
++      LookToRead2_INIT(&lookStream)
+     }
+   }
+ 
+@@ -458,11 +475,11 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+           unsigned extLen;
+           const WCHAR *name = temp + nameStartPos;
+           unsigned len = (unsigned)wcslen(name);
+-          unsigned nameLen = FindExt(temp + nameStartPos, &extLen);
+-          unsigned extPrice = FindItem(kExts, sizeof(kExts) / sizeof(kExts[0]), name + len - extLen, extLen);
+-          unsigned namePrice = FindItem(kNames, sizeof(kNames) / sizeof(kNames[0]), name, nameLen);
++          const unsigned nameLen = FindExt(temp + nameStartPos, &extLen);
++          const unsigned extPrice = FindItem(kExts, sizeof(kExts) / sizeof(kExts[0]), name + len - extLen, extLen);
++          const unsigned namePrice = FindItem(kNames, sizeof(kNames) / sizeof(kNames[0]), name, nameLen);
+ 
+-          unsigned price = namePrice + extPrice * 64 + (nameStartPos == 0 ? 0 : (1 << 12));
++          const unsigned price = namePrice + extPrice * 64 + (nameStartPos == 0 ? 0 : (1 << 12));
+           if (minPrice > price)
+           {
+             minPrice = price;
+@@ -503,12 +520,13 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+         #endif
+         
+         {
+-          SRes res2 = File_Close(&outFile);
++          const WRes res2 = File_Close(&outFile);
+           if (res != SZ_OK)
+             break;
+-          if (res2 != SZ_OK)
++          if (res2 != 0)
+           {
+-            res = res2;
++            errorMessage = "Can't close output file";
++            res = SZ_ERROR_FAIL;
+             break;
+           }
+         }
+@@ -553,7 +571,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+     WCHAR oldCurDir[MAX_PATH + 2];
+     oldCurDir[0] = 0;
+     {
+-      DWORD needLen = GetCurrentDirectory(MAX_PATH + 1, oldCurDir);
++      const DWORD needLen = GetCurrentDirectory(MAX_PATH + 1, oldCurDir);
+       if (needLen == 0 || needLen > MAX_PATH)
+         oldCurDir[0] = 0;
+       SetCurrentDirectory(workCurDir);
+diff --git a/third_party/lzma_sdk/C/Util/SfxSetup/chromium.patch b/third_party/lzma_sdk/C/Util/SfxSetup/chromium.patch
+deleted file mode 100644
+index 2c178d2c48643bd6d8e622b15280ad47105aa8ea..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/C/Util/SfxSetup/chromium.patch
++++ /dev/null
+@@ -1,53 +0,0 @@
+-diff --git a/third_party/lzma_sdk/Util/SfxSetup/Precomp.h b/third_party/lzma_sdk/Util/SfxSetup/Precomp.h
+-index 588a66f7eb8c..177a205f6979 100644
+---- a/third_party/lzma_sdk/Util/SfxSetup/Precomp.h
+-+++ b/third_party/lzma_sdk/Util/SfxSetup/Precomp.h
+-@@ -4,7 +4,7 @@
+- #ifndef __7Z_PRECOMP_H
+- #define __7Z_PRECOMP_H
+- 
+--#include "../../Compiler.h"
+--#include "../../7zTypes.h"
+-+#include "Compiler.h"
+-+#include "7zTypes.h"
+- 
+- #endif
+-diff --git a/third_party/lzma_sdk/Util/SfxSetup/SfxSetup.c b/third_party/lzma_sdk/Util/SfxSetup/SfxSetup.c
+-index ef19aeac5450..45efac060bd6 100644
+---- a/third_party/lzma_sdk/Util/SfxSetup/SfxSetup.c
+-+++ b/third_party/lzma_sdk/Util/SfxSetup/SfxSetup.c
+-@@ -11,16 +11,19 @@
+- #define _UNICODE
+- #endif
+- 
+-+#include <windows.h>
+-+#include <shellapi.h>
+-+
+- #ifdef _CONSOLE
+- #include <stdio.h>
+- #endif
+- 
+--#include "../../7z.h"
+--#include "../../7zAlloc.h"
+--#include "../../7zCrc.h"
+--#include "../../7zFile.h"
+--#include "../../CpuArch.h"
+--#include "../../DllSecur.h"
+-+#include "7z.h"
+-+#include "7zAlloc.h"
+-+#include "7zCrc.h"
+-+#include "7zFile.h"
+-+#include "CpuArch.h"
+-+#include "DllSecur.h"
+- 
+- #define k_EXE_ExtIndex 2
+- 
+-@@ -125,7 +128,7 @@ static WRes MyCreateDir(const WCHAR *name)
+- #define kBufferSize (1 << 15)
+- #endif
+- 
+--#define kSignatureSearchLimit (1 << 22)
+-+#define kSignatureSearchLimit (1 << 26)
+- 
+- static BoolInt FindSignature(CSzFile *stream, UInt64 *resPos)
+- {
+diff --git a/third_party/lzma_sdk/C/Xz.c b/third_party/lzma_sdk/C/Xz.c
+index 7c53b60073730504df00d2574af0959dfc00a6fd..d07550d097c92163ea5b96bb214c23418081b85f 100644
+--- a/third_party/lzma_sdk/C/Xz.c
++++ b/third_party/lzma_sdk/C/Xz.c
+@@ -1,5 +1,5 @@
+ /* Xz.c - Xz
+-2021-02-09 : Igor Pavlov : Public domain */
++2024-03-01 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -52,6 +52,7 @@ void XzCheck_Init(CXzCheck *p, unsigned mode)
+     case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break;
+     case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break;
+     case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break;
++    default: break;
+   }
+ }
+ 
+@@ -62,6 +63,7 @@ void XzCheck_Update(CXzCheck *p, const void *data, size_t size)
+     case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break;
+     case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break;
+     case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break;
++    default: break;
+   }
+ }
+ 
+@@ -70,7 +72,7 @@ int XzCheck_Final(CXzCheck *p, Byte *digest)
+   switch (p->mode)
+   {
+     case XZ_CHECK_CRC32:
+-      SetUi32(digest, CRC_GET_DIGEST(p->crc));
++      SetUi32(digest, CRC_GET_DIGEST(p->crc))
+       break;
+     case XZ_CHECK_CRC64:
+     {
+diff --git a/third_party/lzma_sdk/C/Xz.h b/third_party/lzma_sdk/C/Xz.h
+index 849b944bf5ed64e7b652afb2e7b5b69a04ffb67f..42bc685341a51918c4462a82d6c7a529b081c424 100644
+--- a/third_party/lzma_sdk/C/Xz.h
++++ b/third_party/lzma_sdk/C/Xz.h
+@@ -1,21 +1,24 @@
+ /* Xz.h - Xz interface
+-2021-04-01 : Igor Pavlov : Public domain */
++2024-01-26 : Igor Pavlov : Public domain */
+ 
+-#ifndef __XZ_H
+-#define __XZ_H
++#ifndef ZIP7_INC_XZ_H
++#define ZIP7_INC_XZ_H
+ 
+ #include "Sha256.h"
++#include "Delta.h"
+ 
+ EXTERN_C_BEGIN
+ 
+ #define XZ_ID_Subblock 1
+ #define XZ_ID_Delta 3
+-#define XZ_ID_X86 4
+-#define XZ_ID_PPC 5
+-#define XZ_ID_IA64 6
+-#define XZ_ID_ARM 7
+-#define XZ_ID_ARMT 8
++#define XZ_ID_X86   4
++#define XZ_ID_PPC   5
++#define XZ_ID_IA64  6
++#define XZ_ID_ARM   7
++#define XZ_ID_ARMT  8
+ #define XZ_ID_SPARC 9
++#define XZ_ID_ARM64 0xa
++#define XZ_ID_RISCV 0xb
+ #define XZ_ID_LZMA2 0x21
+ 
+ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
+@@ -53,7 +56,7 @@ typedef struct
+ #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
+ 
+ SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
+-SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
++SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
+ 
+ /* ---------- xz stream ---------- */
+ 
+@@ -101,7 +104,7 @@ typedef UInt16 CXzStreamFlags;
+ unsigned XzFlags_GetCheckSize(CXzStreamFlags f);
+ 
+ SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);
+-SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream);
++SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream);
+ 
+ typedef struct
+ {
+@@ -112,6 +115,7 @@ typedef struct
+ typedef struct
+ {
+   CXzStreamFlags flags;
++  // Byte _pad[6];
+   size_t numBlocks;
+   CXzBlockSizes *blocks;
+   UInt64 startOffset;
+@@ -134,7 +138,7 @@ typedef struct
+ 
+ void Xzs_Construct(CXzs *p);
+ void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
+-SRes Xzs_ReadBackward(CXzs *p, ILookInStream *inStream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc);
++SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);
+ 
+ UInt64 Xzs_GetNumBlocks(const CXzs *p);
+ UInt64 Xzs_GetUnpackSize(const CXzs *p);
+@@ -160,9 +164,9 @@ typedef enum
+ } ECoderFinishMode;
+ 
+ 
+-typedef struct _IStateCoder
++typedef struct
+ {
+-  void *p;
++  void *p; // state object;
+   void (*Free)(void *p, ISzAllocPtr alloc);
+   SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);
+   void (*Init)(void *p);
+@@ -174,6 +178,20 @@ typedef struct _IStateCoder
+ } IStateCoder;
+ 
+ 
++typedef struct
++{
++  UInt32 methodId;
++  UInt32 delta;
++  UInt32 ip;
++  UInt32 X86_State;
++  Byte delta_State[DELTA_STATE_SIZE];
++} CXzBcFilterStateBase;
++
++typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size);
++
++SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
++    Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc);
++
+ 
+ #define MIXCODER_NUM_FILTERS_MAX 4
+ 
+@@ -216,13 +234,13 @@ typedef enum
+ typedef struct
+ {
+   EXzState state;
+-  UInt32 pos;
++  unsigned pos;
+   unsigned alignPos;
+   unsigned indexPreSize;
+ 
+   CXzStreamFlags streamFlags;
+   
+-  UInt32 blockHeaderSize;
++  unsigned blockHeaderSize;
+   UInt64 packSize;
+   UInt64 unpackSize;
+ 
+@@ -422,7 +440,7 @@ typedef struct
+   size_t outStep_ST;      // size of output buffer for Single-Thread decoding
+   BoolInt ignoreErrors;   // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
+   
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   unsigned numThreads;    // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
+   size_t inBufSize_MT;    // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
+   size_t memUseMax;       // the limit of total memory usage for Multi-Thread decoding.
+@@ -432,8 +450,9 @@ typedef struct
+ 
+ void XzDecMtProps_Init(CXzDecMtProps *p);
+ 
+-
+-typedef void * CXzDecMtHandle;
++typedef struct CXzDecMt CXzDecMt;
++typedef CXzDecMt * CXzDecMtHandle;
++// Z7_DECLARE_HANDLE(CXzDecMtHandle)
+ 
+ /*
+   alloc    : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
+@@ -503,14 +522,14 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
+     const CXzDecMtProps *props,
+     const UInt64 *outDataSize, // NULL means undefined
+     int finishMode,            // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished
+-    ISeqOutStream *outStream,
++    ISeqOutStreamPtr outStream,
+     // Byte *outBuf, size_t *outBufSize,
+-    ISeqInStream *inStream,
++    ISeqInStreamPtr inStream,
+     // const Byte *inData, size_t inDataSize,
+     CXzStatInfo *stat,         // out: decoding results and statistics
+     int *isMT,                 // out: 0 means that ST (Single-Thread) version was used
+                                //      1 means that MT (Multi-Thread) version was used
+-    ICompressProgress *progress);
++    ICompressProgressPtr progress);
+ 
+ EXTERN_C_END
+ 
+diff --git a/third_party/lzma_sdk/C/XzCrc64.c b/third_party/lzma_sdk/C/XzCrc64.c
+index b6d02cbebd5151e840ec94975a4d76d5444dafcf..94fc1afb37242228bc97a9627e5565833c6d855d 100644
+--- a/third_party/lzma_sdk/C/XzCrc64.c
++++ b/third_party/lzma_sdk/C/XzCrc64.c
+@@ -1,5 +1,5 @@
+ /* XzCrc64.c -- CRC64 calculation
+-2017-05-23 : Igor Pavlov : Public domain */
++2023-12-08 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -8,45 +8,76 @@
+ 
+ #define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42)
+ 
+-#ifdef MY_CPU_LE
+-  #define CRC64_NUM_TABLES 4
++// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
++// #define Z7_CRC64_DEBUG_BE
++#ifdef Z7_CRC64_DEBUG_BE
++#undef MY_CPU_LE
++#define MY_CPU_BE
++#endif
++
++#ifdef Z7_CRC64_NUM_TABLES
++  #define Z7_CRC64_NUM_TABLES_USE  Z7_CRC64_NUM_TABLES
+ #else
+-  #define CRC64_NUM_TABLES 5
+-  #define CRC_UINT64_SWAP(v) \
+-      ((v >> 56) \
+-    | ((v >> 40) & ((UInt64)0xFF <<  8)) \
+-    | ((v >> 24) & ((UInt64)0xFF << 16)) \
+-    | ((v >>  8) & ((UInt64)0xFF << 24)) \
+-    | ((v <<  8) & ((UInt64)0xFF << 32)) \
+-    | ((v << 24) & ((UInt64)0xFF << 40)) \
+-    | ((v << 40) & ((UInt64)0xFF << 48)) \
+-    | ((v << 56)))
+-
+-  UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
++  #define Z7_CRC64_NUM_TABLES_USE  12
++#endif
++
++#if Z7_CRC64_NUM_TABLES_USE < 1
++  #error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
+ #endif
+ 
++
++#if Z7_CRC64_NUM_TABLES_USE != 1
++
+ #ifndef MY_CPU_BE
+-  UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
++  #define FUNC_NAME_LE_2(s)   XzCrc64UpdateT ## s
++  #define FUNC_NAME_LE_1(s)   FUNC_NAME_LE_2(s)
++  #define FUNC_NAME_LE        FUNC_NAME_LE_1(Z7_CRC64_NUM_TABLES_USE)
++  UInt64 Z7_FASTCALL FUNC_NAME_LE (UInt64 v, const void *data, size_t size, const UInt64 *table);
++#endif
++#ifndef MY_CPU_LE
++  #define FUNC_NAME_BE_2(s)   XzCrc64UpdateBeT ## s
++  #define FUNC_NAME_BE_1(s)   FUNC_NAME_BE_2(s)
++  #define FUNC_NAME_BE        FUNC_NAME_BE_1(Z7_CRC64_NUM_TABLES_USE)
++  UInt64 Z7_FASTCALL FUNC_NAME_BE (UInt64 v, const void *data, size_t size, const UInt64 *table);
+ #endif
+ 
+-typedef UInt64 (MY_FAST_CALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table);
++#if defined(MY_CPU_LE)
++  #define FUNC_REF  FUNC_NAME_LE
++#elif defined(MY_CPU_BE)
++  #define FUNC_REF  FUNC_NAME_BE
++#else
++  #define FUNC_REF  g_Crc64Update
++  static UInt64 (Z7_FASTCALL *FUNC_REF)(UInt64 v, const void *data, size_t size, const UInt64 *table);
++#endif
+ 
+-static CRC64_FUNC g_Crc64Update;
+-UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES];
++#endif
++
++
++MY_ALIGN(64)
++static UInt64 g_Crc64Table[256 * Z7_CRC64_NUM_TABLES_USE];
+ 
+-UInt64 MY_FAST_CALL Crc64Update(UInt64 v, const void *data, size_t size)
+-{
+-  return g_Crc64Update(v, data, size, g_Crc64Table);
+-}
+ 
+-UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size)
++UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size)
+ {
+-  return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL;
++#if Z7_CRC64_NUM_TABLES_USE == 1
++  #define CRC64_UPDATE_BYTE_2(crc, b)  (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++  const UInt64 *table = g_Crc64Table;
++  const Byte *p = (const Byte *)data;
++  const Byte *lim = p + size;
++  for (; p != lim; p++)
++    v = CRC64_UPDATE_BYTE_2(v, *p);
++  return v;
++  #undef CRC64_UPDATE_BYTE_2
++#else
++  return FUNC_REF (v, data, size, g_Crc64Table);
++#endif
+ }
+ 
+-void MY_FAST_CALL Crc64GenerateTable()
++
++Z7_NO_INLINE
++void Z7_FASTCALL Crc64GenerateTable(void)
+ {
+-  UInt32 i;
++  unsigned i;
+   for (i = 0; i < 256; i++)
+   {
+     UInt64 r = i;
+@@ -55,32 +86,55 @@ void MY_FAST_CALL Crc64GenerateTable()
+       r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1)));
+     g_Crc64Table[i] = r;
+   }
+-  for (i = 256; i < 256 * CRC64_NUM_TABLES; i++)
++
++#if Z7_CRC64_NUM_TABLES_USE != 1
++#if 1 || 1 && defined(MY_CPU_X86) // low register count
++  for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i++)
+   {
+-    UInt64 r = g_Crc64Table[(size_t)i - 256];
+-    g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8);
++    const UInt64 r0 = g_Crc64Table[(size_t)i];
++    g_Crc64Table[(size_t)i + 256] = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
+   }
+-  
+-  #ifdef MY_CPU_LE
+-
+-  g_Crc64Update = XzCrc64UpdateT4;
++#else
++  for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i += 2)
++  {
++    UInt64 r0 = g_Crc64Table[(size_t)(i)    ];
++    UInt64 r1 = g_Crc64Table[(size_t)(i) + 1];
++    r0 = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
++    r1 = g_Crc64Table[(Byte)r1] ^ (r1 >> 8);
++    g_Crc64Table[(size_t)i + 256    ] = r0;
++    g_Crc64Table[(size_t)i + 256 + 1] = r1;
++  }
++#endif
+ 
+-  #else
++#ifndef MY_CPU_LE
+   {
+-    #ifndef MY_CPU_BE
++#ifndef MY_CPU_BE
+     UInt32 k = 1;
+     if (*(const Byte *)&k == 1)
+-      g_Crc64Update = XzCrc64UpdateT4;
++      FUNC_REF = FUNC_NAME_LE;
+     else
+-    #endif
++#endif
+     {
+-      for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--)
++#ifndef MY_CPU_BE
++      FUNC_REF = FUNC_NAME_BE;
++#endif
++      for (i = 0; i < 256 * Z7_CRC64_NUM_TABLES_USE; i++)
+       {
+-        UInt64 x = g_Crc64Table[(size_t)i - 256];
+-        g_Crc64Table[i] = CRC_UINT64_SWAP(x);
++        const UInt64 x = g_Crc64Table[i];
++        g_Crc64Table[i] = Z7_BSWAP64(x);
+       }
+-      g_Crc64Update = XzCrc64UpdateT1_BeT4;
+     }
+   }
+-  #endif
++#endif // ndef MY_CPU_LE
++#endif // Z7_CRC64_NUM_TABLES_USE != 1
+ }
++
++#undef kCrc64Poly
++#undef Z7_CRC64_NUM_TABLES_USE
++#undef FUNC_REF
++#undef FUNC_NAME_LE_2
++#undef FUNC_NAME_LE_1
++#undef FUNC_NAME_LE
++#undef FUNC_NAME_BE_2
++#undef FUNC_NAME_BE_1
++#undef FUNC_NAME_BE
+diff --git a/third_party/lzma_sdk/C/XzCrc64.h b/third_party/lzma_sdk/C/XzCrc64.h
+index 08dbc330c27abdbc8260061b0f00de68e0caa245..04f8153df4ac35a29213bb9dc60e577921890dbb 100644
+--- a/third_party/lzma_sdk/C/XzCrc64.h
++++ b/third_party/lzma_sdk/C/XzCrc64.h
+@@ -1,8 +1,8 @@
+ /* XzCrc64.h -- CRC64 calculation
+-2013-01-18 : Igor Pavlov : Public domain */
++2023-12-08 : Igor Pavlov : Public domain */
+ 
+-#ifndef __XZ_CRC64_H
+-#define __XZ_CRC64_H
++#ifndef ZIP7_INC_XZ_CRC64_H
++#define ZIP7_INC_XZ_CRC64_H
+ 
+ #include <stddef.h>
+ 
+@@ -10,16 +10,16 @@
+ 
+ EXTERN_C_BEGIN
+ 
+-extern UInt64 g_Crc64Table[];
++// extern UInt64 g_Crc64Table[];
+ 
+-void MY_FAST_CALL Crc64GenerateTable(void);
++void Z7_FASTCALL Crc64GenerateTable(void);
+ 
+ #define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF)
+ #define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL)
+-#define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++// #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
+ 
+-UInt64 MY_FAST_CALL Crc64Update(UInt64 crc, const void *data, size_t size);
+-UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size);
++UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size);
++// UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size);
+ 
+ EXTERN_C_END
+ 
+diff --git a/third_party/lzma_sdk/C/XzCrc64Opt.c b/third_party/lzma_sdk/C/XzCrc64Opt.c
+index 93a9ffff5f8113b885ef2963725e620d6eede939..0c1fc2ffecb89ebe834823eeda226a9ae3366171 100644
+--- a/third_party/lzma_sdk/C/XzCrc64Opt.c
++++ b/third_party/lzma_sdk/C/XzCrc64Opt.c
+@@ -1,71 +1,261 @@
+-/* XzCrc64Opt.c -- CRC64 calculation
+-2021-02-09 : Igor Pavlov : Public domain */
++/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
++2023-12-08 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+ #include "CpuArch.h"
+ 
++#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
++
++// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
++// #define Z7_CRC64_DEBUG_BE
++#ifdef Z7_CRC64_DEBUG_BE
++#undef MY_CPU_LE
++#define MY_CPU_BE
++#endif
++
++#if defined(MY_CPU_64BIT)
++#define Z7_CRC64_USE_64BIT
++#endif
++
++// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
++#ifdef Z7_CRC64_NUM_TABLES
++#define Z7_CRC64_NUM_TABLES_USE  Z7_CRC64_NUM_TABLES
++#else
++#define Z7_CRC64_NUM_TABLES_USE  12
++#endif
++
++#if Z7_CRC64_NUM_TABLES_USE % 4 || \
++    Z7_CRC64_NUM_TABLES_USE < 4 || \
++    Z7_CRC64_NUM_TABLES_USE > 4 * 4
++  #error Stop_Compiling_Bad_CRC64_NUM_TABLES
++#endif
++
++
+ #ifndef MY_CPU_BE
+ 
+-#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++#define CRC64_UPDATE_BYTE_2(crc, b)  (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
++
++#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
++
++#define Q64LE(n, d) \
++    ( (table + ((n) * 8 + 7) * 0x100)[((d)         ) & 0xFF] \
++    ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
++
++#define R64(a)  *((const UInt64 *)(const void *)p + (a))
++
++#else
++
++#define Q32LE(n, d) \
++    ( (table + ((n) * 4 + 3) * 0x100)[((d)         ) & 0xFF] \
++    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
++
++#define R32(a)  *((const UInt32 *)(const void *)p + (a))
++
++#endif
++
++
++#define CRC64_FUNC_PRE_LE2(step) \
++UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
++
++#define CRC64_FUNC_PRE_LE(step)   \
++        CRC64_FUNC_PRE_LE2(step); \
++        CRC64_FUNC_PRE_LE2(step)
+ 
+-UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
+-UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
++CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
+ {
+   const Byte *p = (const Byte *)data;
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
++  const Byte *lim;
++  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
+     v = CRC64_UPDATE_BYTE_2(v, *p);
+-  for (; size >= 4; size -= 4, p += 4)
++  lim = p + size;
++  if (size >= Z7_CRC64_NUM_TABLES_USE)
+   {
+-    UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
+-    v = (v >> 32)
+-        ^ (table + 0x300)[((d      ) & 0xFF)]
+-        ^ (table + 0x200)[((d >>  8) & 0xFF)]
+-        ^ (table + 0x100)[((d >> 16) & 0xFF)]
+-        ^ (table + 0x000)[((d >> 24))];
++    lim -= Z7_CRC64_NUM_TABLES_USE;
++    do
++    {
++#if Z7_CRC64_NUM_TABLES_USE == 4
++      const UInt32 d = (UInt32)v ^ R32(0);
++      v = (v >> 32) ^ Q32LE(0, d);
++#elif Z7_CRC64_NUM_TABLES_USE == 8
++#ifdef Z7_CRC64_USE_64BIT
++      v ^= R64(0);
++      v = Q64LE(0, v);
++#else
++      UInt32 v0, v1;
++      v0 = (UInt32)v         ^ R32(0);
++      v1 = (UInt32)(v >> 32) ^ R32(1);
++      v = Q32LE(1, v0) ^ Q32LE(0, v1);
++#endif
++#elif Z7_CRC64_NUM_TABLES_USE == 12
++      UInt32 w;
++      UInt32 v0, v1;
++      v0 = (UInt32)v         ^ R32(0);
++      v1 = (UInt32)(v >> 32) ^ R32(1);
++      w = R32(2);
++      v = Q32LE(0, w);
++      v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
++#elif Z7_CRC64_NUM_TABLES_USE == 16
++#ifdef Z7_CRC64_USE_64BIT
++      UInt64 w;
++      UInt64 x;
++      w  = R64(1);      x = Q64LE(0, w);
++      v ^= R64(0);  v = x ^ Q64LE(1, v);
++#else
++      UInt32 v0, v1;
++      UInt32 r0, r1;
++      v0 = (UInt32)v         ^ R32(0);
++      v1 = (UInt32)(v >> 32) ^ R32(1);
++      r0 =                     R32(2);
++      r1 =                     R32(3);
++      v  = Q32LE(1, r0) ^ Q32LE(0, r1);
++      v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
++#endif
++#else
++#error Stop_Compiling_Bad_CRC64_NUM_TABLES
++#endif
++      p += Z7_CRC64_NUM_TABLES_USE;
++    }
++    while (p <= lim);
++    lim += Z7_CRC64_NUM_TABLES_USE;
+   }
+-  for (; size > 0; size--, p++)
++  for (; p < lim; p++)
+     v = CRC64_UPDATE_BYTE_2(v, *p);
+   return v;
+ }
+ 
++#undef CRC64_UPDATE_BYTE_2
++#undef R32
++#undef R64
++#undef Q32LE
++#undef Q64LE
++#undef CRC64_FUNC_PRE_LE
++#undef CRC64_FUNC_PRE_LE2
++
+ #endif
+ 
+ 
++
++
+ #ifndef MY_CPU_LE
+ 
+-#define CRC_UINT64_SWAP(v) \
+-      ((v >> 56) \
+-    | ((v >> 40) & ((UInt64)0xFF <<  8)) \
+-    | ((v >> 24) & ((UInt64)0xFF << 16)) \
+-    | ((v >>  8) & ((UInt64)0xFF << 24)) \
+-    | ((v <<  8) & ((UInt64)0xFF << 32)) \
+-    | ((v << 24) & ((UInt64)0xFF << 40)) \
+-    | ((v << 40) & ((UInt64)0xFF << 48)) \
+-    | ((v << 56)))
++#define CRC64_UPDATE_BYTE_2_BE(crc, b)  (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
++
++#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
++
++#define Q64BE(n, d) \
++    ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
++    ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
++    ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
++
++#ifdef Z7_CRC64_DEBUG_BE
++  #define R64BE(a)  GetBe64a((const UInt64 *)(const void *)p + (a))
++#else
++  #define R64BE(a)         *((const UInt64 *)(const void *)p + (a))
++#endif
++
++#else
+ 
+-#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
++#define Q32BE(n, d) \
++    ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
++    ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
++    ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
+ 
+-UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
+-UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
++#ifdef Z7_CRC64_DEBUG_BE
++  #define R32BE(a)  GetBe32a((const UInt32 *)(const void *)p + (a))
++#else
++  #define R32BE(a)         *((const UInt32 *)(const void *)p + (a))
++#endif
++
++#endif
++
++#define CRC64_FUNC_PRE_BE2(step) \
++UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
++
++#define CRC64_FUNC_PRE_BE(step)   \
++        CRC64_FUNC_PRE_BE2(step); \
++        CRC64_FUNC_PRE_BE2(step)
++
++CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
+ {
+   const Byte *p = (const Byte *)data;
+-  table += 0x100;
+-  v = CRC_UINT64_SWAP(v);
+-  for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
++  const Byte *lim;
++  v = Z7_BSWAP64(v);
++  for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
+     v = CRC64_UPDATE_BYTE_2_BE(v, *p);
+-  for (; size >= 4; size -= 4, p += 4)
++  lim = p + size;
++  if (size >= Z7_CRC64_NUM_TABLES_USE)
+   {
+-    UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
+-    v = (v << 32)
+-        ^ (table + 0x000)[((d      ) & 0xFF)]
+-        ^ (table + 0x100)[((d >>  8) & 0xFF)]
+-        ^ (table + 0x200)[((d >> 16) & 0xFF)]
+-        ^ (table + 0x300)[((d >> 24))];
++    lim -= Z7_CRC64_NUM_TABLES_USE;
++    do
++    {
++#if   Z7_CRC64_NUM_TABLES_USE == 4
++      const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
++      v = (v << 32) ^ Q32BE(0, d);
++#elif Z7_CRC64_NUM_TABLES_USE == 12
++      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
++      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
++      const UInt32 w =                      R32BE(2);
++      v  = Q32BE(0, w);
++      v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
++
++#elif Z7_CRC64_NUM_TABLES_USE == 8
++  #ifdef Z7_CRC64_USE_64BIT
++      v ^= R64BE(0);
++      v  = Q64BE(0, v);
++  #else
++      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
++      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
++      v = Q32BE(1, d1) ^ Q32BE(0, d0);
++  #endif
++#elif Z7_CRC64_NUM_TABLES_USE == 16
++  #ifdef Z7_CRC64_USE_64BIT
++      const UInt64 w = R64BE(1);
++      v ^= R64BE(0);
++      v  = Q64BE(0, w) ^ Q64BE(1, v);
++  #else
++      const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
++      const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
++      const UInt32 w1 =                     R32BE(2);
++      const UInt32 w0 =                     R32BE(3);
++      v  = Q32BE(1, w1) ^ Q32BE(0, w0);
++      v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
++  #endif
++#elif
++#error Stop_Compiling_Bad_CRC64_NUM_TABLES
++#endif
++      p += Z7_CRC64_NUM_TABLES_USE;
++    }
++    while (p <= lim);
++    lim += Z7_CRC64_NUM_TABLES_USE;
+   }
+-  for (; size > 0; size--, p++)
++  for (; p < lim; p++)
+     v = CRC64_UPDATE_BYTE_2_BE(v, *p);
+-  return CRC_UINT64_SWAP(v);
++  return Z7_BSWAP64(v);
+ }
+ 
++#undef CRC64_UPDATE_BYTE_2_BE
++#undef R32BE
++#undef R64BE
++#undef Q32BE
++#undef Q64BE
++#undef CRC64_FUNC_PRE_BE
++#undef CRC64_FUNC_PRE_BE2
++
++#endif
++#undef Z7_CRC64_NUM_TABLES_USE
+ #endif
+diff --git a/third_party/lzma_sdk/C/XzDec.c b/third_party/lzma_sdk/C/XzDec.c
+index 3f96a37f94fc751ed98c31e5d2e731b6526d1957..3d1c98e63194f088ce7907d61f376b5416be341b 100644
+--- a/third_party/lzma_sdk/C/XzDec.c
++++ b/third_party/lzma_sdk/C/XzDec.c
+@@ -1,5 +1,5 @@
+ /* XzDec.c -- Xz Decode
+-2021-09-04 : Igor Pavlov : Public domain */
++2024-03-01 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -67,7 +67,8 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value)
+   return 0;
+ }
+ 
+-/* ---------- BraState ---------- */
++
++/* ---------- XzBcFilterState ---------- */
+ 
+ #define BRA_BUF_SIZE (1 << 14)
+ 
+@@ -76,55 +77,60 @@ typedef struct
+   size_t bufPos;
+   size_t bufConv;
+   size_t bufTotal;
++  Byte *buf;  // must be aligned for 4 bytes
++  Xz_Func_BcFilterStateBase_Filter filter_func;
++  // int encodeMode;
++  CXzBcFilterStateBase base;
++  // Byte buf[BRA_BUF_SIZE];
++} CXzBcFilterState;
+ 
+-  int encodeMode;
+-
+-  UInt32 methodId;
+-  UInt32 delta;
+-  UInt32 ip;
+-  UInt32 x86State;
+-  Byte deltaState[DELTA_STATE_SIZE];
+-
+-  Byte buf[BRA_BUF_SIZE];
+-} CBraState;
+ 
+-static void BraState_Free(void *pp, ISzAllocPtr alloc)
++static void XzBcFilterState_Free(void *pp, ISzAllocPtr alloc)
+ {
+-  ISzAlloc_Free(alloc, pp);
++  if (pp)
++  {
++    CXzBcFilterState *p = ((CXzBcFilterState *)pp);
++    ISzAlloc_Free(alloc, p->buf);
++    ISzAlloc_Free(alloc, pp);
++  }
+ }
+ 
+-static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
++
++static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
+ {
+-  CBraState *p = ((CBraState *)pp);
+-  UNUSED_VAR(alloc);
++  CXzBcFilterStateBase *p = &((CXzBcFilterState *)pp)->base;
++  UNUSED_VAR(alloc)
+   p->ip = 0;
+   if (p->methodId == XZ_ID_Delta)
+   {
+     if (propSize != 1)
+       return SZ_ERROR_UNSUPPORTED;
+-    p->delta = (unsigned)props[0] + 1;
++    p->delta = (UInt32)props[0] + 1;
+   }
+   else
+   {
+     if (propSize == 4)
+     {
+-      UInt32 v = GetUi32(props);
++      const UInt32 v = GetUi32(props);
+       switch (p->methodId)
+       {
+         case XZ_ID_PPC:
+         case XZ_ID_ARM:
+         case XZ_ID_SPARC:
+-          if ((v & 3) != 0)
++        case XZ_ID_ARM64:
++          if (v & 3)
+             return SZ_ERROR_UNSUPPORTED;
+           break;
+         case XZ_ID_ARMT:
+-          if ((v & 1) != 0)
++        case XZ_ID_RISCV:
++          if (v & 1)
+             return SZ_ERROR_UNSUPPORTED;
+           break;
+         case XZ_ID_IA64:
+-          if ((v & 0xF) != 0)
++          if (v & 0xf)
+             return SZ_ERROR_UNSUPPORTED;
+           break;
++        default: break;
+       }
+       p->ip = v;
+     }
+@@ -134,73 +140,91 @@ static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzA
+   return SZ_OK;
+ }
+ 
+-static void BraState_Init(void *pp)
++
++static void XzBcFilterState_Init(void *pp)
+ {
+-  CBraState *p = ((CBraState *)pp);
++  CXzBcFilterState *p = ((CXzBcFilterState *)pp);
+   p->bufPos = p->bufConv = p->bufTotal = 0;
+-  x86_Convert_Init(p->x86State);
+-  if (p->methodId == XZ_ID_Delta)
+-    Delta_Init(p->deltaState);
++  p->base.X86_State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
++  if (p->base.methodId == XZ_ID_Delta)
++    Delta_Init(p->base.delta_State);
+ }
+ 
+ 
+-#define CASE_BRA_CONV(isa) case XZ_ID_ ## isa: size = isa ## _Convert(data, size, p->ip, p->encodeMode); break;
+-
+-static SizeT BraState_Filter(void *pp, Byte *data, SizeT size)
++static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] =
++{
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_PPC),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_IA64),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_ARMT),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_SPARC),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM64),
++  Z7_BRANCH_CONV_DEC_2 (BranchConv_RISCV)
++};
++
++static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size)
+ {
+-  CBraState *p = ((CBraState *)pp);
+   switch (p->methodId)
+   {
+     case XZ_ID_Delta:
+-      if (p->encodeMode)
+-        Delta_Encode(p->deltaState, p->delta, data, size);
+-      else
+-        Delta_Decode(p->deltaState, p->delta, data, size);
++      Delta_Decode(p->delta_State, p->delta, data, size);
+       break;
+     case XZ_ID_X86:
+-      size = x86_Convert(data, size, p->ip, &p->x86State, p->encodeMode);
++      size = (SizeT)(z7_BranchConvSt_X86_Dec(data, size, p->ip, &p->X86_State) - data);
++      break;
++    default:
++      if (p->methodId >= XZ_ID_PPC)
++      {
++        const UInt32 i = p->methodId - XZ_ID_PPC;
++        if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Dec))
++          size = (SizeT)(g_Funcs_BranchConv_RISC_Dec[i](data, size, p->ip) - data);
++      }
+       break;
+-    CASE_BRA_CONV(PPC)
+-    CASE_BRA_CONV(IA64)
+-    CASE_BRA_CONV(ARM)
+-    CASE_BRA_CONV(ARMT)
+-    CASE_BRA_CONV(SPARC)
+   }
+   p->ip += (UInt32)size;
+   return size;
+ }
+ 
+ 
+-static SRes BraState_Code2(void *pp,
++static SizeT XzBcFilterState_Filter(void *pp, Byte *data, SizeT size)
++{
++  CXzBcFilterState *p = ((CXzBcFilterState *)pp);
++  return p->filter_func(&p->base, data, size);
++}
++
++
++static SRes XzBcFilterState_Code2(void *pp,
+     Byte *dest, SizeT *destLen,
+     const Byte *src, SizeT *srcLen, int srcWasFinished,
+     ECoderFinishMode finishMode,
+     // int *wasFinished
+     ECoderStatus *status)
+ {
+-  CBraState *p = ((CBraState *)pp);
++  CXzBcFilterState *p = ((CXzBcFilterState *)pp);
+   SizeT destRem = *destLen;
+   SizeT srcRem = *srcLen;
+-  UNUSED_VAR(finishMode);
++  UNUSED_VAR(finishMode)
+ 
+   *destLen = 0;
+   *srcLen = 0;
+   // *wasFinished = False;
+   *status = CODER_STATUS_NOT_FINISHED;
+   
+-  while (destRem > 0)
++  while (destRem != 0)
+   {
+-    if (p->bufPos != p->bufConv)
+     {
+       size_t size = p->bufConv - p->bufPos;
+-      if (size > destRem)
+-        size = destRem;
+-      memcpy(dest, p->buf + p->bufPos, size);
+-      p->bufPos += size;
+-      *destLen += size;
+-      dest += size;
+-      destRem -= size;
+-      continue;
++      if (size)
++      {
++        if (size > destRem)
++          size = destRem;
++        memcpy(dest, p->buf + p->bufPos, size);
++        p->bufPos += size;
++        *destLen += size;
++        dest += size;
++        destRem -= size;
++        continue;
++      }
+     }
+     
+     p->bufTotal -= p->bufPos;
+@@ -220,7 +244,7 @@ static SRes BraState_Code2(void *pp,
+     if (p->bufTotal == 0)
+       break;
+     
+-    p->bufConv = BraState_Filter(pp, p->buf, p->bufTotal);
++    p->bufConv = p->filter_func(&p->base, p->buf, p->bufTotal);
+ 
+     if (p->bufConv == 0)
+     {
+@@ -240,27 +264,37 @@ static SRes BraState_Code2(void *pp,
+ }
+ 
+ 
+-SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc);
+-SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc)
++#define XZ_IS_SUPPORTED_FILTER_ID(id) \
++    ((id) >= XZ_ID_Delta && (id) <= XZ_ID_RISCV)
++
++SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
++    Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc)
+ {
+-  CBraState *decoder;
+-  if (id < XZ_ID_Delta || id > XZ_ID_SPARC)
++  CXzBcFilterState *decoder;
++  if (!XZ_IS_SUPPORTED_FILTER_ID(id))
+     return SZ_ERROR_UNSUPPORTED;
+-  decoder = (CBraState *)p->p;
++  decoder = (CXzBcFilterState *)p->p;
+   if (!decoder)
+   {
+-    decoder = (CBraState *)ISzAlloc_Alloc(alloc, sizeof(CBraState));
++    decoder = (CXzBcFilterState *)ISzAlloc_Alloc(alloc, sizeof(CXzBcFilterState));
+     if (!decoder)
+       return SZ_ERROR_MEM;
++    decoder->buf = ISzAlloc_Alloc(alloc, BRA_BUF_SIZE);
++    if (!decoder->buf)
++    {
++      ISzAlloc_Free(alloc, decoder);
++      return SZ_ERROR_MEM;
++    }
+     p->p = decoder;
+-    p->Free = BraState_Free;
+-    p->SetProps = BraState_SetProps;
+-    p->Init = BraState_Init;
+-    p->Code2 = BraState_Code2;
+-    p->Filter = BraState_Filter;
++    p->Free     = XzBcFilterState_Free;
++    p->SetProps = XzBcFilterState_SetProps;
++    p->Init     = XzBcFilterState_Init;
++    p->Code2    = XzBcFilterState_Code2;
++    p->Filter   = XzBcFilterState_Filter;
++    decoder->filter_func = func;
+   }
+-  decoder->methodId = (UInt32)id;
+-  decoder->encodeMode = encodeMode;
++  decoder->base.methodId = (UInt32)id;
++  // decoder->encodeMode = encodeMode;
+   return SZ_OK;
+ }
+ 
+@@ -279,9 +313,9 @@ static void SbState_Free(void *pp, ISzAllocPtr alloc)
+ 
+ static SRes SbState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
+ {
+-  UNUSED_VAR(pp);
+-  UNUSED_VAR(props);
+-  UNUSED_VAR(alloc);
++  UNUSED_VAR(pp)
++  UNUSED_VAR(props)
++  UNUSED_VAR(alloc)
+   return (propSize == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED;
+ }
+ 
+@@ -297,7 +331,7 @@ static SRes SbState_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src,
+ {
+   CSbDec *p = (CSbDec *)pp;
+   SRes res;
+-  UNUSED_VAR(srcWasFinished);
++  UNUSED_VAR(srcWasFinished)
+   p->dest = dest;
+   p->destLen = *destLen;
+   p->src = src;
+@@ -389,7 +423,7 @@ static SRes Lzma2State_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *s
+   ELzmaStatus status2;
+   /* ELzmaFinishMode fm = (finishMode == LZMA_FINISH_ANY) ? LZMA_FINISH_ANY : LZMA_FINISH_END; */
+   SRes res;
+-  UNUSED_VAR(srcWasFinished);
++  UNUSED_VAR(srcWasFinished)
+   if (spec->outBufMode)
+   {
+     SizeT dicPos = spec->decoder.decoder.dicPos;
+@@ -420,7 +454,7 @@ static SRes Lzma2State_SetFromMethod(IStateCoder *p, Byte *outBuf, size_t outBuf
+     p->Init = Lzma2State_Init;
+     p->Code2 = Lzma2State_Code2;
+     p->Filter = NULL;
+-    Lzma2Dec_Construct(&spec->decoder);
++    Lzma2Dec_CONSTRUCT(&spec->decoder)
+   }
+   spec->outBufMode = False;
+   if (outBuf)
+@@ -510,26 +544,24 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
+ {
+   IStateCoder *sc = &p->coders[coderIndex];
+   p->ids[coderIndex] = methodId;
+-  switch (methodId)
+-  {
+-    case XZ_ID_LZMA2: return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc);
+-    #ifdef USE_SUBBLOCK
+-    case XZ_ID_Subblock: return SbState_SetFromMethod(sc, p->alloc);
+-    #endif
+-  }
++  if (methodId == XZ_ID_LZMA2)
++    return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc);
++#ifdef USE_SUBBLOCK
++  if (methodId == XZ_ID_Subblock)
++    return SbState_SetFromMethod(sc, p->alloc);
++#endif
+   if (coderIndex == 0)
+     return SZ_ERROR_UNSUPPORTED;
+-  return BraState_SetFromMethod(sc, methodId, 0, p->alloc);
++  return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId,
++      XzBcFilterStateBase_Filter_Dec, p->alloc);
+ }
+ 
+ 
+ static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize)
+ {
+   IStateCoder *sc = &p->coders[coderIndex];
+-  switch (methodId)
+-  {
+-    case XZ_ID_LZMA2: return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
+-  }
++  if (methodId == XZ_ID_LZMA2)
++    return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
+   return SZ_ERROR_UNSUPPORTED;
+ }
+ 
+@@ -568,7 +600,7 @@ static SRes MixCoder_Code(CMixCoder *p,
+     SizeT destLen2, srcLen2;
+     int wasFinished;
+     
+-    PRF_STR("------- MixCoder Single ----------");
++    PRF_STR("------- MixCoder Single ----------")
+       
+     srcLen2 = srcLenOrig;
+     destLen2 = destLenOrig;
+@@ -615,14 +647,14 @@ static SRes MixCoder_Code(CMixCoder *p,
+         processed = coder->Filter(coder->p, p->outBuf, processed);
+         if (wasFinished || (destFinish && p->outWritten == destLenOrig))
+           processed = p->outWritten;
+-        PRF_STR_INT("filter", i);
++        PRF_STR_INT("filter", i)
+       }
+       *destLen = processed;
+     }
+     return res;
+   }
+ 
+-  PRF_STR("standard mix");
++  PRF_STR("standard mix")
+ 
+   if (p->numCoders != 1)
+   {
+@@ -772,14 +804,14 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
+ }
+ 
+ #define READ_VARINT_AND_CHECK(buf, pos, size, res) \
+-  { unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
++  { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
+   if (s == 0) return SZ_ERROR_ARCHIVE; \
+   pos += s; }
+ 
+ 
+ static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
+ {
+-  unsigned numFilters = XzBlock_GetNumFilters(p) - 1;
++  const unsigned numFilters = XzBlock_GetNumFilters(p) - 1;
+   unsigned i;
+   {
+     const CXzFilter *f = &p->filters[numFilters];
+@@ -795,8 +827,7 @@ static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
+       if (f->propsSize != 1)
+         return False;
+     }
+-    else if (f->id < XZ_ID_Delta
+-        || f->id > XZ_ID_SPARC
++    else if (!XZ_IS_SUPPORTED_FILTER_ID(f->id)
+         || (f->propsSize != 0 && f->propsSize != 4))
+       return False;
+   }
+@@ -821,22 +852,24 @@ SRes XzBlock_Parse(CXzBlock *p, const Byte *header)
+   p->packSize = (UInt64)(Int64)-1;
+   if (XzBlock_HasPackSize(p))
+   {
+-    READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize);
++    READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize)
+     if (p->packSize == 0 || p->packSize + headerSize >= (UInt64)1 << 63)
+       return SZ_ERROR_ARCHIVE;
+   }
+ 
+   p->unpackSize = (UInt64)(Int64)-1;
+   if (XzBlock_HasUnpackSize(p))
+-    READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize);
++  {
++    READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize)
++  }
+ 
+   numFilters = XzBlock_GetNumFilters(p);
+   for (i = 0; i < numFilters; i++)
+   {
+     CXzFilter *filter = p->filters + i;
+     UInt64 size;
+-    READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id);
+-    READ_VARINT_AND_CHECK(header, pos, headerSize, &size);
++    READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id)
++    READ_VARINT_AND_CHECK(header, pos, headerSize, &size)
+     if (size > headerSize - pos || size > XZ_FILTER_PROPS_SIZE_MAX)
+       return SZ_ERROR_ARCHIVE;
+     filter->propsSize = (UInt32)size;
+@@ -894,20 +927,20 @@ static SRes XzDecMix_Init(CMixCoder *p, const CXzBlock *block, Byte *outBuf, siz
+     MixCoder_Free(p);
+     for (i = 0; i < numFilters; i++)
+     {
+-      RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize));
++      RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize))
+     }
+     p->numCoders = numFilters;
+   }
+   else
+   {
+-    RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize));
++    RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize))
+   }
+ 
+   for (i = 0; i < numFilters; i++)
+   {
+     const CXzFilter *f = &block->filters[numFilters - 1 - i];
+     IStateCoder *sc = &p->coders[i];
+-    RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc));
++    RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc))
+   }
+   
+   MixCoder_Init(p);
+@@ -1001,7 +1034,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+       SRes res;
+ 
+       ECoderFinishMode finishMode2 = finishMode;
+-      BoolInt srcFinished2 = srcFinished;
++      BoolInt srcFinished2 = (BoolInt)srcFinished;
+       BoolInt destFinish = False;
+ 
+       if (p->block.packSize != (UInt64)(Int64)-1)
+@@ -1054,14 +1087,14 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+       (*destLen) += destLen2;
+       p->unpackSize += destLen2;
+ 
+-      RINOK(res);
++      RINOK(res)
+ 
+       if (*status != CODER_STATUS_FINISHED_WITH_MARK)
+       {
+         if (p->block.packSize == p->packSize
+             && *status == CODER_STATUS_NEEDS_MORE_INPUT)
+         {
+-          PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT");
++          PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT")
+           *status = CODER_STATUS_NOT_SPECIFIED;
+           return SZ_ERROR_DATA;
+         }
+@@ -1078,7 +1111,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+         if ((p->block.packSize != (UInt64)(Int64)-1 && p->block.packSize != p->packSize)
+            || (p->block.unpackSize != (UInt64)(Int64)-1 && p->block.unpackSize != p->unpackSize))
+         {
+-          PRF_STR("ERROR: block.size mismatch");
++          PRF_STR("ERROR: block.size mismatch")
+           return SZ_ERROR_DATA;
+         }
+       }
+@@ -1094,7 +1127,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+       return SZ_OK;
+     }
+ 
+-    switch (p->state)
++    switch ((int)p->state)
+     {
+       case XZ_STATE_STREAM_HEADER:
+       {
+@@ -1109,7 +1142,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+         }
+         else
+         {
+-          RINOK(Xz_ParseHeader(&p->streamFlags, p->buf));
++          RINOK(Xz_ParseHeader(&p->streamFlags, p->buf))
+           p->numStartedStreams++;
+           p->indexSize = 0;
+           p->numBlocks = 0;
+@@ -1139,15 +1172,15 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+             p->state = XZ_STATE_STREAM_INDEX;
+             break;
+           }
+-          p->blockHeaderSize = ((UInt32)p->buf[0] << 2) + 4;
++          p->blockHeaderSize = ((unsigned)p->buf[0] << 2) + 4;
+           break;
+         }
+         
+         if (p->pos != p->blockHeaderSize)
+         {
+-          UInt32 cur = p->blockHeaderSize - p->pos;
++          unsigned cur = p->blockHeaderSize - p->pos;
+           if (cur > srcRem)
+-            cur = (UInt32)srcRem;
++            cur = (unsigned)srcRem;
+           memcpy(p->buf + p->pos, src, cur);
+           p->pos += cur;
+           (*srcLen) += cur;
+@@ -1155,7 +1188,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+         }
+         else
+         {
+-          RINOK(XzBlock_Parse(&p->block, p->buf));
++          RINOK(XzBlock_Parse(&p->block, p->buf))
+           if (!XzBlock_AreSupportedFilters(&p->block))
+             return SZ_ERROR_UNSUPPORTED;
+           p->numTotalBlocks++;
+@@ -1168,7 +1201,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+             p->headerParsedOk = True;
+             return SZ_OK;
+           }
+-          RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize));
++          RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize))
+         }
+         break;
+       }
+@@ -1189,8 +1222,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+         }
+         else
+         {
+-          UInt32 checkSize = XzFlags_GetCheckSize(p->streamFlags);
+-          UInt32 cur = checkSize - p->pos;
++          const unsigned checkSize = XzFlags_GetCheckSize(p->streamFlags);
++          unsigned cur = checkSize - p->pos;
+           if (cur != 0)
+           {
+             if (srcRem == 0)
+@@ -1199,7 +1232,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+               return SZ_OK;
+             }
+             if (cur > srcRem)
+-              cur = (UInt32)srcRem;
++              cur = (unsigned)srcRem;
+             memcpy(p->buf + p->pos, src, cur);
+             p->pos += cur;
+             (*srcLen) += cur;
+@@ -1288,9 +1321,9 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+ 
+       case XZ_STATE_STREAM_FOOTER:
+       {
+-        UInt32 cur = XZ_STREAM_FOOTER_SIZE - p->pos;
++        unsigned cur = XZ_STREAM_FOOTER_SIZE - p->pos;
+         if (cur > srcRem)
+-          cur = (UInt32)srcRem;
++          cur = (unsigned)srcRem;
+         memcpy(p->buf + p->pos, src, cur);
+         p->pos += cur;
+         (*srcLen) += cur;
+@@ -1325,6 +1358,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
+       }
+       
+       case XZ_STATE_BLOCK: break; /* to disable GCC warning */
++
++      default: return SZ_ERROR_FAIL;
+     }
+   }
+   /*
+@@ -1389,7 +1424,7 @@ UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p)
+ 
+ 
+ 
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ #include "MtDec.h"
+ #endif
+ 
+@@ -1400,7 +1435,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p)
+   p->outStep_ST = 1 << 20;
+   p->ignoreErrors = False;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   p->numThreads = 1;
+   p->inBufSize_MT = 1 << 18;
+   p->memUseMax = sizeof(size_t) << 28;
+@@ -1409,7 +1444,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p)
+ 
+ 
+ 
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ 
+ /* ---------- CXzDecMtThread ---------- */
+ 
+@@ -1448,7 +1483,7 @@ typedef struct
+ 
+ /* ---------- CXzDecMt ---------- */
+ 
+-typedef struct
++struct CXzDecMt
+ {
+   CAlignOffsetAlloc alignOffsetAlloc;
+   ISzAllocPtr allocMid;
+@@ -1456,9 +1491,9 @@ typedef struct
+   CXzDecMtProps props;
+   size_t unpackBlockMaxSize;
+   
+-  ISeqInStream *inStream;
+-  ISeqOutStream *outStream;
+-  ICompressProgress *progress;
++  ISeqInStreamPtr inStream;
++  ISeqOutStreamPtr outStream;
++  ICompressProgressPtr progress;
+ 
+   BoolInt finishMode;
+   BoolInt outSize_Defined;
+@@ -1481,7 +1516,7 @@ typedef struct
+   ECoderStatus status;
+   SRes codeRes;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   BoolInt mainDecoderWasCalled;
+   // int statErrorDefined;
+   int finishedDecoderIndex;
+@@ -1504,10 +1539,9 @@ typedef struct
+ 
+   BoolInt mtc_WasConstructed;
+   CMtDec mtc;
+-  CXzDecMtThread coders[MTDEC__THREADS_MAX];
++  CXzDecMtThread coders[MTDEC_THREADS_MAX];
+   #endif
+-
+-} CXzDecMt;
++};
+ 
+ 
+ 
+@@ -1535,11 +1569,11 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
+ 
+   XzDecMtProps_Init(&p->props);
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   p->mtc_WasConstructed = False;
+   {
+     unsigned i;
+-    for (i = 0; i < MTDEC__THREADS_MAX; i++)
++    for (i = 0; i < MTDEC_THREADS_MAX; i++)
+     {
+       CXzDecMtThread *coder = &p->coders[i];
+       coder->dec_created = False;
+@@ -1549,16 +1583,16 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
+   }
+   #endif
+ 
+-  return p;
++  return (CXzDecMtHandle)p;
+ }
+ 
+ 
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ 
+ static void XzDecMt_FreeOutBufs(CXzDecMt *p)
+ {
+   unsigned i;
+-  for (i = 0; i < MTDEC__THREADS_MAX; i++)
++  for (i = 0; i < MTDEC_THREADS_MAX; i++)
+   {
+     CXzDecMtThread *coder = &p->coders[i];
+     if (coder->outBuf)
+@@ -1595,13 +1629,15 @@ static void XzDecMt_FreeSt(CXzDecMt *p)
+ }
+ 
+ 
+-void XzDecMt_Destroy(CXzDecMtHandle pp)
++// #define GET_CXzDecMt_p  CXzDecMt *p = pp;
++
++void XzDecMt_Destroy(CXzDecMtHandle p)
+ {
+-  CXzDecMt *p = (CXzDecMt *)pp;
++  // GET_CXzDecMt_p
+ 
+   XzDecMt_FreeSt(p);
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+ 
+   if (p->mtc_WasConstructed)
+   {
+@@ -1610,7 +1646,7 @@ void XzDecMt_Destroy(CXzDecMtHandle pp)
+   }
+   {
+     unsigned i;
+-    for (i = 0; i < MTDEC__THREADS_MAX; i++)
++    for (i = 0; i < MTDEC_THREADS_MAX; i++)
+     {
+       CXzDecMtThread *t = &p->coders[i];
+       if (t->dec_created)
+@@ -1625,12 +1661,12 @@ void XzDecMt_Destroy(CXzDecMtHandle pp)
+ 
+   #endif
+ 
+-  ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp);
++  ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p);
+ }
+ 
+ 
+ 
+-#ifndef _7ZIP_ST
++#ifndef Z7_ST
+ 
+ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc)
+ {
+@@ -1696,7 +1732,7 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac
+     coder->dec.parseMode = True;
+     coder->dec.headerParsedOk = False;
+     
+-    PRF_STR_INT("Parse", srcSize2);
++    PRF_STR_INT("Parse", srcSize2)
+     
+     res = XzUnpacker_Code(&coder->dec,
+         NULL, &destSize,
+@@ -1739,10 +1775,10 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac
+           }
+         }
+         {
+-        UInt64 packSize = block->packSize;
+-        UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3);
+-        UInt32 checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags);
+-        UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize;
++        const UInt64 packSize = block->packSize;
++        const UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3);
++        const unsigned checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags);
++        const UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize;
+         // if (blockPackSum <= me->props.inBlockMax)
+         // unpackBlockMaxSize
+         {
+@@ -2071,7 +2107,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
+         }
+         data += cur;
+         size -= cur;
+-        // PRF_STR_INT("Written size =", size);
++        // PRF_STR_INT("Written size =", size)
+         if (size == 0)
+           break;
+         res = MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0);
+@@ -2087,7 +2123,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
+         return res;
+       }
+ 
+-    RINOK(res);
++    RINOK(res)
+ 
+     if (coder->inPreSize != coder->inCodeSize
+         || coder->blockPackTotal != coder->inCodeSize)
+@@ -2106,13 +2142,13 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
+   // (coder->state == MTDEC_PARSE_END) means that there are no other working threads
+   // so we can use mtc variables without lock
+ 
+-  PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed);
++  PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed)
+ 
+   me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
+   {
+     CXzUnpacker *dec = &me->dec;
+     
+-    PRF_STR_INT("PostSingle", srcSize);
++    PRF_STR_INT("PostSingle", srcSize)
+     
+     {
+       size_t srcProcessed = srcSize;
+@@ -2186,7 +2222,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
+           me->mtc.crossEnd = srcSize;
+         }
+ 
+-        PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd);
++        PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd)
+ 
+         return SZ_OK;
+       }
+@@ -2277,7 +2313,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
+           UInt64 inDelta = me->mtc.inProcessed - inProgressPrev;
+           if (inDelta >= (1 << 22))
+           {
+-            RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress));
++            RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress))
+             inProgressPrev = me->mtc.inProcessed;
+           }
+         }
+@@ -2331,7 +2367,7 @@ void XzStatInfo_Clear(CXzStatInfo *p)
+ */
+ 
+ static SRes XzDecMt_Decode_ST(CXzDecMt *p
+-    #ifndef _7ZIP_ST
++    #ifndef Z7_ST
+     , BoolInt tMode
+     #endif
+     , CXzStatInfo *stat)
+@@ -2343,11 +2379,11 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
+ 
+   CXzUnpacker *dec;
+ 
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+   if (tMode)
+   {
+     XzDecMt_FreeOutBufs(p);
+-    tMode = MtDec_PrepareRead(&p->mtc);
++    tMode = (BoolInt)MtDec_PrepareRead(&p->mtc);
+   }
+   #endif
+ 
+@@ -2400,7 +2436,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
+ 
+     if (inPos == inLim)
+     {
+-      #ifndef _7ZIP_ST
++      #ifndef Z7_ST
+       if (tMode)
+       {
+         inData = MtDec_Read(&p->mtc, &inLim);
+@@ -2577,19 +2613,19 @@ static void XzStatInfo_SetStat(const CXzUnpacker *dec,
+ 
+ 
+ 
+-SRes XzDecMt_Decode(CXzDecMtHandle pp,
++SRes XzDecMt_Decode(CXzDecMtHandle p,
+     const CXzDecMtProps *props,
+     const UInt64 *outDataSize, int finishMode,
+-    ISeqOutStream *outStream,
++    ISeqOutStreamPtr outStream,
+     // Byte *outBuf, size_t *outBufSize,
+-    ISeqInStream *inStream,
++    ISeqInStreamPtr inStream,
+     // const Byte *inData, size_t inDataSize,
+     CXzStatInfo *stat,
+     int *isMT,
+-    ICompressProgress *progress)
++    ICompressProgressPtr progress)
+ {
+-  CXzDecMt *p = (CXzDecMt *)pp;
+-  #ifndef _7ZIP_ST
++  // GET_CXzDecMt_p
++  #ifndef Z7_ST
+   BoolInt tMode;
+   #endif
+ 
+@@ -2610,7 +2646,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
+     p->outSize = *outDataSize;
+   }
+ 
+-  p->finishMode = finishMode;
++  p->finishMode = (BoolInt)finishMode;
+ 
+   // p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test
+ 
+@@ -2640,7 +2676,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
+     */
+ 
+   
+-  #ifndef _7ZIP_ST
++  #ifndef Z7_ST
+ 
+   p->isBlockHeaderState_Parse = False;
+   p->isBlockHeaderState_Write = False;
+@@ -2782,7 +2818,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
+       return res;
+     }
+ 
+-    PRF_STR("----- decoding ST -----");
++    PRF_STR("----- decoding ST -----")
+   }
+ 
+   #endif
+@@ -2792,13 +2828,13 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
+ 
+   {
+     SRes res = XzDecMt_Decode_ST(p
+-        #ifndef _7ZIP_ST
++        #ifndef Z7_ST
+         , tMode
+         #endif
+         , stat
+         );
+ 
+-    #ifndef _7ZIP_ST
++    #ifndef Z7_ST
+     // we must set error code from MT decoding at first
+     if (p->mainErrorCode != SZ_OK)
+       stat->DecodeRes = p->mainErrorCode;
+@@ -2835,3 +2871,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
+     return res;
+   }
+ }
++
++#undef PRF
++#undef PRF_STR
++#undef PRF_STR_INT_2
+diff --git a/third_party/lzma_sdk/C/XzIn.c b/third_party/lzma_sdk/C/XzIn.c
+index 84f868ec65770120eceb75171f911d39ae0ba3e1..b68af965c1724033d4145adb6ec78be54aecbae1 100644
+--- a/third_party/lzma_sdk/C/XzIn.c
++++ b/third_party/lzma_sdk/C/XzIn.c
+@@ -1,5 +1,5 @@
+ /* XzIn.c - Xz input
+-2021-09-04 : Igor Pavlov : Public domain */
++2023-09-07 : Igor Pavlov : Public domain */
+ 
+ #include "Precomp.h"
+ 
+@@ -15,27 +15,29 @@
+ #define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1)
+ 
+ 
+-SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream)
++SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
+ {
+   Byte sig[XZ_STREAM_HEADER_SIZE];
+-  RINOK(SeqInStream_Read2(inStream, sig, XZ_STREAM_HEADER_SIZE, SZ_ERROR_NO_ARCHIVE));
+-  if (memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0)
++  size_t processedSize = XZ_STREAM_HEADER_SIZE;
++  RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize))
++  if (processedSize != XZ_STREAM_HEADER_SIZE
++      || memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0)
+     return SZ_ERROR_NO_ARCHIVE;
+   return Xz_ParseHeader(p, sig);
+ }
+ 
+ #define READ_VARINT_AND_CHECK(buf, pos, size, res) \
+-  { unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
++  { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
+   if (s == 0) return SZ_ERROR_ARCHIVE; \
+   pos += s; }
+ 
+-SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
++SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
+ {
+   Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
+   unsigned headerSize;
+   *headerSizeRes = 0;
+-  RINOK(SeqInStream_ReadByte(inStream, &header[0]));
+-  headerSize = (unsigned)header[0];
++  RINOK(SeqInStream_ReadByte(inStream, &header[0]))
++  headerSize = header[0];
+   if (headerSize == 0)
+   {
+     *headerSizeRes = 1;
+@@ -45,20 +47,27 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, U
+ 
+   *isIndex = False;
+   headerSize = (headerSize << 2) + 4;
+-  *headerSizeRes = headerSize;
+-  RINOK(SeqInStream_Read(inStream, header + 1, headerSize - 1));
++  *headerSizeRes = (UInt32)headerSize;
++  {
++    size_t processedSize = headerSize - 1;
++    RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize))
++    if (processedSize != headerSize - 1)
++      return SZ_ERROR_INPUT_EOF;
++  }
+   return XzBlock_Parse(p, header);
+ }
+ 
+ #define ADD_SIZE_CHECK(size, val) \
+-  { UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
++  { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
+ 
+ UInt64 Xz_GetUnpackSize(const CXzStream *p)
+ {
+   UInt64 size = 0;
+   size_t i;
+   for (i = 0; i < p->numBlocks; i++)
+-    ADD_SIZE_CHECK(size, p->blocks[i].unpackSize);
++  {
++    ADD_SIZE_CHECK(size, p->blocks[i].unpackSize)
++  }
+   return size;
+ }
+ 
+@@ -67,12 +76,14 @@ UInt64 Xz_GetPackSize(const CXzStream *p)
+   UInt64 size = 0;
+   size_t i;
+   for (i = 0; i < p->numBlocks; i++)
+-    ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3);
++  {
++    ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3)
++  }
+   return size;
+ }
+ 
+ /*
+-SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStream *inStream)
++SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream)
+ {
+   return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f));
+ }
+@@ -93,7 +104,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
+ 
+   {
+     UInt64 numBlocks64;
+-    READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64);
++    READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64)
+     numBlocks = (size_t)numBlocks64;
+     if (numBlocks != numBlocks64 || numBlocks * 2 > size)
+       return SZ_ERROR_ARCHIVE;
+@@ -110,8 +121,8 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
+     for (i = 0; i < numBlocks; i++)
+     {
+       CXzBlockSizes *block = &p->blocks[i];
+-      READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize);
+-      READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize);
++      READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize)
++      READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize)
+       if (block->totalSize == 0)
+         return SZ_ERROR_ARCHIVE;
+     }
+@@ -122,7 +133,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
+   return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
+ }
+ 
+-static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize, ISzAllocPtr alloc)
++static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc)
+ {
+   SRes res;
+   size_t size;
+@@ -142,14 +153,14 @@ static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize,
+   return res;
+ }
+ 
+-static SRes LookInStream_SeekRead_ForArc(ILookInStream *stream, UInt64 offset, void *buf, size_t size)
++static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size)
+ {
+-  RINOK(LookInStream_SeekTo(stream, offset));
++  RINOK(LookInStream_SeekTo(stream, offset))
+   return LookInStream_Read(stream, buf, size);
+   /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */
+ }
+ 
+-static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOffset, ISzAllocPtr alloc)
++static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc)
+ {
+   UInt64 indexSize;
+   Byte buf[XZ_STREAM_FOOTER_SIZE];
+@@ -159,7 +170,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+     return SZ_ERROR_NO_ARCHIVE;
+ 
+   pos -= XZ_STREAM_FOOTER_SIZE;
+-  RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE));
++  RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
+   
+   if (!XZ_FOOTER_SIG_CHECK(buf + 10))
+   {
+@@ -174,7 +185,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+       
+       i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos;
+       pos -= i;
+-      RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i));
++      RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i))
+       total += (UInt32)i;
+       for (; i != 0; i--)
+         if (temp[i - 1] != 0)
+@@ -193,7 +204,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+     if (pos < XZ_STREAM_FOOTER_SIZE)
+       return SZ_ERROR_NO_ARCHIVE;
+     pos -= XZ_STREAM_FOOTER_SIZE;
+-    RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE));
++    RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
+     if (!XZ_FOOTER_SIG_CHECK(buf + 10))
+       return SZ_ERROR_NO_ARCHIVE;
+   }
+@@ -217,8 +228,8 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+     return SZ_ERROR_ARCHIVE;
+ 
+   pos -= indexSize;
+-  RINOK(LookInStream_SeekTo(stream, pos));
+-  RINOK(Xz_ReadIndex(p, stream, indexSize, alloc));
++  RINOK(LookInStream_SeekTo(stream, pos))
++  RINOK(Xz_ReadIndex(p, stream, indexSize, alloc))
+ 
+   {
+     UInt64 totalSize = Xz_GetPackSize(p);
+@@ -227,7 +238,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+         || pos < totalSize + XZ_STREAM_HEADER_SIZE)
+       return SZ_ERROR_ARCHIVE;
+     pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
+-    RINOK(LookInStream_SeekTo(stream, pos));
++    RINOK(LookInStream_SeekTo(stream, pos))
+     *startOffset = (Int64)pos;
+   }
+   {
+@@ -236,7 +247,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
+     SecToRead_CreateVTable(&secToRead);
+     secToRead.realStream = stream;
+ 
+-    RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt));
++    RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt))
+     return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE;
+   }
+ }
+@@ -274,7 +285,9 @@ UInt64 Xzs_GetUnpackSize(const CXzs *p)
+   UInt64 size = 0;
+   size_t i;
+   for (i = 0; i < p->num; i++)
+-    ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i]));
++  {
++    ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i]))
++  }
+   return size;
+ }
+ 
+@@ -284,15 +297,17 @@ UInt64 Xzs_GetPackSize(const CXzs *p)
+   UInt64 size = 0;
+   size_t i;
+   for (i = 0; i < p->num; i++)
+-    ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i]));
++  {
++    ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i]))
++  }
+   return size;
+ }
+ */
+ 
+-SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc)
++SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc)
+ {
+   Int64 endOffset = 0;
+-  RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END));
++  RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END))
+   *startOffset = endOffset;
+   for (;;)
+   {
+@@ -301,7 +316,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
+     Xz_Construct(&st);
+     res = Xz_ReadBackward(&st, stream, startOffset, alloc);
+     st.startOffset = (UInt64)*startOffset;
+-    RINOK(res);
++    RINOK(res)
+     if (p->num == p->numAllocated)
+     {
+       const size_t newNum = p->num + p->num / 4 + 1;
+@@ -317,7 +332,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
+     p->streams[p->num++] = st;
+     if (*startOffset == 0)
+       break;
+-    RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset));
++    RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset))
+     if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
+       return SZ_ERROR_PROGRESS;
+   }
+diff --git a/third_party/lzma_sdk/DEPS b/third_party/lzma_sdk/DEPS
+index 5de7365e95603fbccc61053caaa46190a3168b90..8a72064275aadb35739e3744eef466b7528add53 100644
+--- a/third_party/lzma_sdk/DEPS
++++ b/third_party/lzma_sdk/DEPS
+@@ -4,5 +4,6 @@
+ 
+ include_rules = [
+   "+base",
++  "+build",
+   "+testing",
+ ]
+diff --git a/third_party/lzma_sdk/README.chromium b/third_party/lzma_sdk/README.chromium
+index 7458450739601b3a8236e421241bb9ed73f3a014..a07824208330eec265a88310e42c380386399a24 100644
+--- a/third_party/lzma_sdk/README.chromium
++++ b/third_party/lzma_sdk/README.chromium
+@@ -1,25 +1,19 @@
+ Name: LZMA SDK
+ Short Name: lzma
+ URL: http://www.7-zip.org/sdk.html
+-Version: 22.01
+-Date: 2022-07-15
+-License: Public Domain
++Version: 24.09
++Date: 2024-12-13
++License: LZMA-SDK-9.22
+ License File: LICENSE
+ Security Critical: yes
+ Shipped: yes
+-CPEPrefix: cpe:2.3:a:7-zip:7-zip:22.01:*:*:*:*:*:*:*
++CPEPrefix: cpe:2.3:a:7-zip:7-zip:24.09:*:*:*:*:*:*:*
+ 
+ Description:
+-This contains a part of LZMA SDK 22.01.
++This contains a part of LZMA SDK 24.09.
+ 
+ Local Modifications:
+-The original code can be found at https://7-zip.org/a/lzma2201.7z.  Only parts
+-of this archive are copied here.  More specifically:
+-
+-1/ C code required to open 7z archive files and uncompress LZMA
+-2/ 7za, 7zr, and 7zz executables, if Windows is specified as a target OS.
+-3/ source code for SfxSetup, a utility for creating self extracting archives
+-4/ C code required for xz decompression (split into its own static library)
++The original code can be found at https://7-zip.org/a/lzma2409.7z.
+ 
+ The 7z binaries are installed via a 3pp CIPD package into bin/ if Windows is
+ specified as a target OS in the .gclient file. There are two subdirectories in
+@@ -39,23 +33,12 @@ binaries. Supported platforms are:
+ extract files. 7zr is "lightweight" and only handles 7zip extensions. 7za can
+ handle a few more.
+ 
+-The patch in chromium.patch was applied to 7zCrc.c, CpuArch.c, LZFind.c and
+-Sha256.c to disable some ARM code that was failing to build in Android and
+-Fuschia as well as some of the AVX2 and SSE4 code for Windows. In Fuschia,
+-`#include <asm/hwcap.h>` is not available. In Android builds, `armv8-a+crc` is
+-not a known target architecture, even when the -march cflag is passed,
+-specifying the CPU type to use. In Windows, Chromium still supports SSE3,
+-so it is not be ready to transition to utilizing AVX2 and SSE4, yet. Added
+-.note.gnu.property section to arm64/7zAsm.s to suppress compiler error requiring
+-bti. Shortened segment names in Asm/x86/*.asm files to make the builds
+-deterministic. Resolved include paths as well.
+-
+-The patch in chromium_ubsan.patch fixes some undefined behavior, flagged by
+-UBSan, caused by invalid pointer casts in LZMA SDK.
+-
+-The patch in Util/SfxSetup/chromium.patch was applied so that:
+-
+-1/ Fix for includes file names, since the original code causes an include
+-   file violation during git cl presubmit.
+-2/ Extended search range for 7z archive signature to accomodate
+-   increased sfx binary size in debug builds.
++The purposes of the patch in chromium.patch are:
++1. Extend the search range for 7z archive signature to accomodate increased sfx
++   binary size in debug builds.
++2. Fix undefined behavior in CpuArch.h flagged by UBSan caused by invalid
++   pointer casts.
++3. Shorten segment names in Asm/x86/LzmaDecOpt.asm to make the build
++   deterministic.
++4. Remove files irrelevant to Chromium use.
++5. Remove AVX and SSE41 code in LzFind.
+diff --git a/third_party/lzma_sdk/chromium.patch b/third_party/lzma_sdk/chromium.patch
+index 85b8089a6a0d31113f1570483fe07fb905f5b825..168ca263574d83a27d4afebc8857545254ebe79a 100644
+--- a/third_party/lzma_sdk/chromium.patch
++++ b/third_party/lzma_sdk/chromium.patch
+@@ -1,138 +1,46 @@
+-diff --git "a/lzma2107\\C/7zCrc.c" "b/third_party\\lzma_sdk/7zCrc.c"
+-index f186324ddc609..c0cc9bc7812e0 100644
+---- "a/lzma2107\\C/7zCrc.c"
+-+++ "b/third_party\\lzma_sdk/7zCrc.c"
+-@@ -78,20 +78,20 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
+-   #if defined(_MSC_VER)
+-     #if defined(MY_CPU_ARM64)
+-     #if (_MSC_VER >= 1910)
+--        #define USE_ARM64_CRC
+-+        // #define USE_ARM64_CRC
+-     #endif
+-     #endif
+-   #elif (defined(__clang__) && (__clang_major__ >= 3)) \
+-      || (defined(__GNUC__) && (__GNUC__ > 4))
+-       #if !defined(__ARM_FEATURE_CRC32)
+--        #define __ARM_FEATURE_CRC32 1
+-+        // #define __ARM_FEATURE_CRC32 1
+-           #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
+--            #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
+-+            // #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
+-           #endif
+-       #endif
+-       #if defined(__ARM_FEATURE_CRC32)
+--        #define USE_ARM64_CRC
+--        #include <arm_acle.h>
+-+        // #define USE_ARM64_CRC
+-+        // #include <arm_acle.h>
+-       #endif
+-   #endif
+- 
+-diff --git "a/lzma2107\\C/CpuArch.c" "b/third_party\\lzma_sdk/CpuArch.c"
+-index fa9afe3970b3f..30451fba9b97b 100644
+---- "a/lzma2107\\C/CpuArch.c"
+-+++ "b/third_party\\lzma_sdk/CpuArch.c"
+-@@ -417,7 +417,9 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
+- 
+- #include <sys/auxv.h>
+- 
+-+#if !defined(ARMV8_OS_FUCHSIA)
+- #define USE_HWCAP
+-+#endif // !defined(ARMV8_OS_FUCHSIA)
+- 
+- #ifdef USE_HWCAP
+- 
+-diff --git "a/lzma2107\\C/LzFind.c" "b/third_party\\lzma_sdk/LzFind.c"
+-index 1b73c28484ccf..36f7330911435 100644
+---- "a/lzma2107\\C/LzFind.c"
+-+++ "b/third_party\\lzma_sdk/LzFind.c"
+-@@ -505,7 +505,7 @@ void MatchFinder_Init(CMatchFinder *p)
+- }
+- 
+- 
+--
+-+#if 0
+- #ifdef MY_CPU_X86_OR_AMD64
+-   #if defined(__clang__) && (__clang_major__ >= 8) \
+-     || defined(__GNUC__) && (__GNUC__ >= 8) \
+-@@ -549,6 +549,7 @@ void MatchFinder_Init(CMatchFinder *p)
+-   #endif
+- 
+- #endif
+-+#endif
+- 
+- /*
+- #ifndef ATTRIB_SSE41
+-diff --git "a/lzma2107\\C/Sha256.c" "b/third_party\\lzma_sdk/Sha256.c"
+-index 8b3983ea7323d..21996848c9156 100644
+---- "a/lzma2107\\C/Sha256.c"
+-+++ "b/third_party\\lzma_sdk/Sha256.c"
+-@@ -32,7 +32,8 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
+-       #define _SHA_SUPPORTED
+-     #endif
+-   #endif
+--#elif defined(MY_CPU_ARM_OR_ARM64)
+-+// TODO(crbug.com/1338627): Enable ARM optimizations
+-+#elif 0 // defined(MY_CPU_ARM_OR_ARM64)
+-   #ifdef _MSC_VER
+-     #if _MSC_VER >= 1910
+-       #define _SHA_SUPPORTED
++diff -r /Users/waffles/Downloads/lzma2409/Asm/x86/LzmaDecOpt.asm ./Asm/x86/LzmaDecOpt.asm
++42c42,44
++< _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
++---
++> ; Make this deterministic
++> ; _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
++> LZMADEC SEGMENT ALIGN(64) 'CODE'
++1336c1338
++< _TEXT$LZMADECOPT ENDS
++---
++> LZMADEC ENDS
++diff -r /Users/waffles/Downloads/lzma2409/C/CpuArch.h ./C/CpuArch.h
++413c413,417
++<
++---
++> // Disable MY_CPU_LE_UNALIGN. Although the underlying ISA may be able to load
++> // unaligned words, doing so via pointer casts is undefined behavior in C and
++> // C++, under both strict aliasing and because it is invalid to construct
++> // unaligned pointers. Instead, load the bytes generically and leave optimizing
++> // this to the compiler.
++419,420c423,424
++<     #define MY_CPU_LE_UNALIGN
++<     #define MY_CPU_LE_UNALIGN_64
++---
++>     // #define MY_CPU_LE_UNALIGN
++>     // #define MY_CPU_LE_UNALIGN_64
++diff -r /Users/waffles/Downloads/lzma2409/C/LzFind.c ./C/LzFind.c
++598c598
++<
++---
++> #if 0
++642c642
++<
++---
++> #endif // #if 0
++diff -r /Users/waffles/Downloads/lzma2409/C/Util/SfxSetup/SfxSetup.c ./C/Util/SfxSetup/SfxSetup.c
++13a14,17
++> // SHELLEXECUTEINFO
++> #include <windows.h>
++> #include <shellapi.h>
++>
++144c148
++< #define kSignatureSearchLimit (1 << 22)
++---
++> #define kSignatureSearchLimit (1 << 26)
+ 
+-diff --git a/lzma2107/Asm/arm64/7zAsm.S b/third_party/lzma_sdk/Asm/arm64/7zAsm.S
+-index 67d650d9a7c22..aa30a9ef8bf34 100644
+---- a/lzma2107/Asm/arm64/7zAsm.S
+-+++ b/third_party/lzma_sdk/Asm/arm64/7zAsm.S
+-@@ -37,6 +37,19 @@
+- #define  REG_ABI_PARAM_1 r1
+- #define  REG_ABI_PARAM_2 r2
+- 
+-+// The .note.gnu.property section is required because Chromium Android builds
+-+// utilize the linker flag force-bti.
+-+.pushsection .note.gnu.property, "a"
+-+.balign 8
+-+.long 4
+-+.long 0x10
+-+.long 0x5
+-+.asciz "GNU"
+-+.long 0xc0000000
+-+.long 4
+-+.long ((1 << 0 ) | (1 << 1))
+-+.long 0
+-+.popsection
+- 
+- .macro p2_add reg:req, param:req
+-         add     \reg, \reg, \param
+-diff --git a/lzma2107/Asm/x86/7zAsm.asm b/third_party/lzma_sdk/Asm/x86/7zAsm.asm
+-index 9ca25bccd28cf..df2d4cf5820da 100644
+---- a/lzma2107/Asm/x86/7zAsm.asm
+-+++ b/third_party/lzma_sdk/Asm/x86/7zAsm.asm
+-@@ -47,7 +47,7 @@ MY_ASM_START macro
+-   else
+-     .386
+-     .model flat
+--    _TEXT$00 SEGMENT PARA PUBLIC 'CODE'
+-+    SEVENZ SEGMENT PARA PUBLIC 'CODE'
+-   endif
+- endm
+- 
+-diff --git a/lzma2107/Asm/x86/LzmaDecOpt.asm b/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm
+-index 7e08acc639743..ddbd88ffc2e95 100644
+---- a/lzma2107/Asm/x86/LzmaDecOpt.asm
+-+++ b/third_party/lzma_sdk/Asm/x86/LzmaDecOpt.asm
+-@@ -17,7 +17,7 @@ include 7zAsm.asm
+- 
+- MY_ASM_START
+- 
+--_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
+-+LZMADEC SEGMENT ALIGN(64) 'CODE'
+- 
+- MY_ALIGN macro num:req
+-         align  num
+-@@ -1298,6 +1298,6 @@ fin:
+- MY_POP_PRESERVED_ABI_REGS
+- MY_ENDP
+- 
+--_TEXT$LZMADECOPT ENDS
+-+LZMADEC ENDS
+- 
+- end
+diff --git a/third_party/lzma_sdk/chromium_ubsan.patch b/third_party/lzma_sdk/chromium_ubsan.patch
+deleted file mode 100644
+index 42bb013f1447bb5b44bf0fdd907ad124e010ad4a..0000000000000000000000000000000000000000
+--- a/third_party/lzma_sdk/chromium_ubsan.patch
++++ /dev/null
+@@ -1,25 +0,0 @@
+-diff --git a/third_party/lzma_sdk/C/CpuArch.h b/third_party/lzma_sdk/C/CpuArch.h
+-index 4856fbb12a256..8cd55bea48dc6 100644
+---- a/third_party/lzma_sdk/C/CpuArch.h
+-+++ b/third_party/lzma_sdk/C/CpuArch.h
+-@@ -253,6 +253,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+- 
+- 
+- 
+-+// Disable MY_CPU_LE_UNALIGN. Although the underlying ISA may be able to load
+-+// unaligned words, doing so via pointer casts is undefined behavior in C and
+-+// C++, under both strict aliasing and because it is invalid to construct
+-+// unaligned pointers. Instead, load the bytes generically and leave optimizing
+-+// this to the compiler.
+-+#if 0
+- #ifdef MY_CPU_LE
+-   #if defined(MY_CPU_X86_OR_AMD64) \
+-       || defined(MY_CPU_ARM64)
+-@@ -264,6 +270,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
+-     #define MY_CPU_LE_UNALIGN
+-   #endif
+- #endif
+-+#endif
+- 
+- 
+- #ifdef MY_CPU_LE_UNALIGN
+diff --git a/third_party/lzma_sdk/google/seven_zip_reader.cc b/third_party/lzma_sdk/google/seven_zip_reader.cc
+index c246ae4422c696ca26d2fffcf5e5d6532c8fbe98..8b217dff20856d3c952198e2954cef9656bab781 100644
+--- a/third_party/lzma_sdk/google/seven_zip_reader.cc
++++ b/third_party/lzma_sdk/google/seven_zip_reader.cc
+@@ -209,7 +209,7 @@ Result SevenZipReaderImpl::Initialize(base::File archive_file) {
+   look_stream_buffer_.reset(look_stream_.buf);
+ 
+   look_stream_.bufSize = kStreamBufferSize;
+-  LookToRead2_Init(&look_stream_);
++  LookToRead2_INIT(&look_stream_);
+ 
+   // The destructor assumes that `stream_` is valid whenever `db_` is
+   // initialized.
+diff --git a/third_party/lzma_sdk/google/seven_zip_reader_unittest.cc b/third_party/lzma_sdk/google/seven_zip_reader_unittest.cc
+index 05c66788abe3ade9448776410ed01d0fec8e3766..7d29dd75bb877946cc6f435c0409d07525797ea7 100644
+--- a/third_party/lzma_sdk/google/seven_zip_reader_unittest.cc
++++ b/third_party/lzma_sdk/google/seven_zip_reader_unittest.cc
+@@ -47,6 +47,7 @@
+ #include "base/files/file_util.h"
+ #include "base/logging.h"
+ #include "base/path_service.h"
++#include "build/build_config.h"
+ #include "testing/gmock/include/gmock/gmock.h"
+ #include "testing/gtest/include/gtest/gtest.h"
+ 
+@@ -356,8 +357,15 @@ class SevenZipReaderFakeCrcTableTest : public testing::Test {
+   std::array<uint32_t, 2048> crc_table_;
+ };
+ 
++// TODO(crbug.com/388538957): Make this work on ARM64, which uses a different
++// number of tables and the crc32b instruction.
++#if defined(ARCH_CPU_ARM64)
++#define MAYBE_EmptyCrcWithFakeTable DISABLED_EmptyCrcWithFakeTable
++#else
++#define MAYBE_EmptyCrcWithFakeTable EmptyCrcWithFakeTable
++#endif
+ // This is useful functionality for the fuzzer, so we test it here.
+-TEST_F(SevenZipReaderFakeCrcTableTest, EmptyCrcWithFakeTable) {
++TEST_F(SevenZipReaderFakeCrcTableTest, MAYBE_EmptyCrcWithFakeTable) {
+   base::File file = OpenTestFile(FILE_PATH_LITERAL("fake_crc_table.7z"));
+   ASSERT_TRUE(file.IsValid());
+ 

+ 2 - 0
patches/v8/.patches

@@ -4,6 +4,8 @@ fix_disable_scope_reuse_associated_dchecks.patch
 fix_compiler_failure_on_older_clang.patch
 cherry-pick-3c2d220ad025.patch
 merged_reland_lower_the_maximum_js_parameter_count.patch
+cherry-pick-1c7ff4d5477f.patch
+cherry-pick-8834c16acfcc.patch
 cherry-pick-9209292e7898.patch
 cherry-pick-97e828af5cbc.patch
 cherry-pick-ca504d096c39.patch

+ 57 - 0
patches/v8/cherry-pick-1c7ff4d5477f.patch

@@ -0,0 +1,57 @@
+From 1c7ff4d5477f0e2bc7e20ce3b0f4f8eef71e6d13 Mon Sep 17 00:00:00 2001
+From: Olivier Flückiger <[email protected]>
+Date: Mon, 27 Jan 2025 14:50:34 +0100
+Subject: [PATCH] Merged: [turbofan] LoadField's type with recorded FieldType depends on stability
+
+Bug: 390465670
+(cherry picked from commit f920b6b2c2b1132cd1fbe1198500ceeaedcaa146)
+
+Change-Id: I0b430909275e583e5c0ecf2840e143fef461cbad
+Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6218882
+Reviewed-by: Darius Mercadier <[email protected]>
+Commit-Queue: Darius Mercadier <[email protected]>
+Auto-Submit: Olivier Flückiger <[email protected]>
+Commit-Queue: Olivier Flückiger <[email protected]>
+Cr-Commit-Position: refs/branch-heads/13.2@{#72}
+Cr-Branched-From: 24068c59cedad9ee976ddc05431f5f497b1ebd71-refs/heads/13.2.152@{#1}
+Cr-Branched-From: 6054ba94db0969220be4f94dc1677fc4696bdc4f-refs/heads/main@{#97085}
+---
+
+diff --git a/src/compiler/access-info.cc b/src/compiler/access-info.cc
+index 7247c49..53e55a6 100644
+--- a/src/compiler/access-info.cc
++++ b/src/compiler/access-info.cc
+@@ -484,8 +484,9 @@
+       OptionalMapRef maybe_field_map =
+           TryMakeRef(broker(), FieldType::AsClass(*descriptors_field_type));
+       if (!maybe_field_map.has_value()) return Invalid();
+-      field_type = Type::For(maybe_field_map.value(), broker());
+       field_map = maybe_field_map;
++      // field_type can only be inferred from field_map if it is stable and we
++      // add a stability dependency. This happens on use in the access builder.
+     }
+   } else {
+     CHECK(details_representation.IsTagged());
+@@ -1186,8 +1187,9 @@
+       OptionalMapRef maybe_field_map =
+           TryMakeRef(broker(), FieldType::AsClass(*descriptors_field_type));
+       if (!maybe_field_map.has_value()) return Invalid();
+-      field_type = Type::For(maybe_field_map.value(), broker());
+       field_map = maybe_field_map;
++      // field_type can only be inferred from field_map if it is stable and we
++      // add a stability dependency. This happens on use in the access builder.
+     }
+   }
+ 
+diff --git a/src/compiler/property-access-builder.cc b/src/compiler/property-access-builder.cc
+index 32c0ccf..bcaf221 100644
+--- a/src/compiler/property-access-builder.cc
++++ b/src/compiler/property-access-builder.cc
+@@ -337,6 +337,7 @@
+       if (field_map->is_stable()) {
+         dependencies()->DependOnStableMap(field_map.value());
+         field_access.map = field_map;
++        field_access.type = Type::For(*field_map, broker());
+       }
+     }
+   }

+ 152 - 0
patches/v8/cherry-pick-8834c16acfcc.patch

@@ -0,0 +1,152 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Olivier=20Fl=C3=BCckiger?= <[email protected]>
+Date: Mon, 3 Feb 2025 10:18:36 +0100
+Subject: Merged: [runtime] Fix write barrier check in FastCloneJSObject
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add missing check for page being marked.
+
+Fixed: 392521083
+(cherry picked from commit ce071a295e54b32bf7f03373da943678231cb1ee)
+
+Change-Id: Iccfc1617862a6010ab34389aa4931f45e7389c05
+Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6221320
+Auto-Submit: Olivier Flückiger <[email protected]>
+Commit-Queue: Igor Sheludko <[email protected]>
+Commit-Queue: Olivier Flückiger <[email protected]>
+Reviewed-by: Igor Sheludko <[email protected]>
+Cr-Commit-Position: refs/branch-heads/13.2@{#74}
+Cr-Branched-From: 24068c59cedad9ee976ddc05431f5f497b1ebd71-refs/heads/13.2.152@{#1}
+Cr-Branched-From: 6054ba94db0969220be4f94dc1677fc4696bdc4f-refs/heads/main@{#97085}
+
+diff --git a/src/codegen/code-stub-assembler-inl.h b/src/codegen/code-stub-assembler-inl.h
+index e50133901daf151f50673355220f19c87c7b63ef..eab3ffbb73c1816e9cf62ae401eafe0ffc2133c6 100644
+--- a/src/codegen/code-stub-assembler-inl.h
++++ b/src/codegen/code-stub-assembler-inl.h
+@@ -215,9 +215,8 @@ TNode<Object> CodeStubAssembler::FastCloneJSObject(
+     Label if_no_write_barrier(this),
+         if_needs_write_barrier(this, Label::kDeferred);
+ 
+-    TNode<BoolT> needs_write_barrier = IsPageFlagReset(
+-        BitcastTaggedToWord(target), MemoryChunk::kIsInYoungGenerationMask);
+-    Branch(needs_write_barrier, &if_needs_write_barrier, &if_no_write_barrier);
++    TrySkipWriteBarrier(target, &if_needs_write_barrier);
++    Goto(&if_no_write_barrier);
+ 
+     BIND(&if_needs_write_barrier);
+     EmitCopyLoop(true);
+diff --git a/src/codegen/code-stub-assembler.cc b/src/codegen/code-stub-assembler.cc
+index bd2c37d3f10b019851a413d7577360735ba98311..c867c2d5c85cb235e970c642141e6cf27582adb0 100644
+--- a/src/codegen/code-stub-assembler.cc
++++ b/src/codegen/code-stub-assembler.cc
+@@ -5604,21 +5604,18 @@ void CodeStubAssembler::FillFixedDoubleArrayWithZero(
+                 std::make_pair(MachineType::UintPtr(), byte_length));
+ }
+ 
+-void CodeStubAssembler::JumpIfPointersFromHereAreInteresting(
+-    TNode<Object> object, Label* interesting) {
+-  Label finished(this);
+-  TNode<IntPtrT> object_word = BitcastTaggedToWord(object);
+-  TNode<IntPtrT> object_page_header = MemoryChunkFromAddress(object_word);
+-  TNode<IntPtrT> page_flags = UncheckedCast<IntPtrT>(
+-      Load(MachineType::IntPtr(), object_page_header,
+-           IntPtrConstant(MemoryChunkLayout::kFlagsOffset)));
+-  Branch(
+-      WordEqual(WordAnd(page_flags,
+-                        IntPtrConstant(
+-                            MemoryChunk::kPointersFromHereAreInterestingMask)),
+-                IntPtrConstant(0)),
+-      &finished, interesting);
+-  BIND(&finished);
++void CodeStubAssembler::TrySkipWriteBarrier(TNode<Object> object,
++                                            Label* if_needs_write_barrier) {
++  TNode<BoolT> may_need_write_barrier =
++      IsPageFlagSet(BitcastTaggedToWord(object),
++                    MemoryChunk::kPointersFromHereAreInterestingMask);
++  // TODO(olivf): Also skip the WB with V8_ENABLE_STICKY_MARK_BITS if the mark
++  // bit is set.
++  GotoIf(may_need_write_barrier, if_needs_write_barrier);
++
++  CSA_DCHECK(this, TaggedEqual(CallRuntime(Runtime::kIsNoWriteBarrierNeeded,
++                                           NoContextConstant(), object),
++                               TrueConstant()));
+ }
+ 
+ void CodeStubAssembler::MoveElements(ElementsKind kind,
+@@ -5646,7 +5643,7 @@ void CodeStubAssembler::MoveElements(ElementsKind kind,
+   // The write barrier can be ignored if {dst_elements} is in new space, or if
+   // the elements pointer is FixedDoubleArray.
+   if (needs_barrier_check) {
+-    JumpIfPointersFromHereAreInteresting(elements, &needs_barrier);
++    TrySkipWriteBarrier(elements, &needs_barrier);
+   }
+ 
+   const TNode<IntPtrT> source_byte_length =
+@@ -5740,7 +5737,7 @@ void CodeStubAssembler::CopyElements(ElementsKind kind,
+   // The write barrier can be ignored if {dst_elements} is in new space, or if
+   // the elements pointer is FixedDoubleArray.
+   if (needs_barrier_check) {
+-    JumpIfPointersFromHereAreInteresting(dst_elements, &needs_barrier);
++    TrySkipWriteBarrier(dst_elements, &needs_barrier);
+   }
+ 
+   TNode<IntPtrT> source_byte_length =
+diff --git a/src/codegen/code-stub-assembler.h b/src/codegen/code-stub-assembler.h
+index 7902e08b163436ac4bca4ee129b0edd14c81698e..1bc6cce71567336e24313dd20f8f96609fd2aef7 100644
+--- a/src/codegen/code-stub-assembler.h
++++ b/src/codegen/code-stub-assembler.h
+@@ -2295,8 +2295,7 @@ class V8_EXPORT_PRIVATE CodeStubAssembler
+       HoleConversionMode convert_holes = HoleConversionMode::kDontConvert,
+       TVariable<BoolT>* var_holes_converted = nullptr);
+ 
+-  void JumpIfPointersFromHereAreInteresting(TNode<Object> object,
+-                                            Label* interesting);
++  void TrySkipWriteBarrier(TNode<Object> object, Label* if_needs_write_barrier);
+ 
+   // Efficiently copy elements within a single array. The regions
+   // [src_index, src_index + length) and [dst_index, dst_index + length)
+diff --git a/src/runtime/runtime-test.cc b/src/runtime/runtime-test.cc
+index b76afdb9fe2acd2e9071e9998972319fff25a460..19b6a93b39f28d8a750770f7916c542940dc1541 100644
+--- a/src/runtime/runtime-test.cc
++++ b/src/runtime/runtime-test.cc
+@@ -2206,5 +2206,26 @@ RUNTIME_FUNCTION(Runtime_GetFeedback) {
+ #endif  // OBJECT_PRINT
+ }
+ 
++RUNTIME_FUNCTION(Runtime_IsNoWriteBarrierNeeded) {
++  HandleScope scope(isolate);
++  DisallowGarbageCollection no_gc;
++  if (args.length() != 1) {
++    return CrashUnlessFuzzing(isolate);
++  }
++  DirectHandle<Object> object = args.at(0);
++  if (!(*object).IsHeapObject()) {
++    return CrashUnlessFuzzing(isolate);
++  }
++  auto heap_object = Cast<HeapObject>(object);
++  if (InReadOnlySpace(*heap_object)) {
++    return ReadOnlyRoots(isolate).true_value();
++  }
++  if (WriteBarrier::GetWriteBarrierModeForObject(*heap_object, no_gc) !=
++      WriteBarrierMode::SKIP_WRITE_BARRIER) {
++    return ReadOnlyRoots(isolate).false_value();
++  }
++  return ReadOnlyRoots(isolate).true_value();
++}
++
+ }  // namespace internal
+ }  // namespace v8
+diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
+index 6088fced3a07b63a008b1d8b20597c8f12e06aa8..bb3d12dcaab31947b04d0923dc81cebab6c0a321 100644
+--- a/src/runtime/runtime.h
++++ b/src/runtime/runtime.h
+@@ -578,6 +578,7 @@ namespace internal {
+   F(IsEfficiencyModeEnabled, 0, 1)            \
+   F(IsInPlaceInternalizableString, 1, 1)      \
+   F(IsInternalizedString, 1, 1)               \
++  F(IsNoWriteBarrierNeeded, 1, 1)             \
+   F(IsMaglevEnabled, 0, 1)                    \
+   F(IsSameHeapObject, 2, 1)                   \
+   F(IsSharedString, 1, 1)                     \