Pchen0/electron

Browse Source

chore: cherry-pick 3 changes from 2-M131 (#45040)

Pedro Pontes 4 months ago

parent

8b060dc73e

commit

a2a644e948

5 changed files with 2069 additions and 0 deletions

Split View Show Diff Stats

						
							+ 2
							
							- 0
						
patches/dawn/.patches
							 
								View File
							
				@@ -1,2 +1,4 @@
			
				 tint_validate_that_align_is_large_enough.patch
			
				 ir_fix_robustness_transform_on_textureload_of_sampled_and_depth.patch
			
				+tint_validate_layout_constraints_for_all_address_spaces.patch
			
				+msl_use_packed_vec3_for_workgroup_storage.patch

						
							+ 1860
							
							- 0
						
patches/dawn/msl_use_packed_vec3_for_workgroup_storage.patch
							 
								View File
							
				@@ -0,0 +1,1860 @@
			
				+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
			
				+From: James Price <[email protected]>
			
				+Date: Wed, 20 Nov 2024 22:41:04 +0000
			
				+Subject: [msl] Use packed_vec3 for workgroup storage
			
				+
			
				+This makes sure that the threadgroup allocation sizes that Tint
			
				+reflects to Dawn match the sizes of the types used in the generated
			
				+MSL shader.
			
				+
			
				+Bug: 378725734
			
				+Change-Id: Ib67f6d3299e376ca263419245912e8f453b6cb88
			
				+Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/215075
			
				+Reviewed-by: dan sinclair <[email protected]>
			
				+Commit-Queue: James Price <[email protected]>
			
				+(cherry picked from commit c368b05c475b3473276ad41f09c5f1b149df00e8)
			
				+Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/215937
			
				+Auto-Submit: James Price <[email protected]>
			
				+Reviewed-by: Antonio Maiorano <[email protected]>
			
				+
			
				+diff --git a/src/tint/lang/msl/writer/ast_printer/ast_printer_test.cc b/src/tint/lang/msl/writer/ast_printer/ast_printer_test.cc
			
				+index 4f6035869ebc504753d4b61d4ff015f4e463b8c9..45655ece01852bfba290e4a50908861e60af2661 100644
			
				+--- a/src/tint/lang/msl/writer/ast_printer/ast_printer_test.cc
			
				++++ b/src/tint/lang/msl/writer/ast_printer/ast_printer_test.cc
			
				+@@ -378,93 +378,137 @@ TEST_F(MslASTPrinterTest, WorkgroupMatrix_Multiples) {
			
				+     EXPECT_EQ(gen.Result(), R"(#include <metal_stdlib>
			
				+ 
			
				+ using namespace metal;
			
				++
			
				++template<typename T, size_t N>
			
				++struct tint_array {
			
				++    const constant T& operator[](size_t i) const constant { return elements[i]; }
			
				++    device T& operator[](size_t i) device { return elements[i]; }
			
				++    const device T& operator[](size_t i) const device { return elements[i]; }
			
				++    thread T& operator[](size_t i) thread { return elements[i]; }
			
				++    const thread T& operator[](size_t i) const thread { return elements[i]; }
			
				++    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
			
				++    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
			
				++    T elements[N];
			
				++};
			
				++
			
				+ struct tint_symbol_16 {
			
				+   float2x2 m1;
			
				+-  float2x3 m2;
			
				+   float2x4 m3;
			
				+ };
			
				+ 
			
				+ struct tint_symbol_24 {
			
				+   float3x2 m4;
			
				+-  float3x3 m5;
			
				+   float3x4 m6;
			
				+ };
			
				+ 
			
				+ struct tint_symbol_32 {
			
				+   float4x2 m7;
			
				+-  float4x3 m8;
			
				+   float4x4 m9;
			
				+ };
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup float2x2* const tint_symbol, threadgroup float2x3* const tint_symbol_1, threadgroup float2x4* const tint_symbol_2) {
			
				++struct tint_packed_vec3_f32_array_element {
			
				++  packed_float3 elements;
			
				++};
			
				++
			
				++float2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 2> in) {
			
				++  float2x3 result = float2x3(float3(in[0].elements), float3(in[1].elements));
			
				++  return result;
			
				++}
			
				++
			
				++float3x3 tint_unpack_vec3_in_composite_1(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				++  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				++  return result;
			
				++}
			
				++
			
				++float4x3 tint_unpack_vec3_in_composite_2(tint_array<tint_packed_vec3_f32_array_element, 4> in) {
			
				++  float4x3 result = float4x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements), float3(in[3].elements));
			
				++  return result;
			
				++}
			
				++
			
				++tint_array<tint_packed_vec3_f32_array_element, 2> tint_pack_vec3_in_composite(float2x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 2> result = tint_array<tint_packed_vec3_f32_array_element, 2>{{.elements=packed_float3(in[0])}, {.elements=packed_float3(in[1])}};
			
				++  return result;
			
				++}
			
				++
			
				++tint_array<tint_packed_vec3_f32_array_element, 3> tint_pack_vec3_in_composite_1(float3x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 3> result = tint_array<tint_packed_vec3_f32_array_element, 3>{{.elements=packed_float3(in[0])}, {.elements=packed_float3(in[1])}, {.elements=packed_float3(in[2])}};
			
				++  return result;
			
				++}
			
				++
			
				++tint_array<tint_packed_vec3_f32_array_element, 4> tint_pack_vec3_in_composite_2(float4x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 4> result = tint_array<tint_packed_vec3_f32_array_element, 4>{{.elements=packed_float3(in[0])}, {.elements=packed_float3(in[1])}, {.elements=packed_float3(in[2])}, {.elements=packed_float3(in[3])}};
			
				++  return result;
			
				++}
			
				++
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup float2x2* const tint_symbol, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_1, threadgroup float2x4* const tint_symbol_2) {
			
				+   if ((local_idx < 1u)) {
			
				+     *(tint_symbol) = float2x2(float2(0.0f), float2(0.0f));
			
				+-    *(tint_symbol_1) = float2x3(float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol_1) = tint_pack_vec3_in_composite(float2x3(float3(0.0f), float3(0.0f)));
			
				+     *(tint_symbol_2) = float2x4(float4(0.0f), float4(0.0f));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory_1(uint local_idx_1, threadgroup float3x2* const tint_symbol_3, threadgroup float3x3* const tint_symbol_4, threadgroup float3x4* const tint_symbol_5) {
			
				++void tint_zero_workgroup_memory_1(uint local_idx_1, threadgroup float3x2* const tint_symbol_3, threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol_4, threadgroup float3x4* const tint_symbol_5) {
			
				+   if ((local_idx_1 < 1u)) {
			
				+     *(tint_symbol_3) = float3x2(float2(0.0f), float2(0.0f), float2(0.0f));
			
				+-    *(tint_symbol_4) = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol_4) = tint_pack_vec3_in_composite_1(float3x3(float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+     *(tint_symbol_5) = float3x4(float4(0.0f), float4(0.0f), float4(0.0f));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory_2(uint local_idx_2, threadgroup float4x2* const tint_symbol_6, threadgroup float4x3* const tint_symbol_7, threadgroup float4x4* const tint_symbol_8) {
			
				++void tint_zero_workgroup_memory_2(uint local_idx_2, threadgroup float4x2* const tint_symbol_6, threadgroup tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol_7, threadgroup float4x4* const tint_symbol_8) {
			
				+   if ((local_idx_2 < 1u)) {
			
				+     *(tint_symbol_6) = float4x2(float2(0.0f), float2(0.0f), float2(0.0f), float2(0.0f));
			
				+-    *(tint_symbol_7) = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol_7) = tint_pack_vec3_in_composite_2(float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+     *(tint_symbol_8) = float4x4(float4(0.0f), float4(0.0f), float4(0.0f), float4(0.0f));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void main1_inner(uint local_invocation_index, threadgroup float2x2* const tint_symbol_9, threadgroup float2x3* const tint_symbol_10, threadgroup float2x4* const tint_symbol_11) {
			
				++void main1_inner(uint local_invocation_index, threadgroup float2x2* const tint_symbol_9, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_10, threadgroup float2x4* const tint_symbol_11) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_9, tint_symbol_10, tint_symbol_11);
			
				+   float2x2 const a1 = *(tint_symbol_9);
			
				+-  float2x3 const a2 = *(tint_symbol_10);
			
				++  float2x3 const a2 = tint_unpack_vec3_in_composite(*(tint_symbol_10));
			
				+   float2x4 const a3 = *(tint_symbol_11);
			
				+ }
			
				+ 
			
				+ kernel void main1(threadgroup tint_symbol_16* tint_symbol_13 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+   threadgroup float2x2* const tint_symbol_12 = &((*(tint_symbol_13)).m1);
			
				+-  threadgroup float2x3* const tint_symbol_14 = &((*(tint_symbol_13)).m2);
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 2> tint_symbol_14;
			
				+   threadgroup float2x4* const tint_symbol_15 = &((*(tint_symbol_13)).m3);
			
				+-  main1_inner(local_invocation_index, tint_symbol_12, tint_symbol_14, tint_symbol_15);
			
				++  main1_inner(local_invocation_index, tint_symbol_12, &(tint_symbol_14), tint_symbol_15);
			
				+   return;
			
				+ }
			
				+ 
			
				+-void main2_inner(uint local_invocation_index_1, threadgroup float3x2* const tint_symbol_17, threadgroup float3x3* const tint_symbol_18, threadgroup float3x4* const tint_symbol_19) {
			
				++void main2_inner(uint local_invocation_index_1, threadgroup float3x2* const tint_symbol_17, threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol_18, threadgroup float3x4* const tint_symbol_19) {
			
				+   tint_zero_workgroup_memory_1(local_invocation_index_1, tint_symbol_17, tint_symbol_18, tint_symbol_19);
			
				+   float3x2 const a1 = *(tint_symbol_17);
			
				+-  float3x3 const a2 = *(tint_symbol_18);
			
				++  float3x3 const a2 = tint_unpack_vec3_in_composite_1(*(tint_symbol_18));
			
				+   float3x4 const a3 = *(tint_symbol_19);
			
				+ }
			
				+ 
			
				+ kernel void main2(threadgroup tint_symbol_24* tint_symbol_21 [[threadgroup(0)]], uint local_invocation_index_1 [[thread_index_in_threadgroup]]) {
			
				+   threadgroup float3x2* const tint_symbol_20 = &((*(tint_symbol_21)).m4);
			
				+-  threadgroup float3x3* const tint_symbol_22 = &((*(tint_symbol_21)).m5);
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 3> tint_symbol_22;
			
				+   threadgroup float3x4* const tint_symbol_23 = &((*(tint_symbol_21)).m6);
			
				+-  main2_inner(local_invocation_index_1, tint_symbol_20, tint_symbol_22, tint_symbol_23);
			
				++  main2_inner(local_invocation_index_1, tint_symbol_20, &(tint_symbol_22), tint_symbol_23);
			
				+   return;
			
				+ }
			
				+ 
			
				+-void main3_inner(uint local_invocation_index_2, threadgroup float4x2* const tint_symbol_25, threadgroup float4x3* const tint_symbol_26, threadgroup float4x4* const tint_symbol_27) {
			
				++void main3_inner(uint local_invocation_index_2, threadgroup float4x2* const tint_symbol_25, threadgroup tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol_26, threadgroup float4x4* const tint_symbol_27) {
			
				+   tint_zero_workgroup_memory_2(local_invocation_index_2, tint_symbol_25, tint_symbol_26, tint_symbol_27);
			
				+   float4x2 const a1 = *(tint_symbol_25);
			
				+-  float4x3 const a2 = *(tint_symbol_26);
			
				++  float4x3 const a2 = tint_unpack_vec3_in_composite_2(*(tint_symbol_26));
			
				+   float4x4 const a3 = *(tint_symbol_27);
			
				+ }
			
				+ 
			
				+ kernel void main3(threadgroup tint_symbol_32* tint_symbol_29 [[threadgroup(0)]], uint local_invocation_index_2 [[thread_index_in_threadgroup]]) {
			
				+   threadgroup float4x2* const tint_symbol_28 = &((*(tint_symbol_29)).m7);
			
				+-  threadgroup float4x3* const tint_symbol_30 = &((*(tint_symbol_29)).m8);
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 4> tint_symbol_30;
			
				+   threadgroup float4x4* const tint_symbol_31 = &((*(tint_symbol_29)).m9);
			
				+-  main3_inner(local_invocation_index_2, tint_symbol_28, tint_symbol_30, tint_symbol_31);
			
				++  main3_inner(local_invocation_index_2, tint_symbol_28, &(tint_symbol_30), tint_symbol_31);
			
				+   return;
			
				+ }
			
				+ 
			
				+@@ -479,11 +523,11 @@ kernel void main4_no_usages() {
			
				+     ASSERT_TRUE(allocations.count("main2"));
			
				+     ASSERT_TRUE(allocations.count("main3"));
			
				+     ASSERT_EQ(allocations.at("main1").size(), 1u);
			
				+-    EXPECT_EQ(allocations.at("main1")[0], 20u * sizeof(float));
			
				++    EXPECT_EQ(allocations.at("main1")[0], 12u * sizeof(float));
			
				+     ASSERT_EQ(allocations.at("main2").size(), 1u);
			
				+-    EXPECT_EQ(allocations.at("main2")[0], 32u * sizeof(float));
			
				++    EXPECT_EQ(allocations.at("main2")[0], 20u * sizeof(float));
			
				+     ASSERT_EQ(allocations.at("main3").size(), 1u);
			
				+-    EXPECT_EQ(allocations.at("main3")[0], 40u * sizeof(float));
			
				++    EXPECT_EQ(allocations.at("main3")[0], 24u * sizeof(float));
			
				+     EXPECT_EQ(allocations.at("main4_no_usages").size(), 0u);
			
				+ }
			
				+ 
			
				+diff --git a/src/tint/lang/msl/writer/ast_raise/packed_vec3.cc b/src/tint/lang/msl/writer/ast_raise/packed_vec3.cc
			
				+index 816e0143ea38fb01f6644044de84fa10319c410b..76d664408238682bee8a9a350187fef1a553ac74 100644
			
				+--- a/src/tint/lang/msl/writer/ast_raise/packed_vec3.cc
			
				++++ b/src/tint/lang/msl/writer/ast_raise/packed_vec3.cc
			
				+@@ -83,6 +83,14 @@ struct PackedVec3::State {
			
				+     /// A map from type to the name of a helper function used to unpack that type.
			
				+     Hashmap<const core::type::Type*, Symbol, 4> unpack_helpers;
			
				+ 
			
				++    /// @returns true if @p addrspace requires vec3 types to be packed
			
				++    bool AddressSpaceNeedsPacking(core::AddressSpace addrspace) {
			
				++        // Host-shareable address spaces need to be packed to match the memory layout on the host.
			
				++        // The workgroup address space needs to be packed so that the size of generated threadgroup
			
				++        // variables matches the size of the original WGSL declarations.
			
				++        return core::IsHostShareable(addrspace) || addrspace == core::AddressSpace::kWorkgroup;
			
				++    }
			
				++
			
				+     /// @param ty the type to test
			
				+     /// @returns true if `ty` is a vec3, false otherwise
			
				+     bool IsVec3(const core::type::Type* ty) {
			
				+@@ -373,7 +381,7 @@ struct PackedVec3::State {
			
				+         // if the transform is necessary.
			
				+         for (auto* decl : src.AST().GlobalVariables()) {
			
				+             auto* var = sem.Get<sem::GlobalVariable>(decl);
			
				+-            if (var && core::IsHostShareable(var->AddressSpace()) &&
			
				++            if (var && AddressSpaceNeedsPacking(var->AddressSpace()) &&
			
				+                 ContainsVec3(var->Type()->UnwrapRef())) {
			
				+                 return true;
			
				+             }
			
				+@@ -410,7 +418,7 @@ struct PackedVec3::State {
			
				+                 [&](const sem::TypeExpression* type) {
			
				+                     // Rewrite pointers to types that contain vec3s.
			
				+                     auto* ptr = type->Type()->As<core::type::Pointer>();
			
				+-                    if (ptr && core::IsHostShareable(ptr->AddressSpace())) {
			
				++                    if (ptr && AddressSpaceNeedsPacking(ptr->AddressSpace())) {
			
				+                         auto new_store_type = RewriteType(ptr->StoreType());
			
				+                         if (new_store_type) {
			
				+                             auto access = ptr->AddressSpace() == core::AddressSpace::kStorage
			
				+@@ -423,7 +431,7 @@ struct PackedVec3::State {
			
				+                     }
			
				+                 },
			
				+                 [&](const sem::Variable* var) {
			
				+-                    if (!core::IsHostShareable(var->AddressSpace())) {
			
				++                    if (!AddressSpaceNeedsPacking(var->AddressSpace())) {
			
				+                         return;
			
				+                     }
			
				+ 
			
				+@@ -439,7 +447,7 @@ struct PackedVec3::State {
			
				+                         auto* lhs = sem.GetVal(assign->lhs);
			
				+                         auto* rhs = sem.GetVal(assign->rhs);
			
				+                         if (!ContainsVec3(rhs->Type()) ||
			
				+-                            !core::IsHostShareable(
			
				++                            !AddressSpaceNeedsPacking(
			
				+                                 lhs->Type()->As<core::type::Reference>()->AddressSpace())) {
			
				+                             // Skip assignments to address spaces that are not host-shareable, or
			
				+                             // that do not contain vec3 types.
			
				+@@ -467,7 +475,7 @@ struct PackedVec3::State {
			
				+                 [&](const sem::Load* load) {
			
				+                     // Unpack loads of types that contain vec3s in host-shareable address spaces.
			
				+                     if (ContainsVec3(load->Type()) &&
			
				+-                        core::IsHostShareable(load->ReferenceType()->AddressSpace())) {
			
				++                        AddressSpaceNeedsPacking(load->ReferenceType()->AddressSpace())) {
			
				+                         to_unpack.Add(load);
			
				+                     }
			
				+                 },
			
				+@@ -477,7 +485,7 @@ struct PackedVec3::State {
			
				+                     // struct.
			
				+                     if (auto* ref = accessor->Type()->As<core::type::Reference>()) {
			
				+                         if (IsVec3(ref->StoreType()) &&
			
				+-                            core::IsHostShareable(ref->AddressSpace())) {
			
				++                            AddressSpaceNeedsPacking(ref->AddressSpace())) {
			
				+                             ctx.Replace(node, b.MemberAccessor(ctx.Clone(accessor->Declaration()),
			
				+                                                                kStructMemberName));
			
				+                         }
			
				+diff --git a/src/tint/lang/msl/writer/ast_raise/packed_vec3_test.cc b/src/tint/lang/msl/writer/ast_raise/packed_vec3_test.cc
			
				+index cc002bacbb0cf98cb4f87291bee680809080b444..9fab6dfede8788398cbd61278aa8f048ad8de198 100644
			
				+--- a/src/tint/lang/msl/writer/ast_raise/packed_vec3_test.cc
			
				++++ b/src/tint/lang/msl/writer/ast_raise/packed_vec3_test.cc
			
				+@@ -67,11 +67,6 @@ var<private> p_v : vec3<f32>;
			
				+ var<private> p_m : mat3x3<f32>;
			
				+ var<private> p_a : array<vec3<f32>, 4>;
			
				+ 
			
				+-var<workgroup> w_s : S;
			
				+-var<workgroup> w_v : vec3<f32>;
			
				+-var<workgroup> w_m : mat3x3<f32>;
			
				+-var<workgroup> w_a : array<vec3<f32>, 4>;
			
				+-
			
				+ fn f() {
			
				+   var f_s : S;
			
				+   var f_v : vec3<f32>;
			
				+@@ -6944,20 +6939,20 @@ struct S {
			
				+ 
			
				+ @group(0) @binding(0) var<storage> P : S_tint_packed_vec3;
			
				+ 
			
				+-var<workgroup> w1 : S;
			
				++var<workgroup> w1 : S_tint_packed_vec3;
			
				+ 
			
				+-var<workgroup> w2 : vec3<f32>;
			
				++var<workgroup> w2 : __packed_vec3<f32>;
			
				+ 
			
				+-var<workgroup> w3 : array<vec3<f32>, 4>;
			
				++var<workgroup> w3 : array<tint_packed_vec3_f32_array_element, 4u>;
			
				+ 
			
				+-var<workgroup> w4 : mat3x3<f32>;
			
				++var<workgroup> w4 : array<tint_packed_vec3_f32_array_element, 3u>;
			
				+ 
			
				+ fn f() {
			
				+-  let pv_1 : ptr<workgroup, vec3<f32>> = &(w1.v);
			
				+-  let pv_2 : ptr<workgroup, vec3<f32>> = &(w2);
			
				+-  let pv_3 : ptr<workgroup, vec3<f32>> = &(w3[0]);
			
				+-  let pv_4 : ptr<workgroup, mat3x3<f32>> = &(w1.m);
			
				+-  let pv_5 : ptr<workgroup, mat3x3<f32>> = &(w4);
			
				++  let pv_1 : ptr<workgroup, __packed_vec3<f32>> = &(w1.v);
			
				++  let pv_2 : ptr<workgroup, __packed_vec3<f32>> = &(w2);
			
				++  let pv_3 : ptr<workgroup, __packed_vec3<f32>> = &(w3[0].elements);
			
				++  let pv_4 : ptr<workgroup, array<tint_packed_vec3_f32_array_element, 3u>> = &(w1.m);
			
				++  let pv_5 : ptr<workgroup, array<tint_packed_vec3_f32_array_element, 3u>> = &(w4);
			
				+ }
			
				+ )";
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+index c83e23b4ceee66348815cd540515dd361a6d5759..03fd04f4dcf8f56a91282acea7569f6c7484f227 100644
			
				+--- a/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/array/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -18,45 +18,36 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct tint_symbol_8 {
			
				+-  tint_array<half2x3, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+ };
			
				+ 
			
				+-half2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 2> in) {
			
				+-  half2x3 result = half2x3(half3(in[0].elements), half3(in[1].elements));
			
				+-  return result;
			
				+-}
			
				+-
			
				+-tint_array<half2x3, 4> tint_unpack_vec3_in_composite_1(tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4> in) {
			
				+-  tint_array<half2x3, 4> result = tint_array<half2x3, 4>{tint_unpack_vec3_in_composite(in[0]), tint_unpack_vec3_in_composite(in[1]), tint_unpack_vec3_in_composite(in[2]), tint_unpack_vec3_in_composite(in[3])};
			
				++tint_array<tint_packed_vec3_f16_array_element, 2> tint_pack_vec3_in_composite(half2x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 2> result = tint_array<tint_packed_vec3_f16_array_element, 2>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<half2x3, 4>* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* const tint_symbol) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+-    (*(tint_symbol))[i] = half2x3(half3(0.0h), half3(0.0h));
			
				++    (*(tint_symbol))[i] = tint_pack_vec3_in_composite(half2x3(half3(0.0h), half3(0.0h)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<half2x3, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* const tint_symbol_2, device half* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* const tint_symbol_2, device half* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = tint_unpack_vec3_in_composite((*(tint_symbol_2))[2]);
			
				+-  (*(tint_symbol_1))[1][0] = half3((*(tint_symbol_2))[0][1].elements).zxy;
			
				+-  (*(tint_symbol_1))[1][0][0] = (*(tint_symbol_2))[0][1].elements[0];
			
				+-  *(tint_symbol_3) = (*(tint_symbol_1))[1][0][0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
			
				++  (*(tint_symbol_1))[1][0].elements = packed_half3(half3((*(tint_symbol_2))[0][1].elements).zxy);
			
				++  (*(tint_symbol_1))[1][0].elements[0] = (*(tint_symbol_2))[0][1].elements[0];
			
				++  *(tint_symbol_3) = (*(tint_symbol_1))[1][0].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* tint_symbol_6 [[buffer(0)]], device half* tint_symbol_7 [[buffer(1)]], threadgroup tint_symbol_8* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<half2x3, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6, tint_symbol_7);
			
				++kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4>* tint_symbol_5 [[buffer(0)]], device half* tint_symbol_6 [[buffer(1)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 2>, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5, tint_symbol_6);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+index a4a32840dbaf0f93cfa66822752e2f9346c0452a..2c8c58128914a656cc755e00d85f2e6a8c727f34 100644
			
				+--- a/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/array/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,44 +18,35 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  tint_array<float2x3, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 2> in) {
			
				+-  float2x3 result = float2x3(float3(in[0].elements), float3(in[1].elements));
			
				+-  return result;
			
				+-}
			
				+-
			
				+-tint_array<float2x3, 4> tint_unpack_vec3_in_composite_1(tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4> in) {
			
				+-  tint_array<float2x3, 4> result = tint_array<float2x3, 4>{tint_unpack_vec3_in_composite(in[0]), tint_unpack_vec3_in_composite(in[1]), tint_unpack_vec3_in_composite(in[2]), tint_unpack_vec3_in_composite(in[3])};
			
				++tint_array<tint_packed_vec3_f32_array_element, 2> tint_pack_vec3_in_composite(float2x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 2> result = tint_array<tint_packed_vec3_f32_array_element, 2>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<float2x3, 4>* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* const tint_symbol) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+-    (*(tint_symbol))[i] = float2x3(float3(0.0f), float3(0.0f));
			
				++    (*(tint_symbol))[i] = tint_pack_vec3_in_composite(float2x3(float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<float2x3, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = tint_unpack_vec3_in_composite((*(tint_symbol_2))[2]);
			
				+-  (*(tint_symbol_1))[1][0] = float3((*(tint_symbol_2))[0][1].elements).zxy;
			
				+-  (*(tint_symbol_1))[1][0][0] = (*(tint_symbol_2))[0][1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
			
				++  (*(tint_symbol_1))[1][0].elements = packed_float3(float3((*(tint_symbol_2))[0][1].elements).zxy);
			
				++  (*(tint_symbol_1))[1][0].elements[0] = (*(tint_symbol_2))[0][1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<float2x3, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 2>, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+index ad832018aa484ff500803e1f32b5d93bac1b6913..5a3aaad8c9629c7488427d86ff65672575b50bc5 100644
			
				+--- a/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/array/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,44 +18,35 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  tint_array<float3x3, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				+-  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				+-  return result;
			
				+-}
			
				+-
			
				+-tint_array<float3x3, 4> tint_unpack_vec3_in_composite_1(tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4> in) {
			
				+-  tint_array<float3x3, 4> result = tint_array<float3x3, 4>{tint_unpack_vec3_in_composite(in[0]), tint_unpack_vec3_in_composite(in[1]), tint_unpack_vec3_in_composite(in[2]), tint_unpack_vec3_in_composite(in[3])};
			
				++tint_array<tint_packed_vec3_f32_array_element, 3> tint_pack_vec3_in_composite(float3x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 3> result = tint_array<tint_packed_vec3_f32_array_element, 3>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<float3x3, 4>* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* const tint_symbol) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+-    (*(tint_symbol))[i] = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    (*(tint_symbol))[i] = tint_pack_vec3_in_composite(float3x3(float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<float3x3, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = tint_unpack_vec3_in_composite((*(tint_symbol_2))[2]);
			
				+-  (*(tint_symbol_1))[1][0] = float3((*(tint_symbol_2))[0][1].elements).zxy;
			
				+-  (*(tint_symbol_1))[1][0][0] = (*(tint_symbol_2))[0][1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
			
				++  (*(tint_symbol_1))[1][0].elements = packed_float3(float3((*(tint_symbol_2))[0][1].elements).zxy);
			
				++  (*(tint_symbol_1))[1][0].elements[0] = (*(tint_symbol_2))[0][1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<float3x3, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 3>, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+index 159bd7652f900bbf3d5dc35e6d193e5da25c0914..cf675f1dcb398ecf7441be1b64db84371365b71e 100644
			
				+--- a/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/array/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -18,44 +18,35 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  tint_array<half4x3, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+ };
			
				+ 
			
				+-half4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 4> in) {
			
				+-  half4x3 result = half4x3(half3(in[0].elements), half3(in[1].elements), half3(in[2].elements), half3(in[3].elements));
			
				+-  return result;
			
				+-}
			
				+-
			
				+-tint_array<half4x3, 4> tint_unpack_vec3_in_composite_1(tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4> in) {
			
				+-  tint_array<half4x3, 4> result = tint_array<half4x3, 4>{tint_unpack_vec3_in_composite(in[0]), tint_unpack_vec3_in_composite(in[1]), tint_unpack_vec3_in_composite(in[2]), tint_unpack_vec3_in_composite(in[3])};
			
				++tint_array<tint_packed_vec3_f16_array_element, 4> tint_pack_vec3_in_composite(half4x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 4> result = tint_array<tint_packed_vec3_f16_array_element, 4>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[2])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<half4x3, 4>* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* const tint_symbol) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+-    (*(tint_symbol))[i] = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
			
				++    (*(tint_symbol))[i] = tint_pack_vec3_in_composite(half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<half4x3, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = tint_unpack_vec3_in_composite((*(tint_symbol_2))[2]);
			
				+-  (*(tint_symbol_1))[1][0] = half3((*(tint_symbol_2))[0][1].elements).zxy;
			
				+-  (*(tint_symbol_1))[1][0][0] = (*(tint_symbol_2))[0][1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
			
				++  (*(tint_symbol_1))[1][0].elements = packed_half3(half3((*(tint_symbol_2))[0][1].elements).zxy);
			
				++  (*(tint_symbol_1))[1][0].elements[0] = (*(tint_symbol_2))[0][1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<half4x3, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f16_array_element, 4>, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+index 2f825771d5d3e324ca225bb182798a01c6086852..5bff8264fdb91194573f31f97308212a4e25f864 100644
			
				+--- a/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/array/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,44 +18,35 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  tint_array<float4x3, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 4> in) {
			
				+-  float4x3 result = float4x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements), float3(in[3].elements));
			
				+-  return result;
			
				+-}
			
				+-
			
				+-tint_array<float4x3, 4> tint_unpack_vec3_in_composite_1(tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4> in) {
			
				+-  tint_array<float4x3, 4> result = tint_array<float4x3, 4>{tint_unpack_vec3_in_composite(in[0]), tint_unpack_vec3_in_composite(in[1]), tint_unpack_vec3_in_composite(in[2]), tint_unpack_vec3_in_composite(in[3])};
			
				++tint_array<tint_packed_vec3_f32_array_element, 4> tint_pack_vec3_in_composite(float4x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 4> result = tint_array<tint_packed_vec3_f32_array_element, 4>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<float4x3, 4>* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* const tint_symbol) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+-    (*(tint_symbol))[i] = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    (*(tint_symbol))[i] = tint_pack_vec3_in_composite(float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<float4x3, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* const tint_symbol_1, const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = tint_unpack_vec3_in_composite((*(tint_symbol_2))[2]);
			
				+-  (*(tint_symbol_1))[1][0] = float3((*(tint_symbol_2))[0][1].elements).zxy;
			
				+-  (*(tint_symbol_1))[1][0][0] = (*(tint_symbol_2))[0][1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1] = (*(tint_symbol_2))[2];
			
				++  (*(tint_symbol_1))[1][0].elements = packed_float3(float3((*(tint_symbol_2))[0][1].elements).zxy);
			
				++  (*(tint_symbol_1))[1][0].elements[0] = (*(tint_symbol_2))[0][1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<float4x3, 4>* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 4>, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+index 32a680e67ac45122c352521db871a1b3da5e571c..fa95a268f16d950dd255d80bdfd023c86199d74c 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  half2x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+@@ -42,44 +32,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0044 */ tint_array<int8_t, 60> tint_pad_3;
			
				+ };
			
				+ 
			
				+-half2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 2> in) {
			
				+-  half2x3 result = half2x3(half3(in[0].elements), half3(in[1].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 2> tint_pack_vec3_in_composite(half2x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 2> result = tint_array<tint_packed_vec3_f16_array_element, 2>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  half2x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = half3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_half3(half3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+index 0c87556302beaef579aeccf33ad2e60c90b7c013..030741c5a3d32dcc799fde5c4f1b9a222b5634c3 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  float2x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+@@ -42,44 +32,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0044 */ tint_array<int8_t, 60> tint_pad_3;
			
				+ };
			
				+ 
			
				+-float2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 2> in) {
			
				+-  float2x3 result = float2x3(float3(in[0].elements), float3(in[1].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 2> tint_pack_vec3_in_composite(float2x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 2> result = tint_array<tint_packed_vec3_f32_array_element, 2>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  float2x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = float3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_float3(float3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				+index f982bedd6264465e5a49cc6814e9af0e90830a12..d22ab142850ba9482d17fbca3256710c357d029b 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  half3x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+@@ -42,44 +32,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0044 */ tint_array<int8_t, 60> tint_pad_3;
			
				+ };
			
				+ 
			
				+-half3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 3> in) {
			
				+-  half3x3 result = half3x3(half3(in[0].elements), half3(in[1].elements), half3(in[2].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 3> tint_pack_vec3_in_composite(half3x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 3> result = tint_array<tint_packed_vec3_f16_array_element, 3>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[2])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  half3x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = half3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_half3(half3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+index f86d50ea1ca6c425a9f1d4f33f1fcaf7d32db85e..ea81af9e2730cb0e67554bcdeb3b4c4347a5bc72 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  float3x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+@@ -41,44 +31,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0044 */ tint_array<int8_t, 60> tint_pad_2;
			
				+ };
			
				+ 
			
				+-float3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				+-  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 3> tint_pack_vec3_in_composite(float3x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 3> result = tint_array<tint_packed_vec3_f32_array_element, 3>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  float3x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = float3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_float3(float3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+index f4055dddf1aae1b3e126dfa853d576c792df3457..c28a1dde6ebdab4eb8464349ba5682d444fe8f42 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  half4x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+@@ -42,44 +32,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0044 */ tint_array<int8_t, 60> tint_pad_3;
			
				+ };
			
				+ 
			
				+-half4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 4> in) {
			
				+-  half4x3 result = half4x3(half3(in[0].elements), half3(in[1].elements), half3(in[2].elements), half3(in[3].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 4> tint_pack_vec3_in_composite(half4x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 4> result = tint_array<tint_packed_vec3_f16_array_element, 4>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[2])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  half4x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = half3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_half3(half3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+index daf7a4e046fe0a94aa9d654c2bd95b7cfea75ffb..5960e1dc815793b32453796654a2e37d5c3b0448 100644
			
				+--- a/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/struct/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -18,16 +18,6 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-struct S {
			
				+-  int before;
			
				+-  float4x3 m;
			
				+-  int after;
			
				+-};
			
				+-
			
				+-struct tint_symbol_7 {
			
				+-  tint_array<S, 4> w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+@@ -42,44 +32,45 @@ struct S_tint_packed_vec3 {
			
				+   /* 0x0084 */ tint_array<int8_t, 60> tint_pad_3;
			
				+ };
			
				+ 
			
				+-float4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 4> in) {
			
				+-  float4x3 result = float4x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements), float3(in[3].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 4> tint_pack_vec3_in_composite(float4x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 4> result = tint_array<tint_packed_vec3_f32_array_element, 4>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				+-  S result = {};
			
				++struct S {
			
				++  int before;
			
				++  float4x3 m;
			
				++  int after;
			
				++};
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				+   result.before = in.before;
			
				+-  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  result.m = tint_pack_vec3_in_composite(in.m);
			
				+   result.after = in.after;
			
				+   return result;
			
				+ }
			
				+ 
			
				+-tint_array<S, 4> tint_unpack_vec3_in_composite_2(tint_array<S_tint_packed_vec3, 4> in) {
			
				+-  tint_array<S, 4> result = tint_array<S, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				+-  return result;
			
				+-}
			
				+-
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S, 4>* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 4u); idx = (idx + 1u)) {
			
				+     uint const i = idx;
			
				+     S const tint_symbol = S{};
			
				+-    (*(tint_symbol_1))[i] = tint_symbol;
			
				++    (*(tint_symbol_1))[i] = tint_pack_vec3_in_composite_1(tint_symbol);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup tint_array<S, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<S_tint_packed_vec3, 4>* const tint_symbol_2, const constant tint_array<S_tint_packed_vec3, 4>* const tint_symbol_3) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+-  *(tint_symbol_2) = tint_unpack_vec3_in_composite_2(*(tint_symbol_3));
			
				+-  (*(tint_symbol_2))[1] = tint_unpack_vec3_in_composite_1((*(tint_symbol_3))[2]);
			
				+-  (*(tint_symbol_2))[3].m = tint_unpack_vec3_in_composite((*(tint_symbol_3))[2].m);
			
				+-  (*(tint_symbol_2))[1].m[0] = float3((*(tint_symbol_3))[0].m[1].elements).zxy;
			
				++  *(tint_symbol_2) = *(tint_symbol_3);
			
				++  (*(tint_symbol_2))[1] = (*(tint_symbol_3))[2];
			
				++  (*(tint_symbol_2))[3].m = (*(tint_symbol_3))[2].m;
			
				++  (*(tint_symbol_2))[1].m[0].elements = packed_float3(float3((*(tint_symbol_3))[0].m[1].elements).zxy);
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_6 [[buffer(0)]], threadgroup tint_symbol_7* tint_symbol_5 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<S, 4>* const tint_symbol_4 = &((*(tint_symbol_5)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_4, tint_symbol_6);
			
				++kernel void f(const constant tint_array<S_tint_packed_vec3, 4>* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<S_tint_packed_vec3, 4> tint_symbol_4;
			
				++  f_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+index 70549cbb7a4764016fcf6c9cb019b7127cf6a802..bf973d8799e91a944e90c6528b333fbcc1ce2163 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  half2x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+ };
			
				+ 
			
				+-half2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 2> in) {
			
				+-  half2x3 result = half2x3(half3(in[0].elements), half3(in[1].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 2> tint_pack_vec3_in_composite(half2x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 2> result = tint_array<tint_packed_vec3_f16_array_element, 2>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup half2x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f16_array_element, 2>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = half2x3(half3(0.0h), half3(0.0h));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(half2x3(half3(0.0h), half3(0.0h)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup half2x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 2>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f16_array_element, 2>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 2>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_half3(half3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 2>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup half2x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 2>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f16_array_element, 2> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+index 59ee2c84f205c434f42dfaf57440f240a06f4df2..08871dcc3ff65dcdc567b98fe8c8b86d470f8aa1 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat2x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  float2x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float2x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 2> in) {
			
				+-  float2x3 result = float2x3(float3(in[0].elements), float3(in[1].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 2> tint_pack_vec3_in_composite(float2x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 2> result = tint_array<tint_packed_vec3_f32_array_element, 2>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup float2x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = float2x3(float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(float2x3(float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup float2x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_float3(float3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 2>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup float2x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 2>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 2> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				+index 91b15694a582327aeb19db22d1d8f8a39da5eae3..e55a4a7f42eec9fdee22847533c9b3710d327387 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  half3x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+ };
			
				+ 
			
				+-half3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 3> in) {
			
				+-  half3x3 result = half3x3(half3(in[0].elements), half3(in[1].elements), half3(in[2].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 3> tint_pack_vec3_in_composite(half3x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 3> result = tint_array<tint_packed_vec3_f16_array_element, 3>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[2])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup half3x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f16_array_element, 3>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = half3x3(half3(0.0h), half3(0.0h), half3(0.0h));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(half3x3(half3(0.0h), half3(0.0h), half3(0.0h)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup half3x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 3>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f16_array_element, 3>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 3>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_half3(half3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 3>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup half3x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 3>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f16_array_element, 3> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+index 799208b15f1f49901f9b76010b9572c8723e86a1..aeb8948a5385ca398049e1b4d4f9a89b4c517866 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat3x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  float3x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				+-  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 3> tint_pack_vec3_in_composite(float3x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 3> result = tint_array<tint_packed_vec3_f32_array_element, 3>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup float3x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = float3x3(float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(float3x3(float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup float3x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_float3(float3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 3>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup float3x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 3>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 3> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+index 858ab01df8b0ffa1d91cb194ae39e8c4808f87eb..884415169406fd5e1e575fa12e88cd13d9b1317a 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f16/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  half4x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f16_array_element {
			
				+   /* 0x0000 */ packed_half3 elements;
			
				+   /* 0x0006 */ tint_array<int8_t, 2> tint_pad;
			
				+ };
			
				+ 
			
				+-half4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f16_array_element, 4> in) {
			
				+-  half4x3 result = half4x3(half3(in[0].elements), half3(in[1].elements), half3(in[2].elements), half3(in[3].elements));
			
				++tint_array<tint_packed_vec3_f16_array_element, 4> tint_pack_vec3_in_composite(half4x3 in) {
			
				++  tint_array<tint_packed_vec3_f16_array_element, 4> result = tint_array<tint_packed_vec3_f16_array_element, 4>{tint_packed_vec3_f16_array_element{.elements=packed_half3(in[0])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[1])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[2])}, tint_packed_vec3_f16_array_element{.elements=packed_half3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup half4x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f16_array_element, 4>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(half4x3(half3(0.0h), half3(0.0h), half3(0.0h), half3(0.0h)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup half4x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f16_array_element, 4>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f16_array_element, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = half3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_half3(half3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup half4x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f16_array_element, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f16_array_element, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+index 37e946559438cd496195312dd46d80ef19eef17e..c74ab56ab2e7bffe6726cb569b47e4109e045d0d 100644
			
				+--- a/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				++++ b/test/tint/buffer/uniform/std140/unnested/mat4x3_f32/to_workgroup.wgsl.expected.msl
			
				+@@ -14,38 +14,34 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				+-struct tint_symbol_6 {
			
				+-  float4x3 w;
			
				+-};
			
				+-
			
				+ struct tint_packed_vec3_f32_array_element {
			
				+   /* 0x0000 */ packed_float3 elements;
			
				+   /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				+ };
			
				+ 
			
				+-float4x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 4> in) {
			
				+-  float4x3 result = float4x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements), float3(in[3].elements));
			
				++tint_array<tint_packed_vec3_f32_array_element, 4> tint_pack_vec3_in_composite(float4x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 4> result = tint_array<tint_packed_vec3_f32_array_element, 4>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[2])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[3])}};
			
				+   return result;
			
				+ }
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup float4x3* const tint_symbol) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol) = float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol) = tint_pack_vec3_in_composite(float4x3(float3(0.0f), float3(0.0f), float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void f_inner(uint local_invocation_index, threadgroup float4x3* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol_2) {
			
				++void f_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol_1, const constant tint_array<tint_packed_vec3_f32_array_element, 4>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_1);
			
				+-  *(tint_symbol_1) = tint_unpack_vec3_in_composite(*(tint_symbol_2));
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements);
			
				+-  (*(tint_symbol_1))[1] = float3((*(tint_symbol_2))[0].elements).zxy;
			
				+-  (*(tint_symbol_1))[0][1] = (*(tint_symbol_2))[1].elements[0];
			
				++  *(tint_symbol_1) = *(tint_symbol_2);
			
				++  (*(tint_symbol_1))[1].elements = (*(tint_symbol_2))[0].elements;
			
				++  (*(tint_symbol_1))[1].elements = packed_float3(float3((*(tint_symbol_2))[0].elements).zxy);
			
				++  (*(tint_symbol_1))[0].elements[1] = (*(tint_symbol_2))[1].elements[0];
			
				+ }
			
				+ 
			
				+-kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 4>* tint_symbol_5 [[buffer(0)]], threadgroup tint_symbol_6* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup float4x3* const tint_symbol_3 = &((*(tint_symbol_4)).w);
			
				+-  f_inner(local_invocation_index, tint_symbol_3, tint_symbol_5);
			
				++kernel void f(const constant tint_array<tint_packed_vec3_f32_array_element, 4>* tint_symbol_4 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 4> tint_symbol_3;
			
				++  f_inner(local_invocation_index, &(tint_symbol_3), tint_symbol_4);
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/bug/tint/366037039.wgsl.expected.msl b/test/tint/bug/tint/366037039.wgsl.expected.msl
			
				+new file mode 100644
			
				+index 0000000000000000000000000000000000000000..cf8107dd0fa4413d781c6cdabeb18c33a8ebf77a
			
				+--- /dev/null
			
				++++ b/test/tint/bug/tint/366037039.wgsl.expected.msl
			
				+@@ -0,0 +1,85 @@
			
				++#include <metal_stdlib>
			
				++
			
				++using namespace metal;
			
				++
			
				++template<typename T, size_t N>
			
				++struct tint_array {
			
				++    const constant T& operator[](size_t i) const constant { return elements[i]; }
			
				++    device T& operator[](size_t i) device { return elements[i]; }
			
				++    const device T& operator[](size_t i) const device { return elements[i]; }
			
				++    thread T& operator[](size_t i) thread { return elements[i]; }
			
				++    const thread T& operator[](size_t i) const thread { return elements[i]; }
			
				++    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
			
				++    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
			
				++    T elements[N];
			
				++};
			
				++
			
				++#define TINT_ISOLATE_UB(VOLATILE_NAME) \
			
				++  {volatile bool VOLATILE_NAME = false; if (VOLATILE_NAME) break;}
			
				++
			
				++struct tint_packed_vec3_u32_array_element {
			
				++  /* 0x0000 */ packed_uint3 elements;
			
				++  /* 0x000c */ tint_array<int8_t, 4> tint_pad;
			
				++};
			
				++
			
				++struct S_tint_packed_vec3 {
			
				++  /* 0x0000 */ packed_uint3 a;
			
				++  /* 0x000c */ uint b;
			
				++  /* 0x0010 */ tint_array<tint_packed_vec3_u32_array_element, 4> c;
			
				++};
			
				++
			
				++tint_array<uint3, 4> tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_u32_array_element, 4> in) {
			
				++  tint_array<uint3, 4> result = tint_array<uint3, 4>{uint3(in[0].elements), uint3(in[1].elements), uint3(in[2].elements), uint3(in[3].elements)};
			
				++  return result;
			
				++}
			
				++
			
				++struct S {
			
				++  uint3 a;
			
				++  uint b;
			
				++  tint_array<uint3, 4> c;
			
				++};
			
				++
			
				++S tint_unpack_vec3_in_composite_1(S_tint_packed_vec3 in) {
			
				++  S result = {};
			
				++  result.a = uint3(in.a);
			
				++  result.b = in.b;
			
				++  result.c = tint_unpack_vec3_in_composite(in.c);
			
				++  return result;
			
				++}
			
				++
			
				++tint_array<tint_packed_vec3_u32_array_element, 4> tint_pack_vec3_in_composite(tint_array<uint3, 4> in) {
			
				++  tint_array<tint_packed_vec3_u32_array_element, 4> result = tint_array<tint_packed_vec3_u32_array_element, 4>{tint_packed_vec3_u32_array_element{.elements=packed_uint3(in[0])}, tint_packed_vec3_u32_array_element{.elements=packed_uint3(in[1])}, tint_packed_vec3_u32_array_element{.elements=packed_uint3(in[2])}, tint_packed_vec3_u32_array_element{.elements=packed_uint3(in[3])}};
			
				++  return result;
			
				++}
			
				++
			
				++S_tint_packed_vec3 tint_pack_vec3_in_composite_1(S in) {
			
				++  S_tint_packed_vec3 result = {};
			
				++  result.a = packed_uint3(in.a);
			
				++  result.b = in.b;
			
				++  result.c = tint_pack_vec3_in_composite(in.c);
			
				++  return result;
			
				++}
			
				++
			
				++void assign_and_preserve_padding_1(device tint_array<tint_packed_vec3_u32_array_element, 4>* const dest, tint_array<uint3, 4> value) {
			
				++  for(uint i = 0u; (i < 4u); i = (i + 1u)) {
			
				++    TINT_ISOLATE_UB(tint_volatile_false);
			
				++    (*(dest))[i].elements = packed_uint3(value[i]);
			
				++  }
			
				++}
			
				++
			
				++void assign_and_preserve_padding(device S_tint_packed_vec3* const dest, S value) {
			
				++  (*(dest)).a = packed_uint3(value.a);
			
				++  (*(dest)).b = value.b;
			
				++  assign_and_preserve_padding_1(&((*(dest)).c), value.c);
			
				++}
			
				++
			
				++void foo(const constant S_tint_packed_vec3* const tint_symbol_2, device S_tint_packed_vec3* const tint_symbol_3, threadgroup S_tint_packed_vec3* const tint_symbol_4) {
			
				++  S const u = tint_unpack_vec3_in_composite_1(*(tint_symbol_2));
			
				++  S const s = tint_unpack_vec3_in_composite_1(*(tint_symbol_3));
			
				++  S const w = tint_unpack_vec3_in_composite_1(*(tint_symbol_3));
			
				++  S const tint_symbol = S{};
			
				++  assign_and_preserve_padding(tint_symbol_3, tint_symbol);
			
				++  S const tint_symbol_1 = S{};
			
				++  *(tint_symbol_4) = tint_pack_vec3_in_composite_1(tint_symbol_1);
			
				++}
			
				++
			
				+diff --git a/test/tint/bug/tint/366314931.wgsl.expected.msl b/test/tint/bug/tint/366314931.wgsl.expected.msl
			
				+new file mode 100644
			
				+index 0000000000000000000000000000000000000000..3fccaebd22e6177e4e7c2863acaa99de1985ccea
			
				+--- /dev/null
			
				++++ b/test/tint/bug/tint/366314931.wgsl.expected.msl
			
				+@@ -0,0 +1,33 @@
			
				++#include <metal_stdlib>
			
				++
			
				++using namespace metal;
			
				++struct S_tint_packed_vec3 {
			
				++  /* 0x0000 */ packed_uint3 v;
			
				++  /* 0x000c */ atomic_uint u;
			
				++};
			
				++
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup S_tint_packed_vec3* const tint_symbol_1) {
			
				++  if ((local_idx < 1u)) {
			
				++    (*(tint_symbol_1)).v = packed_uint3(0u);
			
				++    atomic_store_explicit(&((*(tint_symbol_1)).u), 0u, memory_order_relaxed);
			
				++  }
			
				++  threadgroup_barrier(mem_flags::mem_threadgroup);
			
				++}
			
				++
			
				++struct S {
			
				++  uint3 v;
			
				++  atomic_uint u;
			
				++};
			
				++
			
				++void tint_symbol_inner(uint local_invocation_index, threadgroup S_tint_packed_vec3* const tint_symbol_2, device S_tint_packed_vec3* const tint_symbol_3) {
			
				++  tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				++  uint const x = atomic_load_explicit(&((*(tint_symbol_2)).u), memory_order_relaxed);
			
				++  atomic_store_explicit(&((*(tint_symbol_3)).u), x, memory_order_relaxed);
			
				++}
			
				++
			
				++kernel void tint_symbol(device S_tint_packed_vec3* tint_symbol_5 [[buffer(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup S_tint_packed_vec3 tint_symbol_4;
			
				++  tint_symbol_inner(local_invocation_index, &(tint_symbol_4), tint_symbol_5);
			
				++  return;
			
				++}
			
				++
			
				+diff --git a/test/tint/bug/tint/942.wgsl.expected.msl b/test/tint/bug/tint/942.wgsl.expected.msl
			
				+index fa62439319554fb9f147803302fe611fc1e386be..95d1240d958afcecd428e3608a7f160029bad570 100644
			
				+--- a/test/tint/bug/tint/942.wgsl.expected.msl
			
				++++ b/test/tint/bug/tint/942.wgsl.expected.msl
			
				+@@ -18,11 +18,15 @@ struct tint_array {
			
				+   volatile bool VOLATILE_NAME = true; \
			
				+   if (VOLATILE_NAME)
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<float3, 256>, 4>* const tint_symbol_1) {
			
				++struct tint_packed_vec3_f32_array_element {
			
				++  packed_float3 elements;
			
				++};
			
				++
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 256>, 4>* const tint_symbol_1) {
			
				+   TINT_ISOLATE_UB(tint_volatile_true) for(uint idx = local_idx; (idx < 1024u); idx = (idx + 64u)) {
			
				+     uint const i_1 = (idx / 256u);
			
				+     uint const i_2 = (idx % 256u);
			
				+-    (*(tint_symbol_1))[i_1][i_2] = float3(0.0f);
			
				++    (*(tint_symbol_1))[i_1][i_2].elements = packed_float3(0.0f);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+@@ -40,7 +44,7 @@ uint tint_div(uint lhs, uint rhs) {
			
				+   return (lhs / select(rhs, 1u, (rhs == 0u)));
			
				+ }
			
				+ 
			
				+-void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index, threadgroup tint_array<tint_array<float3, 256>, 4>* const tint_symbol_2, const constant Params* const tint_symbol_3, texture2d<float, access::sample> tint_symbol_4, const constant Flip* const tint_symbol_5, sampler tint_symbol_6, texture2d<float, access::write> tint_symbol_7) {
			
				++void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_invocation_index, threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 256>, 4>* const tint_symbol_2, const constant Params* const tint_symbol_3, texture2d<float, access::sample> tint_symbol_4, const constant Flip* const tint_symbol_5, sampler tint_symbol_6, texture2d<float, access::write> tint_symbol_7) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+   uint const filterOffset = tint_div(((*(tint_symbol_3)).filterDim - 1u), 2u);
			
				+   uint2 const dims = uint2(tint_symbol_4.get_width(0), tint_symbol_4.get_height(0));
			
				+@@ -51,7 +55,7 @@ void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_in
			
				+       if (((*(tint_symbol_5)).value != 0u)) {
			
				+         loadIndex = loadIndex.yx;
			
				+       }
			
				+-      (*(tint_symbol_2))[r][((4u * LocalInvocationID[0]) + c)] = tint_symbol_4.sample(tint_symbol_6, ((float2(loadIndex) + float2(0.25f)) / float2(dims)), level(0.0f)).rgb;
			
				++      (*(tint_symbol_2))[r][((4u * LocalInvocationID[0]) + c)].elements = packed_float3(tint_symbol_4.sample(tint_symbol_6, ((float2(loadIndex) + float2(0.25f)) / float2(dims)), level(0.0f)).rgb);
			
				+     }
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+@@ -66,7 +70,7 @@ void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_in
			
				+         float3 acc = float3(0.0f);
			
				+         TINT_ISOLATE_UB(tint_volatile_true_5) for(uint f = 0u; (f < (*(tint_symbol_3)).filterDim); f = (f + 1u)) {
			
				+           uint i = ((center + f) - filterOffset);
			
				+-          acc = (acc + ((1.0f / float((*(tint_symbol_3)).filterDim)) * (*(tint_symbol_2))[r][i]));
			
				++          acc = (acc + ((1.0f / float((*(tint_symbol_3)).filterDim)) * float3((*(tint_symbol_2))[r][i].elements)));
			
				+         }
			
				+         tint_symbol_7.write(float4(acc, 1.0f), uint2(writeIndex));
			
				+       }
			
				+@@ -75,7 +79,7 @@ void tint_symbol_inner(uint3 WorkGroupID, uint3 LocalInvocationID, uint local_in
			
				+ }
			
				+ 
			
				+ kernel void tint_symbol(const constant Params* tint_symbol_9 [[buffer(0)]], texture2d<float, access::sample> tint_symbol_10 [[texture(0)]], const constant Flip* tint_symbol_11 [[buffer(1)]], sampler tint_symbol_12 [[sampler(0)]], texture2d<float, access::write> tint_symbol_13 [[texture(1)]], uint3 WorkGroupID [[threadgroup_position_in_grid]], uint3 LocalInvocationID [[thread_position_in_threadgroup]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup tint_array<tint_array<float3, 256>, 4> tint_symbol_8;
			
				++  threadgroup tint_array<tint_array<tint_packed_vec3_f32_array_element, 256>, 4> tint_symbol_8;
			
				+   tint_symbol_inner(WorkGroupID, LocalInvocationID, local_invocation_index, &(tint_symbol_8), tint_symbol_9, tint_symbol_10, tint_symbol_11, tint_symbol_12, tint_symbol_13);
			
				+   return;
			
				+ }
			
				+diff --git a/test/tint/builtins/workgroupUniformLoad/matrix.wgsl.expected.msl b/test/tint/builtins/workgroupUniformLoad/matrix.wgsl.expected.msl
			
				+index c37c301c12ff57db3497d7ff524a24384c5742d9..3ccf71f9e455f6c1e86cbe2aa0977cb0d5ae1b15 100644
			
				+--- a/test/tint/builtins/workgroupUniformLoad/matrix.wgsl.expected.msl
			
				++++ b/test/tint/builtins/workgroupUniformLoad/matrix.wgsl.expected.msl
			
				+@@ -1,14 +1,36 @@
			
				+ #include <metal_stdlib>
			
				+ 
			
				+ using namespace metal;
			
				+-float3x3 tint_workgroupUniformLoad(threadgroup float3x3* const p) {
			
				++
			
				++template<typename T, size_t N>
			
				++struct tint_array {
			
				++    const constant T& operator[](size_t i) const constant { return elements[i]; }
			
				++    device T& operator[](size_t i) device { return elements[i]; }
			
				++    const device T& operator[](size_t i) const device { return elements[i]; }
			
				++    thread T& operator[](size_t i) thread { return elements[i]; }
			
				++    const thread T& operator[](size_t i) const thread { return elements[i]; }
			
				++    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
			
				++    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
			
				++    T elements[N];
			
				++};
			
				++
			
				++struct tint_packed_vec3_f32_array_element {
			
				++  packed_float3 elements;
			
				++};
			
				++
			
				++float3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				++  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				++  return result;
			
				++}
			
				++
			
				++float3x3 tint_workgroupUniformLoad(threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const p) {
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+-  float3x3 const result = *(p);
			
				++  float3x3 const result = tint_unpack_vec3_in_composite(*(p));
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+   return result;
			
				+ }
			
				+ 
			
				+-float3x3 foo(threadgroup float3x3* const tint_symbol) {
			
				++float3x3 foo(threadgroup tint_array<tint_packed_vec3_f32_array_element, 3>* const tint_symbol) {
			
				+   return tint_workgroupUniformLoad(tint_symbol);
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/builtins/workgroupUniformLoad/structures.wgsl.expected.msl b/test/tint/builtins/workgroupUniformLoad/structures.wgsl.expected.msl
			
				+index 09b35efc390997c94ec016b63420b57db7146855..8ec1ede7504f02af2259480773923787dd3ec01e 100644
			
				+--- a/test/tint/builtins/workgroupUniformLoad/structures.wgsl.expected.msl
			
				++++ b/test/tint/builtins/workgroupUniformLoad/structures.wgsl.expected.msl
			
				+@@ -14,24 +14,62 @@ struct tint_array {
			
				+     T elements[N];
			
				+ };
			
				+ 
			
				++struct tint_packed_vec3_f32_array_element {
			
				++  packed_float3 elements;
			
				++};
			
				++
			
				++struct Inner_tint_packed_vec3 {
			
				++  bool b;
			
				++  int4 v;
			
				++  tint_array<tint_packed_vec3_f32_array_element, 3> m;
			
				++};
			
				++
			
				++struct Outer_tint_packed_vec3 {
			
				++  tint_array<Inner_tint_packed_vec3, 4> a;
			
				++};
			
				++
			
				++float3x3 tint_unpack_vec3_in_composite(tint_array<tint_packed_vec3_f32_array_element, 3> in) {
			
				++  float3x3 result = float3x3(float3(in[0].elements), float3(in[1].elements), float3(in[2].elements));
			
				++  return result;
			
				++}
			
				++
			
				+ struct Inner {
			
				+   bool b;
			
				+   int4 v;
			
				+   float3x3 m;
			
				+ };
			
				+ 
			
				++Inner tint_unpack_vec3_in_composite_1(Inner_tint_packed_vec3 in) {
			
				++  Inner result = {};
			
				++  result.b = in.b;
			
				++  result.v = in.v;
			
				++  result.m = tint_unpack_vec3_in_composite(in.m);
			
				++  return result;
			
				++}
			
				++
			
				++tint_array<Inner, 4> tint_unpack_vec3_in_composite_2(tint_array<Inner_tint_packed_vec3, 4> in) {
			
				++  tint_array<Inner, 4> result = tint_array<Inner, 4>{tint_unpack_vec3_in_composite_1(in[0]), tint_unpack_vec3_in_composite_1(in[1]), tint_unpack_vec3_in_composite_1(in[2]), tint_unpack_vec3_in_composite_1(in[3])};
			
				++  return result;
			
				++}
			
				++
			
				+ struct Outer {
			
				+   tint_array<Inner, 4> a;
			
				+ };
			
				+ 
			
				+-Outer tint_workgroupUniformLoad(threadgroup Outer* const p) {
			
				++Outer tint_unpack_vec3_in_composite_3(Outer_tint_packed_vec3 in) {
			
				++  Outer result = {};
			
				++  result.a = tint_unpack_vec3_in_composite_2(in.a);
			
				++  return result;
			
				++}
			
				++
			
				++Outer tint_workgroupUniformLoad(threadgroup Outer_tint_packed_vec3* const p) {
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+-  Outer const result = *(p);
			
				++  Outer const result = tint_unpack_vec3_in_composite_3(*(p));
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+   return result;
			
				+ }
			
				+ 
			
				+-Outer foo(threadgroup Outer* const tint_symbol) {
			
				++Outer foo(threadgroup Outer_tint_packed_vec3* const tint_symbol) {
			
				+   return tint_workgroupUniformLoad(tint_symbol);
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/var/initialization/workgroup/matrix.wgsl.expected.msl b/test/tint/var/initialization/workgroup/matrix.wgsl.expected.msl
			
				+index 41b834d8cbd93153ca9ca850888d1b27e4c44dec..7e32dc311550f3ec348c990f83e89462cbe0c622 100644
			
				+--- a/test/tint/var/initialization/workgroup/matrix.wgsl.expected.msl
			
				++++ b/test/tint/var/initialization/workgroup/matrix.wgsl.expected.msl
			
				+@@ -1,24 +1,42 @@
			
				+ #include <metal_stdlib>
			
				+ 
			
				+ using namespace metal;
			
				+-struct tint_symbol_5 {
			
				+-  float2x3 v;
			
				++
			
				++template<typename T, size_t N>
			
				++struct tint_array {
			
				++    const constant T& operator[](size_t i) const constant { return elements[i]; }
			
				++    device T& operator[](size_t i) device { return elements[i]; }
			
				++    const device T& operator[](size_t i) const device { return elements[i]; }
			
				++    thread T& operator[](size_t i) thread { return elements[i]; }
			
				++    const thread T& operator[](size_t i) const thread { return elements[i]; }
			
				++    threadgroup T& operator[](size_t i) threadgroup { return elements[i]; }
			
				++    const threadgroup T& operator[](size_t i) const threadgroup { return elements[i]; }
			
				++    T elements[N];
			
				++};
			
				++
			
				++struct tint_packed_vec3_f32_array_element {
			
				++  packed_float3 elements;
			
				+ };
			
				+ 
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup float2x3* const tint_symbol_1) {
			
				++tint_array<tint_packed_vec3_f32_array_element, 2> tint_pack_vec3_in_composite(float2x3 in) {
			
				++  tint_array<tint_packed_vec3_f32_array_element, 2> result = tint_array<tint_packed_vec3_f32_array_element, 2>{tint_packed_vec3_f32_array_element{.elements=packed_float3(in[0])}, tint_packed_vec3_f32_array_element{.elements=packed_float3(in[1])}};
			
				++  return result;
			
				++}
			
				++
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_1) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol_1) = float2x3(float3(0.0f), float3(0.0f));
			
				++    *(tint_symbol_1) = tint_pack_vec3_in_composite(float2x3(float3(0.0f), float3(0.0f)));
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void tint_symbol_inner(uint local_invocation_index, threadgroup float2x3* const tint_symbol_2) {
			
				++void tint_symbol_inner(uint local_invocation_index, threadgroup tint_array<tint_packed_vec3_f32_array_element, 2>* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+ }
			
				+ 
			
				+-kernel void tint_symbol(threadgroup tint_symbol_5* tint_symbol_4 [[threadgroup(0)]], uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup float2x3* const tint_symbol_3 = &((*(tint_symbol_4)).v);
			
				+-  tint_symbol_inner(local_invocation_index, tint_symbol_3);
			
				++kernel void tint_symbol(uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				++  threadgroup tint_array<tint_packed_vec3_f32_array_element, 2> tint_symbol_3;
			
				++  tint_symbol_inner(local_invocation_index, &(tint_symbol_3));
			
				+   return;
			
				+ }
			
				+ 
			
				+diff --git a/test/tint/var/initialization/workgroup/vector.wgsl.expected.msl b/test/tint/var/initialization/workgroup/vector.wgsl.expected.msl
			
				+index 7942bab7773d36d5775be8fd4fdf63aa84b6215f..01e905f9846b4a08216a903901c4fdfe281b429c 100644
			
				+--- a/test/tint/var/initialization/workgroup/vector.wgsl.expected.msl
			
				++++ b/test/tint/var/initialization/workgroup/vector.wgsl.expected.msl
			
				+@@ -1,19 +1,19 @@
			
				+ #include <metal_stdlib>
			
				+ 
			
				+ using namespace metal;
			
				+-void tint_zero_workgroup_memory(uint local_idx, threadgroup int3* const tint_symbol_1) {
			
				++void tint_zero_workgroup_memory(uint local_idx, threadgroup packed_int3* const tint_symbol_1) {
			
				+   if ((local_idx < 1u)) {
			
				+-    *(tint_symbol_1) = int3(0);
			
				++    *(tint_symbol_1) = packed_int3(0);
			
				+   }
			
				+   threadgroup_barrier(mem_flags::mem_threadgroup);
			
				+ }
			
				+ 
			
				+-void tint_symbol_inner(uint local_invocation_index, threadgroup int3* const tint_symbol_2) {
			
				++void tint_symbol_inner(uint local_invocation_index, threadgroup packed_int3* const tint_symbol_2) {
			
				+   tint_zero_workgroup_memory(local_invocation_index, tint_symbol_2);
			
				+ }
			
				+ 
			
				+ kernel void tint_symbol(uint local_invocation_index [[thread_index_in_threadgroup]]) {
			
				+-  threadgroup int3 tint_symbol_3;
			
				++  threadgroup packed_int3 tint_symbol_3;
			
				+   tint_symbol_inner(local_invocation_index, &(tint_symbol_3));
			
				+   return;
			
				+ }

						
							+ 126
							
							- 0
						
patches/dawn/tint_validate_layout_constraints_for_all_address_spaces.patch
							 
								View File
							
				@@ -0,0 +1,126 @@
			
				+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
			
				+From: James Price <[email protected]>
			
				+Date: Wed, 20 Nov 2024 19:06:01 +0000
			
				+Subject: [tint] Validate layout constraints for all address spaces
			
				+
			
				+The WGSL spec has a non-normative note that the layout constraints
			
				+should be validated for all non-host-shareable address spaces, using
			
				+the same constraints as for storage.
			
				+
			
				+See linked bug for details of why this is important.
			
				+
			
				+Bug: 378725734
			
				+Change-Id: I3fb02506d8ded000dc3510bdc1b4a24a95089281
			
				+Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/214754
			
				+Reviewed-by: David Neto <[email protected]>
			
				+Commit-Queue: James Price <[email protected]>
			
				+Reviewed-by: dan sinclair <[email protected]>
			
				+(cherry picked from commit dfa46d12ce63f131c437041d6d97a6d97c54c1b7)
			
				+Reviewed-on: https://dawn-review.googlesource.com/c/dawn/+/215936
			
				+Auto-Submit: James Price <[email protected]>
			
				+Commit-Queue: dan sinclair <[email protected]>
			
				+Commit-Queue: Antonio Maiorano <[email protected]>
			
				+Reviewed-by: Antonio Maiorano <[email protected]>
			
				+
			
				+diff --git a/src/tint/lang/wgsl/resolver/address_space_layout_validation_test.cc b/src/tint/lang/wgsl/resolver/address_space_layout_validation_test.cc
			
				+index f1e14a36a3cdea07f5fc7735c3d9483515d754f1..6ef150c4476e9176e934a94c32d903c2e23083b8 100644
			
				+--- a/src/tint/lang/wgsl/resolver/address_space_layout_validation_test.cc
			
				++++ b/src/tint/lang/wgsl/resolver/address_space_layout_validation_test.cc
			
				+@@ -730,7 +730,7 @@ TEST_F(ResolverAddressSpaceLayoutValidationTest, RelaxedUniformLayout_ArrayStrid
			
				+     EXPECT_TRUE(r()->Resolve()) << r()->error();
			
				+ }
			
				+ 
			
				+-TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall) {
			
				++TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall_Storage) {
			
				+     // struct S {
			
				+     //   @align(4) vector : vec4u;
			
				+     //   scalar : u32;
			
				+@@ -754,5 +754,73 @@ TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall) {
			
				+ 56:78 note: 'S' used in address space 'storage' here)");
			
				+ }
			
				+ 
			
				++TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall_Workgroup) {
			
				++    // struct S {
			
				++    //   @align(4) vector : vec4u;
			
				++    //   scalar : u32;
			
				++    // };
			
				++    //
			
				++    // var<workgroup> a : array<S, 4>;
			
				++    Structure(
			
				++        "S", Vector{
			
				++                 Member("vector", ty.vec4<u32>(), Vector{MemberAlign(Expr(Source{{12, 34}}, 4_a))}),
			
				++                 Member("scalar", ty.u32()),
			
				++             });
			
				++
			
				++    GlobalVar(Source{{56, 78}}, "a", ty("S"), core::AddressSpace::kWorkgroup, Group(0_a));
			
				++
			
				++    ASSERT_FALSE(r()->Resolve());
			
				++    EXPECT_EQ(
			
				++        r()->error(),
			
				++        R"(12:34 error: alignment must be a multiple of '16' bytes for the 'workgroup' address space
			
				++56:78 note: 'S' used in address space 'workgroup' here)");
			
				++}
			
				++
			
				++TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall_Private) {
			
				++    // struct S {
			
				++    //   @align(4) vector : vec4u;
			
				++    //   scalar : u32;
			
				++    // };
			
				++    //
			
				++    // var<private> a : array<S, 4>;
			
				++    Structure(
			
				++        "S", Vector{
			
				++                 Member("vector", ty.vec4<u32>(), Vector{MemberAlign(Expr(Source{{12, 34}}, 4_a))}),
			
				++                 Member("scalar", ty.u32()),
			
				++             });
			
				++
			
				++    GlobalVar(Source{{56, 78}}, "a", ty("S"), core::AddressSpace::kPrivate, Group(0_a));
			
				++
			
				++    ASSERT_FALSE(r()->Resolve());
			
				++    EXPECT_EQ(
			
				++        r()->error(),
			
				++        R"(12:34 error: alignment must be a multiple of '16' bytes for the 'private' address space
			
				++56:78 note: 'S' used in address space 'private' here)");
			
				++}
			
				++
			
				++TEST_F(ResolverAddressSpaceLayoutValidationTest, AlignAttributeTooSmall_Function) {
			
				++    // struct S {
			
				++    //   @align(4) vector : vec4u;
			
				++    //   scalar : u32;
			
				++    // };
			
				++    //
			
				++    // fn foo() {
			
				++    //   var a : array<S, 4>;
			
				++    // }
			
				++    Structure(
			
				++        "S", Vector{
			
				++                 Member("vector", ty.vec4<u32>(), Vector{MemberAlign(Expr(Source{{12, 34}}, 4_a))}),
			
				++                 Member("scalar", ty.u32()),
			
				++             });
			
				++
			
				++    GlobalVar(Source{{56, 78}}, "a", ty("S"), core::AddressSpace::kFunction, Group(0_a));
			
				++
			
				++    ASSERT_FALSE(r()->Resolve());
			
				++    EXPECT_EQ(
			
				++        r()->error(),
			
				++        R"(12:34 error: alignment must be a multiple of '16' bytes for the 'function' address space
			
				++56:78 note: 'S' used in address space 'function' here)");
			
				++}
			
				++
			
				+ }  // namespace
			
				+ }  // namespace tint::resolver
			
				+diff --git a/src/tint/lang/wgsl/resolver/validator.cc b/src/tint/lang/wgsl/resolver/validator.cc
			
				+index 0d62d474780542a79a67698375319b6ab7c4560d..105e937be9e69df6a5a44d75d21cabc4eab2a2f7 100644
			
				+--- a/src/tint/lang/wgsl/resolver/validator.cc
			
				++++ b/src/tint/lang/wgsl/resolver/validator.cc
			
				+@@ -499,10 +499,6 @@ bool Validator::AddressSpaceLayout(const core::type::Type* store_ty,
			
				+         return true;
			
				+     }
			
				+ 
			
				+-    if (!core::IsHostShareable(address_space)) {
			
				+-        return true;
			
				+-    }
			
				+-
			
				+     auto note_usage = [&] {
			
				+         AddNote(source) << style::Type(store_ty->FriendlyName()) << " used in address space "
			
				+                         << style::Enum(address_space) << " here";

						
							+ 1
							
							- 0
						
patches/v8/.patches
							 
								View File
							
				@@ -14,3 +14,4 @@ merged_heap_sandbox_update_ept_s_evacuation_entries_in_scavenger.patch
			
				 merged_don_t_assume_all_turbofan_frames_are_javascript.patch
			
				 merged_wasm_do_not_inline_wrappers_with_ref_extern_parameter.patch
			
				 merged_wasm_fix_default_externref_exnref_reference.patch
			
				+m126-lts_liftoff_fix_clobbered_scratch_register.patch

						
							+ 80
							
							- 0
						
patches/v8/m126-lts_liftoff_fix_clobbered_scratch_register.patch
							 
								View File
							
				@@ -0,0 +1,80 @@
			
				+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
			
				+From: Clemens Backes <[email protected]>
			
				+Date: Fri, 15 Nov 2024 16:00:15 +0100
			
				+Subject: [M126-LTS][liftoff] Fix clobbered scratch register
			
				+
			
				+`GetMemOp` returns an `Operand` which can contain `kScratchRegister`. We
			
				+should hence not clobber that register until after the last use of the
			
				+`Operand`.
			
				+
			
				+This CL changes the scratch register to `kScratchRegister2` which has
			
				+much fewer uses, and in particular none which collides with `GetMemOp`.
			
				+
			
				[email protected]
			
				+
			
				+(cherry picked from commit 57a017e611a5abfb0e4b59f6de028bc4070a3615)
			
				+
			
				+Fixed: 378779897, 378701682
			
				+Change-Id: Id1ed25edfe76200d069ac2ab54e5000eed313c8f
			
				+Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6022072
			
				+Reviewed-by: Matthias Liedtke <[email protected]>
			
				+Commit-Queue: Clemens Backes <[email protected]>
			
				+Cr-Original-Commit-Position: refs/heads/main@{#97224}
			
				+Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6056706
			
				+Reviewed-by: Clemens Backes <[email protected]>
			
				+Commit-Queue: Gyuyoung Kim (xWF) <[email protected]>
			
				+Reviewed-by: Daniel Lehmann <[email protected]>
			
				+Cr-Commit-Position: refs/branch-heads/12.6@{#82}
			
				+Cr-Branched-From: 3c9fa12db3183a6f4ea53d2675adb66ea1194529-refs/heads/12.6.228@{#2}
			
				+Cr-Branched-From: 981bb15ba4dbf9e2381dfc94ec2c4af0b9c6a0b6-refs/heads/main@{#93835}
			
				+
			
				+diff --git a/src/wasm/baseline/x64/liftoff-assembler-x64-inl.h b/src/wasm/baseline/x64/liftoff-assembler-x64-inl.h
			
				+index b20867d7ec2a5724653ebe9baca8c8949d70cd74..be01772c27382e2c10314777e4058cf326327ba3 100644
			
				+--- a/src/wasm/baseline/x64/liftoff-assembler-x64-inl.h
			
				++++ b/src/wasm/baseline/x64/liftoff-assembler-x64-inl.h
			
				+@@ -50,6 +50,8 @@ constexpr Operand kInstanceDataOperand =
			
				+ 
			
				+ constexpr Operand kOSRTargetSlot = GetStackSlot(kOSRTargetOffset);
			
				+ 
			
				++// Note: The returned Operand might contain {kScratchRegister2}; make sure not
			
				++// to clobber that until after the last use of the Operand.
			
				+ inline Operand GetMemOp(LiftoffAssembler* assm, Register addr,
			
				+                         Register offset_reg, uintptr_t offset_imm,
			
				+                         ScaleFactor scale_factor = times_1) {
			
				+@@ -60,7 +62,7 @@ inline Operand GetMemOp(LiftoffAssembler* assm, Register addr,
			
				+                : Operand(addr, offset_reg, scale_factor, offset_imm32);
			
				+   }
			
				+   // Offset immediate does not fit in 31 bits.
			
				+-  Register scratch = kScratchRegister;
			
				++  Register scratch = kScratchRegister2;
			
				+   assm->MacroAssembler::Move(scratch, offset_imm);
			
				+   if (offset_reg != no_reg) assm->addq(scratch, offset_reg);
			
				+   return Operand(addr, scratch, scale_factor, 0);
			
				+diff --git a/test/mjsunit/regress/wasm/regress-378779897.js b/test/mjsunit/regress/wasm/regress-378779897.js
			
				+new file mode 100644
			
				+index 0000000000000000000000000000000000000000..fed1bc807165e1b9e83195a2df30aac33a544470
			
				+--- /dev/null
			
				++++ b/test/mjsunit/regress/wasm/regress-378779897.js
			
				+@@ -0,0 +1,22 @@
			
				++// Copyright 2024 the V8 project authors. All rights reserved.
			
				++// Use of this source code is governed by a BSD-style license that can be
			
				++// found in the LICENSE file.
			
				++
			
				++d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
			
				++
			
				++const builder = new WasmModuleBuilder();
			
				++builder.addMemory(49149);
			
				++
			
				++builder.addFunction('main', kSig_i_v).addBody([
			
				++  ...wasmI32Const(-1118406780),
			
				++  ...wasmI32Const(-1),
			
				++  kAtomicPrefix, kExprI32AtomicOr8U, 0, 0
			
				++]).exportFunc();
			
				++
			
				++let instance;
			
				++try {
			
				++  instance = builder.instantiate();
			
				++} catch (e) {
			
				++  assertException(e, RangeError, /Out of memory/);
			
				++}
			
				++if (instance) instance.exports.main();

chore: cherry-pick 3 changes from 2-M131 (#45040)

+ 2 - 0 patches/dawn/.patches View File

+ 1860 - 0 patches/dawn/msl_use_packed_vec3_for_workgroup_storage.patch View File

+ 126 - 0 patches/dawn/tint_validate_layout_constraints_for_all_address_spaces.patch View File

+ 1 - 0 patches/v8/.patches View File

+ 80 - 0 patches/v8/m126-lts_liftoff_fix_clobbered_scratch_register.patch View File

+ 2 - 0
patches/dawn/.patches
View File

+ 1860 - 0
patches/dawn/msl_use_packed_vec3_for_workgroup_storage.patch
View File

+ 126 - 0
patches/dawn/tint_validate_layout_constraints_for_all_address_spaces.patch
View File

+ 1 - 0
patches/v8/.patches
View File

+ 80 - 0
patches/v8/m126-lts_liftoff_fix_clobbered_scratch_register.patch
View File