Index: src/x86/itx16_avx2.asm
--- src/x86/itx16_avx2.asm.orig
+++ src/x86/itx16_avx2.asm
@@ -360,6 +360,7 @@ cglobal idct_4x4_internal_10bpc, 0, 7, 6, dst, stride,
     pshufb               m0, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vextracti128        xm1, m0, 1
     WRAP_XMM IDCT4_1D_PACKED_WORD 0, 1, 2, 3, 4, 5
     packssdw            xm5, xm5 ; pw_2048
@@ -445,6 +446,7 @@ cglobal iadst_4x4_internal_10bpc, 0, 7, 6, dst, stride
 %endif
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     lea                  r6, [deint_shuf+128]
     vextracti128        xm1, m0, 1
     call m(iadst_4x4_internal_8bpc).main
@@ -497,6 +499,7 @@ cglobal iflipadst_4x4_internal_10bpc, 0, 7, 6, dst, st
     vinserti128          m1, m6, xm4, 1
     jmp m(iadst_4x4_internal_10bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     lea                  r6, [deint_shuf+128]
     vextracti128        xm1, m0, 1
     call m(iadst_4x4_internal_8bpc).main
@@ -545,6 +548,7 @@ cglobal iidentity_4x4_internal_10bpc, 0, 7, 6, dst, st
     pshufb               m0, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m1, [pw_1697x8]
     movq                xm2, [dstq+strideq*0]
     movhps              xm2, [dstq+strideq*1]
@@ -585,6 +589,7 @@ cglobal idct_4x4_internal_12bpc, 0, 7, 8, dst, stride,
     vpermd               m1, m3, m0
     jmp m(iadst_4x4_internal_12bpc).pass1_end2
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m5, [pd_2048]
     vpermq               m0, m0, q3120
     vpermq               m1, m1, q3120
@@ -624,6 +629,7 @@ cglobal iadst_4x4_internal_12bpc, 0, 7, 8, dst, stride
     pminsd               m1, m4
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main_pass2
     vinserti128          m0, m4, xm6, 1
     vinserti128          m1, m2, xm3, 1
@@ -680,6 +686,7 @@ cglobal iflipadst_4x4_internal_12bpc, 0, 7, 8, dst, st
     vinserti128          m2, m6, xm4, 1
     jmp m(iadst_4x4_internal_12bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(iadst_4x4_internal_12bpc).main_pass2
     vinserti128          m0, m3, xm2, 1
     vinserti128          m1, m6, xm4, 1
@@ -706,6 +713,7 @@ cglobal iidentity_4x4_internal_12bpc, 0, 7, 8, dst, st
     paddd                m2, m3
     jmp m(iadst_4x4_internal_12bpc).pass1_end2
 .pass2:
+    _CET_ENDBR
     ; m0 = in0 in1
     ; m1 = in2 in3
     vpbroadcastd         m3, [pd_5793]
@@ -771,6 +779,7 @@ cglobal idct_4x8_internal_10bpc, 0, 7, 8, dst, stride,
     IDCT4_1D              0, 1, 2, 3, 4, 5, 6, 7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     packssdw             m0, m2
     packssdw             m1, m3
     lea                  r6, [deint_shuf+128]
@@ -828,6 +837,7 @@ cglobal iadst_4x8_internal_10bpc, 0, 7, 8, dst, stride
     REPX      {psrad x, 12}, m0, m1, m2, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     mova                xm4, [pw_2048_m2048]
     REPX  {pmulhrsw x, xm4}, xm0, xm1, xm2, xm3
@@ -949,6 +959,7 @@ cglobal iflipadst_4x8_internal_10bpc, 0, 7, 8, dst, st
     paddd                m3, m5, m4
     jmp m(iadst_4x8_internal_10bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(iadst_4x8_internal_10bpc).pass2_main
     mova                xm4, [pw_2048_m2048]
     REPX  {pmulhrsw x, xm4}, xm3, xm2, xm1, xm0
@@ -1002,6 +1013,7 @@ cglobal iidentity_4x8_internal_10bpc, 0, 7, 8, dst, st
     REPX     {psrad  x, 12}, m0, m1, m2, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m6, [pixel_10bpc_max]
     call .pass2_end
     RET
@@ -1058,6 +1070,7 @@ INV_TXFM_4X8_FN dct, flipadst, 12
 cglobal idct_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
     jmp m(idct_4x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -1104,6 +1117,7 @@ cglobal iadst_4x8_internal_12bpc, 0, 7, 10, dst, strid
     REPX      {psrad x, 11}, m0, m1, m2, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -1185,6 +1199,7 @@ cglobal iflipadst_4x8_internal_12bpc, 0, 7, 10, dst, s
     psrad                m3, m4, 1
     jmp m(iadst_4x8_internal_12bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -1206,6 +1221,7 @@ INV_TXFM_4X8_FN identity, identity, 12
 cglobal iidentity_4x8_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
     jmp m(iidentity_4x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     ; m0 = in0 in1
     ; m1 = in2 in3
     ; m2 = in4 in5
@@ -1248,6 +1264,7 @@ cglobal idct_4x16_internal_10bpc, 0, 7, 11, dst, strid
     REPX       {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     packssdw             m0, m4
     packssdw             m1, m5
     packssdw             m2, m6
@@ -1355,6 +1372,7 @@ cglobal iadst_4x16_internal_10bpc, 0, 7, 11, dst, stri
     psrad                m7, 13
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     vpbroadcastd         m5, [pw_2048]
     vpbroadcastd         m8, [pixel_10bpc_max]
@@ -1535,6 +1553,7 @@ cglobal iflipadst_4x16_internal_10bpc, 0, 7, 11, dst, 
     psrad                m7, m8, 13
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iadst_4x16_internal_10bpc).pass2_main
     vpbroadcastd         m5, [pw_2048]
     vpbroadcastd         m8, [pixel_10bpc_max]
@@ -1596,6 +1615,7 @@ cglobal iidentity_4x16_internal_10bpc, 0, 7, 11, dst, 
     REPX      {psrad x, 13}, m0, m4, m1, m5, m2, m6, m3, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     packssdw             m0, m4
     packssdw             m1, m5
     packssdw             m2, m6
@@ -1666,6 +1686,7 @@ INV_TXFM_4X16_FN dct, flipadst, 12
 cglobal idct_4x16_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
     jmp m(idct_4x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     punpckldq            m8, m0, m1
     punpckhdq            m0, m1
     punpckldq            m9, m2, m3
@@ -1725,6 +1746,7 @@ cglobal iadst_4x16_internal_12bpc, 0, 7, 14, dst, stri
     psrad                m7, 12
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -1819,6 +1841,7 @@ cglobal iflipadst_4x16_internal_12bpc, 0, 7, 14, dst, 
     psrad                m7, m8, 12
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -1876,6 +1899,7 @@ cglobal iidentity_4x16_internal_12bpc, 0, 7, 14, dst, 
     REPX     {psrad  x, 1 }, m2, m6, m3, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -1950,6 +1974,7 @@ cglobal idct_8x4_internal_10bpc, 0, 7, 10, dst, stride
     pshufd               m3, m3, q1032
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vbroadcasti128       m4, [deint_shuf]
     packssdw             m0, m1
     packssdw             m2, m3
@@ -1997,6 +2022,7 @@ cglobal iadst_8x4_internal_10bpc, 0, 7, 10, dst, strid
     psignd               m1, m6           ; out2 out3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     vpermq               m0, m0, q3120 ; out0 out1
     vpermq               m2, m1, q3120 ; out2 out3
@@ -2064,6 +2090,7 @@ cglobal iflipadst_8x4_internal_10bpc, 0, 5, 10, dst, s
     psignd               m2, m5, m6
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iadst_8x4_internal_10bpc).pass2_main
     vpermq               m2, m0, q2031
     vpermq               m0, m1, q2031
@@ -2088,6 +2115,7 @@ cglobal iidentity_8x4_internal_10bpc, 0, 7, 10, dst, s
     REPX     {paddd  x, x }, m0, m1, m2, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m5, [pixel_10bpc_max]
     vpbroadcastd         m4, [pw_1697x8]
     packssdw             m0, m1
@@ -2135,6 +2163,7 @@ cglobal idct_8x4_internal_12bpc, 0, 7, 10, dst, stride
     vpbroadcastd         m9, [clip_20b_max]
     jmp m(idct_8x4_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -2159,6 +2188,7 @@ cglobal iadst_8x4_internal_12bpc, 0, 7, 10, dst, strid
     psignd               m1, m6           ; out2 out3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -2221,6 +2251,7 @@ cglobal iflipadst_8x4_internal_12bpc, 0, 5, 10, dst, s
     psignd               m2, m5, m6
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m8, [clip_18b_min]
     vpbroadcastd         m9, [clip_18b_max]
     REPX     {pmaxsd x, m8}, m0, m1, m2, m3
@@ -2241,6 +2272,7 @@ INV_TXFM_8X4_FN identity, identity, 12
 cglobal iidentity_8x4_internal_12bpc, 0, 7, 10, dst, stride, c, eob, tx2
     jmp m(iidentity_8x4_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     ; m0 = in0 in1 (interleaved)
     ; m1 = in2 in3 (interleaved)
     ; m2 = in4 in5 (interleaved)
@@ -2352,6 +2384,7 @@ cglobal idct_8x8_internal_10bpc, 0, 7, 14, dst, stride
     call .round_shift1
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .transpose_8x8_packed
     call m(idct_8x8_internal_8bpc).main
     vpbroadcastd        m12, [pw_2048]
@@ -2471,6 +2504,7 @@ cglobal iadst_8x8_internal_10bpc, 0, 7, 14, dst, strid
     call .main_end
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_8x8_internal_10bpc).transpose_8x8_packed
     pshufd               m4, m0, q1032
     pshufd               m5, m1, q1032
@@ -2532,6 +2566,7 @@ cglobal iflipadst_8x8_internal_10bpc, 0, 7, 14, dst, s
     call .main_end
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_8x8_internal_10bpc).transpose_8x8_packed
     pshufd               m4, m0, q1032
     pshufd               m5, m1, q1032
@@ -2588,6 +2623,7 @@ cglobal iidentity_8x8_internal_10bpc, 0, 7, 14, dst, s
     mova                 m7, [cq+32*7]
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     packssdw             m3, m7
     vpbroadcastd         m7, [pixel_10bpc_max]
 .pass2_main:
@@ -2678,6 +2714,7 @@ cglobal idct_8x8_internal_12bpc, 0, 7, 14, dst, stride
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(idct_8x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -2722,6 +2759,7 @@ cglobal iadst_8x8_internal_12bpc, 0, 7, 14, dst, strid
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iadst_8x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_main
 .pass2_end:
     packssdw             m0, m1
@@ -2769,6 +2807,7 @@ cglobal iflipadst_8x8_internal_12bpc, 0, 7, 14, dst, s
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iflipadst_8x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call m(iadst_8x8_internal_12bpc).pass2_main
     packssdw             m7, m7, m6
     packssdw             m6, m1, m0
@@ -2791,6 +2830,7 @@ INV_TXFM_8X8_FN identity, identity, 12
 cglobal iidentity_8x8_internal_12bpc, 0, 7, 14, dst, stride, c, eob, tx2
     jmp m(iidentity_8x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     packssdw             m3, m7
     vpbroadcastd         m7, [pixel_12bpc_max]
     jmp m(iidentity_8x8_internal_10bpc).pass2_main
@@ -2849,6 +2889,7 @@ cglobal idct_8x16_internal_10bpc, 0, 7, 16, dst, strid
     REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .transpose
     call m(idct_8x16_internal_8bpc).main
     vpbroadcastd        m12, [pw_2048]
@@ -3046,6 +3087,7 @@ cglobal iadst_8x16_internal_10bpc, 0, 7, 16, dst, stri
     REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_8x16_internal_10bpc).transpose
     call m(iadst_8x16_internal_8bpc).main
     call m(iadst_8x16_internal_8bpc).main_pass2_end
@@ -3112,6 +3154,7 @@ cglobal iflipadst_8x16_internal_10bpc, 0, 7, 16, dst, 
     REPX       {mova x, m8}, m9, m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_8x16_internal_10bpc).transpose
     call m(iadst_8x16_internal_8bpc).main
     call m(iadst_8x16_internal_8bpc).main_pass2_end
@@ -3179,6 +3222,7 @@ cglobal iidentity_8x16_internal_10bpc, 0, 7, 16, dst, 
                              m8,  m9,  m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     packssdw             m0, m8
     packssdw             m1, m9
     packssdw             m2, m10
@@ -3244,6 +3288,7 @@ cglobal idct_8x16_internal_12bpc, 0, 7, 16, 32*8, dst,
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(idct_8x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     lea                  r6, [rsp+32*4]
     call .transpose
     vpbroadcastd        m12, [clip_18b_min]
@@ -3338,6 +3383,7 @@ cglobal iadst_8x16_internal_12bpc, 0, 7, 16, 32*8, dst
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iadst_8x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     lea                  r6, [rsp+32*4]
     call .pass2_main
     call m(iadst_16x8_internal_10bpc).pass1_rotations
@@ -3392,6 +3438,7 @@ cglobal iflipadst_8x16_internal_12bpc, 0, 7, 16, 32*8,
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iflipadst_8x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     lea                  r6, [rsp+32*4]
     call m(iadst_8x16_internal_12bpc).pass2_main
     call m(iflipadst_16x8_internal_10bpc).pass1_rotations
@@ -3405,6 +3452,7 @@ INV_TXFM_8X16_FN identity, identity, 0, 12
 cglobal iidentity_8x16_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
     jmp m(iidentity_8x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     packssdw             m0, m8
     packssdw             m1, m9
@@ -3521,6 +3569,7 @@ cglobal idct_16x4_internal_10bpc, 0, 7, 14, dst, strid
     REPX      {psrad x, 1 }, m0, m1, m2, m3, m4, m5, m6, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .transpose_4x16_packed
     lea                  r6, [deint_shuf+128]
     call m(idct_16x4_internal_8bpc).main
@@ -3653,6 +3702,7 @@ cglobal iadst_16x4_internal_10bpc, 0, 7, 14, dst, stri
     REPX       {psrad x, 1}, m0, m1, m2, m3, m4, m5, m6, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x4_internal_10bpc).transpose_4x16_packed
     lea                  r6, [deint_shuf+128]
     call m(iadst_16x4_internal_8bpc).main
@@ -3738,6 +3788,7 @@ cglobal iflipadst_16x4_internal_10bpc, 0, 7, 14, dst, 
     paddd                m0, m8, m11
     jmp m(iadst_16x4_internal_10bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(idct_16x4_internal_10bpc).transpose_4x16_packed
     lea                  r6, [deint_shuf+128]
     call m(iadst_16x4_internal_8bpc).main
@@ -3772,6 +3823,7 @@ cglobal iidentity_16x4_internal_10bpc, 0, 7, 14, dst, 
     REPX     {psrad  x, 12}, m0, m1, m2, m3, m4, m5, m6, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x4_internal_10bpc).transpose_4x16_packed
     vpbroadcastd         m7, [pw_1697x8]
     pmulhrsw             m4, m7, m0
@@ -3794,6 +3846,7 @@ cglobal idct_16x4_internal_12bpc, 0, 7, 14, dst, strid
     vpbroadcastd         m9, [clip_20b_max]
     jmp m(idct_16x4_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -3843,6 +3896,7 @@ cglobal iadst_16x4_internal_12bpc, 0, 7, 14, dst, stri
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iadst_16x4_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     REPX {vpermq x, x, q3120}, m0, m1, m2, m3
     REPX   {pmulhrsw x, m4}, m0, m1, m2, m3
@@ -3898,6 +3952,7 @@ cglobal iflipadst_16x4_internal_12bpc, 0, 7, 14, dst, 
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(iflipadst_16x4_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call m(iadst_16x4_internal_12bpc).pass2_main
     vpermq               m7, m0, q3120
     vpermq               m6, m1, q3120
@@ -3947,6 +4002,7 @@ cglobal iidentity_16x4_internal_12bpc, 0, 7, 14, dst, 
     paddd                m7, m13
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
     REPX    {pmaxsd x, m12}, m0, m1, m2, m3, m4, m5, m6, m7
@@ -4014,6 +4070,7 @@ cglobal idct_16x8_internal_10bpc, 0, 7, 16, 32*8, dst,
                              m8,  m9,  m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .transpose
     call m(idct_16x8_internal_8bpc).main
     vpbroadcastd        m10, [pw_2048]
@@ -4144,6 +4201,7 @@ cglobal iadst_16x8_internal_10bpc, 0, 7, 16, 32*8, dst
     REPX      {psrad x, 12}, m4,  m5,  m6,  m7,  m8,  m9,  m10, m11
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x8_internal_10bpc).transpose
     call m(iadst_16x8_internal_8bpc).main
     call m(iadst_16x8_internal_8bpc).main_pass2_end
@@ -4356,6 +4414,7 @@ cglobal iflipadst_16x8_internal_10bpc, 0, 7, 16, 32*8,
     call .pass1_rotations
     jmp m(iadst_16x8_internal_10bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(idct_16x8_internal_10bpc).transpose
     call m(iadst_16x8_internal_8bpc).main
     call m(iadst_16x8_internal_8bpc).main_pass2_end
@@ -4443,6 +4502,7 @@ cglobal iidentity_16x8_internal_10bpc, 0, 7, 16, 32*8,
                              m8,  m9,  m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x8_internal_10bpc).transpose
     vpbroadcastd        m10, [pw_4096]
     jmp m(idct_16x8_internal_10bpc).end
@@ -4457,6 +4517,7 @@ cglobal idct_16x8_internal_12bpc, 0, 7, 16, 32*8, dst,
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(idct_16x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     RET
 ALIGN function_align
@@ -4522,6 +4583,7 @@ cglobal iadst_16x8_internal_12bpc, 0, 7, 16, 32*8, dst
     vpbroadcastd        m14, [clip_20b_max]
     jmp m(iadst_16x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_main
     call m(idct_16x8_internal_12bpc).end
     RET
@@ -4564,6 +4626,7 @@ cglobal iflipadst_16x8_internal_12bpc, 0, 7, 16, 32*8,
     vpbroadcastd        m14, [clip_20b_max]
     jmp m(iflipadst_16x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call m(iadst_16x8_internal_12bpc).pass2_main
     packssdw            m13, m0, [cq+32* 8]
     packssdw            m12, m1, [cq+32* 9]
@@ -4591,6 +4654,7 @@ INV_TXFM_16X8_FN identity, identity, 12
 cglobal iidentity_16x8_internal_12bpc, 0, 7, 16, 32*8, dst, stride, c, eob, tx2
     jmp m(iidentity_16x8_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call m(idct_16x8_internal_10bpc).transpose2
     vpbroadcastd        m10, [pw_4096]
     pmulhrsw             m0, m10
@@ -4708,6 +4772,7 @@ cglobal idct_16x16_internal_10bpc, 0, 7, 16, 32*24, ds
                              m8,  m9,  m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .transpose
     lea                  r6, [pw_5+128]
     mova              [rsp], m15
@@ -4944,6 +5009,7 @@ cglobal iadst_16x16_internal_10bpc, 0, 7, 16, 32*24, d
     sub                  r6, 32*8
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x16_internal_10bpc).transpose
     lea                  r6, [pw_5+128]
     mova              [rsp], m15
@@ -5077,6 +5143,7 @@ cglobal iflipadst_16x16_internal_10bpc, 0, 7, 16, 32*2
     paddd                m1,     [r6-32*3]
     jmp m(iadst_16x16_internal_10bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(idct_16x16_internal_10bpc).transpose
     lea                  r6, [pw_5+128]
     mova              [rsp], m15
@@ -5163,6 +5230,7 @@ cglobal iidentity_16x16_internal_10bpc, 0, 7, 16, 32*2
                              m8,  m9,  m10, m11, m12, m13, m14, m15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(idct_16x16_internal_10bpc).transpose
 
     mova          [cq+32*0], m15
@@ -5191,6 +5259,7 @@ cglobal idct_16x16_internal_12bpc, 0, 7, 16, 32*24, ds
     vpbroadcastd        m13, [clip_20b_max]
     jmp m(idct_16x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     mova         [cq+32* 8], m8
     mova         [cq+32* 9], m9
     mova         [cq+32*10], m10
@@ -5318,6 +5387,7 @@ cglobal iadst_16x16_internal_12bpc, 0, 7, 16, 32*24, d
     vpbroadcastd        m14, [clip_20b_max]
     jmp m(iadst_16x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call .pass2_part1
     call m(iadst_16x8_internal_10bpc).pass1_rotations
     call .pass2_part2
@@ -5487,6 +5557,7 @@ cglobal iflipadst_16x16_internal_12bpc, 0, 7, 16, 32*2
     vpbroadcastd        m14, [clip_20b_max]
     jmp m(iflipadst_16x16_internal_10bpc).pass1
 .pass2:
+    _CET_ENDBR
     call m(iadst_16x16_internal_12bpc).pass2_part1
     call m(iflipadst_16x8_internal_10bpc).pass1_rotations
     call m(iadst_16x16_internal_12bpc).pass2_part2
@@ -5564,6 +5635,7 @@ cglobal iidentity_16x16_internal_12bpc, 0, 7, 16, 32*2
     mova                 m7, [cq+64*1]
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iidentity_8x16_internal_12bpc).pass2_main
     call m(idct_16x16_internal_10bpc).transpose_fast
     test               eobd, eobd
@@ -6200,6 +6272,7 @@ cglobal inv_txfm_add_dct_dct_8x32_12bpc, 4, 7, 0, dst,
 .eob171:
     call .pass1_main
 .pass2:
+    _CET_ENDBR
     mov                  cq, r4
     vpbroadcastd        m12, [clip_18b_min]
     vpbroadcastd        m13, [clip_18b_max]
@@ -6428,6 +6501,7 @@ cglobal inv_txfm_add_dct_dct_32x8_10bpc, 4, 7, 0, dst,
     RET
 ALIGN function_align
 .pass2:
+    _CET_ENDBR
     call m(idct_16x8_internal_8bpc).main
     REPX  {pmulhrsw x, m11}, m0, m1, m2, m3
     call m(idct_16x8_internal_10bpc).write_16x4_start
@@ -7240,6 +7314,7 @@ cglobal inv_txfm_add_dct_dct_32x32_10bpc, 4, 7, 0, dst
     cmp                  r6, r4
     jl .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                  r3, [rsp+32*3]
     mov                  r4, r6
     lea                  r5, [r6+32*8]
@@ -7525,6 +7600,7 @@ cglobal inv_txfm_add_dct_dct_16x64_10bpc, 4, 7, 0, dst
     cmp                  r6, r4
     jl .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                  r6, [pw_5+128]
     mova                 m0, [rsp+32* 2] ; in0
     mova                 m1, [rsp+32* 6] ; in4
@@ -7868,6 +7944,7 @@ cglobal inv_txfm_add_dct_dct_32x64_10bpc, 4, 7, 0, dst
     cmp                  r6, r4
     jl .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                  r6, [pw_5 + 128]
     mov                 r10, rsp
     lea                  r8, [strideq*4]
@@ -8084,6 +8161,7 @@ cglobal inv_txfm_add_dct_dct_64x16_10bpc, 4, 7, 0, dst
     dec                 r3d
     jg .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                  r7, [r6-32*64]
     lea                  r4, [r6-32*32]
     lea                  r6, [pw_5+128]
@@ -8317,6 +8395,7 @@ cglobal inv_txfm_add_dct_dct_64x32_10bpc, 4, 7, 0, dst
     cmp                  r6, r4
     jl .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                  r7, [r6-32*32]
     lea                  r5, [r6+32*8]
     lea                  r6, [pw_5+128]
@@ -8461,6 +8540,7 @@ cglobal inv_txfm_add_dct_dct_64x64_10bpc, 4, 7, 0, dst
     cmp                  r6, r4
     jl .fast_loop
 .pass2:
+    _CET_ENDBR
     lea                 r10, [r6-32*32]
     lea                  r6, [pw_5+128]
     lea                  r8, [strideq*4]
