Index: src/x86/itx_avx512.asm
--- src/x86/itx_avx512.asm.orig
+++ src/x86/itx_avx512.asm
@@ -475,6 +475,7 @@ cglobal idct_4x4_internal_8bpc, 0, 6, 0, dst, stride, 
     pshufb               m1, m3, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     IDCT4_1D_PACKED
     pxor              ymm16, ymm16
     mova               [cq], ymm16
@@ -495,6 +496,7 @@ cglobal iadst_4x4_internal_8bpc, 0, 6, 0, dst, stride,
     punpcklwd            m0, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main
 .end:
     pxor              ymm16, ymm16
@@ -521,6 +523,7 @@ cglobal iflipadst_4x4_internal_8bpc, 0, 6, 0, dst, str
     punpckhwd            m1, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iadst_4x4_internal_8bpc).main
 .end:
     pxor              ymm16, ymm16
@@ -547,6 +550,7 @@ cglobal iidentity_4x4_internal_8bpc, 0, 6, 0, dst, str
     punpcklwd            m0, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m3, [o(pw_1697x8)]
     pmulhrsw             m2, m3, m0
     pmulhrsw             m3, m1
@@ -693,6 +697,7 @@ cglobal idct_4x8_internal_8bpc, 0, 6, 0, dst, stride, 
     pshufb               m1, m3, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vextracti32x4       xm2, m0, 1
     vextracti32x4       xm3, m1, 1
     call .main
@@ -724,6 +729,7 @@ cglobal iadst_4x8_internal_8bpc, 0, 6, 0, dst, stride,
     punpcklwd            m0, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vextracti32x4       xm2, m0, 1
     vextracti32x4       xm3, m1, 1
     pshufd              xm4, xm0, q1032
@@ -787,6 +793,7 @@ cglobal iflipadst_4x8_internal_8bpc, 0, 6, 0, dst, str
     punpckhwd            m1, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vextracti32x4       xm2, m0, 1
     vextracti32x4       xm3, m1, 1
     pshufd              xm4, xm0, q1032
@@ -818,6 +825,7 @@ cglobal iidentity_4x8_internal_8bpc, 0, 6, 0, dst, str
     vextracti32x8       ym1, m0, 1
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        ym4, [o(pw_4096)]
     jmp m(iadst_4x8_internal_8bpc).end2
 
@@ -935,6 +943,7 @@ cglobal idct_4x16_internal_8bpc, 0, 6, 0, dst, stride,
     pmulhrsw             m1, m4
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vextracti32x4       xm2, ym0, 1
     vextracti32x4       xm3, ym1, 1
     vextracti32x4       xm4, m0, 2
@@ -975,6 +984,7 @@ cglobal iadst_4x16_internal_8bpc, 0, 6, 0, dst, stride
     punpcklwd            m0, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main
     vpbroadcastd         m5, [o(pw_2048)]
     psrlq               m10, 4
@@ -1082,6 +1092,7 @@ cglobal iflipadst_4x16_internal_8bpc, 0, 6, 0, dst, st
     punpckhwd            m1, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iadst_4x16_internal_8bpc).main
     vpbroadcastd         m6, [o(pw_2048)]
     psrlq               m10, 12
@@ -1109,6 +1120,7 @@ cglobal iidentity_4x16_internal_8bpc, 0, 6, 0, dst, st
     punpckhdq            m1, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m3, [o(pw_1697x16)]
     vpbroadcastd         m5, [o(pw_2048)]
     pmulhrsw             m2, m3, m0
@@ -1181,6 +1193,7 @@ cglobal idct_8x4_internal_8bpc, 0, 6, 0, dst, stride, 
     pshufb               m1, m4
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     IDCT4_1D_PACKED
     vpermq               m0, m0, q3120
     vpermq               m1, m1, q2031
@@ -1210,6 +1223,7 @@ cglobal iadst_8x4_internal_8bpc, 0, 6, 0, dst, stride,
     punpcklwd            m0, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main
 .end:
     vpermq               m0, m0, q3120
@@ -1253,6 +1267,7 @@ cglobal iflipadst_8x4_internal_8bpc, 0, 6, 0, dst, str
     punpcklwd            m0, m3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call m(iadst_8x4_internal_8bpc).main
     mova                 m2, m1
     vpermq               m1, m0, q2031
@@ -1280,6 +1295,7 @@ cglobal iidentity_8x4_internal_8bpc, 0, 6, 0, dst, str
     paddsw               m1, m1
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m3, [o(pw_1697x8)]
     pmulhrsw             m2, m3, m0
     pmulhrsw             m3, m1
@@ -1349,6 +1365,7 @@ cglobal idct_8x8_internal_8bpc, 0, 6, 0, dst, stride, 
     vshufi32x4           m3, m5, m3, 0x03
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main
     vpbroadcastd         m4, [o(pw_2048)]
     vpermq               m0, m0, q3120
@@ -1388,6 +1405,7 @@ cglobal iadst_8x8_internal_8bpc, 0, 6, 0, dst, stride,
     vinserti32x4         m1, m4, xm1, 1
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     pshufd               m4, m0, q1032
     pshufd               m5, m1, q1032
     call .main_pass2
@@ -1455,6 +1473,7 @@ cglobal iflipadst_8x8_internal_8bpc, 0, 6, 0, dst, str
     vshufi32x4           m2, m4, m2, 0x03
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     pshufd               m4, m0, q1032
     pshufd               m5, m1, q1032
     call m(iadst_8x8_internal_8bpc).main_pass2
@@ -1493,6 +1512,7 @@ cglobal iidentity_8x8_internal_8bpc, 0, 6, 0, dst, str
     punpckhdq            m3, m4
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m4, [o(pw_4096)]
     jmp m(iadst_8x8_internal_8bpc).end
 
@@ -1553,6 +1573,7 @@ cglobal idct_8x16_internal_8bpc, 0, 6, 0, dst, stride,
     punpckhdq            m3, m4     ;  3  7 11 15
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vprord               m5, [o(int16_perm)], 16
     vshufi32x4           m2, m2, q1320     ;  2 10 14  6
     vshufi32x4           m4, m1, m3, q2310 ;  1  5 15 11
@@ -1686,6 +1707,7 @@ cglobal iadst_8x16_internal_8bpc, 0, 6, 0, dst, stride
     punpckhqdq           m3, m5
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main_pass2
     vpbroadcastd         m6, [o(pw_2048)]
     psrlq               m10, 4
@@ -1794,6 +1816,7 @@ cglobal iflipadst_8x16_internal_8bpc, 0, 6, 0, dst, st
     pshufb               m2, m1, m6 ; e0 f0 e1 f1 e2 f2 e3 f3
     jmp m(iadst_8x16_internal_8bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(iadst_8x16_internal_8bpc).main_pass2
     vpbroadcastd         m7, [o(pw_2048)]
     psrlq               m10, 36
@@ -1823,6 +1846,7 @@ cglobal iidentity_8x16_internal_8bpc, 0, 6, 0, dst, st
     punpckhqdq           m3, m4            ; a3 b3 c3 d3 e3 f3 g3 h3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m7, [o(pw_1697x16)]
     mova                ym8, [o(gather8b)]
     lea                  r3, [dstq+strideq*2]
@@ -1897,6 +1921,7 @@ cglobal idct_16x4_internal_8bpc, 0, 6, 0, dst, stride,
     punpcklwd            m0, m2
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     IDCT4_1D_PACKED
     mova                 m2, [o(permA)]
     jmp m(iadst_16x4_internal_8bpc).end
@@ -1936,6 +1961,7 @@ cglobal iadst_16x4_internal_8bpc, 0, 6, 0, dst, stride
     pmulhrsw             m1, m6
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main
     movu                 m2, [o(permA+1)]
 .end:
@@ -1986,6 +2012,7 @@ cglobal iflipadst_16x4_internal_8bpc, 0, 6, 0, dst, st
     psrlq               m10, 16
     jmp m(iadst_16x4_internal_8bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(iadst_16x4_internal_8bpc).main
     movu                m2, [o(permA+2)]
     jmp m(iadst_16x4_internal_8bpc).end
@@ -2013,6 +2040,7 @@ cglobal iidentity_16x4_internal_8bpc, 0, 6, 0, dst, st
     vpermb               m1, m5, m1
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m3, [o(pw_1697x8)]
     pmulhrsw             m2, m3, m0
     pmulhrsw             m3, m1
@@ -2112,6 +2140,7 @@ cglobal idct_16x8_internal_8bpc, 0, 6, 0, dst, stride,
     punpckhdq            m5, m6, m7 ; i2 j2 k2 l2 i3 j3 k3 l3
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vshufi32x4           m0, m2, m4, q2020 ; 0 1
     vshufi32x4           m2, m4, q3131     ; 4 5
     vshufi32x4           m1, m3, m5, q2020 ; 2 3
@@ -2211,6 +2240,7 @@ cglobal iadst_16x8_internal_8bpc, 0, 6, 0, dst, stride
     REPX   {pmulhrsw x, m7}, m2, m3, m4, m5
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vshufi32x4           m0, m2, m4, q2020
     vshufi32x4           m2, m4, q3131     ; 4 5
     vshufi32x4           m1, m3, m5, q2020
@@ -2265,6 +2295,7 @@ cglobal iflipadst_16x8_internal_8bpc, 0, 6, 0, dst, st
     psrlq               m10, 20
     jmp m(iadst_16x8_internal_8bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     vshufi32x4           m0, m2, m4, q2020
     vshufi32x4           m2, m4, q3131     ; 4 5
     vshufi32x4           m1, m3, m5, q2020
@@ -2314,6 +2345,7 @@ cglobal iidentity_16x8_internal_8bpc, 0, 6, 0, dst, st
     REPX  {vpermb x, m9, x}, m2, m3, m4, m5
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     mova                 m7, [o(permB)]
     vpbroadcastd         m6, [o(pw_4096)]
     vpermq               m0, m7, m2
@@ -2373,6 +2405,7 @@ cglobal idct_16x16_internal_8bpc, 0, 6, 0, dst, stride
     punpckldq            m6, m11
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     vshufi32x4           m8, m4, m6, q3232 ; i8 ic m8 mc
     vinserti32x8         m4, ym6, 1        ; i0 i4 m0 m4
     vshufi32x4           m6, m0, m2, q3232 ; a8 ac e8 ec
@@ -2538,6 +2571,7 @@ cglobal iadst_16x16_internal_8bpc, 0, 6, 0, dst, strid
     REPX  {pmulhrsw x, m10}, m0, m1, m2, m3, m4, m5, m6, m7
     jmp                tx2q
 .pass2:
+    _CET_ENDBR
     call .main_pass2
     mova                m10, [o(permD)]
     psrlq                m8, m10, 8
@@ -2720,6 +2754,7 @@ cglobal iflipadst_16x16_internal_8bpc, 0, 6, 0, dst, s
     punpckhwd            m5, m8, m9 ; i2 j2 k2 l2 i3 j3 k3 l3
     jmp m(iadst_16x16_internal_8bpc).pass1_end
 .pass2:
+    _CET_ENDBR
     call m(iadst_16x16_internal_8bpc).main_pass2
     mova                m10, [o(permD)]
     psrlq                m8, m10, 8
@@ -2789,6 +2824,7 @@ cglobal iidentity_16x16_internal_8bpc, 0, 6, 0, dst, s
     jmp                tx2q
 ALIGN function_align
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m11, [o(pw_1697x16)]
     pmulhrsw            m12, m11, m0
     pmulhrsw            m13, m11, m1
@@ -3131,6 +3167,7 @@ cglobal inv_txfm_add_dct_dct_32x8_8bpc, 4, 4, 0, dst, 
     call m(idct_8x16_internal_8bpc).main
     call m(inv_txfm_add_dct_dct_8x32_8bpc).main_fast
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m10, [o(pw_8192)]
     vpermt2q             m0, m15, m4       ; t0   t1   t9   t8
     vpermt2q            m20, m15, m18      ; t31  t30a t23a t22
@@ -3586,6 +3623,7 @@ cglobal inv_txfm_add_dct_dct_16x32_8bpc, 4, 4, 22, dst
     punpckhwd           m17, m17
     call .main_oddhalf_fast
 .pass2:
+    _CET_ENDBR
     vpbroadcastd        m10, [o(pw_2048)]
     mova                m11, [o(end_16x32p)]
     lea                  r3, [strideq*3]
@@ -3798,6 +3836,7 @@ cglobal inv_txfm_add_dct_dct_32x16_8bpc, 4, 6, 22, dst
     punpckhwd           m17, m17      ; 15
     call m(inv_txfm_add_dct_dct_16x32_8bpc).main_oddhalf_fast
 .pass2:
+    _CET_ENDBR
     vpbroadcastd         m9, [o(pw_16384)]
     call .transpose_round
     vshufi32x4          m16, m14, m2, q3131 ;  5
@@ -5683,6 +5722,7 @@ ALIGN function_align
     vinserti32x8        m17, ym21, 1         ; c30 c31 d30 d31
     ret
 .pass2:
+    _CET_ENDBR
     vshufi32x4           m7, m5, m19, q3131  ; 14
     vshufi32x4           m5, m19, q2020      ; 10
     vshufi32x4          m21, m6, m20, q3131  ; 15
