Index: src/x86/ipred_sse.asm
--- src/x86/ipred_sse.asm.orig
+++ src/x86/ipred_sse.asm
@@ -207,14 +207,19 @@ cglobal ipred_h_8bpc, 3, 6, 2, dst, stride, tl, w, h, 
     lea                    stride3q, [strideq*3]
     jmp                          wq
 .w4:
+    _CET_ENDBR
     IPRED_H                       4
 .w8:
+    _CET_ENDBR
     IPRED_H                       8
 .w16:
+    _CET_ENDBR
     IPRED_H                      16
 .w32:
+    _CET_ENDBR
     IPRED_H                      32
 .w64:
+    _CET_ENDBR
     IPRED_H                      64
 
 ;---------------------------------------------------------------------------------------
@@ -257,10 +262,12 @@ cglobal ipred_dc_8bpc, 3, 7, 6, dst, stride, tl, w, h,
     lea                    stride3q, [strideq*3]
     jmp r6
 .h4:
+    _CET_ENDBR
     movd                         m0, [tlq-4]
     pmaddubsw                    m0, m3
     jmp                          wq
 .w4:
+    _CET_ENDBR
     movd                         m1, [tlq+1]
     pmaddubsw                    m1, m3
     psubw                        m0, m4
@@ -286,6 +293,7 @@ cglobal ipred_dc_8bpc, 3, 7, 6, dst, stride, tl, w, h,
     pxor                         m1, m1
     pshufb                       m0, m1
 .s4:
+    _CET_ENDBR
     movd           [dstq+strideq*0], m0
     movd           [dstq+strideq*1], m0
     movd           [dstq+strideq*2], m0
@@ -296,10 +304,12 @@ cglobal ipred_dc_8bpc, 3, 7, 6, dst, stride, tl, w, h,
     RET
 ALIGN function_align
 .h8:
+    _CET_ENDBR
     movq                         m0, [tlq-8]
     pmaddubsw                    m0, m3
     jmp                          wq
 .w8:
+    _CET_ENDBR
     movq                         m1, [tlq+1]
     pmaddubsw                    m1, m3
     psubw                        m4, m0
@@ -322,6 +332,7 @@ ALIGN function_align
     pxor                         m1, m1
     pshufb                       m0, m1
 .s8:
+    _CET_ENDBR
     movq           [dstq+strideq*0], m0
     movq           [dstq+strideq*1], m0
     movq           [dstq+strideq*2], m0
@@ -332,10 +343,12 @@ ALIGN function_align
     RET
 ALIGN function_align
 .h16:
+    _CET_ENDBR
     mova                         m0, [tlq-16]
     pmaddubsw                    m0, m3
     jmp                          wq
 .w16:
+    _CET_ENDBR
     movu                         m1, [tlq+1]
     pmaddubsw                    m1, m3
     paddw                        m0, m1
@@ -358,6 +371,7 @@ ALIGN function_align
     pxor                         m1, m1
     pshufb                       m0, m1
 .s16:
+    _CET_ENDBR
     mova           [dstq+strideq*0], m0
     mova           [dstq+strideq*1], m0
     mova           [dstq+strideq*2], m0
@@ -368,6 +382,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .h32:
+    _CET_ENDBR
     mova                         m0, [tlq-32]
     pmaddubsw                    m0, m3
     mova                         m2, [tlq-16]
@@ -375,6 +390,7 @@ ALIGN function_align
     paddw                        m0, m2
     jmp wq
 .w32:
+    _CET_ENDBR
     movu                         m1, [tlq+1]
     pmaddubsw                    m1, m3
     movu                         m2, [tlq+17]
@@ -402,6 +418,7 @@ ALIGN function_align
     pshufb                       m0, m1
     mova                         m1, m0
 .s32:
+    _CET_ENDBR
     mova                     [dstq], m0
     mova                  [dstq+16], m1
     mova             [dstq+strideq], m0
@@ -416,6 +433,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .h64:
+    _CET_ENDBR
     mova                         m0, [tlq-64]
     mova                         m1, [tlq-48]
     pmaddubsw                    m0, m3
@@ -429,6 +447,7 @@ ALIGN function_align
     paddw                        m0, m1
     jmp wq
 .w64:
+    _CET_ENDBR
     movu                         m1, [tlq+ 1]
     movu                         m2, [tlq+17]
     pmaddubsw                    m1, m3
@@ -463,6 +482,7 @@ ALIGN function_align
     mova                         m2, m0
     mova                         m3, m0
 .s64:
+    _CET_ENDBR
     mova                     [dstq], m0
     mova                  [dstq+16], m1
     mova                  [dstq+32], m2
@@ -499,6 +519,7 @@ cglobal ipred_dc_left_8bpc, 3, 7, 6, dst, stride, tl, 
     add                  wq, r5
     jmp                  r6
 .h64:
+    _CET_ENDBR
     movu                 m1, [tlq+48]                           ; unaligned when jumping here from dc_top
     pmaddubsw            m1, m2
     paddw                m0, m1
@@ -506,16 +527,20 @@ cglobal ipred_dc_left_8bpc, 3, 7, 6, dst, stride, tl, 
     pmaddubsw            m1, m2
     paddw                m0, m1
 .h32:
+    _CET_ENDBR
     movu                 m1, [tlq+16]                           ; unaligned when jumping here from dc_top
     pmaddubsw            m1, m2
     paddw                m0, m1
 .h16:
+    _CET_ENDBR
     pshufd               m1, m0, q3232                          ; psrlq               m1, m0, 16
     paddw                m0, m1
 .h8:
+    _CET_ENDBR
     pshuflw              m1, m0, q1032                          ; psrlq               m1, m0, 32
     paddw                m0, m1
 .h4:
+    _CET_ENDBR
     pmaddwd              m0, m2
     pmulhrsw             m0, m3
     lea            stride3q, [strideq*3]
@@ -598,6 +623,7 @@ cglobal ipred_smooth_v_8bpc, 3, 7, 7, dst, stride, tl,
     add                  wq, r6
     jmp                  wq
 .w4:
+    _CET_ENDBR
     movd                 m2, [tlq+1]
     punpckldq            m2, m2
     punpcklbw            m2, m5                          ; top, bottom
@@ -627,6 +653,7 @@ cglobal ipred_smooth_v_8bpc, 3, 7, 7, dst, stride, tl,
     RET
 ALIGN function_align
 .w8:
+    _CET_ENDBR
     movq                 m2, [tlq+1]
     punpcklbw            m2, m5
     mova                 m5, [base+ipred_v_shuf]
@@ -649,6 +676,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w16:
+    _CET_ENDBR
     movu                 m3, [tlq+1]
     punpcklbw            m2, m3, m5
     punpckhbw            m3, m5
@@ -670,6 +698,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w32:
+    _CET_ENDBR
     WIN64_PUSH_XMM        8, 7
     mova                 m7, m5
 .w32_loop_init:
@@ -702,6 +731,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w64:
+    _CET_ENDBR
     WIN64_PUSH_XMM        8, 7
     mova                 m7, m5
 .w64_loop_init:
@@ -752,6 +782,7 @@ cglobal ipred_smooth_h_8bpc, 3, 7, 8, dst, stride, tl,
     add                  wq, r6
     jmp                  wq
 .w4:
+    _CET_ENDBR
     movddup              m6, [base+smooth_weights+4*2]
     mova                 m7, [base+ipred_h_shuf]
     sub                 tlq, 4
@@ -788,6 +819,7 @@ cglobal ipred_smooth_h_8bpc, 3, 7, 8, dst, stride, tl,
     RET
 ALIGN function_align
 .w8:
+    _CET_ENDBR
     mova                 m6, [base+smooth_weights+8*2]
     mova                 m7, [base+ipred_h_shuf]
     sub                 tlq, 4
@@ -819,6 +851,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w16:
+    _CET_ENDBR
     mova                 m6, [base+smooth_weights+16*2]
     mova                 m7, [base+smooth_weights+16*3]
     sub                 tlq, 1
@@ -849,6 +882,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w32:
+    _CET_ENDBR
     sub                 tlq, 1
     sub                 tlq, hq
     pxor                 m6, m6
@@ -887,6 +921,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w64:
+    _CET_ENDBR
     sub                 tlq, 1
     sub                 tlq, hq
     pxor                 m6, m6
@@ -1014,6 +1049,7 @@ cglobal ipred_smooth_8bpc, 3, 7, 8, -13*16, dst, strid
     lea          v_weightsq, [base+smooth_weights+hq*2]   ; weights_ver = &dav1d_sm_weights[height]
     jmp                  wq
 .w4:
+    _CET_ENDBR
     mova                 m7, [base+ipred_v_shuf]
     movd                 m1, [tlq+1]                      ; left
     pshufd               m1, m1, q0000
@@ -1071,6 +1107,7 @@ cglobal ipred_smooth_8bpc, 3, 7, 8, -13*16, dst, strid
     RET
 ALIGN function_align
 .w8:
+    _CET_ENDBR
     mova                 m7, [base+ipred_v_shuf]
     movq                 m1, [tlq+1]                  ; left
     punpcklqdq           m1, m1
@@ -1122,6 +1159,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w16:
+    _CET_ENDBR
     mova                 m7, [base+ipred_v_shuf]
     movu                 m1, [tlq+1]                     ; left
     sub                 tlq, 4
@@ -1173,6 +1211,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w32:
+    _CET_ENDBR
     movu                 m1, [tlq+1]                     ; top     topleft[1 + x]
     movu                 m2, [tlq+17]                    ; top
     mova         [rsp+16*0], m1
@@ -1196,6 +1235,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w64:
+    _CET_ENDBR
     movu                 m1, [tlq+1]                     ; top     topleft[1 + x]
     movu                 m2, [tlq+17]                    ; top
     mova         [rsp+16*0], m1
@@ -1257,6 +1297,7 @@ cglobal ipred_z1_8bpc, 3, 7, 8, -16*13, dst, _, tl, w,
     xor              angled, 0x4ff ; d = 90 - angle
     jmp                  wq
 .w4:
+    _CET_ENDBR
     lea                 r3d, [angleq+88]
     test                r3d, 0x480
     jnz .w4_no_upsample ; !enable_intra_edge_filter || angle >= 40
@@ -1410,6 +1451,7 @@ cglobal ipred_z1_8bpc, 3, 7, 8, -16*13, dst, _, tl, w,
 .w4_end:
     RET
 .w8:
+    _CET_ENDBR
     lea                 r3d, [angleq+88]
     and                 r3d, ~0x7f
     or                  r3d, hd
@@ -1561,6 +1603,7 @@ cglobal ipred_z1_8bpc, 3, 7, 8, -16*13, dst, _, tl, w,
 .w8_end:
     RET
 .w16:
+    _CET_ENDBR
     lea                 r3d, [hq+15]
     movd                 m0, r3d
     and                 r3d, 15
@@ -1663,6 +1706,7 @@ cglobal ipred_z1_8bpc, 3, 7, 8, -16*13, dst, _, tl, w,
 .w16_end:
     RET
 .w32:
+    _CET_ENDBR
     lea                 r3d, [hq+31]
     and                 r3d, 31
     or                  r3d, 32    ; imin(h+31, 63)
@@ -1774,6 +1818,7 @@ cglobal ipred_z1_8bpc, 3, 7, 8, -16*13, dst, _, tl, w,
 .w32_end:
     RET
 .w64:
+    _CET_ENDBR
     lea                 r3d, [hq+63]
     test             angled, 0x400 ; !enable_intra_edge_filter
     jnz .w64_main
@@ -2078,6 +2123,7 @@ cglobal ipred_z2_8bpc, 4, 7, 8, -16*20, dst, _, tl, w,
     mov                r11d, (128-4)<<6
     jmp                  wq
 .w4:
+    _CET_ENDBR
     test             angled, 0x400
     jnz .w4_main
     movd                 m5, [tlq+4]
@@ -2336,6 +2382,7 @@ cglobal ipred_z2_8bpc, 4, 7, 8, -16*20, dst, _, tl, w,
 .w4_ret:
     RET
 .w8:
+    _CET_ENDBR
     test             angled, 0x400
     jnz .w4_main
     movd                 m5, [tlq+8]
@@ -2448,6 +2495,7 @@ cglobal ipred_z2_8bpc, 4, 7, 8, -16*20, dst, _, tl, w,
 .w8_filter_top_end:
     ret
 .w16:
+    _CET_ENDBR
     test             angled, 0x400
     jnz .w4_main
     lea                 r3d, [hq+15]
@@ -2519,6 +2567,7 @@ cglobal ipred_z2_8bpc, 4, 7, 8, -16*20, dst, _, tl, w,
     adc                  r5, -4 ; filter_strength-3
     jmp .filter_left
 .w32:
+    _CET_ENDBR
     test             angled, 0x400
     jnz .w4_main
     pshufb               m6, [base+z_filter_t_w16] ; tlq[32]
@@ -2540,6 +2589,7 @@ cglobal ipred_z2_8bpc, 4, 7, 8, -16*20, dst, _, tl, w,
     movu      [rsp+r2+16*9], m1
     jmp .filter_left
 .w64:
+    _CET_ENDBR
     movu                 m0, [tlq+16*2+1]
     movu                 m1, [tlq+16*3+1]
     mova        [rsp+16*10], m0
@@ -2647,6 +2697,7 @@ cglobal ipred_z3_8bpc, 4, 7, 8, -16*10, dst, stride, t
     movzx               dyd, word [base+dr_intra_derivative+45*2-1+dyq]
     jmp                  hq
 .h4:
+    _CET_ENDBR
     lea                 r4d, [angleq+88]
     test                r4d, 0x480
     jnz .h4_no_upsample ; !enable_intra_edge_filter || angle >= 40
@@ -2819,6 +2870,7 @@ cglobal ipred_z3_8bpc, 4, 7, 8, -16*10, dst, stride, t
     jg .h4_transpose_loop
     RET
 .h8:
+    _CET_ENDBR
     lea                 r4d, [angleq+88]
     and                 r4d, ~0x7f
     or                  r4d, wd
@@ -3010,6 +3062,7 @@ cglobal ipred_z3_8bpc, 4, 7, 8, -16*10, dst, stride, t
     movd   [dstq+strideq*0], m1
     ret
 .h16:
+    _CET_ENDBR
     lea                 r4d, [wq+15]
     movd                 m0, r4d
     and                 r4d, 15
@@ -3141,6 +3194,7 @@ cglobal ipred_z3_8bpc, 4, 7, 8, -16*10, dst, stride, t
     punpcklwd            m1, m2, m3
     jmp .write_4x8_end
 .h32:
+    _CET_ENDBR
     lea                 r4d, [wq+31]
     and                 r4d, 31
     or                  r4d, 32 ; imin(w+31, 63)
@@ -3252,6 +3306,7 @@ cglobal ipred_z3_8bpc, 4, 7, 8, -16*10, dst, stride, t
     or                  r3d, 32
     jmp .end_transpose_main
 .h64:
+    _CET_ENDBR
     lea                 r4d, [wq+63]
     test             angled, 0x400 ; !enable_intra_edge_filter
     jnz .h64_main
@@ -3487,6 +3542,7 @@ cglobal pal_pred_8bpc, 4, 6, 5, dst, stride, pal, idx,
     lea                  r2, [strideq*3]
     jmp                  wq
 .w4:
+    _CET_ENDBR
     movq                 m1, [idxq]
     add                idxq, 8
     psrlw                m0, m1, 4
@@ -3504,6 +3560,7 @@ cglobal pal_pred_8bpc, 4, 6, 5, dst, stride, pal, idx,
     jg .w4
     RET
 .w8:
+    _CET_ENDBR
     movu                 m0, [idxq]
     add                idxq, 16
     pshufb               m1, m4, m0
@@ -3520,6 +3577,7 @@ cglobal pal_pred_8bpc, 4, 6, 5, dst, stride, pal, idx,
     jg .w8
     RET
 .w16:
+    _CET_ENDBR
     movu                 m0, [idxq]
     add                idxq, 16
     pshufb               m1, m4, m0
@@ -3534,6 +3592,7 @@ cglobal pal_pred_8bpc, 4, 6, 5, dst, stride, pal, idx,
     jg .w16
     RET
 .w32:
+    _CET_ENDBR
     movu                 m0, [idxq]
     add                idxq, 16
     pshufb               m1, m4, m0
@@ -3548,6 +3607,7 @@ cglobal pal_pred_8bpc, 4, 6, 5, dst, stride, pal, idx,
     jg .w32
     RET
 .w64:
+    _CET_ENDBR
     movu                 m0, [idxq+16*0]
     movu                 m2, [idxq+16*1]
     add                idxq, 32
@@ -3607,10 +3667,12 @@ cglobal ipred_cfl_8bpc, 3, 7, 6, dst, stride, tl, w, h
     movifnidn           acq, acmp
     jmp                  r6
 .h4:
+    _CET_ENDBR
     movd                 m0, [tlq-4]
     pmaddubsw            m0, m3
     jmp                  wq
 .w4:
+    _CET_ENDBR
     movd                 m1, [tlq+1]
     pmaddubsw            m1, m3
     psubw                m0, m4
@@ -3636,6 +3698,7 @@ cglobal ipred_cfl_8bpc, 3, 7, 6, dst, stride, tl, w, h
     pshuflw              m0, m0, q0000
     punpcklqdq           m0, m0
 .s4:
+    _CET_ENDBR
     movd                 m1, alpham
     pshuflw              m1, m1, q0000
     punpcklqdq           m1, m1
@@ -3662,10 +3725,12 @@ cglobal ipred_cfl_8bpc, 3, 7, 6, dst, stride, tl, w, h
     RET
 ALIGN function_align
 .h8:
+    _CET_ENDBR
     movq                 m0, [tlq-8]
     pmaddubsw            m0, m3
     jmp                  wq
 .w8:
+    _CET_ENDBR
     movq                 m1, [tlq+1]
     pmaddubsw            m1, m3
     psubw                m4, m0
@@ -3688,6 +3753,7 @@ ALIGN function_align
     pshuflw              m0, m0, q0000
     punpcklqdq           m0, m0
 .s8:
+    _CET_ENDBR
     movd                 m1, alpham
     pshuflw              m1, m1, q0000
     punpcklqdq           m1, m1
@@ -3716,10 +3782,12 @@ ALIGN function_align
     RET
 ALIGN function_align
 .h16:
+    _CET_ENDBR
     mova                 m0, [tlq-16]
     pmaddubsw            m0, m3
     jmp                  wq
 .w16:
+    _CET_ENDBR
     movu                 m1, [tlq+1]
     pmaddubsw            m1, m3
     paddw                m0, m1
@@ -3742,6 +3810,7 @@ ALIGN function_align
     pshuflw              m0, m0, q0000
     punpcklqdq           m0, m0
 .s16:
+    _CET_ENDBR
     movd                 m1, alpham
     pshuflw              m1, m1, q0000
     punpcklqdq           m1, m1
@@ -3767,6 +3836,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .h32:
+    _CET_ENDBR
     mova                 m0, [tlq-32]
     pmaddubsw            m0, m3
     mova                 m2, [tlq-16]
@@ -3774,6 +3844,7 @@ ALIGN function_align
     paddw                m0, m2
     jmp                  wq
 .w32:
+    _CET_ENDBR
     movu                 m1, [tlq+1]
     pmaddubsw            m1, m3
     movu                 m2, [tlq+17]
@@ -3800,6 +3871,7 @@ ALIGN function_align
     pshuflw              m0, m0, q0000
     punpcklqdq           m0, m0
 .s32:
+    _CET_ENDBR
     movd                 m1, alpham
     pshuflw              m1, m1, q0000
     punpcklqdq           m1, m1
@@ -3849,16 +3921,20 @@ cglobal ipred_cfl_left_8bpc, 3, 7, 6, dst, stride, tl,
     movifnidn           acq, acmp
     jmp                  r6
 .h32:
+    _CET_ENDBR
     movu                 m1, [tlq+16]                           ; unaligned when jumping here from dc_top
     pmaddubsw            m1, m2
     paddw                m0, m1
 .h16:
+    _CET_ENDBR
     pshufd               m1, m0, q3232                          ; psrlq               m1, m0, 16
     paddw                m0, m1
 .h8:
+    _CET_ENDBR
     pshuflw              m1, m0, q1032                          ; psrlq               m1, m0, 32
     paddw                m0, m1
 .h4:
+    _CET_ENDBR
     pmaddwd              m0, m2
     pmulhrsw             m0, m3
     pshuflw              m0, m0, q0000
@@ -3941,6 +4017,7 @@ DECLARE_REG_TMP 4
     DEFINE_ARGS ac, y, stride, wpad, hpad, stride3, h
 %endif
 .w4:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
 .w4_loop:
     movq                 m0, [yq]
@@ -3967,6 +4044,7 @@ DECLARE_REG_TMP 4
     jg .w4_hpad_loop
     jmp .calc_avg_4_8
 .w8:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
     test              wpadd, wpadd
     jnz .w8_wpad
@@ -4015,6 +4093,7 @@ DECLARE_REG_TMP 4
     jg .w8_hpad
     jmp .calc_avg_4_8
 .w16:
+    _CET_ENDBR
     test              wpadd, wpadd
     jnz .w16_wpad
 .w16_loop:
@@ -4044,6 +4123,7 @@ DECLARE_REG_TMP 4
     jl .w16_pad1
     je .w16_pad2
 .w16_pad3:
+    _CET_ENDBR
     movddup              m0, [yq]
     movddup              m1, [yq+strideq]
     pmaddubsw            m0, m2
@@ -4062,6 +4142,7 @@ DECLARE_REG_TMP 4
     jg .w16_pad3
     jmp .w16_wpad_done
 .w16_pad2:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, [yq+strideq]
     pmaddubsw            m0, m2
@@ -4079,6 +4160,7 @@ DECLARE_REG_TMP 4
     jg .w16_pad2
     jmp .w16_wpad_done
 .w16_pad1:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, [yq+strideq]
     pmaddubsw            m0, m2
@@ -4180,6 +4262,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     DEFINE_ARGS ac, y, stride, wpad, hpad, stride3, h
 %endif
 .w4:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
 .w4_loop:
     movq                 m1, [yq]
@@ -4207,6 +4290,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     jg .w4_hpad_loop
     jmp .calc_avg_4
 .w8:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
     test              wpadd, wpadd
     jnz .w8_wpad
@@ -4261,6 +4345,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     jg .w8_hpad
     jmp .calc_avg_8_16
 .w16:
+    _CET_ENDBR
     test              wpadd, wpadd
     jnz .w16_wpad
 .w16_loop:
@@ -4292,6 +4377,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     jl .w16_pad1
     je .w16_pad2
 .w16_pad3:
+    _CET_ENDBR
     movddup              m1, [yq]
     pmaddubsw            m1, m2
     pshufhw              m1, m1, q3333
@@ -4314,6 +4400,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     jg .w16_pad3
     jmp .w16_wpad_done
 .w16_pad2:
+    _CET_ENDBR
     mova                 m1, [yq]
     pmaddubsw            m1, m2
     mova              [acq], m1
@@ -4337,6 +4424,7 @@ cglobal ipred_cfl_ac_422_8bpc, 4, 7, 7, ac, y, stride,
     jg .w16_pad2
     jmp .w16_wpad_done
 .w16_pad1:
+    _CET_ENDBR
     mova                 m1, [yq]
     pmaddubsw            m1, m2
     mova              [acq], m1
@@ -4456,6 +4544,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     DEFINE_ARGS ac, y, stride, wpad, hpad, stride3, h
 %endif
 .w4:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
 .w4_loop:
     movd                 m1, [yq]
@@ -4491,6 +4580,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jmp .calc_avg
 
 .w8:
+    _CET_ENDBR
     lea            stride3q, [strideq*3]
     test              wpadd, wpadd
     jnz .w8_wpad
@@ -4554,6 +4644,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jmp .calc_avg_8_16
 
 .w16:
+    _CET_ENDBR
     test              wpadd, wpadd
     jnz .w16_wpad
 .w16_loop:
@@ -4589,6 +4680,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jl .w16_pad1
     je .w16_pad2
 .w16_pad3:
+    _CET_ENDBR
     movd                 m1, [yq]
     punpcklbw            m1, m1
     punpcklqdq           m1, m1
@@ -4615,6 +4707,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w16_pad3
     jmp .w16_wpad_done
 .w16_pad2:
+    _CET_ENDBR
     movq                 m1, [yq]
     punpcklbw            m1, m1
     pmaddubsw            m1, m2
@@ -4640,6 +4733,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w16_pad2
     jmp .w16_wpad_done
 .w16_pad1:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4698,6 +4792,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jmp .calc_avg
 
 .w32:
+    _CET_ENDBR
     pxor                 m0, m0
     mova           [rsp   ], m0
     mova           [rsp+16], m0
@@ -4748,6 +4843,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jl .w32_pad5
     je .w32_pad6
 .w32_pad7:
+    _CET_ENDBR
     movd                 m1, [yq]
     punpcklbw            m1, m1
     punpcklqdq           m1, m1
@@ -4775,6 +4871,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad7
     jmp .w32_wpad_done
 .w32_pad6:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4801,6 +4898,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad6
     jmp .w32_wpad_done
 .w32_pad5:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4831,6 +4929,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad5
     jmp .w32_wpad_done
 .w32_pad4:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4859,6 +4958,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad4
     jmp .w32_wpad_done
 .w32_pad3:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4890,6 +4990,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad3
     jmp .w32_wpad_done
 .w32_pad2:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -4919,6 +5020,7 @@ cglobal ipred_cfl_ac_444_8bpc, 4, 7, 7, -5*16, ac, y, 
     jg .w32_pad2
     jmp .w32_wpad_done
 .w32_pad1:
+    _CET_ENDBR
     mova                 m0, [yq]
     mova                 m1, m0
     punpcklbw            m1, m1
@@ -5075,6 +5177,7 @@ cglobal ipred_paeth_8bpc, 3, 6, 8, -7*16, dst, stride,
     add                  wq, r5
     jmp                  wq
 .w4:
+    _CET_ENDBR
     movd                 m6, [tlq+1]            ; top
     pshufd               m6, m6, q0000
     lea                  r3, [strideq*3]
@@ -5100,6 +5203,7 @@ cglobal ipred_paeth_8bpc, 3, 6, 8, -7*16, dst, stride,
     RET
 ALIGN function_align
 .w8:
+    _CET_ENDBR
     movddup              m6, [tlq+1]
     psubusb              m7, m5, m6
     psubusb              m0, m6, m5
@@ -5117,6 +5221,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w16:
+    _CET_ENDBR
     movu                 m6, [tlq+1]
     psubusb              m7, m5, m6
     psubusb              m0, m6, m5
@@ -5134,6 +5239,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w32:
+    _CET_ENDBR
     movu                 m6, [tlq+1]
     psubusb              m7, m5, m6
     psubusb              m0, m6, m5
@@ -5162,6 +5268,7 @@ ALIGN function_align
     RET
 ALIGN function_align
 .w64:
+    _CET_ENDBR
     movu                 m6, [tlq+1]
     psubusb              m7, m5, m6
     psubusb              m0, m6, m5
@@ -5251,6 +5358,7 @@ cglobal ipred_filter_8bpc, 3, 7, 8, dst, stride, tl, w
     mov                   hd, hm
     jmp                   wq
 .w4:
+    _CET_ENDBR
     mova                  m1, [base+filter_shuf1]
     sub                  tlq, 3
     sub                  tlq, hq
@@ -5270,6 +5378,7 @@ cglobal ipred_filter_8bpc, 3, 7, 8, dst, stride, tl, w
 
 ALIGN function_align
 .w8:
+    _CET_ENDBR
     movq                  m6, [tlq+1]                   ;_ _ _ 0 1 2 3 4
     sub                  tlq, 5
     sub                  tlq, hq
@@ -5294,6 +5403,7 @@ ALIGN function_align
 
 ALIGN function_align
 .w16:
+    _CET_ENDBR
     movu                  m6, [tlq+1]                   ;top row
     sub                  tlq, 5
     sub                  tlq, hq
@@ -5333,6 +5443,7 @@ ALIGN function_align
 
 ALIGN function_align
 .w32:
+    _CET_ENDBR
     movu                  m6, [tlq+1]                   ;top row
     lea              filterq, [tlq+17]
     sub                  tlq, 5
