Index: src/x86/filmgrain_sse.asm
--- src/x86/filmgrain_sse.asm.orig
+++ src/x86/filmgrain_sse.asm
@@ -156,6 +156,7 @@ cglobal generate_grain_y_8bpc, 2, 7 + 2 * ARCH_X86_64,
     jmp              r2
 
 .ar1:
+    _CET_ENDBR
 %if ARCH_X86_32
     DEFINE_ARGS buf, fg_data, cf3, unused, val3, min, max
 %elif WIN64
@@ -228,9 +229,11 @@ cglobal generate_grain_y_8bpc, 2, 7 + 2 * ARCH_X86_64,
     dec              hd
     jg .y_loop_ar1
 .ar0:
+    _CET_ENDBR
     RET
 
 .ar2:
+    _CET_ENDBR
 %if ARCH_X86_32
     ALLOC_STACK -16*8
 %endif
@@ -330,6 +333,7 @@ cglobal generate_grain_y_8bpc, 2, 7 + 2 * ARCH_X86_64,
     RET
 
 .ar3:
+    _CET_ENDBR
     DEFINE_ARGS buf, fg_data, shift
 %if ARCH_X86_32
     ALLOC_STACK  -16*14
@@ -596,6 +600,7 @@ cglobal generate_grain_uv_%1_8bpc, 1, 7 + 3 * ARCH_X86
     jmp              r5
 
 .ar0:
+    _CET_ENDBR
     DEFINE_ARGS buf, bufy, fg_data, uv, unused, shift
     movifnidn     bufyq, bufymp
 %if ARCH_X86_32
@@ -734,6 +739,7 @@ cglobal generate_grain_uv_%1_8bpc, 1, 7 + 3 * ARCH_X86
     RET
 
 .ar1:
+    _CET_ENDBR
 %if ARCH_X86_32
     RESET_STACK_STATE
 %endif
@@ -875,6 +881,7 @@ cglobal generate_grain_uv_%1_8bpc, 1, 7 + 3 * ARCH_X86
     RET
 
 .ar2:
+    _CET_ENDBR
 %if ARCH_X86_32
     ALLOC_STACK   -8*16
 %endif
@@ -1005,6 +1012,7 @@ cglobal generate_grain_uv_%1_8bpc, 1, 7 + 3 * ARCH_X86
     RET
 
 .ar3:
+    _CET_ENDBR
 %if ARCH_X86_32
     RESET_STACK_STATE
 %endif
