Index: src/x86/filmgrain_avx2.asm
--- src/x86/filmgrain_avx2.asm.orig
+++ src/x86/filmgrain_avx2.asm
@@ -151,6 +151,7 @@ cglobal generate_grain_y_8bpc, 2, 9, 8, buf, fg_data
     jmp              r5
 
 .ar1:
+    _CET_ENDBR
     DEFINE_ARGS buf, fg_data, cf3, shift, val3, min, max, x, val0
     mov          shiftd, [fg_dataq+FGData.ar_coeff_shift]
     movsx          cf3d, byte [fg_dataq+FGData.ar_coeffs_y+3]
@@ -200,9 +201,11 @@ cglobal generate_grain_y_8bpc, 2, 9, 8, buf, fg_data
     dec              hd
     jg .y_loop_ar1
 .ar0:
+    _CET_ENDBR
     RET
 
 .ar2:
+    _CET_ENDBR
 %if WIN64
     %assign stack_size_padded 168
     SUB             rsp, stack_size_padded
@@ -277,6 +280,7 @@ cglobal generate_grain_y_8bpc, 2, 9, 8, buf, fg_data
 
 INIT_YMM avx2
 .ar3:
+    _CET_ENDBR
 %if WIN64
     ALLOC_STACK   16*14
     %assign stack_size stack_size - 16*4
@@ -446,6 +450,7 @@ cglobal generate_grain_uv_%1_8bpc, 4, 10, 16, buf, buf
 
 INIT_YMM avx2
 .ar0:
+    _CET_ENDBR
     DEFINE_ARGS buf, bufy, fg_data, uv, unused, shift
     imul            uvd, 28
     mov          shiftd, [fg_dataq+FGData.ar_coeff_shift]
@@ -574,6 +579,7 @@ INIT_YMM avx2
 
 INIT_XMM avx2
 .ar1:
+    _CET_ENDBR
     DEFINE_ARGS buf, bufy, fg_data, uv, val3, cf3, min, max, x, shift
     imul            uvd, 28
     mov          shiftd, [fg_dataq+FGData.ar_coeff_shift]
@@ -656,6 +662,7 @@ INIT_XMM avx2
     RET
 
 .ar2:
+    _CET_ENDBR
     DEFINE_ARGS buf, bufy, fg_data, uv, unused, shift
     mov          shiftd, [fg_dataq+FGData.ar_coeff_shift]
     imul            uvd, 28
@@ -750,6 +757,7 @@ INIT_XMM avx2
 
 INIT_YMM avx2
 .ar3:
+    _CET_ENDBR
     DEFINE_ARGS buf, bufy, fg_data, uv, unused, shift
     mov          shiftd, [fg_dataq+FGData.ar_coeff_shift]
     imul            uvd, 28
