Skip to content

Commit

Permalink
x86-64 SIMD: Support CET if C compiler enables it
Browse files Browse the repository at this point in the history
- Detect at configure time, via the __CET__ C preprocessor macro,
  whether the C compiler will include either indirect branch tracking
  (IBT) or shadow stack support, and define a NASM macro (__CET__) if
  so.

- Modify the x86-64 SIMD code so that it includes appropriate endbr64
  instructions (to support IBT) and an appropriate .note.gnu.property
  section (to support both IBT and shadow stack) when __CET__ is
  defined.

Closes #350
  • Loading branch information
dcommander committed Feb 29, 2024
1 parent 1335547 commit 3202feb
Show file tree
Hide file tree
Showing 31 changed files with 81 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Expand Up @@ -149,7 +149,7 @@ jobs:
mkdir build
pushd build
cmake -G"Unix Makefiles" -DWITH_JPEG8=1 \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings' \
-DCMAKE_C_FLAGS='--std=gnu90 -Wall -Werror -Wextra -Wpedantic -pedantic-errors -Wdouble-promotion -Wformat-overflow=2 -Wformat-security -Wformat-signedness -Wformat-truncation=2 -Wformat-y2k -Wmissing-include-dirs -Wshift-overflow=2 -Wswitch-bool -Wno-unused-parameter -Wuninitialized -Wstrict-overflow=2 -Wstringop-overflow=4 -Wstringop-truncation -Wduplicated-branches -Wduplicated-cond -Wdeclaration-after-statement -Wshadow -Wunsafe-loop-optimizations -Wundef -Wcast-align -Wno-clobbered -Wjump-misses-init -Wno-sign-compare -Wlogical-op -Waggregate-return -Wstrict-prototypes -Wold-style-definition -Wmissing-prototypes -Wmissing-declarations -Wpacked -Wredundant-decls -Wnested-externs -Winline -Wno-long-long -Wdisabled-optimization -Wno-overlength-strings -fcf-protection' \
..
export NUMCPUS=`grep -c '^processor' /proc/cpuinfo`
make -j$NUMCPUS --load-average=$NUMCPUS
Expand Down
2 changes: 2 additions & 0 deletions BUILDING.md
Expand Up @@ -14,6 +14,8 @@ Build Requirements
(if building x86 or x86-64 SIMD extensions)
* If using NASM, 2.13 or later is required.
* If using Yasm, 1.2.0 or later is required.
* NASM 2.15 or later is required if building libjpeg-turbo with Intel
Control-flow Enforcement Technology (CET) support.
* If building on macOS, NASM or Yasm can be obtained from
[MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in
Expand Down
4 changes: 4 additions & 0 deletions ChangeLog.md
Expand Up @@ -8,6 +8,10 @@ libjpeg-turbo components to depend on the Visual C++ run-time DLL when built
with Visual C++ and CMake 3.15 or later, regardless of value of the
`WITH_CRT_DLL` CMake variable.

2. The x86-64 SIMD extensions now include support for Intel Control-flow
Enforcement Technology (CET), which is enabled automatically if CET is enabled
in the C compiler.


3.0.2
=====
Expand Down
12 changes: 12 additions & 0 deletions simd/CMakeLists.txt
Expand Up @@ -96,6 +96,18 @@ if(NOT WIN32 AND (CMAKE_POSITION_INDEPENDENT_CODE OR ENABLE_SHARED))
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPIC")
endif()

if(CPU_TYPE STREQUAL "x86_64" AND CMAKE_ASM_NASM_OBJECT_FORMAT MATCHES "^elf")
check_c_source_compiles("
#if (__CET__ & 3) == 0
#error \"CET not enabled\"
#endif
int main(void) { return 0; }" HAVE_CET)

if(HAVE_CET)
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -D__CET__")
endif()
endif()

string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
set(EFFECTIVE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} ${CMAKE_ASM_NASM_FLAGS_${CMAKE_BUILD_TYPE_UC}}")
message(STATUS "CMAKE_ASM_NASM_FLAGS = ${EFFECTIVE_ASM_NASM_FLAGS}")
Expand Down
21 changes: 21 additions & 0 deletions simd/nasm/jsimdext.inc
Expand Up @@ -76,6 +76,14 @@
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits

%ifdef __CET__
%ifdef __x86_64__
section .note.gnu.property note alloc noexec align=8
dd 0x00000004, 0x00000010, 0x00000005, 0x00554e47
dd 0xc0000002, 0x00000004, 0x00000003, 0x00000000
%endif
%endif

; -- segment definition --
;
%ifdef __x86_64__
Expand Down Expand Up @@ -513,6 +521,19 @@ const_base:

%endif

%ifdef __CET__

%imacro ENDBR64 0
dd 0xfa1e0ff3
%endmacro

%else

%imacro ENDBR64 0
%endmacro

%endif

; --------------------------------------------------------------------------
; Defines picked up from the C headers
;
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jccolext-avx2.asm
Expand Up @@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_avx2)

EXTN(jsimd_rgb_ycc_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jccolext-sse2.asm
Expand Up @@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_ycc_convert_sse2)

EXTN(jsimd_rgb_ycc_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jcgryext-avx2.asm
Expand Up @@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_avx2)

EXTN(jsimd_rgb_gray_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jcgryext-sse2.asm
Expand Up @@ -40,6 +40,7 @@
GLOBAL_FUNCTION(jsimd_rgb_gray_convert_sse2)

EXTN(jsimd_rgb_gray_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jchuff-sse2.asm
Expand Up @@ -261,6 +261,7 @@ times 1 << 15 db 16
GLOBAL_FUNCTION(jsimd_huff_encode_one_block_sse2)

EXTN(jsimd_huff_encode_one_block_sse2):
ENDBR64
push rbp
mov rbp, rsp

Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jcphuff-sse2.asm
Expand Up @@ -283,6 +283,7 @@
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_first_prepare_sse2)

EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
Expand Down Expand Up @@ -446,6 +447,7 @@ EXTN(jsimd_encode_mcu_AC_first_prepare_sse2):
GLOBAL_FUNCTION(jsimd_encode_mcu_AC_refine_prepare_sse2)

EXTN(jsimd_encode_mcu_AC_refine_prepare_sse2):
ENDBR64
push rbp
mov rbp, rsp
and rsp, byte (-SIZEOF_XMMWORD) ; align to 128 bits
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jcsample-avx2.asm
Expand Up @@ -44,6 +44,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_avx2)

EXTN(jsimd_h2v1_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
Expand Down Expand Up @@ -205,6 +206,7 @@ EXTN(jsimd_h2v1_downsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_avx2)

EXTN(jsimd_h2v2_downsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jcsample-sse2.asm
Expand Up @@ -43,6 +43,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_downsample_sse2)

EXTN(jsimd_h2v1_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
Expand Down Expand Up @@ -187,6 +188,7 @@ EXTN(jsimd_h2v1_downsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_downsample_sse2)

EXTN(jsimd_h2v2_downsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 6
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jdcolext-avx2.asm
Expand Up @@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_avx2)

EXTN(jsimd_ycc_rgb_convert_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jdcolext-sse2.asm
Expand Up @@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_ycc_rgb_convert_sse2)

EXTN(jsimd_ycc_rgb_convert_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jdmrgext-avx2.asm
Expand Up @@ -42,6 +42,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_avx2)

EXTN(jsimd_h2v1_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down Expand Up @@ -506,6 +507,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_avx2)

EXTN(jsimd_h2v2_merged_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jdmrgext-sse2.asm
Expand Up @@ -41,6 +41,7 @@
GLOBAL_FUNCTION(jsimd_h2v1_merged_upsample_sse2)

EXTN(jsimd_h2v1_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down Expand Up @@ -448,6 +449,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_merged_upsample_sse2)

EXTN(jsimd_h2v2_merged_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down
4 changes: 4 additions & 0 deletions simd/x86_64/jdsample-avx2.asm
Expand Up @@ -62,6 +62,7 @@ PW_EIGHT times 16 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_avx2)

EXTN(jsimd_h2v1_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
PUSH_XMM 3
Expand Down Expand Up @@ -215,6 +216,7 @@ EXTN(jsimd_h2v1_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_avx2)

EXTN(jsimd_h2v2_fancy_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down Expand Up @@ -524,6 +526,7 @@ EXTN(jsimd_h2v2_fancy_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_avx2)

EXTN(jsimd_h2v1_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down Expand Up @@ -612,6 +615,7 @@ EXTN(jsimd_h2v1_upsample_avx2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_avx2)

EXTN(jsimd_h2v2_upsample_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down
4 changes: 4 additions & 0 deletions simd/x86_64/jdsample-sse2.asm
Expand Up @@ -61,6 +61,7 @@ PW_EIGHT times 8 dw 8
GLOBAL_FUNCTION(jsimd_h2v1_fancy_upsample_sse2)

EXTN(jsimd_h2v1_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down Expand Up @@ -202,6 +203,7 @@ EXTN(jsimd_h2v1_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_fancy_upsample_sse2)

EXTN(jsimd_h2v2_fancy_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down Expand Up @@ -497,6 +499,7 @@ EXTN(jsimd_h2v2_fancy_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v1_upsample_sse2)

EXTN(jsimd_h2v1_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down Expand Up @@ -583,6 +586,7 @@ EXTN(jsimd_h2v1_upsample_sse2):
GLOBAL_FUNCTION(jsimd_h2v2_upsample_sse2)

EXTN(jsimd_h2v2_upsample_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jfdctflt-sse.asm
Expand Up @@ -66,6 +66,7 @@ PD_1_306 times 4 dd 1.306562964876376527856643
GLOBAL_FUNCTION(jsimd_fdct_float_sse)

EXTN(jsimd_fdct_float_sse):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jfdctfst-sse2.asm
Expand Up @@ -81,6 +81,7 @@ PW_F1306 times 8 dw F_1_306 << CONST_SHIFT
GLOBAL_FUNCTION(jsimd_fdct_ifast_sse2)

EXTN(jsimd_fdct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jfdctint-avx2.asm
Expand Up @@ -260,6 +260,7 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_fdct_islow_avx2)

EXTN(jsimd_fdct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 1
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jfdctint-sse2.asm
Expand Up @@ -102,6 +102,7 @@ PW_DESCALE_P2X times 8 dw 1 << (PASS1_BITS - 1)
GLOBAL_FUNCTION(jsimd_fdct_islow_sse2)

EXTN(jsimd_fdct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jidctflt-sse2.asm
Expand Up @@ -76,6 +76,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_float_sse2)

EXTN(jsimd_idct_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jidctfst-sse2.asm
Expand Up @@ -95,6 +95,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_ifast_sse2)

EXTN(jsimd_idct_ifast_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jidctint-avx2.asm
Expand Up @@ -282,6 +282,7 @@ PW_1_NEG1 times 8 dw 1
GLOBAL_FUNCTION(jsimd_idct_islow_avx2)

EXTN(jsimd_idct_islow_avx2):
ENDBR64
push rbp
mov rbp, rsp ; rbp = aligned rbp
PUSH_XMM 4
Expand Down
1 change: 1 addition & 0 deletions simd/x86_64/jidctint-sse2.asm
Expand Up @@ -108,6 +108,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_islow_sse2)

EXTN(jsimd_idct_islow_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jidctred-sse2.asm
Expand Up @@ -116,6 +116,7 @@ PB_CENTERJSAMP times 16 db CENTERJSAMPLE
GLOBAL_FUNCTION(jsimd_idct_4x4_sse2)

EXTN(jsimd_idct_4x4_sse2):
ENDBR64
push rbp
mov rbp, rsp
push r15
Expand Down Expand Up @@ -413,6 +414,7 @@ EXTN(jsimd_idct_4x4_sse2):
GLOBAL_FUNCTION(jsimd_idct_2x2_sse2)

EXTN(jsimd_idct_2x2_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 4
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jquantf-sse2.asm
Expand Up @@ -37,6 +37,7 @@
GLOBAL_FUNCTION(jsimd_convsamp_float_sse2)

EXTN(jsimd_convsamp_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
Expand Down Expand Up @@ -109,6 +110,7 @@ EXTN(jsimd_convsamp_float_sse2):
GLOBAL_FUNCTION(jsimd_quantize_float_sse2)

EXTN(jsimd_quantize_float_sse2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
Expand Down
2 changes: 2 additions & 0 deletions simd/x86_64/jquanti-avx2.asm
Expand Up @@ -38,6 +38,7 @@
GLOBAL_FUNCTION(jsimd_convsamp_avx2)

EXTN(jsimd_convsamp_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
Expand Down Expand Up @@ -115,6 +116,7 @@ EXTN(jsimd_convsamp_avx2):
GLOBAL_FUNCTION(jsimd_quantize_avx2)

EXTN(jsimd_quantize_avx2):
ENDBR64
push rbp
mov rbp, rsp
COLLECT_ARGS 3
Expand Down

0 comments on commit 3202feb

Please sign in to comment.