Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Bmi2 instrunction to optmize compact protocol int64 code and deco… #2780

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
52 changes: 52 additions & 0 deletions lib/cpp/CMakeLists.txt
Expand Up @@ -124,6 +124,58 @@ if(UNIX)
endif()
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(PREV_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -mbmi2 -mbmi -mlzcnt -msse3 -mavx512bw -mavx512vl")
check_cxx_source_compiles(
"
#include <immintrin.h>
int main(){unsigned int a,b;_pdep_u32(a,b); return 0;}
"
HAVE_BMI2)
check_cxx_source_compiles(
"
#include <immintrin.h>
int main(){unsigned int a; _tzcnt_u32(a); return 0;}
"
HAVE_BMI)
check_cxx_source_compiles(
"
#include <immintrin.h>
int main(){unsigned int c;_lzcnt_u32(c); return 0;}
"
HAVE_LZCNT)
check_cxx_source_compiles(
"
#include <immintrin.h>
int main(){const __m128i* p;_mm_lddqu_si128(p); return 0;}
"
HAVE_SSE3)
check_cxx_source_compiles(
"
#include <immintrin.h>
int main(){__m128i a,b;_mm_mask_cmp_epi8_mask(0x3ff,a,b,_MM_CMPINT_NLT); return 0;}
"
HAVE_AVX512BW_AVX512VL)

if (HAVE_BMI2)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi2")
endif()
if (HAVE_BMI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mbmi")
endif()
if (HAVE_LZCNT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlzcnt")
endif()
if (HAVE_SSE3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
endif()
if (HAVE_AVX512BW_AVX512VL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512bw -mavx512vl")
endif()
set(CMAKE_REQUIRED_FLAGS ${PREV_CMAKE_REQUIRED_FLAGS})
endif ()

set(thriftcpp_threads_SOURCES
src/thrift/concurrency/ThreadFactory.cpp
src/thrift/concurrency/Thread.cpp
Expand Down
20 changes: 20 additions & 0 deletions lib/cpp/src/thrift/protocol/TCompactProtocol.h
Expand Up @@ -167,7 +167,14 @@ class TCompactProtocolT : public TVirtualProtocol<TCompactProtocolT<Transport_>
uint32_t writeListEnd() { return 0; }
uint32_t writeSetEnd() { return 0; }
uint32_t writeFieldEnd() { return 0; }
private:
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t writeVarint64NoneBMI2(uint64_t n);

#if defined(__BMI2__) && defined(__LZCNT__)
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t writeVarint64BMI2(uint64_t n);
#endif
protected:
int32_t writeFieldBeginInternal(const char* name,
const TType fieldType,
Expand Down Expand Up @@ -223,6 +230,19 @@ class TCompactProtocolT : public TVirtualProtocol<TCompactProtocolT<Transport_>
uint32_t readListEnd() { return 0; }
uint32_t readSetEnd() { return 0; }

private:
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t readVarint64FastPathNoneAVX(const uint8_t* buf,const std::size_t bufsz,int64_t& i64);
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t readVarint64SlowPathNoneAVX(uint8_t* buf,const std::size_t bufsz,int64_t& i64);
#if defined(__SSE3__) && defined(__AVX512BW__) && defined(__AVX512VL__) && \
defined(__BMI2__) && defined(__BMI__)
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t readVarint64FastPathAVX(const uint8_t* buf,const std::size_t bufsz,int64_t& i64);
template<bool needConsume=true>
inline __attribute__((always_inline)) uint32_t readVarint64SlowPathAVX(uint8_t* buf,const std::size_t bufsz,int64_t& i64);
#endif

protected:
uint32_t readVarint32(int32_t& i32);
uint32_t readVarint64(int64_t& i64);
Expand Down