// Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef OPENSSL_HEADER_CRYPTO_INTERNAL_H #define OPENSSL_HEADER_CRYPTO_INTERNAL_H #include // Must be first. #include "ring-core/check.h" #if defined(__clang__) // Don't require prototypes for functions defined in C that are only // used from Rust. #pragma GCC diagnostic ignored "-Wmissing-prototypes" #endif #if defined(__GNUC__) && \ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 // |alignas| and |alignof| were added in C11. GCC added support in version 4.8. // Testing for __STDC_VERSION__/__cplusplus doesn't work because 4.7 already // reports support for C11. #define alignas(x) __attribute__ ((aligned (x))) #elif defined(_MSC_VER) && !defined(__clang__) #define alignas(x) __declspec(align(x)) #else #include #endif #if defined(__clang__) || defined(__GNUC__) #define RING_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) #define RING_NOINLINE __declspec(noinline) #else #define RING_NOINLINE #endif // Some C compilers require a useless cast when dealing with arrays for the // reason explained in // https://gustedt.wordpress.com/2011/02/12/const-and-arrays/ #if defined(__clang__) || defined(_MSC_VER) #define RING_CORE_POINTLESS_ARRAY_CONST_CAST(cast) #else #define RING_CORE_POINTLESS_ARRAY_CONST_CAST(cast) cast #endif // `uint8_t` isn't guaranteed to be 'unsigned char' and only 'char' and // 'unsigned char' are allowed to alias according to ISO C. typedef unsigned char aliasing_uint8_t; #if (!defined(_MSC_VER) || defined(__clang__)) && defined(OPENSSL_64_BIT) #define BORINGSSL_HAS_UINT128 typedef __int128_t int128_t; typedef __uint128_t uint128_t; #endif // GCC-like compilers indicate SSE2 with |__SSE2__|. MSVC leaves the caller to // know that x86_64 has SSE2, and uses _M_IX86_FP to indicate SSE2 on x86. // https://learn.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=msvc-170 #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) # if defined(_MSC_VER) && !defined(__clang__) # if defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) # define OPENSSL_SSE2 # else # error "SSE2 is required." # endif # elif !defined(__SSE2__) # error "SSE2 is required." # endif #endif // For convenience in testing the fallback code, we allow disabling SSE2 // intrinsics via |OPENSSL_NO_SSE2_FOR_TESTING|. We require SSE2 on x86 and // x86_64, so we would otherwise need to test such code on a non-x86 platform. // // This does not remove the above requirement for SSE2 support with assembly // optimizations. It only disables some intrinsics-based optimizations so that // we can test the fallback code on CI. #if defined(OPENSSL_SSE2) && defined(OPENSSL_NO_SSE2_FOR_TESTING) #undef OPENSSL_SSE2 #endif // Pointer utility functions. // buffers_alias returns one if |a| and |b| alias and zero otherwise. static inline int buffers_alias(const void *a, size_t a_bytes, const void *b, size_t b_bytes) { // Cast |a| and |b| to integers. In C, pointer comparisons between unrelated // objects are undefined whereas pointer to integer conversions are merely // implementation-defined. We assume the implementation defined it in a sane // way. uintptr_t a_u = (uintptr_t)a; uintptr_t b_u = (uintptr_t)b; return a_u + a_bytes > b_u && b_u + b_bytes > a_u; } // Constant-time utility functions. // // The following methods return a bitmask of all ones (0xff...f) for true and 0 // for false. This is useful for choosing a value based on the result of a // conditional in constant time. For example, // // if (a < b) { // c = a; // } else { // c = b; // } // // can be written as // // crypto_word_t lt = constant_time_lt_w(a, b); // c = constant_time_select_w(lt, a, b); #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wsign-conversion" #endif #if defined(_MSC_VER) && !defined(__clang__) #pragma warning(push) // '=': conversion from 'crypto_word_t' to 'uint8_t', possible loss of data #pragma warning(disable: 4242) // 'initializing': conversion from 'crypto_word_t' to 'uint8_t', ... #pragma warning(disable: 4244) #endif // crypto_word_t is the type that most constant-time functions use. Ideally we // would like it to be |size_t|, but NaCl builds in 64-bit mode with 32-bit // pointers, which means that |size_t| can be 32 bits when |BN_ULONG| is 64 // bits. Since we want to be able to do constant-time operations on a // |BN_ULONG|, |crypto_word_t| is defined as an unsigned value with the native // word length. #if defined(OPENSSL_64_BIT) typedef uint64_t crypto_word_t; #define CRYPTO_WORD_BITS (64u) #elif defined(OPENSSL_32_BIT) typedef uint32_t crypto_word_t; #define CRYPTO_WORD_BITS (32u) #else #error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT" #endif #define CONSTTIME_TRUE_W ~((crypto_word_t)0) #define CONSTTIME_FALSE_W ((crypto_word_t)0) // value_barrier_w returns |a|, but prevents GCC and Clang from reasoning about // the returned value. This is used to mitigate compilers undoing constant-time // code, until we can express our requirements directly in the language. // // Note the compiler is aware that |value_barrier_w| has no side effects and // always has the same output for a given input. This allows it to eliminate // dead code, move computations across loops, and vectorize. static inline crypto_word_t value_barrier_w(crypto_word_t a) { #if defined(__GNUC__) || defined(__clang__) __asm__("" : "+r"(a) : /* no inputs */); #endif return a; } // value_barrier_u32 behaves like |value_barrier_w| but takes a |uint32_t|. static inline uint32_t value_barrier_u32(uint32_t a) { #if defined(__GNUC__) || defined(__clang__) __asm__("" : "+r"(a) : /* no inputs */); #endif return a; } // |value_barrier_u8| could be defined as above, but compilers other than // clang seem to still materialize 0x00..00MM instead of reusing 0x??..??MM. // constant_time_msb_w returns the given value with the MSB copied to all the // other bits. static inline crypto_word_t constant_time_msb_w(crypto_word_t a) { return 0u - (a >> (sizeof(a) * 8 - 1)); } // constant_time_is_zero returns 0xff..f if a == 0 and 0 otherwise. static inline crypto_word_t constant_time_is_zero_w(crypto_word_t a) { // Here is an SMT-LIB verification of this formula: // // (define-fun is_zero ((a (_ BitVec 32))) (_ BitVec 32) // (bvand (bvnot a) (bvsub a #x00000001)) // ) // // (declare-fun a () (_ BitVec 32)) // // (assert (not (= (= #x00000001 (bvlshr (is_zero a) #x0000001f)) (= a #x00000000)))) // (check-sat) // (get-model) return constant_time_msb_w(~a & (a - 1)); } static inline crypto_word_t constant_time_is_nonzero_w(crypto_word_t a) { return ~constant_time_is_zero_w(a); } // constant_time_eq_w returns 0xff..f if a == b and 0 otherwise. static inline crypto_word_t constant_time_eq_w(crypto_word_t a, crypto_word_t b) { return constant_time_is_zero_w(a ^ b); } // constant_time_select_w returns (mask & a) | (~mask & b). When |mask| is all // 1s or all 0s (as returned by the methods above), the select methods return // either |a| (if |mask| is nonzero) or |b| (if |mask| is zero). static inline crypto_word_t constant_time_select_w(crypto_word_t mask, crypto_word_t a, crypto_word_t b) { // Clang recognizes this pattern as a select. While it usually transforms it // to a cmov, it sometimes further transforms it into a branch, which we do // not want. // // Hiding the value of the mask from the compiler evades this transformation. mask = value_barrier_w(mask); return (mask & a) | (~mask & b); } // constant_time_select_8 acts like |constant_time_select| but operates on // 8-bit values. static inline uint8_t constant_time_select_8(crypto_word_t mask, uint8_t a, uint8_t b) { // |mask| is a word instead of |uint8_t| to avoid materializing 0x000..0MM // Making both |mask| and its value barrier |uint8_t| would allow the compiler // to materialize 0x????..?MM instead, but only clang is that clever. // However, vectorization of bitwise operations seems to work better on // |uint8_t| than a mix of |uint64_t| and |uint8_t|, so |m| is cast to // |uint8_t| after the value barrier but before the bitwise operations. uint8_t m = value_barrier_w(mask); return (m & a) | (~m & b); } // constant_time_conditional_memcpy copies |n| bytes from |src| to |dst| if // |mask| is 0xff..ff and does nothing if |mask| is 0. The |n|-byte memory // ranges at |dst| and |src| must not overlap, as when calling |memcpy|. static inline void constant_time_conditional_memcpy(void *dst, const void *src, const size_t n, const crypto_word_t mask) { debug_assert_nonsecret(!buffers_alias(dst, n, src, n)); uint8_t *out = (uint8_t *)dst; const uint8_t *in = (const uint8_t *)src; for (size_t i = 0; i < n; i++) { out[i] = constant_time_select_8(mask, in[i], out[i]); } } // constant_time_conditional_memxor xors |n| bytes from |src| to |dst| if // |mask| is 0xff..ff and does nothing if |mask| is 0. The |n|-byte memory // ranges at |dst| and |src| must not overlap, as when calling |memcpy|. static inline void constant_time_conditional_memxor(void *dst, const void *src, size_t n, const crypto_word_t mask) { debug_assert_nonsecret(!buffers_alias(dst, n, src, n)); aliasing_uint8_t *out = dst; const aliasing_uint8_t *in = src; #if defined(__GNUC__) && !defined(__clang__) // gcc 13.2.0 doesn't automatically vectorize this loop regardless of barrier typedef aliasing_uint8_t v32u8 __attribute__((vector_size(32), aligned(1), may_alias)); size_t n_vec = n&~(size_t)31; v32u8 masks = ((aliasing_uint8_t)mask-(v32u8){}); // broadcast for (size_t i = 0; i < n_vec; i += 32) { *(v32u8*)&out[i] ^= masks & *(v32u8 const*)&in[i]; } out += n_vec; n -= n_vec; #endif for (size_t i = 0; i < n; i++) { out[i] ^= value_barrier_w(mask) & in[i]; } } #if defined(BORINGSSL_CONSTANT_TIME_VALIDATION) // CONSTTIME_SECRET takes a pointer and a number of bytes and marks that region // of memory as secret. Secret data is tracked as it flows to registers and // other parts of a memory. If secret data is used as a condition for a branch, // or as a memory index, it will trigger warnings in valgrind. #define CONSTTIME_SECRET(ptr, len) VALGRIND_MAKE_MEM_UNDEFINED(ptr, len) // CONSTTIME_DECLASSIFY takes a pointer and a number of bytes and marks that // region of memory as public. Public data is not subject to constant-time // rules. #define CONSTTIME_DECLASSIFY(ptr, len) VALGRIND_MAKE_MEM_DEFINED(ptr, len) #else #define CONSTTIME_SECRET(ptr, len) #define CONSTTIME_DECLASSIFY(ptr, len) #endif // BORINGSSL_CONSTANT_TIME_VALIDATION static inline crypto_word_t constant_time_declassify_w(crypto_word_t v) { // Return |v| through a value barrier to be safe. Valgrind-based constant-time // validation is partly to check the compiler has not undone any constant-time // work. Any place |BORINGSSL_CONSTANT_TIME_VALIDATION| influences // optimizations, this validation is inaccurate. // // However, by sending pointers through valgrind, we likely inhibit escape // analysis. On local variables, particularly booleans, we likely // significantly impact optimizations. // // Thus, to be safe, stick a value barrier, in hopes of comparably inhibiting // compiler analysis. CONSTTIME_DECLASSIFY(&v, sizeof(v)); return value_barrier_w(v); } static inline int constant_time_declassify_int(int v) { OPENSSL_STATIC_ASSERT(sizeof(uint32_t) == sizeof(int), "int is not the same size as uint32_t"); // See comment above. CONSTTIME_DECLASSIFY(&v, sizeof(v)); return value_barrier_u32((uint32_t)v); } #if defined(_MSC_VER) && !defined(__clang__) // '=': conversion from 'int64_t' to 'int32_t', possible loss of data #pragma warning(pop) #endif #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif // declassify_assert behaves like |assert| but declassifies the result of // evaluating |expr|. This allows the assertion to branch on the (presumably // public) result, but still ensures that values leading up to the computation // were secret. #define declassify_assert(expr) dev_assert_secret(constant_time_declassify_int(expr)) // Endianness conversions. #if defined(__GNUC__) && __GNUC__ >= 2 static inline uint32_t CRYPTO_bswap4(uint32_t x) { return __builtin_bswap32(x); } static inline uint64_t CRYPTO_bswap8(uint64_t x) { return __builtin_bswap64(x); } #elif defined(_MSC_VER) #pragma warning(push, 3) #include #pragma warning(pop) #pragma intrinsic(_byteswap_ulong) static inline uint32_t CRYPTO_bswap4(uint32_t x) { return _byteswap_ulong(x); } #endif #if !defined(RING_CORE_NOSTDLIBINC) #include #endif static inline void *OPENSSL_memcpy(void *dst, const void *src, size_t n) { #if !defined(RING_CORE_NOSTDLIBINC) if (n == 0) { return dst; } return memcpy(dst, src, n); #else aliasing_uint8_t *d = dst; const aliasing_uint8_t *s = src; for (size_t i = 0; i < n; ++i) { d[i] = s[i]; } return dst; #endif } static inline void *OPENSSL_memset(void *dst, int c, size_t n) { #if !defined(RING_CORE_NOSTDLIBINC) if (n == 0) { return dst; } return memset(dst, c, n); #else aliasing_uint8_t *d = dst; for (size_t i = 0; i < n; ++i) { d[i] = (aliasing_uint8_t)c; } return dst; #endif } // Loads and stores. // // The following functions load and store sized integers with the specified // endianness. They use |memcpy|, and so avoid alignment or strict aliasing // requirements on the input and output pointers. #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define RING_BIG_ENDIAN #endif #endif static inline uint32_t CRYPTO_load_u32_le(const void *in) { uint32_t v; OPENSSL_memcpy(&v, in, sizeof(v)); #if defined(RING_BIG_ENDIAN) return CRYPTO_bswap4(v); #else return v; #endif } static inline void CRYPTO_store_u32_le(void *out, uint32_t v) { #if defined(RING_BIG_ENDIAN) v = CRYPTO_bswap4(v); #endif OPENSSL_memcpy(out, &v, sizeof(v)); } static inline uint32_t CRYPTO_load_u32_be(const void *in) { uint32_t v; OPENSSL_memcpy(&v, in, sizeof(v)); #if !defined(RING_BIG_ENDIAN) return CRYPTO_bswap4(v); #else return v; #endif } static inline void CRYPTO_store_u32_be(void *out, uint32_t v) { #if !defined(RING_BIG_ENDIAN) v = CRYPTO_bswap4(v); #endif OPENSSL_memcpy(out, &v, sizeof(v)); } // Runtime CPU feature support #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) // OPENSSL_ia32cap_P contains the Intel CPUID bits when running on an x86 or // x86-64 system. // // Index 0: // EDX for CPUID where EAX = 1 // Bit 30 is used to indicate an Intel CPU // Index 1: // ECX for CPUID where EAX = 1 // Index 2: // EBX for CPUID where EAX = 7, ECX = 0 // Bit 14 (for removed feature MPX) is used to indicate a preference for ymm // registers over zmm even when zmm registers are supported // Index 3: // ECX for CPUID where EAX = 7, ECX = 0 // // Note: the CPUID bits are pre-adjusted for the OSXSAVE bit and the XMM, YMM, // and AVX512 bits in XCR0, so it is not necessary to check those. (WARNING: See // caveats in cpu_intel.c.) #if defined(OPENSSL_X86_64) extern uint32_t avx2_available; extern uint32_t adx_bmi2_available; #endif #endif #if defined(OPENSSL_ARM) extern alignas(4) uint32_t neon_available; #endif // OPENSSL_ARM #endif // OPENSSL_HEADER_CRYPTO_INTERNAL_H