From 76b5c7ce6b0c7ddb6aa4ac1c2b8c05a6702a1975 Mon Sep 17 00:00:00 2001 From: Paul Amonson Date: Tue, 18 Jun 2024 09:00:53 -0700 Subject: [PATCH 2/2] [Fix] Copyright and Licensing issues. Signed-off-by: Paul Amonson --- src/port/pg_crc32c_avx512.c | 113 +++++++++++++++++------------ src/port/pg_crc32c_avx512_choose.c | 15 ++-- 2 files changed, 75 insertions(+), 53 deletions(-) diff --git a/src/port/pg_crc32c_avx512.c b/src/port/pg_crc32c_avx512.c index 085c8d99a8..da1a01b974 100644 --- a/src/port/pg_crc32c_avx512.c +++ b/src/port/pg_crc32c_avx512.c @@ -5,7 +5,6 @@ * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2024, Intel(r) Corporation * * IDENTIFICATION * src/port/pg_crc32c_avx512.c @@ -71,16 +70,36 @@ crc32c_fallback(pg_crc32c crc, const uint8 *p, size_t length) * * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ * Instruction" - * V. Gopal, E. Ozturk, et al., 2009, - * https://www.researchgate.net/publication/263424619_Fast_CRC_computation#full-text + * V. Gopal, E. Ozturk, et al., 2009 * - * This Function: - * Copyright 2017 The Chromium Authors - * Copyright (c) 2024, Intel(r) Corporation + * For This Function: + * Copyright 2015 The Chromium Authors * - * Use of this source code is governed by a BSD-style license that can be - * found in the Chromium source repository LICENSE file. - * https://chromium.googlesource.com/chromium/src/+/refs/heads/main/LICENSE + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google LLC nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ pg_attribute_no_sanitize_alignment() inline @@ -112,48 +131,48 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t length) * to 32 bytes. * >>> BEGIN */ - /* - * There's at least one block of 256. - */ - x1 = _mm512_loadu_si512((__m512i *)(input + 0x00)); - x2 = _mm512_loadu_si512((__m512i *)(input + 0x40)); - x3 = _mm512_loadu_si512((__m512i *)(input + 0x80)); - x4 = _mm512_loadu_si512((__m512i *)(input + 0xC0)); +/* + * There's at least one block of 256. + */ +x1 = _mm512_loadu_si512((__m512i *)(input + 0x00)); +x2 = _mm512_loadu_si512((__m512i *)(input + 0x40)); +x3 = _mm512_loadu_si512((__m512i *)(input + 0x80)); +x4 = _mm512_loadu_si512((__m512i *)(input + 0xC0)); - x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc))); +x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc))); - x0 = _mm512_load_si512((__m512i *)k1k2); +x0 = _mm512_load_si512((__m512i *)k1k2); - input += 256; - length -= 256; +input += 256; +length -= 256; - /* - * Parallel fold blocks of 256, if any. - */ - while (length >= 256) - { - x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00); - x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00); - x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00); - x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00); - - x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11); - x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11); - x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11); - x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11); - - y5 = _mm512_loadu_si512((__m512i *)(input + 0x00)); - y6 = _mm512_loadu_si512((__m512i *)(input + 0x40)); - y7 = _mm512_loadu_si512((__m512i *)(input + 0x80)); - y8 = _mm512_loadu_si512((__m512i *)(input + 0xC0)); - - x1 = _mm512_ternarylogic_epi64(x1, x5, y5, 0x96); - x2 = _mm512_ternarylogic_epi64(x2, x6, y6, 0x96); - x3 = _mm512_ternarylogic_epi64(x3, x7, y7, 0x96); - x4 = _mm512_ternarylogic_epi64(x4, x8, y8, 0x96); - - input += 256; - length -= 256; +/* + * Parallel fold blocks of 256, if any. + */ +while (length >= 256) +{ + x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00); + x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00); + x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00); + x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00); + + x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11); + x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11); + x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11); + x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11); + + y5 = _mm512_loadu_si512((__m512i *)(input + 0x00)); + y6 = _mm512_loadu_si512((__m512i *)(input + 0x40)); + y7 = _mm512_loadu_si512((__m512i *)(input + 0x80)); + y8 = _mm512_loadu_si512((__m512i *)(input + 0xC0)); + + x1 = _mm512_ternarylogic_epi64(x1, x5, y5, 0x96); + x2 = _mm512_ternarylogic_epi64(x2, x6, y6, 0x96); + x3 = _mm512_ternarylogic_epi64(x3, x7, y7, 0x96); + x4 = _mm512_ternarylogic_epi64(x4, x8, y8, 0x96); + + input += 256; + length -= 256; } /* diff --git a/src/port/pg_crc32c_avx512_choose.c b/src/port/pg_crc32c_avx512_choose.c index d5ccb69d10..f774522715 100644 --- a/src/port/pg_crc32c_avx512_choose.c +++ b/src/port/pg_crc32c_avx512_choose.c @@ -10,7 +10,6 @@ * * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2024, Intel(r) Corp. * * * IDENTIFICATION @@ -36,6 +35,10 @@ #include "port/pg_crc32c.h" typedef unsigned int exx_t; +#define EAX 0 +#define EBX 1 +#define ECX 2 +#define EDX 3 /* * Helper function. @@ -94,7 +97,7 @@ sse42_available(void) exx_t exx[4] = {0, 0, 0, 0}; pg_getcpuid(1, exx); - return is_bit_set(exx[2], 20); /* sse4.2 */ + return is_bit_set(exx[ECX], 20); /* sse4.2 */ } /* @@ -108,7 +111,7 @@ osxsave_available(void) exx_t exx[4] = {0, 0, 0, 0}; pg_getcpuid(1, exx); - return is_bit_set(exx[2], 27); /* osxsave */ + return is_bit_set(exx[ECX], 27); /* osxsave */ } /* @@ -122,7 +125,7 @@ avx512f_available(void) exx_t exx[4] = {0, 0, 0, 0}; pg_getcpuidex(7, 0, exx); - return is_bit_set(exx[1], 16); /* avx512-f */ + return is_bit_set(exx[EBX], 16); /* avx512-f */ } /* @@ -136,7 +139,7 @@ vpclmulqdq_available(void) exx_t exx[4] = {0, 0, 0, 0}; pg_getcpuidex(7, 0, exx); - return is_bit_set(exx[1], 10); /* vpclmulqdq */ + return is_bit_set(exx[ECX], 10); /* vpclmulqdq */ } /* @@ -150,7 +153,7 @@ avx512vl_available(void) exx_t exx[4] = {0, 0, 0, 0}; pg_getcpuidex(7, 0, exx); - return is_bit_set(exx[1], 31); /* avx512-vl */ + return is_bit_set(exx[EBX], 31); /* avx512-vl */ } /* -- 2.34.1