Skip to content

Commit

Permalink
Merge pull request #3150 from xianyi/develop
Browse files Browse the repository at this point in the history
Update branch from develop for 0.3.14 release
  • Loading branch information
martin-frbg authored Mar 17, 2021
2 parents d2b11c4 + 86de5f7 commit 2f6d35c
Show file tree
Hide file tree
Showing 102 changed files with 4,895 additions and 433 deletions.
24 changes: 24 additions & 0 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,27 @@ steps:
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: arm64_gcc10

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:20.04
environment:
CC: gcc-10
FC: gfortran-10
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran-10 perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C test $COMMON_FLAGS

5 changes: 5 additions & 0 deletions .github/workflows/nightly-Homebrew-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ jobs:
if: github.event_name != 'pull_request'
run: brew update || true

- name: unlink installed gcc to allow updating
run: |
brew unlink gcc@8
brew unlink gcc@9
- name: Install prerequisites
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas

Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,7 @@ build.*
*.swp
benchmark/*.goto
benchmark/smallscaling
.vscode
CMakeCache.txt
CMakeFiles/*
.vscode
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 13)
set(OpenBLAS_PATCH_VERSION 14)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

# Adhere to GNU filesystem layout conventions
include(GNUInstallDirs)

include(CMakePackageConfigHelpers)

if(MSVC AND NOT DEFINED NOFORTRAN)
set(NOFORTRAN ON)
endif()

#######
if(MSVC)
Expand Down Expand Up @@ -229,7 +232,7 @@ if (NOT NO_CBLAS)
add_subdirectory(utest)
endif()

if (NOT MSVC AND NOT NOFORTRAN)
if (NOT NOFORTRAN)
# Build test and ctest
add_subdirectory(test)
if(NOT NO_CBLAS)
Expand Down
48 changes: 48 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,52 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.14
17-Mar-2021

common:
* Fixed a race condition on thread shutdown in non-OpenMP builds
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
* Improved performance of OMATCOPY_RT across all platforms
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
* Fixed potential misreading of the GCC compiler version in the build scripts
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)

RISCV:
* Fixed compilation on RISCV (missing entry in getarch)

POWER:
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
* Added support for compilation on FreeBSD/ppc64le
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
* Improved SCOPY and CCOPY performance on POWER10
* Improved SGEMM and DGEMM performance on POWER10
* Added support for compilation with the NVIDIA HPC compiler

x86_64:
* Added an optimized bfloat16 GEMM kernel for Cooperlake
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
* Added support for compilation with the NAG Fortran compiler
* Fixed recognition of the AMD AOCC compiler
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
* Added support for running the BLAS/CBLAS tests on Windows
* Fixed signatures of the tls callback functions for Windows x64
* Fixed various issues with fma intrinsics support handling

ARM:
* Added support for embedded Cortex M targets via a new option EMBEDDED

ARMV8:
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
* Added support for the DYNAMIC_LIST option
* Added support for compilation with the NVIDIA HPC compiler
* Added support for compiling with the NAG Fortran compiler

====================================================================
Version 0.3.13
12-Dec-2020
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ endif
@$(CC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
cverinfo=`$(CC) --version | sed -n '1p'`; \
if [ -z "$${cverinfo}" ]; then \
cverinfo=`$(CC) --version | sed -n '2p'`; \
fi; \
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
else \
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
Expand All @@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(FC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
fverinfo=`$(FC) --version | sed -n '1p'`; \
if [ -z "$${fverinfo}" ]; then \
fverinfo=`$(FC) --version | sed -n '2p'`; \
fi; \
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
else \
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
Expand Down
34 changes: 32 additions & 2 deletions Makefile.arm64
Original file line number Diff line number Diff line change
@@ -1,80 +1,110 @@

ifneq ($(C_COMPILER), PGI)
ifeq ($(CORE), ARMV8)
CCOMMON_OPT += -march=armv8-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a
endif
endif

ifeq ($(CORE), CORTEXA53)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif

ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
endif
endif

ifeq ($(CORE), CORTEXA72)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif

ifeq ($(CORE), CORTEXA73)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
endif

# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq ($(GCCVERSIONGTEQ7), 1)
ifeq ($(GCCVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif

ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
endif
endif

ifeq ($(CORE), FALKOR)
CCOMMON_OPT += -march=armv8-a -mtune=falkor
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=falkor
endif
endif

ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif

ifeq ($(CORE), THUNDERX3T110)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
endif
else
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif
endif

ifeq ($(CORE), VORTEX)
CCOMMON_OPT += -march=armv8.3-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a
endif
endif

ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
endif

endif
endif
2 changes: 2 additions & 0 deletions Makefile.power
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ USE_OPENMP = 1
endif

ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif

ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)
Expand Down
2 changes: 1 addition & 1 deletion Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.3.13
VERSION = 0.3.14

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
Expand Down
Loading

0 comments on commit 2f6d35c

Please sign in to comment.