Skip to content

Commit

Permalink
arm_compute v19.08
Browse files Browse the repository at this point in the history
  • Loading branch information
Jenkins committed Sep 2, 2019
1 parent 4ba87db commit 975dfe1
Show file tree
Hide file tree
Showing 10,361 changed files with 334,440 additions and 156,417 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,26 @@ Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues

News:

- [Gian Marco's talk on Performance Analysis for Optimizing Embedded Deep Learning Inference Software](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2019-embedded-vision-summit)
- [Gian Marco's talk on optimizing CNNs with Winograd algorithms at the EVS](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2018-embedded-vision-summit-iodice)
- [Gian Marco's talk on using SGEMM and FFTs to Accelerate Deep Learning](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2016-embedded-vision-summit-iodice)

Related projects:

- [Arm NN SDK](https://github.com/arm-software/armnn)

Tutorials:

- [Tutorial: Cartoonifying Images on Raspberry Pi with the Compute Library](https://community.arm.com/graphics/b/blog/posts/cartoonifying-images-on-raspberry-pi-with-the-compute-library)
- [Tutorial: Running AlexNet on Raspberry Pi with Compute Library](https://community.arm.com/processors/b/blog/posts/running-alexnet-on-raspberry-pi-with-compute-library)

Blogs:

- [Happy Birthday ACL!](https://community.arm.com/developer/tools-software/graphics/b/blog/posts/arm-compute-library-19-05-is-coming)

Documentation available here:

- [v19.08](https://arm-software.github.io/ComputeLibrary/v19.08/)
- [v19.05](https://arm-software.github.io/ComputeLibrary/v19.05/)
- [v19.02](https://arm-software.github.io/ComputeLibrary/v19.02/)
- [v18.11](https://arm-software.github.io/ComputeLibrary/v18.11/index.xhtml)
Expand All @@ -41,6 +50,8 @@ Documentation available here:

Binaries available here:

- [v19.08-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.08/arm_compute-v19.08-bin-linux.tar.gz)
- [v19.08-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.08/arm_compute-v19.08-bin-android.tar.gz)
- [v19.05-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-linux.tar.gz)
- [v19.05-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-android.tar.gz)
- [v19.02-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.02/arm_compute-v19.02-bin-linux.tar.gz)
Expand Down
13 changes: 9 additions & 4 deletions SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import os.path
import re
import subprocess

VERSION = "v19.05"
SONAME_VERSION="15.0.0"
VERSION = "v19.08"
SONAME_VERSION="16.0.0"

Import('env')
Import('vars')
Expand Down Expand Up @@ -164,6 +164,7 @@ core_files += Glob('src/core/CPP/kernels/*.cpp')
core_files += Glob('src/core/utils/helpers/*.cpp')
core_files += Glob('src/core/utils/io/*.cpp')
core_files += Glob('src/core/utils/quantization/*.cpp')
core_files += Glob('src/core/utils/misc/*.cpp')
if env["logging"]:
core_files += Glob('src/core/utils/logging/*.cpp')

Expand All @@ -187,6 +188,7 @@ if env['opencl']:
core_files += Glob('src/core/CL/*.cpp')
core_files += Glob('src/core/CL/kernels/*.cpp')
core_files += Glob('src/core/CL/gemm/*.cpp')
core_files += Glob('src/core/CL/gemm/native/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp')

Expand All @@ -204,10 +206,13 @@ if env['neon']:

core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')

# build winograd sources for either v7a / v8a
# build winograd/depthwise sources for either v7a / v8a
core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp')
core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp')
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/convolution/winograd/","arm_compute/core/NEON/kernels/convolution/common/" , "arm_compute/core/NEON/kernels/assembly/"])
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/convolution/common/",
"arm_compute/core/NEON/kernels/convolution/winograd/",
"arm_compute/core/NEON/kernels/convolution/depthwise/",
"arm_compute/core/NEON/kernels/assembly/"])

graph_files += Glob('src/graph/backends/NEON/*.cpp')

Expand Down
19 changes: 8 additions & 11 deletions SConstruct
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ if env['os'] == 'android' and ( 'clang++' not in cpp_compiler or 'clang' not in
if 'clang++' in cpp_compiler:
env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
else:
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel','-Wno-implicit-fallthrough'])
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel', '-Wno-redundant-move'])

if env['cppthreads']:
env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])
Expand Down Expand Up @@ -185,18 +185,15 @@ elif env['arch'] == 'arm64-v8a':
env.Append(CXXFLAGS = ['-no-integrated-as'])
elif 'arm64-v8.2-a' in env['arch']:
if env['arch'] == 'arm64-v8.2-a-sve':
if env['os'] != 'bare_metal':
print("Only bare metal SVE is supported at the moment")
Exit(1)
env.Append(CXXFLAGS = ['-march=armv8.2-a+sve+fp16+dotprod'])
else:
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16']) # explicitly enable fp16 extension otherwise __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is undefined
if env['os'] == 'linux':
prefix = "aarch64-linux-gnu-"
elif env['os'] == 'bare_metal':
prefix = "aarch64-elf-"
elif env['os'] == 'android':
prefix = "aarch64-linux-android-"
if env['os'] == 'linux':
prefix = "aarch64-linux-gnu-"
elif env['os'] == 'bare_metal':
prefix = "aarch64-elf-"
elif env['os'] == 'android':
prefix = "aarch64-linux-android-"
env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2','NO_DOT_IN_TOOLCHAIN'])
if 'clang++' in cpp_compiler:
env.Append(CXXFLAGS = ['-no-integrated-as'])
Expand Down Expand Up @@ -282,7 +279,7 @@ if env['debug']:
env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2'])
env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED'])
else:
env.Append(CXXFLAGS = ['-O3','-ftree-vectorize'])
env.Append(CXXFLAGS = ['-O3'])

if env['asserts']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])
Expand Down
5 changes: 5 additions & 0 deletions arm_compute/core/CL/CLKernelLibrary.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,11 @@ class CLKernelLibrary
* @return The content of CL_DEVICE_VERSION
*/
std::string get_device_version();
/** Return the maximum number of compute units in the device
*
* @return The content of CL_DEVICE_MAX_COMPUTE_UNITS
*/
cl_uint get_num_compute_units();
/** Creates a kernel from the kernel library.
*
* @param[in] kernel_name Kernel name.
Expand Down
9 changes: 7 additions & 2 deletions arm_compute/core/CL/CLKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
Expand All @@ -51,6 +52,7 @@
#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h"
Expand All @@ -73,25 +75,26 @@
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h"
#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
Expand All @@ -109,6 +112,7 @@
#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
Expand Down Expand Up @@ -138,6 +142,7 @@
#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
Expand Down
21 changes: 20 additions & 1 deletion arm_compute/core/CL/CLTypes.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2018 ARM Limited.
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_CL_TYPES_H__
#define __ARM_COMPUTE_CL_TYPES_H__

#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/GPUTarget.h"

#include <string>
Expand Down Expand Up @@ -53,5 +54,23 @@ struct CLDeviceOptions
size_t num_cores; /**< Number of cores */
size_t cache_size; /**< Cache size */
};

/** OpenCL quantization data */
struct CLQuantization
{
/** Default Constructor */
CLQuantization()
: scale(nullptr), offset(nullptr) {};
/** Constructor
*
* @param[in] scale OpenCL scale array
* @param[in] offset OpenCL offset array
*/
CLQuantization(const ICLFloatArray *scale, const ICLInt32Array *offset)
: scale(scale), offset(offset) {};

const ICLFloatArray *scale; /**< Quantization scale array */
const ICLInt32Array *offset; /**< Quantization offset array */
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_CL_TYPES_H__ */
28 changes: 28 additions & 0 deletions arm_compute/core/CL/ICLKernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,20 @@ class ICLKernel : public IKernel
{
add_tensor_argument<1>(idx, tensor, window);
}
/** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
*
* @param[in] cond Condition to check
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
{
if(cond)
{
add_1D_tensor_argument(idx, tensor, window);
}
}
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
Expand All @@ -121,6 +135,20 @@ class ICLKernel : public IKernel
{
add_tensor_argument<2>(idx, tensor, window);
}
/** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
*
* @param[in] cond Condition to check
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
* @param[in] tensor Tensor to set as an argument of the object's kernel.
* @param[in] window Window the kernel will be executed on.
*/
void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
{
if(cond)
{
add_2D_tensor_argument(idx, tensor, window);
}
}
/** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
*
* @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
Expand Down
9 changes: 8 additions & 1 deletion arm_compute/core/CL/ICLTensor.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2018 ARM Limited.
* Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
Expand All @@ -26,6 +26,8 @@

#include "arm_compute/core/ITensor.h"

#include "arm_compute/core/CL/CLTypes.h"

#include <cstdint>

namespace cl
Expand Down Expand Up @@ -53,6 +55,11 @@ class ICLTensor : public ITensor
/** Default virtual destructor. */
virtual ~ICLTensor() = default;

/** Interface to be implemented by the child class to return the wrapped quantization info data
*
* @return A wrapped quantization info object.
*/
virtual CLQuantization quantization() const = 0;
/** Interface to be implemented by the child class to return a reference to the OpenCL buffer containing the image's data.
*
* @return A reference to an OpenCL buffer containing the image's data.
Expand Down
55 changes: 55 additions & 0 deletions arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__
#define __ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__

#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h"
#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.h"

#include <memory>

namespace arm_compute
{
namespace cl_gemm
{
/** CLGEMMNative factory class */
class CLGEMMNativeKernelConfigurationFactory final
{
public:
/** Static method to construct CLGEMMNative kernel object accordingly with the GPU architecture
*
* @param[in] arch GPU target
*
* @return CLGEMMNative kernel configuration class
*/
static std::unique_ptr<ICLGEMMKernelConfiguration> create(GPUTarget arch)
{
// Note: At the moment we only support Bifrost architecture. However, we should have a dedicated path for each GPU architecture
// using get_arch_from_target(arch)
return support::cpp14::make_unique<CLGEMMNativeKernelConfigurationBifrost>(arch);
}
};
} // namespace cl_gemm
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGEMMNATIVEKERNELCONFIGURATION_H__ */
Loading

0 comments on commit 975dfe1

Please sign in to comment.