Merge pull request #120854 from tobim/pkgs/arrow-cpp-4.0.0

arrow-cpp: 3.0.0 -> 4.0.0
This commit is contained in:
Dmitry Kalinkin 2021-04-30 20:06:00 -04:00 committed by GitHub
commit 275624a226
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 82 additions and 12 deletions

View File

@ -1,6 +1,7 @@
{ stdenv, lib, fetchurl, fetchFromGitHub, fetchpatch, fixDarwinDylibNames
{ stdenv, lib, fetchurl, fetchFromGitHub, fixDarwinDylibNames
, autoconf, boost, brotli, cmake, flatbuffers, gflags, glog, gtest, lz4
, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, zlib, zstd
, perl, python3, rapidjson, re2, snappy, thrift, utf8proc, which, xsimd
, zlib, zstd
, enableShared ? !stdenv.hostPlatform.isStatic
}:
@ -15,18 +16,18 @@ let
parquet-testing = fetchFromGitHub {
owner = "apache";
repo = "parquet-testing";
rev = "e31fe1a02c9e9f271e4bfb8002d403c52f1ef8eb";
sha256 = "02f51dvx8w5mw0bx3hn70hkn55mn1m65kzdps1ifvga9hghpy0sh";
rev = "ddd898958803cb89b7156c6350584d1cda0fe8de";
sha256 = "0n16xqlpxn2ryp43w8pppxrbwmllx6sk4hv3ycgikfj57nd3ibc0";
};
in stdenv.mkDerivation rec {
pname = "arrow-cpp";
version = "3.0.0";
version = "4.0.0";
src = fetchurl {
url =
"mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
sha256 = "0yp2b02wrc3s50zd56fmpz4nhhbihp0zw329v4zizaipwlxwrhkk";
sha256 = "1bj9jr0pgq9f2nyzqiyj3cl0hcx3c83z2ym6rpdkp59ff2zx0caa";
};
sourceRoot = "apache-arrow-${version}/cpp";
@ -90,6 +91,10 @@ in stdenv.mkDerivation rec {
"-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
"-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
"-DARROW_COMPUTE=ON"
"-DARROW_CSV=ON"
"-DARROW_DATASET=ON"
"-DARROW_JSON=ON"
"-DARROW_PLASMA=ON"
# Disable Python for static mode because openblas is currently broken there.
"-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
@ -111,6 +116,8 @@ in stdenv.mkDerivation rec {
"-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF";
ARROW_XSIMD_URL = xsimd.src;
doInstallCheck = true;
ARROW_TEST_DATA =
if doInstallCheck then "${arrow-testing}/data" else null;

View File

@ -0,0 +1,56 @@
{ lib, stdenv, fetchFromGitHub, cmake, gtest }:
let
version = "7.5.0";
darwin_src = fetchFromGitHub {
owner = "xtensor-stack";
repo = "xsimd";
rev = version;
sha256 = "eGAdRSYhf7rbFdm8g1Tz1ZtSVu44yjH/loewblhv9Vs=";
# Avoid requiring apple_sdk. We're doing this here instead of in the patchPhase
# because this source is directly used in arrow-cpp.
# pyconfig.h defines _GNU_SOURCE to 1, so we need to stamp that out too.
# Upstream PR with a better fix: https://github.com/xtensor-stack/xsimd/pull/463
postFetch = ''
mkdir $out
tar -xf $downloadedFile --directory=$out --strip-components=1
substituteInPlace $out/include/xsimd/types/xsimd_scalar.hpp \
--replace 'defined(__APPLE__)' 0 \
--replace 'defined(_GNU_SOURCE)' 0
'';
};
src = fetchFromGitHub {
owner = "xtensor-stack";
repo = "xsimd";
rev = version;
sha256 = "0c9pq5vz43j99z83w3b9qylfi66mn749k1afpv5cwfxggbxvy63f";
};
in stdenv.mkDerivation {
pname = "xsimd";
inherit version;
src = if stdenv.hostPlatform.isDarwin then darwin_src else src;
nativeBuildInputs = [ cmake ];
cmakeFlags = [ "-DBUILD_TESTS=ON" ];
doCheck = true;
checkInputs = [ gtest ];
checkTarget = "xtest";
GTEST_FILTER = let
# Upstream Issue: https://github.com/xtensor-stack/xsimd/issues/456
filteredTests = lib.optionals stdenv.hostPlatform.isDarwin [
"error_gamma_test/sse_double.gamma"
"error_gamma_test/avx_double.gamma"
];
in "-${builtins.concatStringsSep ":" filteredTests}";
meta = with lib; {
description = "C++ wrappers for SIMD intrinsics";
homepage = "https://github.com/xtensor-stack/xsimd";
license = licenses.bsd3;
maintainers = with maintainers; [ tobim ];
platforms = platforms.all;
};
}

View File

@ -34,12 +34,17 @@ buildPythonPackage rec {
export PYARROW_PARALLEL=$NIX_BUILD_CORES
'';
# Deselect a single test because pyarrow prints a 2-line error message where
# only a single line is expected. The additional line of output comes from
# the glog library which is an optional dependency of arrow-cpp that is
# enabled in nixpkgs.
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
pytestFlagsArray = [ "--deselect=pyarrow/tests/test_memory.py::test_env_var" ];
pytestFlagsArray = [
# Deselect a single test because pyarrow prints a 2-line error message where
# only a single line is expected. The additional line of output comes from
# the glog library which is an optional dependency of arrow-cpp that is
# enabled in nixpkgs.
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
# Deselect the parquet dataset write test because it erroneously fails to find the
# pyarrow._dataset module.
"--deselect=pyarrow/tests/parquet/test_dataset.py::test_write_to_dataset_filesystem"
];
dontUseSetuptoolsCheck = true;
preCheck = ''

View File

@ -18104,6 +18104,8 @@ in
xlslib = callPackage ../development/libraries/xlslib { };
xsimd = callPackage ../development/libraries/xsimd { };
xvidcore = callPackage ../development/libraries/xvidcore { };
xxHash = callPackage ../development/libraries/xxHash {};