Merge pull request #278818 from codedownio/julia-pkgs-test-top-n

julia.withPackages: add tests
This commit is contained in:
Nick Cao 2024-01-07 10:39:56 -05:00 committed by GitHub
commit c035506a47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 304 additions and 0 deletions

View File

@ -0,0 +1,3 @@
test_runs/
.stack-work/
*~

View File

@ -0,0 +1,25 @@
# Testing `julia.withPackages`
This folder contains a test suite for ensuring that the top N most popular Julia packages (as measured by download count) work properly. The key parts are
* `top-julia-packages.nix`: an impure derivation for fetching Julia download data and processing it into a file called `top-julia-packages.yaml`. This YAML file contains an array of objects with fields "name", "uuid", and "count", and is sorted in decreasing order of count.
* `julia-top-n`: a small Haskell program which reads `top-julia-packages.yaml` and builds a `julia.withPackages` environment for each package, with a nice interactive display and configurable parallelism. It also tests whether evaluating `using <package-name>` works in the resulting environment.
> **Warning:**
> These tests should only be run on maintainer machines, not Hydra! `julia.withPackages` uses IFD, which is not allowed in Hydra.
## Quick start
``` shell
# Test the top 100 Julia packages
./run_tests.sh -n 100
```
## Options
You can run `./run_tests.sh --help` to see additional options for the test harness. The main ones are
* `-n`/`--top-n`: how many of the top packages to build (default: 100).
* `-p`/`--parallelism`: how many builds to run at once (default: 10).
* `-c`/`--count-file`: path to `top-julia-packages.yaml`.

View File

@ -0,0 +1,89 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Main (main) where
import Control.Exception
import Control.Monad
import Data.Aeson as A hiding (Options, defaultOptions)
import qualified Data.Aeson.Key as A
import qualified Data.Aeson.KeyMap as HM
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.List as L
import Data.Text as T
import qualified Data.Vector as V
import qualified Data.Yaml as Yaml
import GHC.Generics
import Options.Applicative
import System.Exit
import System.FilePath
import Test.Sandwich hiding (info)
import UnliftIO.MVar
import UnliftIO.Process
data Args = Args {
countFilePath :: FilePath
, topN :: Int
, parallelism :: Int
}
argsParser :: Parser Args
argsParser = Args
<$> strOption (long "count-file" <> short 'c' <> help "YAML file containing package names and counts")
<*> option auto (long "top-n" <> short 'n' <> help "How many of the top packages to build" <> showDefault <> value 100 <> metavar "INT")
<*> option auto (long "parallelism" <> short 'p' <> help "How many builds to run at once" <> showDefault <> value 10 <> metavar "INT")
data NameAndCount = NameAndCount {
name :: Text
, count :: Int
, uuid :: Text
} deriving (Show, Eq, Generic, FromJSON)
newtype JuliaPath = JuliaPath FilePath
deriving Show
julia :: Label "julia" (MVar (Maybe JuliaPath))
julia = Label
main :: IO ()
main = do
clo <- parseCommandLineArgs argsParser (return ())
let Args {..} = optUserOptions clo
namesAndCounts :: [NameAndCount] <- Yaml.decodeFileEither countFilePath >>= \case
Left err -> throwIO $ userError ("Couldn't decode names and counts YAML file: " <> show err)
Right x -> pure x
runSandwichWithCommandLineArgs' defaultOptions argsParser $
describe ("Building environments for top " <> show topN <> " Julia packages") $
parallelN parallelism $
forM_ (L.take topN namesAndCounts) $ \(NameAndCount {..}) ->
introduce' (defaultNodeOptions { nodeOptionsVisibilityThreshold = 0 }) (T.unpack name) julia (newMVar Nothing) (const $ return ()) $ do
it "Builds" $ do
let cp = proc "nix" ["build", "--impure", "--no-link", "--json", "--expr"
, "with import ../../../../. {}; julia.withPackages [\"" <> T.unpack name <> "\"]"
]
output <- readCreateProcessWithLogging cp ""
juliaPath <- case A.eitherDecode (BL8.pack output) of
Right (A.Array ((V.!? 0) -> Just (A.Object (aesonLookup "outputs" -> Just (A.Object (aesonLookup "out" -> Just (A.String t))))))) -> pure (JuliaPath ((T.unpack t) </> "bin" </> "julia"))
x -> expectationFailure ("Couldn't parse output: " <> show x)
getContext julia >>= flip modifyMVar_ (const $ return (Just juliaPath))
it "Uses" $ do
getContext julia >>= readMVar >>= \case
Nothing -> expectationFailure "Build step failed."
Just (JuliaPath juliaPath) -> do
let cp = proc juliaPath ["-e", "using " <> T.unpack name]
createProcessWithLogging cp >>= waitForProcess >>= (`shouldBe` ExitSuccess)
aesonLookup :: Text -> HM.KeyMap v -> Maybe v
aesonLookup = HM.lookup . A.fromText

View File

@ -0,0 +1,16 @@
{ mkDerivation, aeson, base, filepath, lib, optparse-applicative
, sandwich, text, unliftio, yaml
}:
mkDerivation {
pname = "julia-top-n";
version = "0.1.0.0";
src = ./.;
isLibrary = false;
isExecutable = true;
executableHaskellDepends = [
aeson base filepath optparse-applicative sandwich text unliftio
yaml
];
license = lib.licenses.bsd3;
mainProgram = "julia-top-n-exe";
}

View File

@ -0,0 +1,34 @@
cabal-version: 2.2
-- This file has been generated from package.yaml by hpack version 0.36.0.
--
-- see: https://github.com/sol/hpack
name: julia-top-n
version: 0.1.0.0
author: Tom McLaughlin
maintainer: tom@codedown.io
license: BSD-3-Clause
build-type: Simple
executable julia-top-n-exe
main-is: Main.hs
other-modules:
Paths_julia_top_n
autogen-modules:
Paths_julia_top_n
hs-source-dirs:
app
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, filepath
, optparse-applicative
, sandwich
, text
, unliftio
, vector
, yaml
default-language: Haskell2010

View File

@ -0,0 +1,37 @@
name: julia-top-n
version: 0.1.0.0
license: BSD-3-Clause
author: "Tom McLaughlin"
maintainer: "tom@codedown.io"
dependencies:
- aeson
- base >= 4.7 && < 5
- bytestring
- filepath
- optparse-applicative
- sandwich
- text
- unliftio
- vector
- yaml
ghc-options:
- -Wall
- -Wcompat
- -Widentities
- -Wincomplete-record-updates
- -Wincomplete-uni-patterns
- -Wmissing-export-lists
- -Wmissing-home-modules
- -Wpartial-fields
- -Wredundant-constraints
executables:
julia-top-n-exe:
main: Main.hs
source-dirs: app
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N

View File

@ -0,0 +1,11 @@
resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/4.yaml
packages:
- .
nix:
pure: false
packages:
- zlib

View File

@ -0,0 +1,13 @@
# This file was autogenerated by Stack.
# You should not edit this file by hand.
# For more information, please see the documentation at:
# https://docs.haskellstack.org/en/stable/lock_files
packages: []
snapshots:
- completed:
sha256: 8b211c5a6aad3787e023dfddaf7de7868968e4f240ecedf14ad1c5b2199046ca
size: 714097
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/4.yaml
original:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/4.yaml

View File

@ -0,0 +1,33 @@
#! /usr/bin/env nix-shell
#! nix-shell -i python3 -p "python3.withPackages(ps: with ps; [ pyyaml toml ])"
import csv
from pathlib import Path
import sys
import toml
import yaml
requests_csv_path = Path(sys.argv[1])
registry_path = Path(sys.argv[2])
# Generate list of tuples (UUID, count)
rows = []
with open(requests_csv_path) as f:
reader = csv.reader(f)
for row in reader:
if row[2] == "user":
# Get UUID and request_count
rows.append((row[0], int(row[4])))
rows.sort(key=(lambda x: x[1]), reverse=True)
# Build a map from UUID -> name
registry = toml.load(registry_path / "Registry.toml")
uuid_to_name = {k: v["name"] for k, v in registry["packages"].items()}
results = []
for (uuid, count) in rows:
name = uuid_to_name.get(uuid)
if not name: continue
results.append({ "uuid": uuid, "name": uuid_to_name.get(uuid), "count": count })
yaml.dump(results, sys.stdout, default_flow_style=False)

View File

@ -0,0 +1,15 @@
#! /usr/bin/env nix-shell
#! nix-shell -i bash -p jq
set -eo pipefail
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd $SCRIPTDIR
TOP_N_FILE=$(nix build --impure -f top-julia-packages.nix --no-link --json | jq -r '.[0].outputs.out')
echo "Got top Julia packages: $TOP_N_FILE"
TESTER_PROGRAM=$(nix build --impure --expr 'with import ../../../../. {}; haskellPackages.callPackage ./julia-top-n {}' --no-link --json | jq -r '.[0].outputs.out')/bin/julia-top-n-exe
echo "Built tester program: $TESTER_PROGRAM"
"$TESTER_PROGRAM" --tui -c "$TOP_N_FILE" $*

View File

@ -0,0 +1,28 @@
with import ../../../../. {};
let
package-requests = stdenv.mkDerivation {
name = "julia-package-requests.csv";
__impure = true;
buildInputs = [cacert gzip wget];
buildCommand = ''
wget https://julialang-logs.s3.amazonaws.com/public_outputs/current/package_requests.csv.gz
gunzip package_requests.csv.gz
ls -lh
cp package_requests.csv $out
'';
};
registry = callPackage ../registry.nix {};
in
runCommand "top-julia-packages.yaml" {
__impure = true;
nativeBuildInputs = [(python3.withPackages (ps: with ps; [pyyaml toml]))];
} ''
python ${./process_top_n.py} ${package-requests} ${registry} > $out
''