From 01317be8d084b958528d2a5d0944c7a70b674a05 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Fri, 1 Mar 2024 10:49:16 +0100 Subject: [PATCH 1/4] maintainers: add osnyx & ma27 to team flyingcircus --- maintainers/maintainer-list.nix | 6 ++++++ maintainers/team-list.nix | 2 ++ 2 files changed, 8 insertions(+) diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix index b27cf6166f6a..087dbc62a8ad 100644 --- a/maintainers/maintainer-list.nix +++ b/maintainers/maintainer-list.nix @@ -14479,6 +14479,12 @@ githubId = 111265; name = "Ozan Sener"; }; + osnyx = { + email = "os@flyingcircus.io"; + github = "osnyx"; + githubId = 104593071; + name = "Oliver Schmidt"; + }; ostrolucky = { email = "gabriel.ostrolucky@gmail.com"; github = "ostrolucky"; diff --git a/maintainers/team-list.nix b/maintainers/team-list.nix index d43d6e975a81..18c1a25a54a8 100644 --- a/maintainers/team-list.nix +++ b/maintainers/team-list.nix @@ -302,6 +302,8 @@ with lib.maintainers; { dpausp frlan leona + osnyx + ma27 ]; scope = "Team for Flying Circus employees who collectively maintain packages."; shortName = "Flying Circus employees"; From 8d0e5a3402aabe9a749913d9028b54bad4da32ab Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Fri, 1 Mar 2024 10:58:36 +0100 Subject: [PATCH 2/4] postgresqlPackages.anonymizer: init at 1.3.1 --- nixos/tests/all-tests.nix | 1 + nixos/tests/pg_anonymizer.nix | 54 +++++++++++++++++++ .../servers/sql/postgresql/ext/anonymizer.nix | 34 ++++++++++++ pkgs/servers/sql/postgresql/packages.nix | 2 + 4 files changed, 91 insertions(+) create mode 100644 nixos/tests/pg_anonymizer.nix create mode 100644 pkgs/servers/sql/postgresql/ext/anonymizer.nix diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index a5991abbfc88..9c0f67a9b10e 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -679,6 +679,7 @@ in { peering-manager = handleTest ./web-apps/peering-manager.nix {}; peertube = handleTestOn ["x86_64-linux"] ./web-apps/peertube.nix {}; peroxide = handleTest ./peroxide.nix {}; + pg_anonymizer = handleTest ./pg_anonymizer.nix {}; pgadmin4 = handleTest ./pgadmin4.nix {}; pgbouncer = handleTest ./pgbouncer.nix {}; pgjwt = handleTest ./pgjwt.nix {}; diff --git a/nixos/tests/pg_anonymizer.nix b/nixos/tests/pg_anonymizer.nix new file mode 100644 index 000000000000..601526272d71 --- /dev/null +++ b/nixos/tests/pg_anonymizer.nix @@ -0,0 +1,54 @@ +import ./make-test-python.nix ({ pkgs, lib, ... }: { + name = "pg_anonymizer"; + meta.maintainers = lib.teams.flyingcircus.members; + + nodes.machine = { + services.postgresql = { + enable = true; + extraPlugins = ps: [ ps.anonymizer ]; + settings.shared_preload_libraries = "anon"; + }; + }; + + testScript = '' + start_all() + machine.wait_for_unit("multi-user.target") + machine.wait_for_unit("postgresql.service") + + with subtest("Setup"): + machine.succeed("sudo -u postgres psql --command 'create database demo'") + machine.succeed( + "sudo -u postgres psql -d demo -f ${pkgs.writeText "init.sql" '' + create extension anon cascade; + select anon.init(); + create table player(id serial, name text, points int); + insert into player(id,name,points) values (1,'Foo', 23); + insert into player(id,name,points) values (2,'Bar',42); + security label for anon on column player.name is 'MASKED WITH FUNCTION anon.fake_last_name();'; + security label for anon on column player.points is 'MASKED WITH VALUE NULL'; + ''}" + ) + + def get_player_table_contents(): + return [ + x.split(',') for x in machine.succeed("sudo -u postgres psql -d demo --csv --command 'select * from player'").splitlines()[1:] + ] + + def check_anonymized_row(row, id, original_name): + assert row[0] == id, f"Expected first row to have ID {id}, but got {row[0]}" + assert row[1] != original_name, f"Expected first row to have a name other than {original_name}" + assert not bool(row[2]), "Expected points to be NULL in first row" + + with subtest("Check initial state"): + output = get_player_table_contents() + assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}" + assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}" + + with subtest("Anonymize"): + machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'") + output = get_player_table_contents() + + check_anonymized_row(output[0], '1', 'Foo') + check_anonymized_row(output[1], '2', 'Bar') + ''; +}) diff --git a/pkgs/servers/sql/postgresql/ext/anonymizer.nix b/pkgs/servers/sql/postgresql/ext/anonymizer.nix new file mode 100644 index 000000000000..0a90780df71e --- /dev/null +++ b/pkgs/servers/sql/postgresql/ext/anonymizer.nix @@ -0,0 +1,34 @@ +{ lib, stdenv, fetchFromGitLab, postgresql, nixosTests, ... }: + +stdenv.mkDerivation (finalAttrs: { + pname = "postgresql_anonymizer"; + version = "1.3.1"; + + src = fetchFromGitLab { + owner = "dalibo"; + repo = "postgresql_anonymizer"; + rev = finalAttrs.version; + hash = "sha256-Z5Oz/cIYDxFUZwQijRk4xAOUdOK0LWR+px8WOcs+Rs0="; + }; + + buildInputs = [ postgresql ]; + nativeBuildInputs = [ postgresql ] ++ lib.optional postgresql.jitSupport postgresql.llvm; + + strictDeps = true; + + makeFlags = [ + "BINDIR=${placeholder "out"}/bin" + "datadir=${placeholder "out"}/share/postgresql" + "pkglibdir=${placeholder "out"}/lib" + "DESTDIR=" + ]; + + passthru.tests = { inherit (nixosTests) pg_anonymizer; }; + + meta = with lib; { + description = "postgresql_anonymizer is an extension to mask or replace personally identifiable information (PII) or commercially sensitive data from a PostgreSQL database."; + homepage = "https://postgresql-anonymizer.readthedocs.io/en/stable/"; + maintainers = teams.flyingcircus.members; + license = licenses.postgresql; + }; +}) diff --git a/pkgs/servers/sql/postgresql/packages.nix b/pkgs/servers/sql/postgresql/packages.nix index 3b1c855e0de9..3252a59954c0 100644 --- a/pkgs/servers/sql/postgresql/packages.nix +++ b/pkgs/servers/sql/postgresql/packages.nix @@ -2,6 +2,8 @@ self: super: { age = super.callPackage ./ext/age.nix { }; + anonymizer = super.callPackage ./ext/anonymizer.nix { }; + apache_datasketches = super.callPackage ./ext/apache_datasketches.nix { }; citus = super.callPackage ./ext/citus.nix { }; From 0cdaede1444c143b411ebcb203b1a7d2f97a3949 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Wed, 13 Mar 2024 14:36:20 +0100 Subject: [PATCH 3/4] pg-dump-anon: init at 1.3.1 This is a Go program inside the sources of `postgresql_anonymizer` that allows to perform database dumps, but with anonymized data. I figured that it's a little awkward to have a client program to be part of the extension package. So I decided to create a second package called `pg-dump-anon`. Since it's one repository, both share `version` & `src`. Also extended the VM test to make sure we're getting properly anonymized data when dumping with `pg_dump_anon`. --- nixos/tests/pg_anonymizer.nix | 54 ++++++++++++++++--- pkgs/by-name/pg/pg-dump-anon/package.nix | 32 +++++++++++ .../servers/sql/postgresql/ext/anonymizer.nix | 24 ++++----- 3 files changed, 90 insertions(+), 20 deletions(-) create mode 100644 pkgs/by-name/pg/pg-dump-anon/package.nix diff --git a/nixos/tests/pg_anonymizer.nix b/nixos/tests/pg_anonymizer.nix index 601526272d71..2960108e37c3 100644 --- a/nixos/tests/pg_anonymizer.nix +++ b/nixos/tests/pg_anonymizer.nix @@ -2,7 +2,8 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: { name = "pg_anonymizer"; meta.maintainers = lib.teams.flyingcircus.members; - nodes.machine = { + nodes.machine = { pkgs, ... }: { + environment.systemPackages = [ pkgs.pg-dump-anon ]; services.postgresql = { enable = true; extraPlugins = ps: [ ps.anonymizer ]; @@ -39,16 +40,55 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: { assert row[1] != original_name, f"Expected first row to have a name other than {original_name}" assert not bool(row[2]), "Expected points to be NULL in first row" - with subtest("Check initial state"): - output = get_player_table_contents() + def find_xsv_in_dump(dump, sep=','): + """ + Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like + + COPY public.player ... + 1,Shields, + 2,Salazar, + \. + + in the given dump (the commas are tabs in case of pg_dump). + Extract the CSV lines and split by `sep`. + """ + + try: + from itertools import dropwhile, takewhile + return [x.split(sep) for x in list(takewhile( + lambda x: x != "\\.", + dropwhile( + lambda x: not x.startswith("COPY public.player"), + dump.splitlines() + ) + ))[1:]] + except: + print(f"Dump to process: {dump}") + raise + + def check_original_data(output): assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}" assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}" - with subtest("Anonymize"): - machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'") - output = get_player_table_contents() - + def check_anonymized_rows(output): check_anonymized_row(output[0], '1', 'Foo') check_anonymized_row(output[1], '2', 'Bar') + + with subtest("Check initial state"): + check_original_data(get_player_table_contents()) + + with subtest("Anonymous dumps"): + check_original_data(find_xsv_in_dump( + machine.succeed("sudo -u postgres pg_dump demo"), + sep='\t' + )) + check_anonymized_rows(find_xsv_in_dump( + machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"), + sep=',' + )) + + with subtest("Anonymize"): + machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'") + check_anonymized_rows(get_player_table_contents()) ''; }) diff --git a/pkgs/by-name/pg/pg-dump-anon/package.nix b/pkgs/by-name/pg/pg-dump-anon/package.nix new file mode 100644 index 000000000000..fedcf9f40b6a --- /dev/null +++ b/pkgs/by-name/pg/pg-dump-anon/package.nix @@ -0,0 +1,32 @@ +{ lib, fetchFromGitLab, buildGoModule, nixosTests, postgresql, makeWrapper }: + +buildGoModule rec { + pname = "pg-dump-anon"; + version = "1.3.1"; + src = fetchFromGitLab { + owner = "dalibo"; + repo = "postgresql_anonymizer"; + rev = version; + hash = "sha256-Z5Oz/cIYDxFUZwQijRk4xAOUdOK0LWR+px8WOcs+Rs0="; + }; + + sourceRoot = "${src.name}/pg_dump_anon"; + + vendorHash = "sha256-CwU1zoIayxvfnGL9kPdummPJiV+ECfSz4+q6gZGb8pw="; + + passthru.tests = { inherit (nixosTests) pg_anonymizer; }; + + nativeBuildInputs = [ makeWrapper ]; + postInstall = '' + wrapProgram $out/bin/pg_dump_anon \ + --prefix PATH : ${lib.makeBinPath [ postgresql ]} + ''; + + meta = with lib; { + description = "Export databases with data being anonymized with the anonymizer extension"; + homepage = "https://postgresql-anonymizer.readthedocs.io/en/stable/"; + maintainers = teams.flyingcircus.members; + license = licenses.postgresql; + mainProgram = "pg_dump_anon"; + }; +} diff --git a/pkgs/servers/sql/postgresql/ext/anonymizer.nix b/pkgs/servers/sql/postgresql/ext/anonymizer.nix index 0a90780df71e..f7e65b06a9fa 100644 --- a/pkgs/servers/sql/postgresql/ext/anonymizer.nix +++ b/pkgs/servers/sql/postgresql/ext/anonymizer.nix @@ -1,15 +1,9 @@ -{ lib, stdenv, fetchFromGitLab, postgresql, nixosTests, ... }: +{ lib, stdenv, pg-dump-anon, postgresql, runtimeShell }: stdenv.mkDerivation (finalAttrs: { pname = "postgresql_anonymizer"; - version = "1.3.1"; - src = fetchFromGitLab { - owner = "dalibo"; - repo = "postgresql_anonymizer"; - rev = finalAttrs.version; - hash = "sha256-Z5Oz/cIYDxFUZwQijRk4xAOUdOK0LWR+px8WOcs+Rs0="; - }; + inherit (pg-dump-anon) version src passthru; buildInputs = [ postgresql ]; nativeBuildInputs = [ postgresql ] ++ lib.optional postgresql.jitSupport postgresql.llvm; @@ -23,12 +17,16 @@ stdenv.mkDerivation (finalAttrs: { "DESTDIR=" ]; - passthru.tests = { inherit (nixosTests) pg_anonymizer; }; + postInstall = '' + cat >$out/bin/pg_dump_anon.sh <<'EOF' + #!${runtimeShell} + echo "This script is deprecated by upstream. To use the new script," + echo "please install pkgs.pg-dump-anon." + exit 1 + EOF + ''; - meta = with lib; { + meta = pg-dump-anon.meta // { description = "postgresql_anonymizer is an extension to mask or replace personally identifiable information (PII) or commercially sensitive data from a PostgreSQL database."; - homepage = "https://postgresql-anonymizer.readthedocs.io/en/stable/"; - maintainers = teams.flyingcircus.members; - license = licenses.postgresql; }; }) From b4f8ebd2955e2c3ca9f37110b05646f9bd2670ac Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Thu, 14 Mar 2024 09:50:46 +0100 Subject: [PATCH 4/4] postgresqlPackages.anonymizer: update description Co-authored-by: Mario Rodas --- pkgs/servers/sql/postgresql/ext/anonymizer.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/servers/sql/postgresql/ext/anonymizer.nix b/pkgs/servers/sql/postgresql/ext/anonymizer.nix index f7e65b06a9fa..430911d40108 100644 --- a/pkgs/servers/sql/postgresql/ext/anonymizer.nix +++ b/pkgs/servers/sql/postgresql/ext/anonymizer.nix @@ -27,6 +27,6 @@ stdenv.mkDerivation (finalAttrs: { ''; meta = pg-dump-anon.meta // { - description = "postgresql_anonymizer is an extension to mask or replace personally identifiable information (PII) or commercially sensitive data from a PostgreSQL database."; + description = "Extension to mask or replace personally identifiable information (PII) or commercially sensitive data from a PostgreSQL database"; }; })