From f124c7368678ce28618d92f3d071a379c936c72a Mon Sep 17 00:00:00 2001 From: Dee Anzorge Date: Tue, 2 Jan 2024 20:29:15 +0100 Subject: [PATCH] nginx: change etags for statically compressed files served from store Per RFC 9110, [section 8.8.1][1], different representations of the same resource should have different Etags: > A strong validator is unique across all versions of all > representations associated with a particular resource over time. > However, there is no implication of uniqueness across representations > of different resources (i.e., the same strong validator might be in > use for representations of multiple resources at the same time and > does not imply that those representations are equivalent) When serving statically compressed files (ie, when there is an existing corresponding .gz/.br/etc. file on disk), Nginx sends the Etag marked as strong. These tags should be different for each compressed format (as shown in an explicit example in section [8.8.3.3][2] of the RFC). Upstream Etags are composed of the file modification timestamp and content length, and the latter generally changes between these representations. Previous implementation of Nix-specific Etags for things served from store used the store hash. This is fine to share between different files, but it becomes a problem for statically compressed versions of the same file, as it means Nginx was serving different representations of the same resource with the same Etag, marked as strong. This patch addresses this by imitating the upstream Nginx behavior, and appending the value of content length to the store hash. [1]: https://www.rfc-editor.org/rfc/rfc9110.html#name-validator-fields [2]: https://www.rfc-editor.org/rfc/rfc9110.html#name-example-entity-tags-varying --- doc/packages/nginx.section.md | 2 +- nixos/tests/all-tests.nix | 1 + nixos/tests/nginx-etag-compression.nix | 45 +++++++++++++++++++ pkgs/servers/http/nginx/generic.nix | 2 +- pkgs/servers/http/nginx/nix-etag-1.15.4.patch | 43 +++++++++++++----- 5 files changed, 80 insertions(+), 13 deletions(-) create mode 100644 nixos/tests/nginx-etag-compression.nix diff --git a/doc/packages/nginx.section.md b/doc/packages/nginx.section.md index 0704b534e5f7..41241304ceff 100644 --- a/doc/packages/nginx.section.md +++ b/doc/packages/nginx.section.md @@ -8,4 +8,4 @@ HTTP has a couple of different mechanisms for caching to prevent clients from ha Fortunately, HTTP supports an alternative (and more effective) caching mechanism: the [`ETag`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) response header. The value of the `ETag` header specifies some identifier for the particular content that the server is sending (e.g., a hash). When a client makes a second request for the same resource, it sends that value back in an `If-None-Match` header. If the ETag value is unchanged, then the server does not need to resend the content. -As of NixOS 19.09, the nginx package in Nixpkgs is patched such that when nginx serves a file out of `/nix/store`, the hash in the store path is used as the `ETag` header in the HTTP response, thus providing proper caching functionality. This happens automatically; you do not need to do modify any configuration to get this behavior. +As of NixOS 19.09, the nginx package in Nixpkgs is patched such that when nginx serves a file out of `/nix/store`, the hash in the store path is used as the `ETag` header in the HTTP response, thus providing proper caching functionality. With NixOS 24.05 and later, the `ETag` additionally includes the response content length, to ensure files served with static compression do not share `ETag`s with their uncompressed version. This `ETag` functionality is enabled automatically; you do not need to do modify any configuration to get this behavior. diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 02e3e91e2e3d..664d649d36d9 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -583,6 +583,7 @@ in { nginx = handleTest ./nginx.nix {}; nginx-auth = handleTest ./nginx-auth.nix {}; nginx-etag = handleTest ./nginx-etag.nix {}; + nginx-etag-compression = handleTest ./nginx-etag-compression.nix {}; nginx-globalredirect = handleTest ./nginx-globalredirect.nix {}; nginx-http3 = handleTest ./nginx-http3.nix {}; nginx-modsecurity = handleTest ./nginx-modsecurity.nix {}; diff --git a/nixos/tests/nginx-etag-compression.nix b/nixos/tests/nginx-etag-compression.nix new file mode 100644 index 000000000000..67493ae29984 --- /dev/null +++ b/nixos/tests/nginx-etag-compression.nix @@ -0,0 +1,45 @@ +import ./make-test-python.nix { + name = "nginx-etag-compression"; + + nodes.machine = { pkgs, lib, ... }: { + services.nginx = { + enable = true; + recommendedGzipSettings = true; + virtualHosts.default = { + root = pkgs.runCommandLocal "testdir" {} '' + mkdir "$out" + cat > "$out/index.html" <hash = 1; +@@ -1692,16 +1694,82 @@ ngx_http_set_etag(ngx_http_request_t *r) + etag->next = NULL; ngx_str_set(&etag->key, "ETag"); - etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3); - if (etag->value.data == NULL) { - etag->hash = 0; - return NGX_ERROR; ++ // Upstream nginx uses file mod timestamp and content-length for Etag, but ++ // files in the Nix store have their timestamps reset, so that doesn't work. ++ // Instead, when serving from the Nix store, we use the hash from the store ++ // path and content-length. ++ // ++ // Every file in under the given store path will share the same store path ++ // hash. It is fine to serve different resources with the same Etag, but ++ // different representations of the same resource (eg the same file, but ++ // gzip-compressed) should have different Etags. Thus, we also append ++ // content-length, which should be different when the response is compressed ++ + err = ngx_errno; + real = ngx_realpath(clcf->root.data, NULL); + ngx_set_errno(err); @@ -35,8 +46,10 @@ index cb49ef74..7b456993 100644 + && real[NIX_STORE_LEN] == '/' + && real[NIX_STORE_LEN + 1] != '\0') + { -+ ptr1 = real + NIX_STORE_LEN; -+ *ptr1 = '"'; ++ // extract the hash from a path formatted like ++ // /nix/store/hashhere1234-pname-1.0.0 ++ // +1 to skip the leading / ++ ptr1 = real + NIX_STORE_LEN + 1; + + ptr2 = (u_char *) ngx_strchr(ptr1, '-'); + @@ -46,11 +59,11 @@ index cb49ef74..7b456993 100644 + return NGX_ERROR; + } + -+ *ptr2++ = '"'; + *ptr2 = '\0'; + -+ etag->value.len = ngx_strlen(ptr1); -+ etag->value.data = ngx_pnalloc(r->pool, etag->value.len); ++ // hash + content-length + quotes and hyphen. Note that the ++ // content-length part of the string can vary in length. ++ etag->value.data = ngx_pnalloc(r->pool, ngx_strlen(ptr1) + NGX_OFF_T_LEN + 3); + + if (etag->value.data == NULL) { + ngx_free(real); @@ -58,9 +71,18 @@ index cb49ef74..7b456993 100644 + return NGX_ERROR; + } + -+ ngx_memcpy(etag->value.data, ptr1, etag->value.len); ++ ++ // set value.data content to "{hash}-{content-length}" (including quote ++ // marks), and set value.len to the length of the resulting string ++ etag->value.len = ngx_sprintf(etag->value.data, "\"\%s-%xO\"", ++ ptr1, ++ r->headers_out.content_length_n) ++ - etag->value.data; ++ + ngx_http_clear_last_modified(r); + } else { ++ // outside of Nix store, use the upstream Nginx logic for etags ++ + etag->value.data = ngx_pnalloc(r->pool, NGX_OFF_T_LEN + NGX_TIME_T_LEN + 3); + + if (etag->value.data == NULL) { @@ -82,4 +104,3 @@ index cb49ef74..7b456993 100644 + ngx_free(real); r->headers_out.etag = etag; -