rework grammar to be friendly to injections

want to later support the case of multiple nix-shell directives in injection
This commit is contained in:
2023-07-05 00:01:38 +00:00
parent d5adafbc03
commit c9d79b0734
5 changed files with 114 additions and 149 deletions

View File

@@ -2,26 +2,29 @@ module.exports = grammar({
name: 'nix_shell',
rules: {
// TODO: allow trailing whitespace on any line
source_file: $ => seq($.first_line, repeat($.next_line)),
first_line: $ => seq($._shebang_open, $._opt_ws, optional(seq($._env, $._ws)), $._nix_shell),
next_line: $ => seq($._newline, choice(
$.annotation_line,
repeat(/./),
)),
annotation_line: $ => seq($._shebang_open, $._opt_ws, $._nix_shell, optional($.nix_shell_args)),
source_file: $ => seq(
optional($._first_line),
repeat($._next_line),
),
_first_line: $ => choice(
$.nix_shell_directive,
repeat1(/./),
),
_next_line: $ => seq('\n', $._first_line),
nix_shell_directive: $ => seq(
/#![ \t]*nix-shell/,
repeat(seq($._ws, optional($._nix_shell_arg))),
),
// nix acts as if it actually just `exec`'s whichever line contains `#! nix-shell` in the style of `sh`'s `exec`.
// so arguments can be quoted, invoke subshells, interpolate environment variables, etc.
// the full scope cannot be covered, but the minimal scope here gets 99% of use cases.
nix_shell_args: $ => repeat1(seq($._ws, $._nix_shell_arg)),
_nix_shell_arg: $ => choice(
seq('-i', $._opt_ws, $.interpreter),
$.sh_arg,
$._sh_arg,
),
interpreter: $ => $.sh_arg,
sh_arg: $ => choice(
interpreter: $ => $._sh_arg,
_sh_arg: $ => choice(
$._sh_lit,
$._sh_quote1,
$._sh_quote2,
@@ -47,20 +50,12 @@ module.exports = grammar({
'"',
),
_shebang_open: $ => '#!',
// TODO: env accepts flags like `-v` or `--unset=NAME` or `VAR=VALUE` before `nix-shell`,
_env: $ => choice(
'/usr/bin/env',
'/bin/env',
'env',
),
_nix_shell: $ => 'nix-shell',
// N.B.: this accepts more than it needs to:
// - shebang parser allows tab characters
// - #!nix-shell directives do not support tabs
// - wherever #!nix-shell directives allow whitespace, it's a *single* space -- never multiple
// - some places where whitespace is accepted (e.g. `#! nix-shell`), only a *single* space is accepted.
// it's not clear how much of this is intentional, v.s. oversight in matching broader shell parsing,
// so err on overaccepting in case future nix-shell supports these.
_ws: $ => /[ \t]+/,
_opt_ws: $ => /[ \t]*/,
_newline: $ => '\n',
}
});