安全的 Shell String Interpolation

2025-03-22 我们来看一个简单的例子:获取用户的名字并打印一个彩虹色的欢迎横幅。一个没有经验的开发者可能会这样解决这个问题:

import { exec } from "node:child_process"
import { promisify } from "node:util"
const execPromise = promisify(exec)
const username = prompt("Hello, what's your name?")
const banner = (await execPromise(`figlet "Welcome, ${username}" | lolcat -f`)).stdout // 安全漏洞!
console.log(banner)

这会像你期望的那样工作: 但是,当你使用不受信任的用户输入运行这段代码时,你可能会注意到这个 Bug! 构造的命令是:

figlet "Welcome, "; ps; echo "" | lolcat -f

这种 Bug 令人惊讶地经常出现在生产环境中,尽管通常是在使用受信任的输入时。即使使用受信任的输入,它也可能成为一个问题,因为当你的参数包含空格或其他特殊字符时,你会遇到奇怪的 Bug。

一个显而易见的解决方案是使用 execFile 而不是 exec,并将参数直接传递给命令,而无需 shell 解析用户输入。 就像这样:

import { execFile } from "node:child_process"
import { promisify } from "node:util"
const execFilePromise = promisify(execFile)
const username = prompt("Hello, what's your name?")
// 现在我们必须手动生成两个进程,并将 figlet 的输出通过管道传递给 lolcat:
const lolcat = execFilePromise("lolcat", ["-f"])
const figlet = execFile("figlet", [`Welcome, ${username}`]).stdout.pipe(lolcat.child.stdin)
console.log((await lolcat).stdout)

我认为这是一个相当丑陋的解决方案。 另一种方法是继续使用 shell,但将输入作为环境变量传递:

import { exec } from "node:child_process"
import { promisify } from "node:util"
const execPromise = promisify(exec)
const username = prompt("Hello, what's your name?")
const banner = (await execPromise('figlet "Welcome, $username" | lolcat -f', { env: { ...process.env, username } })).stdout
console.log(banner)

我们得到了相同且正确的结果。 在 JavaScript 中,我们可以更进一步! Tagged templates 允许你编写函数,接收模板字面量的参数,并返回你想要的任何东西。 这是一个安全插值的示例实现:

import { exec as exec } from "child_process"
import { promisify } from "util"
const execPromise = promisify(exec)
async function shell(fragments: TemplateStringsArray, ...values: unknown[]) {
 const env = { ...process.env }
 const command = fragments.reduce((constructedCommand, fragment, i) => {
  if (i < values.length) {
   const varName = `_val${i + 1}`
   env[varName] = `${values[i]}`
   return constructedCommand + fragment + `$\{${varName}}`
  }
  return constructedCommand + fragment
 }, "")
 return await execPromise(command, { env })
}
const username = prompt("Hello, what's your name?")
const banner = (await shell`figlet "Welcome, ${username}" | lolcat -f`).stdout
console.log(banner)

这允许你安全地对 shell 命令上使用不受信任的用户输入进行字符串插值! 仍然存在很多潜在的问题,具体取决于你使用的命令,例如,如果第一个参数以 -S 开头,则 env 命令将开始解析参数。 一般来说,要小心,当你将不受信任的用户输入传递给命令时,请确保该命令不会在给定恶意构造的输入时执行意外的操作。 现在,其他语言呢? 在大多数情况下,你能做的最好的事情是使用环境变量方法。 这是一个 Python 示例:

import subprocess
import os

def shell(command, **values):
  return subprocess.run(
    command,
    shell=True,
    check=True,
    text=True,
    capture_output=True,
    env=os.environ | values,
  )

username = input("Hello, what's your name? ")
banner = shell('figlet "Welcome, $username" | lolcat -f', username=username)
print(banner.stdout)

有一个提案 PEP 750,它将实现类似的功能:

shell(t'figlet "Welcome, {username}" | lolcat -f') 

预计将在 Python 3.14 中发布。

然而,除了 JavaScript 之外,Swift 是我唯一发现的今天具有等效功能的语言。 它允许你以类似于 JavaScript 的 tagged templates 的方式编写扩展 ExpressibleByStringInterpolation 的结构。

自从我十一岁以来,我就没有写过一行 Swift 代码,而且我不想为了一个快速的概念验证而去学习 Swift,所以这段代码主要由 LLM 辅助生成。

import Foundation
struct Shell: ExpressibleByStringInterpolation {
  private var command: String
  private var env: [String: String]
  var result: String
  struct Interpolation: StringInterpolationProtocol {
    var command = ""
    var env: [String: String] = [:]
    private var argCount = 0
    init(literalCapacity: Int, interpolationCount: Int) {}
    mutating func appendLiteral(_ literal: String) {
      command += literal
    }
    mutating func appendInterpolation(_ value: Any) {
      argCount += 1
      let varName = "_val\(argCount)"
      env[varName] = "\(value)"
      command += "${\(varName)}"
    }
  }
  init(stringLiteral value: String) {
    command = value
    env = [:]
    // We can't use the run() function until all of our properties are initialized
    // so we need to set result to "" first.
    result = ""
    // I'm not totally sure how this try? syntax works, the LLM generated it
    // Presumably, it does run() and returns null if it fails, then we optional-chain to "" if it fails
    result = (try? run()) ?? ""
  }
  init(stringInterpolation: Interpolation) {
    command = stringInterpolation.command
    env = stringInterpolation.env
    result = ""
    result = (try? run()) ?? ""
  }
  private func run() throws -> String {
    let process = Process()
    process.executableURL = URL(fileURLWithPath: "/bin/sh")
    process.arguments = ["-c", command]
    var environment = ProcessInfo.processInfo.environment
    for (key, value) in env {
      environment[key] = value
    }
    process.environment = environment
    // Combining stdout and stderr is different
    // than what I did in the Python and JS implementations.
    // This function is purely LLM-generated (excluding this comment).
    let pipe = Pipe()
    process.standardOutput = pipe
    process.standardError = pipe
    try process.run()
    process.waitUntilExit()
    let data = pipe.fileHandleForReading.readDataToEndOfFile()
    return String(data: data, encoding: .utf8) ?? ""
  }
}
print("What is your name?", terminator: " ")
let username = readLine()
// Swift doesn't have single-quoted strings, so we need to escape the double quotes
// in the figlet command.
let banner: Shell = "figlet \"Welcome, \(username!)\" | lolcat -f"
print(banner.result)

附录:有问题的 Python 版本

我想看看是否有可能使 shell(f'figlet "Welcome, {username}" | lolcat -f') 不受 shell 注入的影响。 任何人都不要使用这个,但我设法把它做成了我想要的样子。 它使用一个装饰器函数,该函数获取函数的源代码,使用正则表达式对其进行操作(我想你可以为此使用 AST,但是使用正则表达式编辑代码会使乐趣更大 – 希望没有人将其投入生产),并执行新更新的代码。

import inspect
import os
import re
import subprocess
import uuid

def with_shell(f):
  def wrapper(*args, **kwargs):
    # Horrifying hack within a horrifying hack:
    # Delete the first line of the function source code,
    # which is the decorator itself.
    # This is necessary to avoid infinite recursion.
    # It will break if there's a decorator above this one.
    function_source = "\n".join(inspect.getsource(f).splitlines()[1::])
    pattern = r'shell\(f(["\']{1,3})(.*?)\1\)'
    # We could replace all of the interpolated values with "?" or something,
    # but what if the command itself contains a "?"?
    # Instead, we generate an unpredictable placeholder.
    placeholder = str(uuid.uuid4())
    def replace_shell_invocation(match):
      cmd = match.group(2)
      placeholders = []
      def replace_braces(match):
        placeholders.append(match.group(1))
        return placeholder
      cmd_no_braces = re.sub(r"\{(.+?)\}", replace_braces, cmd)
      placeholders_str = ", ".join(placeholders)
      if placeholders_str:
        return f'shell.internal({repr(cmd_no_braces)}, "{placeholder}", {placeholders_str})'
      return f'shell.internal({repr(cmd_no_braces)}, "{placeholder}")'
    new_code = re.sub(
      pattern, replace_shell_invocation, function_source, flags=re.DOTALL
    )
    exec(compile(new_code, f.__code__.co_filename, "exec"), f.__globals__)
    # We can't use f(*args, **kwargs) because it would call the original function,
    # so we need to eval its name to get a reference to the newly-generated function
    # from the exec.
    eval(f.__name__)(*args, **kwargs)
  return wrapper

def shell(command: str) -> str:
  """
  Run a shell command and return the output. Prevents shell injection.
  Args:
    command: The shell command to run
  Returns:
    The output of the shell command
  """
  raise NotImplementedError("Make sure you're using the with_shell decorator")

def shell_internal(command, placeholder, *args):
  i = 0
  env = {}
  while placeholder in command:
    var_name = f"_val{i}"
    env[var_name] = args[i]
    command = command.replace(placeholder, f"${{{var_name}}}")
    i += 1
  return subprocess.run(
    command,
    shell=True,
    check=True,
    text=True,
    capture_output=True,
    env=os.environ | env,
  )

shell.internal = shell_internal

@with_shell
def main():
  username = input("Hello, what's your name? ")
  banner = shell(f'figlet "Welcome, {username}" | lolcat -f')
  print(banner.stdout)

main()

Mastodon