安全的 Shell 字符串插值方案
安全的 Shell String Interpolation
2025-03-22 我们来看一个简单的例子:获取用户的名字并打印一个彩虹色的欢迎横幅。一个没有经验的开发者可能会这样解决这个问题:
import { exec } from "node:child_process"
import { promisify } from "node:util"
const execPromise = promisify(exec)
const username = prompt("Hello, what's your name?")
const banner = (await execPromise(`figlet "Welcome, ${username}" | lolcat -f`)).stdout // 安全漏洞!
console.log(banner)
这会像你期望的那样工作:
但是,当你使用不受信任的用户输入运行这段代码时,你可能会注意到这个 Bug!
构造的命令是:
figlet "Welcome, "; ps; echo "" | lolcat -f
这种 Bug 令人惊讶地经常出现在生产环境中,尽管通常是在使用受信任的输入时。即使使用受信任的输入,它也可能成为一个问题,因为当你的参数包含空格或其他特殊字符时,你会遇到奇怪的 Bug。
一个显而易见的解决方案是使用 execFile
而不是 exec
,并将参数直接传递给命令,而无需 shell 解析用户输入。 就像这样:
import { execFile } from "node:child_process"
import { promisify } from "node:util"
const execFilePromise = promisify(execFile)
const username = prompt("Hello, what's your name?")
// 现在我们必须手动生成两个进程,并将 figlet 的输出通过管道传递给 lolcat:
const lolcat = execFilePromise("lolcat", ["-f"])
const figlet = execFile("figlet", [`Welcome, ${username}`]).stdout.pipe(lolcat.child.stdin)
console.log((await lolcat).stdout)
我认为这是一个相当丑陋的解决方案。 另一种方法是继续使用 shell,但将输入作为环境变量传递:
import { exec } from "node:child_process"
import { promisify } from "node:util"
const execPromise = promisify(exec)
const username = prompt("Hello, what's your name?")
const banner = (await execPromise('figlet "Welcome, $username" | lolcat -f', { env: { ...process.env, username } })).stdout
console.log(banner)
我们得到了相同且正确的结果。
在 JavaScript 中,我们可以更进一步! Tagged templates 允许你编写函数,接收模板字面量的参数,并返回你想要的任何东西。 这是一个安全插值的示例实现:
import { exec as exec } from "child_process"
import { promisify } from "util"
const execPromise = promisify(exec)
async function shell(fragments: TemplateStringsArray, ...values: unknown[]) {
const env = { ...process.env }
const command = fragments.reduce((constructedCommand, fragment, i) => {
if (i < values.length) {
const varName = `_val${i + 1}`
env[varName] = `${values[i]}`
return constructedCommand + fragment + `$\{${varName}}`
}
return constructedCommand + fragment
}, "")
return await execPromise(command, { env })
}
const username = prompt("Hello, what's your name?")
const banner = (await shell`figlet "Welcome, ${username}" | lolcat -f`).stdout
console.log(banner)
这允许你安全地对 shell 命令上使用不受信任的用户输入进行字符串插值! 仍然存在很多潜在的问题,具体取决于你使用的命令,例如,如果第一个参数以 -S
开头,则 env
命令将开始解析参数。 一般来说,要小心,当你将不受信任的用户输入传递给命令时,请确保该命令不会在给定恶意构造的输入时执行意外的操作。
现在,其他语言呢? 在大多数情况下,你能做的最好的事情是使用环境变量方法。 这是一个 Python 示例:
import subprocess
import os
def shell(command, **values):
return subprocess.run(
command,
shell=True,
check=True,
text=True,
capture_output=True,
env=os.environ | values,
)
username = input("Hello, what's your name? ")
banner = shell('figlet "Welcome, $username" | lolcat -f', username=username)
print(banner.stdout)
有一个提案 PEP 750,它将实现类似的功能:
shell(t'figlet "Welcome, {username}" | lolcat -f')
预计将在 Python 3.14 中发布。
然而,除了 JavaScript 之外,Swift 是我唯一发现的今天具有等效功能的语言。 它允许你以类似于 JavaScript 的 tagged templates 的方式编写扩展 ExpressibleByStringInterpolation
的结构。
自从我十一岁以来,我就没有写过一行 Swift 代码,而且我不想为了一个快速的概念验证而去学习 Swift,所以这段代码主要由 LLM 辅助生成。
import Foundation
struct Shell: ExpressibleByStringInterpolation {
private var command: String
private var env: [String: String]
var result: String
struct Interpolation: StringInterpolationProtocol {
var command = ""
var env: [String: String] = [:]
private var argCount = 0
init(literalCapacity: Int, interpolationCount: Int) {}
mutating func appendLiteral(_ literal: String) {
command += literal
}
mutating func appendInterpolation(_ value: Any) {
argCount += 1
let varName = "_val\(argCount)"
env[varName] = "\(value)"
command += "${\(varName)}"
}
}
init(stringLiteral value: String) {
command = value
env = [:]
// We can't use the run() function until all of our properties are initialized
// so we need to set result to "" first.
result = ""
// I'm not totally sure how this try? syntax works, the LLM generated it
// Presumably, it does run() and returns null if it fails, then we optional-chain to "" if it fails
result = (try? run()) ?? ""
}
init(stringInterpolation: Interpolation) {
command = stringInterpolation.command
env = stringInterpolation.env
result = ""
result = (try? run()) ?? ""
}
private func run() throws -> String {
let process = Process()
process.executableURL = URL(fileURLWithPath: "/bin/sh")
process.arguments = ["-c", command]
var environment = ProcessInfo.processInfo.environment
for (key, value) in env {
environment[key] = value
}
process.environment = environment
// Combining stdout and stderr is different
// than what I did in the Python and JS implementations.
// This function is purely LLM-generated (excluding this comment).
let pipe = Pipe()
process.standardOutput = pipe
process.standardError = pipe
try process.run()
process.waitUntilExit()
let data = pipe.fileHandleForReading.readDataToEndOfFile()
return String(data: data, encoding: .utf8) ?? ""
}
}
print("What is your name?", terminator: " ")
let username = readLine()
// Swift doesn't have single-quoted strings, so we need to escape the double quotes
// in the figlet command.
let banner: Shell = "figlet \"Welcome, \(username!)\" | lolcat -f"
print(banner.result)
附录:有问题的 Python 版本
我想看看是否有可能使 shell(f'figlet "Welcome, {username}" | lolcat -f')
不受 shell 注入的影响。 任何人都不要使用这个,但我设法把它做成了我想要的样子。 它使用一个装饰器函数,该函数获取函数的源代码,使用正则表达式对其进行操作(我想你可以为此使用 AST,但是使用正则表达式编辑代码会使乐趣更大 – 希望没有人将其投入生产),并执行新更新的代码。
import inspect
import os
import re
import subprocess
import uuid
def with_shell(f):
def wrapper(*args, **kwargs):
# Horrifying hack within a horrifying hack:
# Delete the first line of the function source code,
# which is the decorator itself.
# This is necessary to avoid infinite recursion.
# It will break if there's a decorator above this one.
function_source = "\n".join(inspect.getsource(f).splitlines()[1::])
pattern = r'shell\(f(["\']{1,3})(.*?)\1\)'
# We could replace all of the interpolated values with "?" or something,
# but what if the command itself contains a "?"?
# Instead, we generate an unpredictable placeholder.
placeholder = str(uuid.uuid4())
def replace_shell_invocation(match):
cmd = match.group(2)
placeholders = []
def replace_braces(match):
placeholders.append(match.group(1))
return placeholder
cmd_no_braces = re.sub(r"\{(.+?)\}", replace_braces, cmd)
placeholders_str = ", ".join(placeholders)
if placeholders_str:
return f'shell.internal({repr(cmd_no_braces)}, "{placeholder}", {placeholders_str})'
return f'shell.internal({repr(cmd_no_braces)}, "{placeholder}")'
new_code = re.sub(
pattern, replace_shell_invocation, function_source, flags=re.DOTALL
)
exec(compile(new_code, f.__code__.co_filename, "exec"), f.__globals__)
# We can't use f(*args, **kwargs) because it would call the original function,
# so we need to eval its name to get a reference to the newly-generated function
# from the exec.
eval(f.__name__)(*args, **kwargs)
return wrapper
def shell(command: str) -> str:
"""
Run a shell command and return the output. Prevents shell injection.
Args:
command: The shell command to run
Returns:
The output of the shell command
"""
raise NotImplementedError("Make sure you're using the with_shell decorator")
def shell_internal(command, placeholder, *args):
i = 0
env = {}
while placeholder in command:
var_name = f"_val{i}"
env[var_name] = args[i]
command = command.replace(placeholder, f"${{{var_name}}}")
i += 1
return subprocess.run(
command,
shell=True,
check=True,
text=True,
capture_output=True,
env=os.environ | env,
)
shell.internal = shell_internal
@with_shell
def main():
username = input("Hello, what's your name? ")
banner = shell(f'figlet "Welcome, {username}" | lolcat -f')
print(banner.stdout)
main()