LLM 能否将 C 代码转换为 ASM,再到规格说明,最终生成可用的 Z/80 Speccy 磁带?当然可以
文章探讨了使用 LLM 将 C 代码转换为汇编代码,进而生成 Z/80 Speccy 磁带的可能性。作者通过一个简单的 C 语言销售税计算器示例,展示了代码生成、编译、反汇编的过程,最终得到汇编代码。
LLM 能否将 C 代码转换为 ASM,再到规格说明,最终生成可用的 Z/80 Speccy 磁带?当然可以
Geoffrey Huntley 2025年3月2日
Damien Guard 激起了我和其他一些人的兴趣,他们想要更多证据表明,现在可以廉价、轻松地重写软件或克隆现有的“源代码可用”业务(参见文章底部)。
那么,让我们通过向 LLM 发送以下提示来创建一个玩具应用程序:
创建一个 C 语言的销售税计算器。食品的税率为 10%,个人卫生用品的税率为 0%,其他所有商品的税率为 25%。
这会生成以下代码:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
// 税率
#define FOOD_TAX 0.10
#define HYGIENE_TAX 0.00
#define DEFAULT_TAX 0.25
// 商品类别
#define FOOD 1
#define HYGIENE 2
#define OTHER 3
int getCategory() {
char choice;
printf("Enter item category (F for Food, H for Hygiene, O for Other): ");
scanf(" %c", &choice);
choice = toupper(choice);
switch(choice) {
case 'F': return FOOD;
case 'H': return HYGIENE;
case 'O': return OTHER;
default:
printf("Invalid category. Defaulting to Other.\n");
return OTHER;
}
}
double calculateTax(double price, int category) {
switch(category) {
case FOOD: return price * FOOD_TAX;
case HYGIENE: return price * HYGIENE_TAX;
case OTHER: return price * DEFAULT_TAX;
default: return price * DEFAULT_TAX;
}
}
int main() {
double price;
int category;
char another = 'Y';
while (toupper(another) == 'Y') {
printf("Enter item price: $");
scanf("%lf", &price);
category = getCategory();
double tax = calculateTax(price, category);
double total = price + tax;
printf("Price: $%.2f\n", price);
printf("Tax: $%.2f\n", tax);
printf("Total: $%.2f\n\n", total);
printf("Calculate another item? (Y/N): ");
scanf(" %c", &another);
}
return 0;
}
现在,让我们通过 gcc calc.c -o calc
编译它,然后使用 objdump -d calc > calc.asm
将其转换为汇编代码,结果如下:
calc: file format elf64-x86-64
Disassembly of section .init:
0000000000001000 <_init>:
1000: f3 0f 1e fa endbr64
1004: 48 83 ec 08 sub $0x8,%rsp
1008: 48 8b 05 d9 2f 00 00 mov 0x2fd9(%rip),%rax # 3fe8 <__gmon_start__@Base>
100f: 48 85 c0 test %rax,%rax
1012: 74 02 je 1016 <_init+0x16>
1014: ff d0 call *%rax
1016: 48 83 c4 08 add $0x8,%rsp
101a: c3 ret
Disassembly of section .plt:
0000000000001020 <.plt>:
1020: ff 35 7a 2f 00 00 push 0x2f7a(%rip) # 3fa0 <_GLOBAL_OFFSET_TABLE_+0x8>
1026: f2 ff 25 7b 2f 00 00 bnd jmp *0x2f7b(%rip) # 3fa8 <_GLOBAL_OFFSET_TABLE_+0x10>
102d: 0f 1f 00 nopl (%rax)
1030: f3 0f 1e fa endbr64
1034: 68 00 00 00 00 push $0x0
1039: f2 e9 e1 ff ff ff bnd jmp 1020 <_init+0x20>
103f: 90 nop
1040: f3 0f 1e fa endbr64
1044: 68 01 00 00 00 push $0x1
1049: f2 e9 d1 ff ff ff bnd jmp 1020 <_init+0x20>
104f: 90 nop
1050: f3 0f 1e fa endbr64
1054: 68 02 00 00 00 push $0x2
1059: f2 e9 c1 ff ff ff bnd jmp 1020 <_init+0x20>
105f: 90 nop
1060: f3 0f 1e fa endbr64
1064: 68 03 00 00 00 push $0x3
1069: f2 e9 b1 ff ff ff bnd jmp 1020 <_init+0x20>
106f: 90 nop
1070: f3 0f 1e fa endbr64
1074: 68 04 00 00 00 push $0x4
1079: f2 e9 a1 ff ff ff bnd jmp 1020 <_init+0x20>
107f: 90 nop
Disassembly of section .plt.got:
0000000000001080 <__cxa_finalize@plt>:
1080: f3 0f 1e fa endbr64
1084: f2 ff 25 6d 2f 00 00 bnd jmp *0x2f6d(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5>
108b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
Disassembly of section .plt.sec:
0000000000001090 <toupper@plt>:
1090: f3 0f 1e fa endbr64
1094: f2 ff 25 15 2f 00 00 bnd jmp *0x2f15(%rip) # 3fb0 <toupper@GLIBC_2.2.5>
109b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
00000000000010a0 <puts@plt>:
10a0: f3 0f 1e fa endbr64
10a4: f2 ff 25 0d 2f 00 00 bnd jmp *0x2f0d(%rip) # 3fb8 <puts@GLIBC_2.2.5>
10ab: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
00000000000010b0 <__stack_chk_fail@plt>:
10b0: f3 0f 1e fa endbr64
10b4: f2 ff 25 05 2f 00 00 bnd jmp *0x2f05(%rip) # 3fc0 <__stack_chk_fail@GLIBC_2.4>
10bb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
00000000000010c0 <printf@plt>:
10c0: f3 0f 1e fa endbr64
10c4: f2 ff 25 fd 2e 00 00 bnd jmp *0x2efd(%rip) # 3fc8 <printf@GLIBC_2.2.5>
10cb: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
00000000000010d0 <__isoc99_scanf@plt>:
10d0: f3 0f 1e fa endbr64
10d4: f2 ff 25 f5 2e 00 00 bnd jmp *0x2ef5(%rip) # 3fd0 <__isoc99_scanf@GLIBC_2.7>
10db: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
Disassembly of section .text:
00000000000010e0 <_start>:
10e0: f3 0f 1e fa endbr64
10e4: 31 ed xor %ebp,%ebp
10e6: 49 89 d1 mov %rdx,%r9
10e9: 5e pop %rsi
10ea: 48 89 e2 mov %rsp,%rdx
10ed: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
10f1: 50 push %rax
10f2: 54 push %rsp
10f3: 45 31 c0 xor %r8d,%r8d
10f6: 31 c9 xor %ecx,%ecx
10f8: 48 8d 3d fd 01 00 00 lea 0x1fd(%rip),%rdi # 12fc <main>
10ff: ff 15 d3 2e 00 00 call *0x2ed3(%rip) # 3fd8 <__libc_start_main@GLIBC_2.34>
1105: f4 hlt
1106: 66 2e 0f 1f 84 00 00 cs nopw 0x0(%rax,%rax,1)
110d: 00 00 00
0000000000001110 <deregister_tm_clones>:
1110: 48 8d 3d f9 2e 00 00 lea 0x2ef9(%rip),%rdi # 4010 <__TMC_END__>
1117: 48 8d 05 f2 2e 00 00 lea 0x2ef2(%rip),%rax # 4010 <__TMC_END__>
111e: 48 39 f8 cmp %rdi,%rax
1121: 74 15 je 1138 <deregister_tm_clones+0x28>
1123: 48 8b 05 b6 2e 00 00 mov 0x2eb6(%rip),%rax # 3fe0 <_ITM_deregisterTMCloneTable@Base>
112a: 48 85 c0 test %rax,%rax
112d: 74 09 je 1138 <deregister_tm_clones+0x28>
112f: ff e0 jmp *%rax
1131: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
1138: c3 ret
1139: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
0000000000001140 <register_tm_clones>:
1140: 48 8d 3d c9 2e 00 00 lea 0x2ec9(%rip),%rdi # 4010 <__TMC_END__>
1147: 48 8d 35 c2 2e 00 00 lea 0x2ec2(%rip),%rsi # 4010 <__TMC_END__>
114e: 48 29 fe sub %rdi,%rsi
1151: 48 89 f0 mov %rsi,%rax
1154: 48 c1 ee 3f shr $0x3f,%rsi
1158: 48 c1 f8 03 sar $0x3,%rax
115c: 48 01 c6 add %rax,%rsi
115f: 48 d1 fe sar %rsi
1162: 74 14 je 1178 <register_tm_clones+0x38>
1164: 48 8b 05 85 2e 00 00 mov 0x2e85(%rip),%rax # 3ff0 <_ITM_registerTMCloneTable@Base>
116b: 48 85 c0 test %rax,%rax
116e: 74 08 je 1178 <register_tm_clones+0x38>
1170: ff e0 jmp *%rax
1172: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
1178: c3 ret
1179: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
0000000000001180 <__do_global_dtors_aux>:
1180: f3 0f 1e fa endbr64
1184: 80 3d 85 2e 00 00 00 cmpb $0x0,0x2e85(%rip) # 4010 <__TMC_END__>
118b: 75 2b jne 11b8 <__do_global_dtors_aux+0x38>
118d: 55 push %rbp
118e: 48 83 3d 62 2e 00 00 cmpq $0x0,0x2e62(%rip) # 3ff8 <__cxa_finalize@GLIBC_2.2.5>
1195: 00
1196: 48 89 e5 mov %rsp,%rbp
1199: 74 0c je 11a7 <__do_global_dtors_aux+0x27>
119b: 48 8b 3d 66 2e 00 00 mov 0x2e66(%rip),%rdi # 4008 <__dso_handle>
11a2: e8 d9 fe ff ff call 1080 <__cxa_finalize@plt>
11a7: e8 64 ff ff ff call 1110 <deregister_tm_clones>
11ac: c6 05 5d 2e 00 00 01 movb $0x1,0x2e5d(%rip) # 4010 <__TMC_END__>
11b3: 5d pop %rbp
11b4: c3 ret
11b5: 0f 1f 00 nopl (%rax)
11b8: c3 ret
11b9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
00000000000011c0 <frame_dummy>:
11c0: f3 0f 1e fa endbr64
11c4: e9 77 ff ff ff jmp 1140 <register_tm_clones>
00000000000011c9 <getCategory>:
11c9: f3 0f 1e fa endbr64
11cd: 55 push %rbp
11ce: 48 89 e5 mov %rsp,%rbp
11d1: 48 83 ec 10 sub $0x10,%rsp
11d5: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
11dc: 00 00
11de: 48 89 45 f8 mov %rax,-0x8(%rbp)
11e2: 31 c0 xor %eax,%eax
11e4: 48 8d 05 1d 0e 00 00 lea 0xe1d(%rip),%rax # 2008 <_IO_stdin_used+0x8>
11eb: 48 89 c7 mov %rax,%rdi
11ee: b8 00 00 00 00 mov $0x0,%eax
11f3: e8 c8 fe ff ff call 10c0 <printf@plt>
11f8: 48 8d 45 f7 lea -0x9(%rbp),%rax
11fc: 48 89 c6 mov %rax,%rsi
11ff: 48 8d 05 41 0e 00 00 lea 0xe41(%rip),%rax # 2047 <_IO_stdin_used+0x47>
1206: 48 89 c7 mov %rax,%rdi
1209: b8 00 00 00 00 mov $0x0,%eax
120e: e8 bd fe ff ff call 10d0 <__isoc99_scanf@plt>
1213: 0f b6 45 f7 movzbl -0x9(%rbp),%eax
1217: 0f be c0 movsbl %al,%eax
121a: 89 c7 mov %eax,%edi
121c: e8 6f fe ff ff call 1090 <toupper@plt>
1221: 88 45 f7 mov %al,-0x9(%rbp)
1224: 0f b6 45 f7 movzbl -0x9(%rbp),%eax
1228: 0f be c0 movsbl %al,%eax
122b: 83 f8 4f cmp $0x4f,%eax
122e: 74 1f je 124f <getCategory+0x86>
1230: 83 f8 4f cmp $0x4f,%eax
1233: 7f 21 jg 1256 <getCategory+0x8d>
1235: 83 f8 46 cmp $0x46,%eax
1238: 74 07 je 1241 <getCategory+0x78>
123a: 83 f8 48 cmp $0x48,%eax
123d: 74 09 je 1248 <getCategory+0x7f>
123f: eb 15 jmp 1256 <getCategory+0x8d>
1241: b8 01 00 00 00 mov $0x1,%eax
1246: eb 22 jmp 126a <getCategory+0xa1>
1248: b8 02 00 00 00 mov $0x2,%eax
124d: eb 1b jmp 126a <getCategory+0xa1>
124f: b8 03 00 00 00 mov $0x3,%eax
1254: eb 14 jmp 126a <getCategory+0xa1>
1256: 48 8d 05 f3 0d 00 00 lea 0xdf3(%rip),%rax # 2050 <_IO_stdin_used+0x50>
125d: 48 89 c7 mov %rax,%rdi
1260: e8 3b fe ff ff call 10a0 <puts@plt>
1265: b8 03 00 00 00 mov $0x3,%eax
126a: 48 8b 55 f8 mov -0x8(%rbp),%rdx
126e: 64 48 2b 14 25 28 00 sub %fs:0x28,%rdx
1275: 00 00
1277: 74 05 je 127e <getCategory+0xb5>
1279: e8 32 fe ff ff call 10b0 <__stack_chk_fail@plt>
127e: c9 leave
127f: c3 ret
0000000000001280 <calculateTax>:
1280: f3 0f 1e fa endbr64
1284: 55 push %rbp
1285: 48 89 e5 mov %rsp,%rbp
1288: f2 0f 11 45 f8 movsd %xmm0,-0x8(%rbp)
128d: 89 7d f4 mov %edi,-0xc(%rbp)
1290: 83 7d f4 03 cmpl $0x3,-0xc(%rbp)
1294: 74 36 je 12cc <calculateTax+0x4c>
1296: 83 7d f4 03 cmpl $0x3,-0xc(%rbp)
129a: 7f 43 jg 12df <calculateTax+0x5f>
129c: 83 7d f4 01 cmpl $0x1,-0xc(%rbp)
12a0: 74 08 je 12aa <calculateTax+0x2a>
12a2: 83 7d f4 02 cmpl $0x2,-0xc(%rbp)
12a6: 74 15 je 12bd <calculateTax+0x3d>
12a8: eb 35 jmp 12df <calculateTax+0x5f>
12aa: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1
12af: f2 0f 10 05 21 0e 00 movsd 0xe21(%rip),%xmm0 # 20d8 <_IO_stdin_used+0xd8>
12b6: 00
12b7: f2 0f 59 c1 mulsd %xmm1,%xmm0
12bb: eb 33 jmp 12f0 <calculateTax+0x70>
12bd: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1
12c2: 66 0f ef c0 pxor %xmm0,%xmm0
12c6: f2 0f 59 c1 mulsd %xmm1,%xmm0
12ca: eb 24 jmp 12f0 <calculateTax+0x70>
12cc: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1
12d1: f2 0f 10 05 07 0e 00 movsd 0xe07(%rip),%xmm0 # 20e0 <_IO_stdin_used+0xe0>
12d8: 00
12d9: f2 0f 59 c1 mulsd %xmm1,%xmm0
12dd: eb 11 jmp 12f0 <calculateTax+0x70>
12df: f2 0f 10 4d f8 movsd -0x8(%rbp),%xmm1
12e4: f2 0f 10 05 f4 0d 00 movsd 0xdf4(%rip),%xmm0 # 20e0 <_IO_stdin_used+0xe0>
12eb: 00
12ec: f2 0f 59 c1 mulsd %xmm1,%xmm0
12f0: 66 48 0f 7e c0 movq %xmm0,%rax
12f5: 66 48 0f 6e c0 movq %rax,%xmm0
12fa: 5d pop %rbp
12fb: c3 ret
00000000000012fc <main>:
12fc: f3 0f 1e fa endbr64
1300: 55 push %rbp
1301: 48 89 e5 mov %rsp,%rbp
1304: 48 83 ec 30 sub $0x30,%rsp
1308: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
130f: 00 00
1311: 48 89 45 f8 mov %rax,-0x8(%rbp)
1315: 31 c0 xor %eax,%eax
1317: c6 45 db 59 movb $0x59,-0x25(%rbp)
131b: e9 f1 00 00 00 jmp 1411 <main+0x115>
1320: 48 8d 05 50 0d 00 00 lea 0xd50(%rip),%rax # 2077 <_IO_stdin_used+0x77>
1327: 48 89 c7 mov %rax,%rdi
132a: b8 00 00 00 00 mov $0x0,%eax
132f: e8 8c fd ff ff call 10c0 <printf@plt>
1334: 48 8d 45 e0 lea -0x20(%rbp),%rax
1338: 48 89 c6 mov %rax,%rsi
133b: 48 8d 05 49 0d 00 00 lea 0xd49(%rip),%rax # 208b <_IO_stdin_used+0x8b>
1342: 48 89 c7 mov %rax,%rdi
1345: b8 00 00 00 00 mov $0x0,%eax
134a: e8 81 fd ff ff call 10d0 <__isoc99_scanf@plt>
134f: b8 00 00 00 00 mov $0x0,%eax
1354: e8 70 fe ff ff call 11c9 <getCategory>
1359: 89 45 dc mov %eax,-0x24(%rbp)
135c: 48 8b 45 e0 mov -0x20(%rbp),%rax
1360: 8b 55 dc mov -0x24(%rbp),%edx
1363: 89 d7 mov %edx,%edi
1365: 66 48 0f 6e c0 movq %rax,%xmm0
136a: e8 11 ff ff ff call 1280 <calculateTax>
136f: 66 48 0f 7e c0 movq %xmm0,%rax
1374: 48 89 45 e8 mov %rax