LLM 能否将 C 代码转换为 ASM,再到规格说明,最终生成可用的 Z/80 Speccy 磁带?当然可以。

LLM 能否将 C 代码转换为 ASM,再到规格说明,最终生成可用的 Z/80 Speccy 磁带?当然可以

Geoffrey Huntley 2025年3月2日

Damien Guard 激起了我和其他一些人的兴趣,他们想要更多证据表明,现在可以廉价、轻松地重写软件或克隆现有的“源代码可用”业务(参见文章底部)。

那么,让我们通过向 LLM 发送以下提示来创建一个玩具应用程序:

创建一个 C 语言的销售税计算器。食品的税率为 10%,个人卫生用品的税率为 0%,其他所有商品的税率为 25%。

这会生成以下代码:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

// 税率
#define FOOD_TAX 0.10
#define HYGIENE_TAX 0.00
#define DEFAULT_TAX 0.25

// 商品类别
#define FOOD 1
#define HYGIENE 2
#define OTHER 3

int getCategory() {
  char choice;
  printf("Enter item category (F for Food, H for Hygiene, O for Other): ");
  scanf(" %c", &choice);
  
  choice = toupper(choice);
  
  switch(choice) {
    case 'F': return FOOD;
    case 'H': return HYGIENE;
    case 'O': return OTHER;
    default:
      printf("Invalid category. Defaulting to Other.\n");
      return OTHER;
  }
}

double calculateTax(double price, int category) {
  switch(category) {
    case FOOD: return price * FOOD_TAX;
    case HYGIENE: return price * HYGIENE_TAX;
    case OTHER: return price * DEFAULT_TAX;
    default: return price * DEFAULT_TAX;
  }
}

int main() {
  double price;
  int category;
  char another = 'Y';
  
  while (toupper(another) == 'Y') {
    printf("Enter item price: $");
    scanf("%lf", &price);
    
    category = getCategory();
    
    double tax = calculateTax(price, category);
    double total = price + tax;
    
    printf("Price: $%.2f\n", price);
    printf("Tax: $%.2f\n", tax);
    printf("Total: $%.2f\n\n", total);
    
    printf("Calculate another item? (Y/N): ");
    scanf(" %c", &another);
  }
  
  return 0;
}

现在,让我们通过 gcc calc.c -o calc 编译它,然后使用 objdump -d calc > calc.asm 将其转换为汇编代码,结果如下:

calc:   file format elf64-x86-64

Disassembly of section .init:
0000000000001000 <_init>:
  1000:    f3 0f 1e fa       endbr64 
  1004:    48 83 ec 08       sub  $0x8,%rsp
  1008:    48 8b 05 d9 2f 00 00  mov  0x2fd9(%rip),%rax    # 3fe8 <__gmon_start__@Base>
  100f:    48 85 c0        test  %rax,%rax
  1012:    74 02          je   1016 <_init+0x16>
  1014:    ff d0          call  *%rax
  1016:    48 83 c4 08       add  $0x8,%rsp
  101a:    c3           ret  
Disassembly of section .plt:
0000000000001020 <.plt>:
  1020:    ff 35 7a 2f 00 00    push  0x2f7a(%rip)    # 3fa0 <_GLOBAL_OFFSET_TABLE_+0x8>
  1026:    f2 ff 25 7b 2f 00 00  bnd jmp *0x2f7b(%rip)    # 3fa8 <_GLOBAL_OFFSET_TABLE_+0x10>
  102d:    0f 1f 00        nopl  (%rax)
  1030:    f3 0f 1e fa       endbr64 
  1034:    68 00 00 00 00     push  $0x0
  1039:    f2 e9 e1 ff ff ff    bnd jmp 1020 <_init+0x20>
  103f:    90           nop
  1040:    f3 0f 1e fa       endbr64 
  1044:    68 01 00 00 00     push  $0x1
  1049:    f2 e9 d1 ff ff ff    bnd jmp 1020 <_init+0x20>
  104f:    90           nop
  1050:    f3 0f 1e fa       endbr64 
  1054:    68 02 00 00 00     push  $0x2
  1059:    f2 e9 c1 ff ff ff    bnd jmp 1020 <_init+0x20>
  105f:    90           nop
  1060:    f3 0f 1e fa       endbr64 
  1064:    68 03 00 00 00     push  $0x3
  1069:    f2 e9 b1 ff ff ff    bnd jmp 1020 <_init+0x20>
  106f:    90           nop
  1070:    f3 0f 1e fa       endbr64 
  1074:    68 04 00 00 00     push  $0x4
  1079:    f2 e9 a1 ff ff ff    bnd jmp 1020 <_init+0x20>
  107f:    90           nop
Disassembly of section .plt.got:
0000000000001080 <__cxa_finalize@plt>:
  1080:    f3 0f 1e fa       endbr64 
  1084:    f2 ff 25 6d 2f 00 00  bnd jmp *0x2f6d(%rip)    # 3ff8 <__cxa_finalize@GLIBC_2.2.5>
  108b:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
Disassembly of section .plt.sec:
0000000000001090 <toupper@plt>:
  1090:    f3 0f 1e fa       endbr64 
  1094:    f2 ff 25 15 2f 00 00  bnd jmp *0x2f15(%rip)    # 3fb0 <toupper@GLIBC_2.2.5>
  109b:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
00000000000010a0 <puts@plt>:
  10a0:    f3 0f 1e fa       endbr64 
  10a4:    f2 ff 25 0d 2f 00 00  bnd jmp *0x2f0d(%rip)    # 3fb8 <puts@GLIBC_2.2.5>
  10ab:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
00000000000010b0 <__stack_chk_fail@plt>:
  10b0:    f3 0f 1e fa       endbr64 
  10b4:    f2 ff 25 05 2f 00 00  bnd jmp *0x2f05(%rip)    # 3fc0 <__stack_chk_fail@GLIBC_2.4>
  10bb:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
00000000000010c0 <printf@plt>:
  10c0:    f3 0f 1e fa       endbr64 
  10c4:    f2 ff 25 fd 2e 00 00  bnd jmp *0x2efd(%rip)    # 3fc8 <printf@GLIBC_2.2.5>
  10cb:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
00000000000010d0 <__isoc99_scanf@plt>:
  10d0:    f3 0f 1e fa       endbr64 
  10d4:    f2 ff 25 f5 2e 00 00  bnd jmp *0x2ef5(%rip)    # 3fd0 <__isoc99_scanf@GLIBC_2.7>
  10db:    0f 1f 44 00 00     nopl  0x0(%rax,%rax,1)
Disassembly of section .text:
00000000000010e0 <_start>:
  10e0:    f3 0f 1e fa       endbr64 
  10e4:    31 ed          xor  %ebp,%ebp
  10e6:    49 89 d1        mov  %rdx,%r9
  10e9:    5e           pop  %rsi
  10ea:    48 89 e2        mov  %rsp,%rdx
  10ed:    48 83 e4 f0       and  $0xfffffffffffffff0,%rsp
  10f1:    50           push  %rax
  10f2:    54           push  %rsp
  10f3:    45 31 c0        xor  %r8d,%r8d
  10f6:    31 c9          xor  %ecx,%ecx
  10f8:    48 8d 3d fd 01 00 00  lea  0x1fd(%rip),%rdi    # 12fc <main>
  10ff:    ff 15 d3 2e 00 00    call  *0x2ed3(%rip)    # 3fd8 <__libc_start_main@GLIBC_2.34>
  1105:    f4           hlt  
  1106:    66 2e 0f 1f 84 00 00  cs nopw 0x0(%rax,%rax,1)
  110d:    00 00 00 
0000000000001110 <deregister_tm_clones>:
  1110:    48 8d 3d f9 2e 00 00  lea  0x2ef9(%rip),%rdi    # 4010 <__TMC_END__>
  1117:    48 8d 05 f2 2e 00 00  lea  0x2ef2(%rip),%rax    # 4010 <__TMC_END__>
  111e:    48 39 f8        cmp  %rdi,%rax
  1121:    74 15          je   1138 <deregister_tm_clones+0x28>
  1123:    48 8b 05 b6 2e 00 00  mov  0x2eb6(%rip),%rax    # 3fe0 <_ITM_deregisterTMCloneTable@Base>
  112a:    48 85 c0        test  %rax,%rax
  112d:    74 09          je   1138 <deregister_tm_clones+0x28>
  112f:    ff e0          jmp  *%rax
  1131:    0f 1f 80 00 00 00 00  nopl  0x0(%rax)
  1138:    c3           ret  
  1139:    0f 1f 80 00 00 00 00  nopl  0x0(%rax)
0000000000001140 <register_tm_clones>:
  1140:    48 8d 3d c9 2e 00 00  lea  0x2ec9(%rip),%rdi    # 4010 <__TMC_END__>
  1147:    48 8d 35 c2 2e 00 00  lea  0x2ec2(%rip),%rsi    # 4010 <__TMC_END__>
  114e:    48 29 fe        sub  %rdi,%rsi
  1151:    48 89 f0        mov  %rsi,%rax
  1154:    48 c1 ee 3f       shr  $0x3f,%rsi
  1158:    48 c1 f8 03       sar  $0x3,%rax
  115c:    48 01 c6        add  %rax,%rsi
  115f:    48 d1 fe        sar  %rsi
  1162:    74 14          je   1178 <register_tm_clones+0x38>
  1164:    48 8b 05 85 2e 00 00  mov  0x2e85(%rip),%rax    # 3ff0 <_ITM_registerTMCloneTable@Base>
  116b:    48 85 c0        test  %rax,%rax
  116e:    74 08          je   1178 <register_tm_clones+0x38>
  1170:    ff e0          jmp  *%rax
  1172:    66 0f 1f 44 00 00    nopw  0x0(%rax,%rax,1)
  1178:    c3           ret  
  1179:    0f 1f 80 00 00 00 00  nopl  0x0(%rax)
0000000000001180 <__do_global_dtors_aux>:
  1180:    f3 0f 1e fa       endbr64 
  1184:    80 3d 85 2e 00 00 00  cmpb  $0x0,0x2e85(%rip)    # 4010 <__TMC_END__>
  118b:    75 2b          jne  11b8 <__do_global_dtors_aux+0x38>
  118d:    55           push  %rbp
  118e:    48 83 3d 62 2e 00 00  cmpq  $0x0,0x2e62(%rip)    # 3ff8 <__cxa_finalize@GLIBC_2.2.5>
  1195:    00 
  1196:    48 89 e5        mov  %rsp,%rbp
  1199:    74 0c          je   11a7 <__do_global_dtors_aux+0x27>
  119b:    48 8b 3d 66 2e 00 00  mov  0x2e66(%rip),%rdi    # 4008 <__dso_handle>
  11a2:    e8 d9 fe ff ff     call  1080 <__cxa_finalize@plt>
  11a7:    e8 64 ff ff ff     call  1110 <deregister_tm_clones>
  11ac:    c6 05 5d 2e 00 00 01  movb  $0x1,0x2e5d(%rip)    # 4010 <__TMC_END__>
  11b3:    5d           pop  %rbp
  11b4:    c3           ret  
  11b5:    0f 1f 00        nopl  (%rax)
  11b8:    c3           ret  
  11b9:    0f 1f 80 00 00 00 00  nopl  0x0(%rax)
00000000000011c0 <frame_dummy>:
  11c0:    f3 0f 1e fa       endbr64 
  11c4:    e9 77 ff ff ff     jmp  1140 <register_tm_clones>
00000000000011c9 <getCategory>:
  11c9:    f3 0f 1e fa       endbr64 
  11cd:    55           push  %rbp
  11ce:    48 89 e5        mov  %rsp,%rbp
  11d1:    48 83 ec 10       sub  $0x10,%rsp
  11d5:    64 48 8b 04 25 28 00  mov  %fs:0x28,%rax
  11dc:    00 00 
  11de:    48 89 45 f8       mov  %rax,-0x8(%rbp)
  11e2:    31 c0          xor  %eax,%eax
  11e4:    48 8d 05 1d 0e 00 00  lea  0xe1d(%rip),%rax    # 2008 <_IO_stdin_used+0x8>
  11eb:    48 89 c7        mov  %rax,%rdi
  11ee:    b8 00 00 00 00     mov  $0x0,%eax
  11f3:    e8 c8 fe ff ff     call  10c0 <printf@plt>
  11f8:    48 8d 45 f7       lea  -0x9(%rbp),%rax
  11fc:    48 89 c6        mov  %rax,%rsi
  11ff:    48 8d 05 41 0e 00 00  lea  0xe41(%rip),%rax    # 2047 <_IO_stdin_used+0x47>
  1206:    48 89 c7        mov  %rax,%rdi
  1209:    b8 00 00 00 00     mov  $0x0,%eax
  120e:    e8 bd fe ff ff     call  10d0 <__isoc99_scanf@plt>
  1213:    0f b6 45 f7       movzbl -0x9(%rbp),%eax
  1217:    0f be c0        movsbl %al,%eax
  121a:    89 c7          mov  %eax,%edi
  121c:    e8 6f fe ff ff     call  1090 <toupper@plt>
  1221:    88 45 f7        mov  %al,-0x9(%rbp)
  1224:    0f b6 45 f7       movzbl -0x9(%rbp),%eax
  1228:    0f be c0        movsbl %al,%eax
  122b:    83 f8 4f        cmp  $0x4f,%eax
  122e:    74 1f          je   124f <getCategory+0x86>
  1230:    83 f8 4f        cmp  $0x4f,%eax
  1233:    7f 21          jg   1256 <getCategory+0x8d>
  1235:    83 f8 46        cmp  $0x46,%eax
  1238:    74 07          je   1241 <getCategory+0x78>
  123a:    83 f8 48        cmp  $0x48,%eax
  123d:    74 09          je   1248 <getCategory+0x7f>
  123f:    eb 15          jmp  1256 <getCategory+0x8d>
  1241:    b8 01 00 00 00     mov  $0x1,%eax
  1246:    eb 22          jmp  126a <getCategory+0xa1>
  1248:    b8 02 00 00 00     mov  $0x2,%eax
  124d:    eb 1b          jmp  126a <getCategory+0xa1>
  124f:    b8 03 00 00 00     mov  $0x3,%eax
  1254:    eb 14          jmp  126a <getCategory+0xa1>
  1256:    48 8d 05 f3 0d 00 00  lea  0xdf3(%rip),%rax    # 2050 <_IO_stdin_used+0x50>
  125d:    48 89 c7        mov  %rax,%rdi
  1260:    e8 3b fe ff ff     call  10a0 <puts@plt>
  1265:    b8 03 00 00 00     mov  $0x3,%eax
  126a:    48 8b 55 f8       mov  -0x8(%rbp),%rdx
  126e:    64 48 2b 14 25 28 00  sub  %fs:0x28,%rdx
  1275:    00 00 
  1277:    74 05          je   127e <getCategory+0xb5>
  1279:    e8 32 fe ff ff     call  10b0 <__stack_chk_fail@plt>
  127e:    c9           leave 
  127f:    c3           ret  
0000000000001280 <calculateTax>:
  1280:    f3 0f 1e fa       endbr64 
  1284:    55           push  %rbp
  1285:    48 89 e5        mov  %rsp,%rbp
  1288:    f2 0f 11 45 f8     movsd %xmm0,-0x8(%rbp)
  128d:    89 7d f4        mov  %edi,-0xc(%rbp)
  1290:    83 7d f4 03       cmpl  $0x3,-0xc(%rbp)
  1294:    74 36          je   12cc <calculateTax+0x4c>
  1296:    83 7d f4 03       cmpl  $0x3,-0xc(%rbp)
  129a:    7f 43          jg   12df <calculateTax+0x5f>
  129c:    83 7d f4 01       cmpl  $0x1,-0xc(%rbp)
  12a0:    74 08          je   12aa <calculateTax+0x2a>
  12a2:    83 7d f4 02       cmpl  $0x2,-0xc(%rbp)
  12a6:    74 15          je   12bd <calculateTax+0x3d>
  12a8:    eb 35          jmp  12df <calculateTax+0x5f>
  12aa:    f2 0f 10 4d f8     movsd -0x8(%rbp),%xmm1
  12af:    f2 0f 10 05 21 0e 00  movsd 0xe21(%rip),%xmm0    # 20d8 <_IO_stdin_used+0xd8>
  12b6:    00 
  12b7:    f2 0f 59 c1       mulsd %xmm1,%xmm0
  12bb:    eb 33          jmp  12f0 <calculateTax+0x70>
  12bd:    f2 0f 10 4d f8     movsd -0x8(%rbp),%xmm1
  12c2:    66 0f ef c0       pxor  %xmm0,%xmm0
  12c6:    f2 0f 59 c1       mulsd %xmm1,%xmm0
  12ca:    eb 24          jmp  12f0 <calculateTax+0x70>
  12cc:    f2 0f 10 4d f8     movsd -0x8(%rbp),%xmm1
  12d1:    f2 0f 10 05 07 0e 00  movsd 0xe07(%rip),%xmm0    # 20e0 <_IO_stdin_used+0xe0>
  12d8:    00 
  12d9:    f2 0f 59 c1       mulsd %xmm1,%xmm0
  12dd:    eb 11          jmp  12f0 <calculateTax+0x70>
  12df:    f2 0f 10 4d f8     movsd -0x8(%rbp),%xmm1
  12e4:    f2 0f 10 05 f4 0d 00  movsd 0xdf4(%rip),%xmm0    # 20e0 <_IO_stdin_used+0xe0>
  12eb:    00 
  12ec:    f2 0f 59 c1       mulsd %xmm1,%xmm0
  12f0:    66 48 0f 7e c0     movq  %xmm0,%rax
  12f5:    66 48 0f 6e c0     movq  %rax,%xmm0
  12fa:    5d           pop  %rbp
  12fb:    c3           ret  
00000000000012fc <main>:
  12fc:    f3 0f 1e fa       endbr64 
  1300:    55           push  %rbp
  1301:    48 89 e5        mov  %rsp,%rbp
  1304:    48 83 ec 30       sub  $0x30,%rsp
  1308:    64 48 8b 04 25 28 00  mov  %fs:0x28,%rax
  130f:    00 00 
  1311:    48 89 45 f8       mov  %rax,-0x8(%rbp)
  1315:    31 c0          xor  %eax,%eax
  1317:    c6 45 db 59       movb  $0x59,-0x25(%rbp)
  131b:    e9 f1 00 00 00     jmp  1411 <main+0x115>
  1320:    48 8d 05 50 0d 00 00  lea  0xd50(%rip),%rax    # 2077 <_IO_stdin_used+0x77>
  1327:    48 89 c7        mov  %rax,%rdi
  132a:    b8 00 00 00 00     mov  $0x0,%eax
  132f:    e8 8c fd ff ff     call  10c0 <printf@plt>
  1334:    48 8d 45 e0       lea  -0x20(%rbp),%rax
  1338:    48 89 c6        mov  %rax,%rsi
  133b:    48 8d 05 49 0d 00 00  lea  0xd49(%rip),%rax    # 208b <_IO_stdin_used+0x8b>
  1342:    48 89 c7        mov  %rax,%rdi
  1345:    b8 00 00 00 00     mov  $0x0,%eax
  134a:    e8 81 fd ff ff     call  10d0 <__isoc99_scanf@plt>
  134f:    b8 00 00 00 00     mov  $0x0,%eax
  1354:    e8 70 fe ff ff     call  11c9 <getCategory>
  1359:    89 45 dc        mov  %eax,-0x24(%rbp)
  135c:    48 8b 45 e0       mov  -0x20(%rbp),%rax
  1360:    8b 55 dc        mov  -0x24(%rbp),%edx
  1363:    89 d7          mov  %edx,%edi
  1365:    66 48 0f 6e c0     movq  %rax,%xmm0
  136a:    e8 11 ff ff ff     call  1280 <calculateTax>
  136f:    66 48 0f 7e c0     movq  %xmm0,%rax
  1374:    48 89 45 e8       mov  %rax