1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
| #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
逆向自 Calendar.dll 中的自定义 VM,恢复 FLAG 的脚本。
思路简述:
1. 把 .rdata 里的字节码抄出来(0x14001DA90 那一段)。
2. 按照 sub_140001270 的逻辑写一个 Python 版 VM 解释器。
3. 根据逆向分析得到的等式(全是 + / - / ^ / == 这种),在 Python 里
把各个字符算出来,组装出 flag,并用 VM 真实跑一遍校验。
"""
# ----------------------------
# 1. 还原出来的 VM 字节码
# ----------------------------
CODE = bytes([
0x0D,0x3C,0x01,0x06,0x0A,0x01,0x02,0x6F,0x01,0x00,0x57,0x01,0x05,0x21,0x01,0x03,
0x6E,0x01,0x01,0x72,0x01,0x04,0x67,0x01,0x07,0x00,0x11,0x00,0x01,0x04,0x65,0x01,
0x05,0x63,0x01,0x09,0x00,0x01,0x01,0x6F,0x01,0x03,0x72,0x01,0x07,0x21,0x01,0x02,
0x72,0x01,0x08,0x0A,0x01,0x00,0x43,0x01,0x06,0x74,0x11,0x00,0x10,0x02,0x00,0x00,
0x0C,0x00,0x66,0x0E,0x02,0x02,0x00,0x01,0x03,0x01,0x00,0x06,0x01,0x01,0x0C,0x01,
0x6D,0x0E,0x02,0x02,0x01,0x02,0x02,0x02,0x03,0x02,0x03,0x04,0x03,0x00,0x01,0x09,
0x00,0x02,0x0C,0x00,0x06,0x0E,0x02,0x03,0x00,0x02,0x09,0x00,0x03,0x0C,0x00,0x1C,
0x0E,0x02,0x03,0x00,0x01,0x09,0x00,0x03,0x0C,0x00,0x1A,0x0E,0x02,0x02,0x01,0x05,
0x02,0x02,0x06,0x02,0x03,0x07,0x0C,0x02,0x69,0x0E,0x02,0x03,0x00,0x01,0x09,0x00,
0x02,0x0C,0x00,0x21,0x0E,0x02,0x03,0x00,0x03,0x09,0x00,0x02,0x0C,0x00,0x3D,0x0E,
0x02,0x02,0x04,0x08,0x02,0x05,0x09,0x02,0x06,0x0A,0x0B,0x05,0x03,0x0E,0x02,0x03,
0x00,0x04,0x09,0x00,0x05,0x0C,0x00,0x17,0x0E,0x02,0x03,0x00,0x06,0x09,0x00,0x05,
0x0C,0x00,0x12,0x0E,0x02,0x02,0x01,0x0B,0x02,0x02,0x0C,0x02,0x03,0x0D,0x02,0x04,
0x0E,0x02,0x05,0x0F,0x02,0x06,0x10,0x02,0x07,0x11,0x0B,0x02,0x04,0x0E,0x02,0x03,
0x00,0x01,0x05,0x00,0x00,0x0C,0x00,0xBE,0x0E,0x02,0x09,0x00,0x00,0x0B,0x00,0x07,
0x0E,0x02,0x05,0x00,0x02,0x0C,0x00,0x32,0x0E,0x02,0x08,0x00,0x02,0x0C,0x00,0x30,
0x0E,0x02,0x06,0x00,0x05,0x0B,0x00,0x05,0x0E,0x02,0x02,0x00,0x04,0x07,0x06,0x00,
0x0C,0x06,0x02,0x0E,0x02,0x0D,0x1C,0x00
])
# 操作码到名字,仅用于调试(脚本真正执行不依赖这个)
OPMAP = {
0: "HALT",
1: "STORE",
2: "LDMEM",
3: "MOV",
4: "LDI",
5: "ADD",
6: "ADD_I",
7: "SUB",
8: "SUB_I",
9: "XOR",
10: "XOR_I",
11: "CMP",
12: "CMP_I",
13: "JMP",
14: "JNZ",
16: "IN",
17: "OUT",
}
# -------------------------------------------------
# 2. Python 版 VM 解释器:仿 sub_140001270 的核心逻辑
# -------------------------------------------------
def vm_run(flag_bytes: bytes, debug: bool = False) -> bool:
"""
用 Python 跑一遍 VM 逻辑,判断给定输入是否走到“正确”分支。
flag_bytes:我们要传给 VM 的“输入字符串”(不含 \n),
VM 读不到的位置按 0 处理。
返回值:True 表示通过所有校验,走到了成功分支;False 表示走错分支。
"""
code = CODE
regs = [0] * 8 # 8 个通用寄存器 r0..r7,每个 32 bit
zf = 0 # 只模拟 Zero Flag(ZF),对应 a1+1064 的最低位
ip = 0 # instruction pointer,从 0 开始
steps = 0
# 实际 VM 里:输入从 a1+1084 开始,我们这里直接用 flag_bytes 数组代替
def read_input(idx: int) -> int:
return flag_bytes[idx] if idx < len(flag_bytes) else 0
while ip < len(code):
steps += 1
if steps > 20000:
# 理论上不会死循环,这里加个保险
raise RuntimeError("VM 可能陷入死循环,终止调试")
op = code[ip]
if debug:
print(f"IP={ip:03d} OP={op:02X} {OPMAP.get(op, '?'):<5} regs={regs} ZF={zf}")
# ------- 无参数指令 -------
if op == 0: # HALT
# 题目里有两个 HALT:
# - 错误分支是前面构造 "Wrong..." 后的 HALT(在 ip=27)
# - 正确分支在最后构造 "Correct!" 后 HALT(在 ip=279)
return ip != 27
elif op == 16: # IN:真实程序里 fgets 读入,我们在脚本中直接忽略
ip += 1
continue
elif op == 17: # OUT:真实程序把 a1+1134 的字符串输出,这里也直接跳过
ip += 1
continue
# ------- 单字节参数(跳转) -------
elif op == 13: # JMP imm8
target = code[ip + 1]
ip = target
continue
elif op == 14: # JNZ imm8 :如果 ZF == 0 就跳转
target = code[ip + 1]
if zf == 0:
ip = target
else:
ip += 2
continue
# ------- 双字节参数的算术 / 访存 / mov / cmp -------
# 下面所有分支都会读两个紧跟的字节作为参数
# 形式统一: [op, a, b]
# --------------------------------------------
if ip + 2 >= len(code):
# 防御性检查
raise RuntimeError("字节码非法,读取越界")
a = code[ip + 1]
b = code[ip + 2]
# 说明:原 VM 对寄存器编号 >=8 会返回错误,这个字节码里都在 0..7 范围
if op == 1: # STORE;只影响 VM 内部的输出缓冲,对逻辑无影响,这里忽略
ip += 3
continue
elif op == 2: # LDMEM r, idx -> 从输入缓冲 flag[idx] 读入寄存器
r, idx = a, b
regs[r] = read_input(idx)
ip += 3
continue
elif op == 3: # MOV rd, rs
rd, rs = a, b
regs[rd] = regs[rs]
ip += 3
continue
elif op == 4: # LDI r, imm8
r, imm = a, b
regs[r] = imm
ip += 3
continue
elif op == 5: # ADD rd, rs
rd, rs = a, b
regs[rd] = (regs[rd] + regs[rs]) & 0xFFFFFFFF
zf = 1 if regs[rd] == 0 else 0
ip += 3
continue
elif op == 6: # ADD_I r, imm8
r, imm = a, b
regs[r] = (regs[r] + imm) & 0xFFFFFFFF
zf = 1 if regs[r] == 0 else 0
ip += 3
continue
elif op == 7: # SUB rd, rs
rd, rs = a, b
regs[rd] = (regs[rd] - regs[rs]) & 0xFFFFFFFF
zf = 1 if regs[rd] == 0 else 0
ip += 3
continue
elif op == 8: # SUB_I r, imm8
r, imm = a, b
regs[r] = (regs[r] - imm) & 0xFFFFFFFF
zf = 1 if regs[r] == 0 else 0
ip += 3
continue
elif op == 9: # XOR rd, rs
rd, rs = a, b
regs[rd] = regs[rd] ^ regs[rs]
zf = 1 if regs[rd] == 0 else 0
ip += 3
continue
elif op == 10: # XOR_I r, imm8
r, imm = a, b
regs[r] = regs[r] ^ imm
zf = 1 if regs[r] == 0 else 0
ip += 3
continue
elif op == 11: # CMP r1, r2 -> 设置 ZF = (r1 - r2 == 0)
r1, r2 = a, b
res = (regs[r1] - regs[r2]) & 0xFFFFFFFF
zf = 1 if res == 0 else 0
ip += 3
continue
elif op == 12: # CMP_I r, imm8 -> 设置 ZF = (r - imm == 0)
r, imm = a, b
res = (regs[r] - imm) & 0xFFFFFFFF
zf = 1 if res == 0 else 0
ip += 3
continue
else:
raise RuntimeError(f"未知指令 op={op} @ ip={ip}")
# 正常不会跑到这里
return False
# -------------------------------------------------
# 3. 利用逆向出来的方程,在 Python 里还原 FLAG
# -------------------------------------------------
def solve_flag() -> str:
"""
根据 VM 中的比较与跳转逻辑,恢复所有字符。
这里只做“数学解方程”,而不是瞎枚举。
"""
# flag[i] 代表第 i 个字符(0-based)
flag = [0] * 17 # 实际用到索引 0..16,其中 17 位置在内存中是 0 终止符
# ---- 0,1: 直接比较 ----
# LDMEM r0, [0] ; CMP_I r0, 102 ('f')
flag[0] = ord('f')
# LDMEM r0, [1] ; MOV r1, r0 ; ADD_I r1, 1 ; CMP_I r1, 109
# => flag[1] + 1 = 109 -> flag[1] = 108 ('l')
flag[1] = ord('l')
# ---- 2,3,4: 三个异或方程 ----
# r1 = flag[2]
# r2 = flag[3]
# r3 = flag[4]
# CMP_I (r1 ^ r2), 6
# CMP_I (r2 ^ r3), 28
# CMP_I (r1 ^ r3), 26
#
# 这组方程有很多数值解,但我们知道一般 CTF flag 会是 "flag{...}"
# 代入 'a','g','{' 可以验证:
# ord('a') ^ ord('g') == 6
# ord('g') ^ ord('{') == 28
# ord('a') ^ ord('{') == 26
flag[2] = ord('a')
flag[3] = ord('g')
flag[4] = ord('{')
# ---- 5,6,7: 与 'H','i','T' 相关 ----
# LDMEM r1,[5]; LDMEM r2,[6]; LDMEM r3,[7]
# CMP_I r2, 105 -> flag[6] == 'i'
flag[6] = ord('i')
# MOV r0,r1; XOR r0,r2; CMP_I r0,33 -> flag[5] ^ flag[6] = 33
# ord('H') ^ ord('i') == 72 ^ 105 == 33
flag[5] = ord('H')
# MOV r0,r3; XOR r0,r2; CMP_I r0,61 -> flag[7] ^ flag[6] = 61 -> flag[7] = 84 'T'
flag[7] = ord('T')
# ---- 8,9,10: 与前面的 'T' 组合出来 "CTF" ----
# LDMEM r4,[8]; LDMEM r5,[9]; LDMEM r6,[10]
# CMP r5,r3 -> flag[9] == flag[7] == 'T'
flag[9] = flag[7] # 'T'
# MOV r0,r4; XOR r0,r5; CMP_I r0,23 -> flag[8] ^ flag[9] = 23
# MOV r0,r6; XOR r0,r5; CMP_I r0,18 -> flag[10] ^ flag[9] = 18
flag[8] = flag[9] ^ 23 # 'C'
flag[10] = flag[9] ^ 18 # 'F'
# 验证一下:chr(flag[8]) == 'C', chr(flag[9]) == 'T', chr(flag[10]) == 'F'
# ---- 11..16: 尾巴部分 ----
# LDMEM r1,[11] ; r1 = flag[11]
# CMP_I (r1+r1), 190 -> 2*flag[11] = 190 -> flag[11] = 95 -> '_'
flag[11] = ord('_')
# r7 = flag[17], 经过一系列操作后要求 0,说明第 18 个位置是 0 终止符,
# 而不是我们输入的字符(也就是说输入长度不用到 18)。
# CMP r2,r4 -> flag[12] == flag[14]
# 后面:
# r0 = flag[12]; CMP_I r0,50 -> flag[12] == '2'
# 然后 r0 -= 2 == 48 再加 5 == 53 与 flag[15] 比较 -> flag[15] == '5'
flag[12] = ord('2')
flag[14] = ord('2')
flag[15] = ord('5')
# 最后:
# LDMEM r0,[4] -> r0=flag[4]='{'
# SUB r6,r0 -> r6 = flag[16] - flag[4]
# CMP_I r6,2 -> flag[16] - flag[4] = 2 -> flag[16] = '{' + 2 = '}'
flag[16] = ord('}') # 123 + 2 = 125 -> '}'
# flag[13] 在字节码中仅被 LDMEM 读入,从未参与比较,因此是自由变量。
# 结合 CTF 年份,“_2 0 2 5” 比较自然,我们就填 '0'。
flag[13] = ord('0')
return "".join(chr(c) for c in flag)
def main():
flag = solve_flag()
print("Recovered flag:", flag)
# 用 VM 真机跑一遍校验
ok = vm_run(flag.encode())
print("VM check:", "PASS" if ok else "FAIL")
if not ok:
raise SystemExit("求出来的 flag 没通过 VM 校验,请检查脚本逻辑。")
if __name__ == "__main__":
main()
|