写在前面

好久没打比赛了,而且像羊城杯这种打比赛是值得我们记录一下的。

而且是一个通宵比赛,虽然通宵也没写出来一道很简单的misc()

别笑,你试你也过不了第二关

这真是别笑,你试你也过不了第二关。

第一关用replace,第二关却有类似原题的题解可供参考

第一关

replace替换字符

1
2
3
4
5
6
7
8
9
'''
c = '##### ##### ### # # ##### ##### ##### ##### ### # # #####\\n# # # # # # # # # # # # # # # # #\\n# # ##### # # # # # # # ##### # # # #\\n# # # # # # # # # # # # # # # # #\\n##### ##### # # ##### ##### ##### ##### ##### ##### # # ##### ##### #####'
c = c.replace('#####','0').replace(' ','1').replace(' ','2')
print(c)
'c = 0202 ###2 #1#10 0 0 02 ###2 #1#10\n#12#22#2 #2#1#1#2 # #22 #1 #22#2 #2#1#1#2 #\n#12#2202#1#1#2 # #22 #1 #2202#1#1#2 #\n#12#22#2 #2#1#1#2 # #22 #1 #22#2 #2#1#1#2 #\n0202#2 #202020 0 0 02#2 #202020'
'''

a='0202 ###2 #1#10 0 0 02 ###2 #1#10\n#12#22#2 #2#1#1#2 # #22 #1 #22#2 #2#1#1#2 #\n#12#2202#1#1#2 # #22 #1 #2202#1#1#2 #\n#12#22#2 #2#1#1#2 # #22 #1 #22#2 #2#1#1#2 #\n0202#2 #202020 0 0 02#2 #202020'
hilogo=a.replace('0','#####').replace('1',' ').replace('2',' ')

第二关

有链接()

1761702446492

给出了一个链接:code golf - Outputting ordinal numbers (1st, 2nd, 3rd) - Code Golf Stack Exchange

看到

1761702596171

得到答案:

1
'tsnrhtdd'[n%5*(n%100^15>4>n%10)::4]

POLAR

依旧ai一把梭,nc连接上直接2,贴出脚本即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def construction(N, K, eps):
Zvalues = [0.99609375, 0.87890625, 0.80859375, 0.31640625, 0.68359375, 0.19140625, 0.12109375, 0.00390625]
indices = list(range(N))
indices.sort(key=lambda i: Zvalues[i])
infoidx = indices[:K]
frozenidx = indices[K:]
return infoidx, frozenidx, None

def encode(u, N):
n = int(np.log2(N))
F = np.array([[1, 0], [1, 1]], dtype=int)
G = F.copy()
for i in range(1, n):
G = np.kron(G, F)
x = np.dot(u, G) % 2
return x

def decode(y, frozenidx):
N = len(y)
infoidx = [i for i in range(N) if i not in frozenidx] # 使用列表推导式替代set操作
K = len(infoidx)
n = int(np.log2(N))
F = np.array([[1, 0], [1, 1]], dtype=int)
G = F.copy()
for i in range(1, n):
G = np.kron(G, F)
knownIndices = [i for i in range(N) if y[i] is not None]
M = len(knownIndices)
if M < K:
uInfo = np.zeros(K, dtype=int)
else:
A = np.zeros((M, K), dtype=int)
b = np.zeros(M, dtype=int)
for p, j in enumerate(knownIndices):
for q, i in enumerate(infoidx):
A[p, q] = G[i, j]
b[p] = y[j]
aug = np.concatenate((A, b.reshape(-1, 1)), axis=1)
rank = 0
for col in range(K):
pivotRow = -1
for r in range(rank, M):
if aug[r, col] == 1:
pivotRow = r
break
if pivotRow == -1:
continue
aug[[rank, pivotRow]] = aug[[pivotRow, rank]]
for r in range(rank+1, M):
if aug[r, col] == 1:
aug[r] = (aug[r] + aug[rank]) % 2
rank += 1
uInfo = np.zeros(K, dtype=int)
for r in range(rank):
if np.all(aug[r, :K] == 0) and aug[r, K] != 0:
uInfo = np.zeros(K, dtype=int)
break
else:
for r in range(rank-1, -1, -1):
col = -1
for c in range(K):
if aug[r, c] == 1:
col = c
break
if col == -1:
continue
uInfo[col] = aug[r, K]
for r2 in range(r):
if aug[r2, col] == 1:
aug[r2, K] = (aug[r2, K] + uInfo[col]) % 2
uHat = np.zeros(N, dtype=int)
for i in range(N):
if i in frozenidx:
uHat[i] = 0
else:
pos = infoidx.index(i)
uHat[i] = uInfo[pos]
return uHat
END

成功男人背后的女人

这题我做的时候卡住了。

我原本的做法:

用010打开发现有很多不太正常的idea块,提取几个zlib解压发现:

第五个idea块出现一些明显的奇怪特征

1761704558581

第六个也是:

1761704582121

疑似为某种画图工具的东西,但是我迟迟做不出来。

这就是思维局限在了一个地方,misc就是这样的,局限住了就出不来了。

正确写法:

010打开发现许多mkbt块

1761705532967

搜索mkBT块搜到:CTF–py的交易(tweakpng,firework) | Blog for You

发现是adobe fireworks的格式,需要使用fireworks才能看

下载后打开即可

(3 封私信 / 80 条消息) Fireworks CS6 安装 - 知乎

打开之后在右侧图层发现另一层

1761707291990

转化为01字符串

1
2
3
4
5
6
01000100010000010101001101000011
01010100010001100111101101110111
00110000011011010100010101001110
01011111011000100110010101101000
00110001011011100100010001011111
01001101010001010110111001111101

得到flag:DASCTF{70444438697368958104855268219040}

Mini-modelscope

原题:https://xz.aliyun.com/news/18887

SM4-OFB

因为用的sm4,编写exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# -*- coding: utf-8 -*-
"""
121.py - 针对 SM4-OFB CTF 的解密辅助脚本(跳过 header 并从第一条数据推 keystream)

用法:
python 121.py <xlsx_path>

假设:
- Excel 有表头,表头包含 "姓名", "手机号", "身份证号", "IV"(不区分大小写)
- 从第一条实际数据(即 header 之后第一条数据行)已知明文:
姓名 = "蒋宏玲"
手机号 = "17145949399"
身份证号 = "220000197309078766"
- 每列使用相同 key/IV/模式加密(或 IV 列里含有相同的 IV),脚本用第一条数据推每列 keystream
- 目标是找到姓名为 "何浩璐" 的那一行并输出其身份证号的 md5(hex)

提示: 若 Excel 列名不同或已知明文不是第一条实际数据,请修改脚本顶部已知明文变量或列名映射。
"""

import sys
import re
import hashlib
from openpyxl import load_workbook

# --------- 可修改区域(如果已知明文不同,修改此处) -------------
known_name = "蒋宏玲"
known_phone = "17145949399"
known_id = "220000197309078766"

target_name = "何浩璐"
# ------------------------------------------------------------------

HEX_RE = re.compile(r'[0-9a-fA-F]+')

def extract_best_hex(s):
"""
从字符串 s 提取出最长且长度为偶数的十六进制子串(返回 bytes)。
如果没有合适的 hex 子串,返回 b''。
"""
if s is None:
return b''
s = str(s)
# 先直接去掉常见分隔符
s2 = s.strip().replace("0x", "").replace(" ", "").replace("\n", "").replace("\r", "").replace("\t", "")
# 如果纯 hex 且长度为偶数,就直接用
if re.fullmatch(r'[0-9a-fA-F]+', s2) and len(s2) % 2 == 0:
try:
return bytes.fromhex(s2)
except Exception:
pass
# 否则用正则提取所有 hex 段,选最长且为偶数长度的
parts = HEX_RE.findall(s)
best = ""
for p in parts:
if len(p) % 2 == 1:
# 尝试去掉最后一个字符使其为偶数长度(若合理)
p2 = p[:-1]
else:
p2 = p
if len(p2) > len(best):
best = p2
if best == "":
return b''
try:
return bytes.fromhex(best)
except Exception:
return b''

def xor_bytes(a: bytes, b: bytes) -> bytes:
L = min(len(a), len(b))
return bytes(x ^ y for x, y in zip(a[:L], b[:L]))

def derive_keystream_from_known(cipher_cell, plain_text):
c = extract_best_hex(cipher_cell)
p = plain_text.encode('utf-8')
if len(c) < len(p):
raise ValueError(f"密文长度 ({len(c)}) 小于已知明文长度 ({len(p)}),无法完整恢复 keystream。")
ks = xor_bytes(c, p)
return ks # 返回与明文长度相同的 keystream片段

def decrypt_with_keystream(cipher_cell, keystream):
c = extract_best_hex(cipher_cell)
if len(c) == 0:
return ""
if len(c) > len(keystream):
# 只解密能覆盖的部分,剩余以 hex 表示
part = xor_bytes(c[:len(keystream)], keystream)
rest = c[len(keystream):]
try:
decoded_part = part.decode('utf-8')
except:
decoded_part = part.decode('utf-8', errors='replace')
return decoded_part + "[REM_HEX:" + rest.hex() + "]"
else:
plain_bytes = xor_bytes(c, keystream[:len(c)])
try:
return plain_bytes.decode('utf-8')
except:
return plain_bytes.decode('utf-8', errors='replace')

def find_header_indices(header_row):
"""
给定 header_row(可迭代字符串),返回 dict: {'name': idx, 'phone': idx, 'id': idx, 'iv': idx}
找不到的项为 None
匹配关键字(不区分大小写)
"""
mapping = {'name': None, 'phone': None, 'id': None, 'iv': None}
if header_row is None:
return mapping
for i, cell in enumerate(header_row):
if cell is None:
continue
s = str(cell).strip().lower()
if '姓名' in s or 'name' in s:
mapping['name'] = i
if '手机' in s or '电话' in s or 'phone' in s:
mapping['phone'] = i
if '身份证' in s or 'id' == s or '身份证号' in s:
mapping['id'] = i
if 'iv' in s:
mapping['iv'] = i
return mapping

def main(xlsx_path):
wb = load_workbook(xlsx_path, read_only=True, data_only=True)
ws = wb.active

rows = list(ws.iter_rows(values_only=True))
if len(rows) < 2:
print("表格行太少,确认是否包含数据。")
return

header = rows[0]
idx = find_header_indices(header)
# 如果没找到表头中的列索引,尝试按常见顺序赋值(序号, 姓名, 手机号, 身份证号, IV)
if all(v is None for v in idx.values()):
# 退而求其次:使用固定列位置: 姓名:1 手机:2 身份证:3 IV:4 (基于0索引)
print("未识别到表头中的关键列名,尝试使用默认列位置:姓名=1, 手机号=2, 身份证号=3, IV=4(0基)。")
idx = {'name': 1, 'phone': 2, 'id': 3, 'iv': 4}

print("列索引映射 (0基):", idx)

# 找到第一条实际数据行(跳过 header,找到第一行姓名/其他列有 hex 的那一行)
first_data_row = None
first_row_idx = None
for i, row in enumerate(rows[1:], start=2): # 从 Excel 的第2行开始(人类计数)
if row is None:
continue
# 取姓名/手机/身份证三列中任一含有十六进制内容的作为数据行
name_cell = row[idx['name']] if idx['name'] is not None and idx['name'] < len(row) else None
phone_cell = row[idx['phone']] if idx['phone'] is not None and idx['phone'] < len(row) else None
id_cell = row[idx['id']] if idx['id'] is not None and idx['id'] < len(row) else None
# 若三列任何一列可以提取到 hex,就认为是数据行
if extract_best_hex(name_cell) or extract_best_hex(phone_cell) or extract_best_hex(id_cell):
first_data_row = row
first_row_idx = i
break

if first_data_row is None:
print("未找到任何看起来像密文的行。请确认文件内容或表头列位置。")
return

print(f"第一条实际数据位于 Excel 的第 {first_row_idx} 行,作为已知明文来源。")

# 从第一条数据推 keystream(用已知明文 variables)
try:
ks_name = derive_keystream_from_known(first_data_row[idx['name']], known_name)
ks_phone = derive_keystream_from_known(first_data_row[idx['phone']], known_phone)
ks_id = derive_keystream_from_known(first_data_row[idx['id']], known_id)
except Exception as e:
print("从第一条记录推导 keystream 失败:", e)
return

print("已成功推导 keystream(长度): name={}, phone={}, id={}".format(len(ks_name), len(ks_phone), len(ks_id)))

# 遍历所有数据行并尝试解密
found = False
for r_idx, row in enumerate(rows[1:], start=2):
if row is None:
continue
# 安全取列
def safe_get(col):
if col is None:
return None
if col < len(row):
return row[col]
return None

name_c = safe_get(idx['name'])
phone_c = safe_get(idx['phone'])
id_c = safe_get(idx['id'])

dec_name = decrypt_with_keystream(name_c, ks_name) if name_c is not None else ""
dec_phone = decrypt_with_keystream(phone_c, ks_phone) if phone_c is not None else ""
dec_id = decrypt_with_keystream(id_c, ks_id) if id_c is not None else ""

dec_name_s = dec_name.strip() if isinstance(dec_name, str) else dec_name
dec_id_s = dec_id.strip() if isinstance(dec_id, str) else dec_id

print(f"[Row {r_idx}] name={dec_name_s} | phone={dec_phone} | id={dec_id_s}")

if dec_name_s == target_name:
found = True
target_id = dec_id_s
print("找到目标姓名:", target_name, " 对应身份证号:", target_id)
m = hashlib.md5()
m.update(target_id.encode('utf-8'))
md5hex = m.hexdigest()
print("flag (md5 of id) =", md5hex)
break

if not found:
print("没有在表中找到姓名为", target_name, "的记录。请确认姓名字符串与解密后的字符串完全匹配(包括空格)。")

if __name__ == "__main__":
if len(sys.argv) != 2:
print("用法: python 121.py <个人信息表.xlsx>")
sys.exit(1)
main(sys.argv[1])

运行之后似乎得到没有数据,但我们把所有的输出放到txt中,再搜索一下得到:

1761704390652

获得身份证:120000197404101676

md5一下:

1761704417128

dataidsort

要求正确率在98%以上,调教了2个小时终于出了,正确率能到99.773%

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#!/usr/bin/env python3
# coding: utf-8
"""
extract_sensitive_v2.py
优化版:目标98%+准确率
- 更精准的正则边界处理
- 改进的冲突解析策略
- 增强的校验逻辑
- 更好的格式保留
"""

import re, csv, os, sys
from datetime import datetime
from collections import defaultdict, Counter

DATA_FILE = "data.txt"
OUT_FILE = "output_best.csv"

if not os.path.exists(DATA_FILE):
print(f"错误:未找到 {DATA_FILE},请把脚本与 data.txt 放在同一目录后重试。")
sys.exit(1)

with open(DATA_FILE, "r", encoding="utf-8", errors="ignore") as f:
text = f.read()

# ------------ 校验与辅助函数 ------------
ID_WEIGHTS = [7,9,10,5,8,4,2,1,6,3,7,9,10,5,8,4,2]
ID_MAP = ['1','0','X','9','8','7','6','5','4','3','2']

def calc_id_checkcode(first17: str) -> str:
s = 0
for ch, w in zip(first17, ID_WEIGHTS):
s += int(ch) * w
return ID_MAP[s % 11]

def id15_to_18(s15: str) -> str:
first17 = s15[:6] + '19' + s15[6:]
return first17 + calc_id_checkcode(first17)

def valid_id_clean(cleaned: str) -> bool:
if len(cleaned) not in (15, 18):
return False
if len(cleaned) == 15:
if not cleaned.isdigit(): return False
cleaned = id15_to_18(cleaned)
if not re.match(r'^\d{17}[\dXx]$', cleaned):
return False
birth = cleaned[6:14]
try:
dt = datetime.strptime(birth, "%Y%m%d")
if not (1900 <= dt.year <= datetime.now().year + 1):
return False
except Exception:
return False
return calc_id_checkcode(cleaned[:17]) == cleaned[17].upper()

MOBILE_PREFIXES = {
'134','135','136','137','138','139','147','148','150','151','152','157','158','159',
'172','178','182','183','184','187','188','195','198','130','131','132','140','145',
'146','155','156','166','167','171','175','176','185','186','196','133','149','153',
'173','174','177','180','181','189','190','191','193','199'
}

def valid_phone_candidate(d: str) -> bool:
return len(d) == 11 and d.isdigit() and d[:3] in MOBILE_PREFIXES

def luhn_check(cardnum: str) -> bool:
if not cardnum.isdigit(): return False
total = 0
rev = cardnum[::-1]
for i,ch in enumerate(rev):
d = int(ch)
if i % 2 == 1:
d *= 2
if d > 9:
d -= 9
total += d
return total % 10 == 0

def valid_ipv4(s: str) -> bool:
parts = s.split('.')
if len(parts) != 4: return False
for p in parts:
if not p.isdigit(): return False
if p.startswith('0') and len(p) > 1: return False
v = int(p)
if v < 0 or v > 255: return False
return True

def normalize_mac(raw: str):
r = raw.strip()
# dot form: aabb.ccdd.eeff
if '.' in r and r.count('.') == 2:
parts = r.split('.')
if all(len(p)==4 and re.match(r'^[0-9A-Fa-f]{4}$', p) for p in parts):
hexs = ''.join(parts)
return ':'.join(hexs[i:i+2].lower() for i in range(0,12,2))
r2 = re.sub(r'[^0-9A-Fa-f]', ':', r)
parts = [p for p in r2.split(':') if p!='']
if len(parts) == 6 and all(re.match(r'^[0-9A-Fa-f]{2}$', p) for p in parts):
return ':'.join(p.lower() for p in parts)
return None

# ------------ 改进的正则模式 ------------
# 关键改进:使用负向前瞻/后顾来避免匹配数字的一部分

# 身份证:18位或带分隔符的18位,或15位
# 使用(?<!\d)和(?!\d)确保不是更长数字串的一部分(除非是带分隔符的情况)
id_pattern = re.compile(
r'(?:(?<!\d)\d{6}[-\s]\d{8}[-\s]\d{3}[-\s]?[0-9Xx](?!\d)|' # 带分隔符
r'(?<!\d)\d{17}[0-9Xx](?!\d)|' # 18位标准
r'(?<!\d)\d{15}(?!\d))' # 15位
)

# 手机号:更精确的前缀匹配
phone_pattern = re.compile(
r'(?:'
r'(?:\+86|(?:\(\+86\)))[\s\-\.\u3000]*\d{3}[\s\-\.\u3000]+\d{4}[\s\-\.\u3000]+\d{4}|' # 带前缀+分隔
r'(?:\+86|(?:\(\+86\)))[\s\-\.\u3000]*\d{11}|' # 带前缀无分隔
r'(?<!\d)\d{3}[\s\-\.\u3000]+\d{4}[\s\-\.\u3000]+\d{4}(?!\d)|' # 无前缀+分隔
r'(?<!\d)\d{11}(?!\d)' # 纯11位
r')'
)

# 银行卡:16-19位,使用边界防止与身份证混淆
bank_pattern = re.compile(r'(?<!\d)\d{16,19}(?!\d)')

# IP地址:标准点分十进制
ip_pattern = re.compile(r'(?<!\d)(?:\d{1,3}\.){3}\d{1,3}(?!\d)')

# MAC地址:三种标准格式
mac_pattern = re.compile(
r'(?:'
r'[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}|'
r'[0-9A-Fa-f]{2}-[0-9A-Fa-f]{2}-[0-9A-Fa-f]{2}-[0-9A-Fa-f]{2}-[0-9A-Fa-f]{2}-[0-9A-Fa-f]{2}|'
r'[0-9A-Fa-f]{4}\.[0-9A-Fa-f]{4}\.[0-9A-Fa-f]{4}'
r')'
)

# ------------ 收集候选 ------------
candidates = []

def add_candidate(cat, m):
s, e = m.start(), m.end()
raw = text[s:e]
cand = {"start": s, "end": e, "cat": cat, "value": raw}
candidates.append(cand)

# 按优先级顺序收集(先收集更特异的类型)
for m in mac_pattern.finditer(text):
add_candidate("mac", m)

for m in ip_pattern.finditer(text):
add_candidate("ip", m)

# 先收集身份证(更严格),再收集银行卡(避免冲突)
for m in id_pattern.finditer(text):
add_candidate("idcard", m)

for m in bank_pattern.finditer(text):
add_candidate("bankcard", m)

for m in phone_pattern.finditer(text):
add_candidate("phone", m)

# ------------ 校验与评分 ------------
def score_candidate(cand):
cat = cand["cat"]
raw = cand["value"]

if cat == "mac":
if normalize_mac(raw):
return 1.0
return 0.0

elif cat == "ip":
if valid_ipv4(raw):
return 1.0
return 0.0

elif cat == "bankcard":
digits = re.sub(r'\D', '', raw)
# 银行卡必须16-19位且通过Luhn
if 16 <= len(digits) <= 19 and luhn_check(digits):
# 额外检查:不应该是身份证号的一部分
# 身份证号固定18位(或15位),银行卡16-19位
# 如果cleaned后的数字能通过身份证校验,则拒绝作为银行卡
if len(digits) == 18:
if valid_id_clean(digits):
return 0.0 # 这是身份证,不是银行卡
return 1.0
return 0.0

elif cat == "idcard":
cleaned = re.sub(r'[\s-]', '', raw)
if valid_id_clean(cleaned):
return 1.0
return 0.0

elif cat == "phone":
# 提取所有数字
digits = re.sub(r'\D', '', raw)

# 处理86前缀
if digits.startswith('86') and len(digits) == 13:
digits = digits[2:]
elif digits.startswith('86') and len(digits) > 13:
# 可能是86+11位手机号后面还有其他数字,只取11位
digits = digits[2:13]

if len(digits) == 11:
if valid_phone_candidate(digits):
return 1.0
elif len(digits) > 11:
# 尝试后11位
if valid_phone_candidate(digits[-11:]):
return 0.8

return 0.0

return 0.0

for c in candidates:
c["score"] = score_candidate(c)

# ------------ 过滤无效候选 ------------
valid_candidates = [c for c in candidates if c["score"] > 0]

# ------------ 冲突解析:优先级策略 ------------
# 1. MAC和IP优先级最高(格式最明确)
# 2. 身份证次之
# 3. 银行卡和手机号最低
type_priority = {"mac": 5, "ip": 4, "idcard": 3, "bankcard": 2, "phone": 1}

for c in valid_candidates:
c["length"] = c["end"] - c["start"]
c["priority"] = type_priority.get(c["cat"], 0)

# 排序:位置 -> 得分 -> 优先级 -> 长度
valid_candidates.sort(key=lambda x: (x["start"], -x["score"], -x["priority"], -x["length"]))

# 贪心选择不重叠的候选
selected = []
occupied = [False] * len(text)

for c in valid_candidates:
s, e = c["start"], c["end"]

# 检查是否与已选择的候选重叠
overlap = False
for i in range(s, e):
if occupied[i]:
overlap = True
break

if not overlap:
selected.append(c)
for i in range(s, e):
occupied[i] = True

# ------------ 按位置排序并去重 ------------
selected.sort(key=lambda x: x["start"])

seen = set()
final = []

for c in selected:
v = c["value"]
if v not in seen:
seen.add(v)
final.append((c["cat"], v))

# ------------ 输出CSV ------------
with open(OUT_FILE, "w", encoding="utf-8", newline='') as f:
writer = csv.writer(f)
writer.writerow(["category", "value"])
for cat, val in final:
writer.writerow([cat, val])

# ------------ 统计信息 ------------
cnt = Counter([c for c, _ in final])
print(f"✓ 已写入: {OUT_FILE}")
print(f"✓ 识别统计: {dict(cnt)}")
print(f"✓ 总计: {len(final)} 条")
print("\n前60条示例:")
for i, (cat, val) in enumerate(final[:60], 1):
print(f"{i:03d} [{cat:8s}] {val}")

满天繁星

exp如下,运行后直接看flag.jpg即可:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
from scipy.spatial.distance import cdist

# 加载数据
known_samples = np.loadtxt("known_samples_new.npy") # 256个已知星团
data = np.loadtxt("data_new.npy") # 待分类数据

print(f"已知样本: {len(known_samples)}")
print(f"待分类数据: {len(data)}")

# 关键步骤: 数据标准化
# 使用已知样本的均值和标准差来标准化所有数据
mean = known_samples.mean(axis=0)
std = known_samples.std(axis=0)

known_normalized = (known_samples - mean) / std
data_normalized = (data - mean) / std

print("\n标准化完成:")
print(f"均值: {mean}")
print(f"标准差: {std}")

# 计算标准化后的欧氏距离
print("\n计算距离...")
distances = cdist(data_normalized, known_normalized, metric='euclidean')

# 找到最近的星团
labels = np.argmin(distances, axis=1)

print(f"分类完成!")
print(f"标签范围: {labels.min()} - {labels.max()}")

# 生成JPEG文件
file_data = bytes([int(label) for label in labels])

with open("flag.jpg", "wb") as f:
f.write(file_data)

print(f"\n✓ Flag已保存到 flag.jpg")
print(f"✓ 文件大小: {len(file_data)} 字节")

# 验证JPEG文件头
if file_data[:2] == b'\xff\xd8':
print("✓ 检测到有效的JPEG文件头 (FF D8)")

if file_data[-2:] == b'\xff\xd9':
print("✓ 检测到有效的JPEG文件尾 (FF D9)")

# 统计信息
unique, counts = np.unique(labels, return_counts=True)
print(f"\n聚类统计:")
print(f"使用的星团数: {len(unique)}/256")
print(f"平均每个星团: {counts.mean():.2f} 个星星")
print(f"\n可以打开 flag.jpg 查看flag了!")

flag.jpg:

1761704503467