Coverage for src / competitive_verifier / oj / verify / languages / cplusplus_bundle.py: 70%

197 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-03-05 16:00 +0000

1# Python Version: 3.x 

2import contextlib 

3import functools 

4import json 

5import os 

6import pathlib 

7import re 

8import shutil 

9from logging import getLogger 

10from typing import Any 

11 

12from competitive_verifier.exec import command_stdout 

13 

14logger = getLogger(__name__) 

15 

16BITS_STDCXX_H = "bits/stdc++.h" 

17 

18CXX_STANDARD_LIBS = { 

19 "algorithm", 

20 "array", 

21 "bitset", 

22 "chrono", 

23 "codecvt", 

24 "complex", 

25 "condition_variable", 

26 "deque", 

27 "exception", 

28 "forward_list", 

29 "fstream", 

30 "functional", 

31 "future", 

32 "iomanip", 

33 "ios", 

34 "iosfwd", 

35 "iostream", 

36 "istream", 

37 "iterator", 

38 "limits", 

39 "list", 

40 "locale", 

41 "map", 

42 "memory", 

43 "mutex", 

44 "new", 

45 "numeric", 

46 "ostream", 

47 "queue", 

48 "random", 

49 "regex", 

50 "set", 

51 "sstream", 

52 "stack", 

53 "stdexcept", 

54 "streambuf", 

55 "string", 

56 "thread", 

57 "tuple", 

58 "typeinfo", 

59 "unordered_map", 

60 "unordered_set", 

61 "utility", 

62 "valarray", 

63 "vector", 

64} 

65 

66C_STANDARD_LIBS = { 

67 "assert.h", 

68 "complex.h", 

69 "ctype.h", 

70 "errno.h", 

71 "fenv.h", 

72 "float.h", 

73 "inttypes.h", 

74 "iso646.h", 

75 "limits.h", 

76 "locale.h", 

77 "math.h", 

78 "setjmp.h", 

79 "signal.h", 

80 "stdalign.h", 

81 "stdarg.h", 

82 "stdatomic.h", 

83 "stdbool.h", 

84 "stddef.h", 

85 "stdint.h", 

86 "stdio.h", 

87 "stdlib.h", 

88 "stdnoreturn.h", 

89 "string.h", 

90 "tgmath.h", 

91 "threads.h", 

92 "time.h", 

93 "uchar.h", 

94 "wchar.h", 

95 "wctype.h", 

96} 

97 

98CXX_C_ORIGIN_LIBS = {"c" + name[: -len(".h")] for name in C_STANDARD_LIBS} 

99 

100BITS_EXTCXX_H = "bits/extc++.h" 

101 

102EXT_LIBS = { 

103 "ext/algorithm", 

104 "ext/array_allocator.h", 

105 "ext/atomicity.h", 

106 "ext/bitmap_allocator.h", 

107 "ext/cast.h", 

108 "ext/concurrence.h", 

109 "ext/debug_allocator.h", 

110 "ext/extptr_allocator.h", 

111 "ext/functional", 

112 "ext/iterator", 

113 "ext/malloc_allocator.h", 

114 "ext/memory", 

115 "ext/mt_allocator.h", 

116 "ext/new_allocator.h", 

117 "ext/numeric", 

118 "ext/pod_char_traits.h", 

119 "ext/pointer.h", 

120 "ext/pool_allocator.h", 

121 "ext/rb_tree", 

122 "ext/rope", 

123 "ext/slist", 

124 "ext/stdio_filebuf.h", 

125 "ext/stdio_sync_filebuf.h", 

126 "ext/throw_allocator.h", 

127 "ext/typelist.h", 

128 "ext/type_traits.h", 

129 "ext/vstring.h", 

130 "ext/pb_ds/assoc_container.hpp", 

131 "ext/pb_ds/priority_queue.hpp", 

132 "ext/pb_ds/exception.hpp", 

133 "ext/pb_ds/hash_policy.hpp", 

134 "ext/pb_ds/list_update_policy.hpp", 

135 "ext/pb_ds/tree_policy.hpp", 

136 "ext/pb_ds/trie_policy.hpp", 

137} 

138 

139BITS_STDTR1CXX_H = "bits/stdtr1c++.h" 

140 

141TR1_LIBS = { 

142 "tr1/array", 

143 "tr1/cctype", 

144 "tr1/cfenv", 

145 "tr1/cfloat", 

146 "tr1/cinttypes", 

147 "tr1/climits", 

148 "tr1/cmath", 

149 "tr1/complex", 

150 "tr1/cstdarg", 

151 "tr1/cstdbool", 

152 "tr1/cstdint", 

153 "tr1/cstdio", 

154 "tr1/cstdlib", 

155 "tr1/ctgmath", 

156 "tr1/ctime", 

157 "tr1/cwchar", 

158 "tr1/cwctype", 

159 "tr1/functional", 

160 "tr1/random", 

161 "tr1/tuple", 

162 "tr1/unordered_map", 

163 "tr1/unordered_set", 

164 "tr1/utility", 

165} 

166 

167 

168@functools.cache 

169def _check_compiler(compiler: str) -> str: 

170 # Executables named "g++" are not always g++, due to the fake g++ of macOS 

171 version = command_stdout([compiler, "--version"]) 

172 if "clang" in version.lower() or "Apple LLVM".lower() in version.lower(): 172 ↛ 173line 172 didn't jump to line 173 because the condition on line 172 was never true

173 return "clang" 

174 if "g++" in version.lower(): 174 ↛ 176line 174 didn't jump to line 176 because the condition on line 174 was always true

175 return "gcc" 

176 return "unknown" # default 

177 

178 

179@functools.cache 

180def _get_uncommented_code( 

181 path: pathlib.Path, *, iquotes_options: tuple[str, ...], compiler: str 

182) -> bytes: 

183 # `iquotes_options` must be a tuple to use `lru_cache` 

184 

185 if shutil.which(compiler) is None: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 raise BundleError(f"command not found: {compiler}") 

187 if _check_compiler(compiler) != "gcc": 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 if compiler == "g++": 

189 raise BundleError( 

190 f"A fake g++ is detected. Please install the GNU C++ compiler.: {compiler}" 

191 ) 

192 raise BundleError( 

193 f"It's not g++. Please specify g++ with $CXX envvar.: {compiler}" 

194 ) 

195 command = [ 

196 compiler, 

197 "-x", 

198 "c++", 

199 *iquotes_options, 

200 "-fpreprocessed", 

201 "-dD", 

202 "-E", 

203 str(path), 

204 ] 

205 return command_stdout(command, text=False) 

206 

207 

208def get_uncommented_code( 

209 path: pathlib.Path, *, iquotes: list[pathlib.Path], compiler: str 

210) -> bytes: 

211 iquotes_options: list[str] = [] 

212 for iquote in iquotes: 

213 iquotes_options.extend(["-I", str(iquote.resolve())]) 

214 code = _get_uncommented_code( 

215 path.resolve(), iquotes_options=tuple(iquotes_options), compiler=compiler 

216 ) 

217 lines: list[bytes] = [] 

218 for line in code.splitlines(keepends=True): 

219 m = re.match(rb'# (\d+) ".*"', line.rstrip()) 

220 if m: 

221 lineno = int(m.group(1)) 

222 while len(lines) + 1 < lineno: 222 ↛ 223line 222 didn't jump to line 223 because the condition on line 222 was never true

223 lines.append(b"\n") 

224 else: 

225 lines.append(line) 

226 return b"".join(lines) 

227 

228 

229class BundleError(Exception): 

230 pass 

231 

232 

233class BundleErrorAt(BundleError): # noqa: N818 

234 def __init__( 

235 self, path: pathlib.Path, line: int, message: str, *args: Any, **kwargs: Any 

236 ): 

237 with contextlib.suppress(ValueError): 

238 path = path.resolve().relative_to(pathlib.Path.cwd()) 

239 message = f"{path!s}: line {line}: {message}" 

240 super().__init__(message, *args, **kwargs) 

241 

242 

243class Bundler: 

244 iquotes: list[pathlib.Path] 

245 pragma_once: set[pathlib.Path] 

246 pragma_once_system: set[str] 

247 result_lines: list[bytes] 

248 path_stack: set[pathlib.Path] 

249 compiler: str 

250 

251 def __init__( 

252 self, 

253 *, 

254 iquotes: list[pathlib.Path] | None = None, 

255 compiler: str = os.environ.get("CXX", "g++"), 

256 ) -> None: 

257 if iquotes is None: 257 ↛ 258line 257 didn't jump to line 258 because the condition on line 257 was never true

258 iquotes = [] 

259 self.iquotes = iquotes 

260 self.pragma_once = set() 

261 self.pragma_once_system = set() 

262 self.result_lines = [] 

263 self.path_stack = set() 

264 self.compiler = compiler 

265 

266 # これをしないと __FILE__ や __LINE__ が壊れる 

267 def _line(self, line: int, path: pathlib.Path) -> None: 

268 while self.result_lines and self.result_lines[-1].startswith(b"#line "): 

269 self.result_lines.pop() 

270 with contextlib.suppress(ValueError): 

271 path = path.relative_to(pathlib.Path.cwd()) 

272 # パス中の特殊文字を JSON style にエスケープしてから生成コードに記述 

273 # quick solution to this: https://github.com/online-judge-tools/verification-helper/issues/280 

274 self.result_lines.append(f"#line {line} {json.dumps(str(path))}\n".encode()) 

275 

276 # path を解決する 

277 # see: https://gcc.gnu.org/onlinedocs/gcc/Directory-Options.html#Directory-Options 

278 def _resolve( 

279 self, path: pathlib.Path, *, included_from: pathlib.Path 

280 ) -> pathlib.Path: 

281 if (included_from.parent / path).exists(): 281 ↛ 283line 281 didn't jump to line 283 because the condition on line 281 was always true

282 return (included_from.parent / path).resolve() 

283 for dir_ in self.iquotes: 

284 if (dir_ / path).exists(): 

285 return (dir_ / path).resolve() 

286 raise BundleErrorAt(path, -1, "no such header") 

287 

288 def update(self, path: pathlib.Path) -> None: 

289 if path.resolve() in self.pragma_once: 289 ↛ 290line 289 didn't jump to line 290 because the condition on line 289 was never true

290 logger.debug( 

291 "%s: skipped since this file is included once with include guard", 

292 path, 

293 ) 

294 return 

295 

296 # 再帰的に自分自身を #include してたら諦める 

297 if path in self.path_stack: 297 ↛ 298line 297 didn't jump to line 298 because the condition on line 297 was never true

298 raise BundleErrorAt(path, -1, "cycle found in inclusion relations") 

299 self.path_stack.add(path) 

300 try: 

301 code = path.read_bytes() 

302 if not code.endswith(b"\n"): 302 ↛ 304line 302 didn't jump to line 304 because the condition on line 302 was never true

303 # ファイルの末尾に改行がなかったら足す 

304 code += b"\n" 

305 

306 # include guard のまわりの変数 

307 # NOTE: include guard に使われたマクロがそれ以外の用途にも使われたり #undef されたりすると壊れるけど、無視します 

308 non_guard_line_found = False 

309 pragma_once_found = False 

310 include_guard_macro = None 

311 include_guard_define_found = False 

312 include_guard_endif_found = False 

313 preprocess_if_nest = 0 

314 

315 lines = code.splitlines(keepends=True) 

316 uncommented_lines = get_uncommented_code( 

317 path, iquotes=self.iquotes, compiler=self.compiler 

318 ).splitlines(keepends=True) 

319 uncommented_lines.extend( 

320 [b""] * (len(lines) - len(uncommented_lines)) 

321 ) # trailing comment lines are removed 

322 assert len(lines) == len(uncommented_lines) 

323 self._line(1, path) 

324 for i, (line, uncommented_line) in enumerate( 

325 zip(lines, uncommented_lines, strict=False) 

326 ): 

327 # nest の処理 

328 if re.match(rb"\s*#\s*(if|ifdef|ifndef)\s.*", uncommented_line): 

329 preprocess_if_nest += 1 

330 if ( 330 ↛ 334line 330 didn't jump to line 334 because the condition on line 330 was never true

331 re.match(rb"\s*#\s*(else\s*|elif\s.*)", uncommented_line) 

332 and preprocess_if_nest == 0 

333 ): 

334 raise BundleErrorAt(path, i + 1, "unmatched #else / #elif") 

335 if re.match(rb"\s*#\s*endif\s*", uncommented_line): 

336 preprocess_if_nest -= 1 

337 if preprocess_if_nest < 0: 337 ↛ 338line 337 didn't jump to line 338 because the condition on line 337 was never true

338 raise BundleErrorAt(path, i + 1, "unmatched #endif") 

339 is_toplevel = preprocess_if_nest == 0 or ( 

340 preprocess_if_nest == 1 and include_guard_macro is not None 

341 ) 

342 

343 # #pragma once 

344 if re.match( 

345 rb"\s*#\s*pragma\s+once\s*", line 

346 ): # #pragma once は comment 扱いで消されてしまう 

347 logger.debug("%s: line %s: #pragma once", path, i + 1) 

348 if non_guard_line_found: 348 ↛ 350line 348 didn't jump to line 350 because the condition on line 348 was never true

349 # 先頭以外で #pragma once されてた場合は諦める 

350 raise BundleErrorAt( 

351 path, i + 1, "#pragma once found in a non-first line" 

352 ) 

353 if include_guard_macro is not None: 353 ↛ 354line 353 didn't jump to line 354 because the condition on line 353 was never true

354 raise BundleErrorAt( 

355 path, 

356 i + 1, 

357 "#pragma once found in an include guard with #ifndef", 

358 ) 

359 if path.resolve() in self.pragma_once: 359 ↛ 360line 359 didn't jump to line 360 because the condition on line 359 was never true

360 return 

361 pragma_once_found = True 

362 self.pragma_once.add(path.resolve()) 

363 self._line(i + 2, path) 

364 continue 

365 

366 matched: re.Match[bytes] | None 

367 # #ifndef HOGE_H as guard 

368 if ( 

369 not pragma_once_found 

370 and not non_guard_line_found 

371 and include_guard_macro is None 

372 ): 

373 matched = re.match(rb"\s*#\s*ifndef\s+(\w+)\s*", uncommented_line) 

374 if matched: 

375 include_guard_macro = matched.group(1).decode() 

376 logger.debug( 

377 "%s: line %s: #ifndef %s", 

378 path, 

379 i + 1, 

380 include_guard_macro, 

381 ) 

382 self.result_lines.append(b"\n") 

383 continue 

384 

385 # #define HOGE_H as guard 

386 if include_guard_macro is not None and not include_guard_define_found: 

387 matched = re.match(rb"\s*#\s*define\s+(\w+)\s*", uncommented_line) 

388 if matched and matched.group(1).decode() == include_guard_macro: 388 ↛ 401line 388 didn't jump to line 401 because the condition on line 388 was always true

389 self.pragma_once.add(path.resolve()) 

390 logger.debug( 

391 "%s: line %s: #define %s", 

392 path, 

393 i + 1, 

394 include_guard_macro, 

395 ) 

396 include_guard_define_found = True 

397 self.result_lines.append(b"\n") 

398 continue 

399 

400 # #endif as guard 

401 if ( 

402 include_guard_define_found 

403 and preprocess_if_nest == 0 

404 and not include_guard_endif_found 

405 ) and re.match(rb"\s*#\s*endif\s*", uncommented_line): 

406 include_guard_endif_found = True 

407 self.result_lines.append(b"\n") 

408 continue 

409 

410 if uncommented_line and not re.match(rb"^\s*$", uncommented_line): 

411 non_guard_line_found = True 

412 if ( 412 ↛ 417line 412 didn't jump to line 417 because the condition on line 412 was never true

413 include_guard_macro is not None 

414 and not include_guard_define_found 

415 ): 

416 # 先頭に #ifndef が見付かっても #define が続かないならそれは include guard ではない 

417 include_guard_macro = None 

418 if include_guard_endif_found: 418 ↛ 420line 418 didn't jump to line 420 because the condition on line 418 was never true

419 # include guard の外側にコードが書かれているとまずいので検出する 

420 raise BundleErrorAt( 

421 path, i + 1, "found codes out of include guard" 

422 ) 

423 

424 # #include <...> 

425 matched = re.match(rb"\s*#\s*include\s*<(.*)>\s*", uncommented_line) 

426 if matched: 

427 included = matched.group(1).decode() 

428 logger.debug( 

429 "%s: line %s: #include <%s>", path, i + 1, str(included) 

430 ) 

431 if included in self.pragma_once_system: 431 ↛ 432line 431 didn't jump to line 432 because the condition on line 431 was never true

432 self._line(i + 2, path) 

433 elif not is_toplevel: 433 ↛ 435line 433 didn't jump to line 435 because the condition on line 433 was never true

434 # #pragma once 系の判断ができない場合はそっとしておく 

435 self.result_lines.append(line) 

436 elif ( 436 ↛ 446line 436 didn't jump to line 446 because the condition on line 436 was always true

437 included in C_STANDARD_LIBS 

438 or included in CXX_STANDARD_LIBS 

439 or included in CXX_C_ORIGIN_LIBS 

440 ): 

441 if BITS_STDCXX_H in self.pragma_once_system: 441 ↛ 442line 441 didn't jump to line 442 because the condition on line 441 was never true

442 self._line(i + 2, path) 

443 else: 

444 self.pragma_once_system.add(included) 

445 self.result_lines.append(line) 

446 elif included in EXT_LIBS: 

447 if BITS_EXTCXX_H in self.pragma_once_system: 

448 self._line(i + 2, path) 

449 else: 

450 self.pragma_once_system.add(included) 

451 self.result_lines.append(line) 

452 elif included in TR1_LIBS: 

453 if BITS_STDTR1CXX_H in self.pragma_once_system: 

454 self._line(i + 2, path) 

455 else: 

456 self.pragma_once_system.add(included) 

457 self.result_lines.append(line) 

458 else: 

459 # possibly: bits/*, tr2/* boost/*, c-posix library, etc. 

460 self.pragma_once_system.add(included) 

461 self.result_lines.append(line) 

462 if included in [BITS_EXTCXX_H, BITS_STDTR1CXX_H]: 

463 self.pragma_once_system.add(BITS_STDCXX_H) 

464 continue 

465 

466 # #include "..." 

467 matched = re.match(rb'\s*#\s*include\s*"(.*)"\s*', uncommented_line) 

468 if matched: 

469 included = matched.group(1).decode() 

470 logger.debug('%s: line %s: #include "%s"', path, i + 1, included) 

471 if not is_toplevel: 471 ↛ 473line 471 didn't jump to line 473 because the condition on line 471 was never true

472 # #if の中から #include されると #pragma once 系の判断が不可能になるので諦める 

473 raise BundleErrorAt( 

474 path, 

475 i + 1, 

476 "unable to process #include in #if / #ifdef / #ifndef other than include guards", 

477 ) 

478 self.update( 

479 self._resolve(pathlib.Path(included), included_from=path) 

480 ) 

481 self._line(i + 2, path) 

482 # #include "iostream" みたいに書いたときの挙動をはっきりさせる 

483 # #include <iostream> /* とかをやられた場合を落とす 

484 continue 

485 

486 # otherwise 

487 self.result_lines.append(line) 

488 

489 # #if #endif の対応が壊れてたら諦める 

490 last_index = i + 1 # pyright: ignore[reportPossiblyUnboundVariable] 

491 

492 if preprocess_if_nest != 0: 492 ↛ 493line 492 didn't jump to line 493 because the condition on line 492 was never true

493 raise BundleErrorAt( 

494 path, last_index, "unmatched #if / #ifdef / #ifndef" 

495 ) 

496 if include_guard_macro is not None and not include_guard_endif_found: 496 ↛ 497line 496 didn't jump to line 497 because the condition on line 496 was never true

497 raise BundleErrorAt(path, last_index, "unmatched #ifndef") 

498 

499 finally: 

500 # 中で return することがあるので finally 節に入れておく 

501 self.path_stack.remove(path) 

502 

503 def get(self) -> bytes: 

504 return b"".join(self.result_lines)