Coverage for coverage / bytecode.py: 24.460%
93 statements
« prev ^ index » next coverage.py v7.12.1a0.dev1, created at 2025-11-30 17:57 +0000
« prev ^ index » next coverage.py v7.12.1a0.dev1, created at 2025-11-30 17:57 +0000
1# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
2# For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt
4"""Bytecode analysis for coverage.py"""
6from __future__ import annotations 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
8import collections 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
9import dis 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
10from types import CodeType 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
11from typing import Iterable, Mapping, Optional 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
13from coverage.types import TArc, TLineNo, TOffset 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
16def code_objects(code: CodeType) -> Iterable[CodeType]: 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
17 """Iterate over all the code objects in `code`."""
18 stack = [code] 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
19 while stack: 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
20 # We're going to return the code object on the stack, but first
21 # push its children for later returning.
22 code = stack.pop() 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
23 for c in code.co_consts: 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
24 if isinstance(c, CodeType): 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
25 stack.append(c) 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
26 yield code 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
29def op_set(*op_names: str) -> set[int]: 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
30 """Make a set of opcodes from instruction names.
32 The names might not exist in this version of Python, skip those if not.
33 """
34 ops = {op for name in op_names if (op := dis.opmap.get(name))} 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
35 assert ops, f"At least one opcode must exist: {op_names}" 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
36 return ops 1abcdefghijklmnopqrstuvwxyzABCDEF5GH6IJ7KL8MN9OP!QR#ST$UV%WX'YZ(01)234
39# Opcodes that are unconditional jumps elsewhere.
40ALWAYS_JUMPS = op_set( 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
41 "JUMP_BACKWARD",
42 "JUMP_BACKWARD_NO_INTERRUPT",
43 "JUMP_FORWARD",
44)
46# Opcodes that exit from a function.
47RETURNS = op_set( 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
48 "RETURN_VALUE",
49 "RETURN_GENERATOR",
50)
52# Opcodes that do nothing.
53NOPS = op_set( 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
54 "NOP",
55 "NOT_TAKEN",
56)
59class InstructionWalker: 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
60 """Utility to step through trails of instructions.
62 We have two reasons to need sequences of instructions from a code object:
63 First, in strict sequence to visit all the instructions in the object.
64 This is `walk(follow_jumps=False)`. Second, we want to follow jumps to
65 understand how execution will flow: `walk(follow_jumps=True)`.
66 """
68 def __init__(self, code: CodeType) -> None: 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
69 self.code = code
70 self.insts: dict[TOffset, dis.Instruction] = {}
72 inst = None
73 for inst in dis.get_instructions(code):
74 self.insts[inst.offset] = inst
76 assert inst is not None
77 self.max_offset = inst.offset
79 def walk( 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
80 self, *, start_at: TOffset = 0, follow_jumps: bool = True
81 ) -> Iterable[dis.Instruction]:
82 """
83 Yield instructions starting from `start_at`. Follow unconditional
84 jumps if `follow_jumps` is true.
85 """
86 seen = set()
87 offset = start_at
88 while offset < self.max_offset + 1:
89 if offset in seen:
90 break
91 seen.add(offset)
92 if inst := self.insts.get(offset):
93 yield inst
94 if follow_jumps and inst.opcode in ALWAYS_JUMPS:
95 offset = inst.jump_target
96 continue
97 offset += 2
100TBranchTrailsOneSource = dict[Optional[TArc], set[TOffset]] 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
101TBranchTrails = dict[TOffset, TBranchTrailsOneSource] 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
104def branch_trails( 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
105 code: CodeType,
106 multiline_map: Mapping[TLineNo, TLineNo],
107) -> TBranchTrails:
108 """
109 Calculate branch trails for `code`.
111 `multiline_map` maps line numbers to the first line number of a
112 multi-line statement.
114 Instructions can have a jump_target, where they might jump to next. Some
115 instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so
116 they aren't interesting to us, since they aren't the start of a branch
117 possibility.
119 Instructions that might or might not jump somewhere else are branch
120 possibilities. For each of those, we track a trail of instructions. These
121 are lists of instruction offsets, the next instructions that can execute.
122 We follow the trail until we get to a new source line. That gives us the
123 arc from the original instruction's line to the new source line.
125 """
126 the_trails: TBranchTrails = collections.defaultdict(lambda: collections.defaultdict(set))
127 iwalker = InstructionWalker(code)
128 for inst in iwalker.walk(follow_jumps=False):
129 if not inst.jump_target:
130 # We only care about instructions with jump targets.
131 continue
132 if inst.opcode in ALWAYS_JUMPS:
133 # We don't care about unconditional jumps.
134 continue
136 from_line = inst.line_number
137 if from_line is None:
138 continue
139 from_line = multiline_map.get(from_line, from_line)
141 def add_one_branch_trail(
142 trails: TBranchTrailsOneSource,
143 start_at: TOffset,
144 ) -> None:
145 # pylint: disable=cell-var-from-loop
146 inst_offsets: set[TOffset] = set()
147 to_line = None
148 for inst2 in iwalker.walk(start_at=start_at, follow_jumps=True):
149 inst_offsets.add(inst2.offset)
150 l2 = inst2.line_number
151 if l2 is not None:
152 l2 = multiline_map.get(l2, l2)
153 if l2 and l2 != from_line:
154 to_line = l2
155 break
156 elif inst2.jump_target and (inst2.opcode not in ALWAYS_JUMPS):
157 break
158 elif inst2.opcode in RETURNS:
159 to_line = -code.co_firstlineno
160 break
161 if to_line is not None:
162 trails[(from_line, to_line)].update(inst_offsets)
163 else:
164 trails[None] = set()
166 # Calculate two trails: one from the next instruction, and one from the
167 # jump_target instruction.
168 trails: TBranchTrailsOneSource = collections.defaultdict(set)
169 add_one_branch_trail(trails, start_at=inst.offset + 2)
170 add_one_branch_trail(trails, start_at=inst.jump_target)
171 the_trails[inst.offset] = trails
173 # Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions
174 # other than the original jump possibility instruction. Register each
175 # trail under all of their offsets so we can pick up in the middle of a
176 # trail if need be.
177 for arc, offsets in trails.items():
178 for offset in offsets:
179 the_trails[offset][arc].update(offsets)
181 return the_trails
184def always_jumps(code: CodeType) -> dict[TOffset, TOffset]: 1abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234
185 """Make a map of unconditional bytecodes jumping to others.
187 Only include bytecodes that do no work and go to another bytecode.
188 """
189 jumps = {}
190 iwalker = InstructionWalker(code)
191 for inst in iwalker.walk(follow_jumps=False):
192 if inst.opcode in ALWAYS_JUMPS:
193 jumps[inst.offset] = inst.jump_target
194 elif inst.opcode in NOPS:
195 jumps[inst.offset] = inst.offset + 2
196 return jumps