Files
u-boot/tools/codman/dwarf.py
Simon Glass 683d1578ae codman: Provide an dwarf analyser
Add a way to do static preprocessor analysis using debug information
from compiled code. This reads the DWARF tables to determin which lines
produced code.

Co-developed-by: Claude <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
2025-11-24 06:47:07 -07:00

201 lines
7.2 KiB
Python

# SPDX-License-Identifier: GPL-2.0
#
# Copyright 2025 Canonical Ltd
#
"""DWARF debug info-based line-level analysis for source code.
This module provides functionality to analyse which lines in source files
were compiled by extracting line information from DWARF debug data in
object files.
"""
import multiprocessing
import os
import subprocess
from collections import defaultdict
from u_boot_pylib import tout
from analyser import Analyser, FileResult
def worker(args):
"""Extract line numbers from DWARF debug info in an object file.
Uses readelf --debug-dump=decodedline to get the line table, then parses
section headers and line entries to determine which source lines were
compiled into the object.
Args:
args (tuple): Tuple of (obj_path, build_dir, srcdir)
Returns:
tuple: (source_lines_dict, error_msg) where source_lines_dict is a
mapping of source file paths to sets of line numbers, and
error_msg is None on success or an error string on failure
"""
obj_path, build_dir, srcdir = args
source_lines = defaultdict(set)
# Get the directory of the .o file relative to build_dir
rel_to_build = os.path.relpath(obj_path, build_dir)
obj_dir = os.path.dirname(rel_to_build)
# Use readelf to extract decoded line information
try:
result = subprocess.run(
['readelf', '--debug-dump=decodedline', obj_path],
capture_output=True, text=True, check=False,
encoding='utf-8', errors='ignore')
if result.returncode != 0:
error_msg = (f'readelf failed on {obj_path} with return code '
f'{result.returncode}\nstderr: {result.stderr}')
return (source_lines, error_msg)
# Parse the output
# Format is: Section header with full path, then data lines
current_file = None
for line in result.stdout.splitlines():
# Skip header lines and empty lines
if not line or line.startswith('Contents of') or \
line.startswith('File name') or line.strip() == '' or \
line.startswith(' '):
continue
# Look for section headers with full path (e.g., '/path/to/file.c:')
if line.endswith(':'):
header_path = line.rstrip(':')
# Try to resolve the path
if os.path.isabs(header_path):
# Absolute path in DWARF
abs_path = os.path.realpath(header_path)
else:
# Relative path - try relative to srcdir and obj_dir
abs_path = os.path.realpath(
os.path.join(srcdir, obj_dir, header_path))
if not os.path.exists(abs_path):
abs_path = os.path.realpath(
os.path.join(srcdir, header_path))
if os.path.exists(abs_path):
current_file = abs_path
continue
# Parse data lines - use current_file from section header
if current_file:
parts = line.split()
if len(parts) >= 2:
try:
line_num = int(parts[1])
# Skip special line numbers (like '-')
if line_num > 0:
source_lines[current_file].add(line_num)
except (ValueError, IndexError):
continue
except (OSError, subprocess.SubprocessError) as e:
error_msg = f'Failed to execute readelf on {obj_path}: {e}'
return (source_lines, error_msg)
return (source_lines, None)
# pylint: disable=too-few-public-methods
class DwarfAnalyser(Analyser):
"""Analyser that uses DWARF debug info to determine active lines.
This analyser extracts line number information from DWARF debug data in
compiled object files to determine which source lines generated code.
"""
def __init__(self, build_dir, srcdir, used_sources, keep_temps=False):
"""Initialise the DWARF analyser.
Args:
build_dir (str): Build directory containing .o files
srcdir (str): Path to source root directory
used_sources (set): Set of source files that are compiled
keep_temps (bool): If True, keep temporary files for debugging
"""
super().__init__(srcdir, keep_temps)
self.build_dir = build_dir
self.used_sources = used_sources
def extract_lines(self, jobs=None):
"""Extract used line numbers from DWARF debug info in object files.
Args:
jobs (int): Number of parallel jobs (None = use all CPUs)
Returns:
dict: Mapping of source file paths to sets of line numbers that
generated code
"""
# Find all .o files
obj_files = self.find_object_files(self.build_dir)
if not obj_files:
return defaultdict(set)
# Prepare arguments for parallel processing
args_list = [(obj_path, self.build_dir, self.srcdir)
for obj_path in obj_files]
# Process in parallel
num_jobs = jobs if jobs else multiprocessing.cpu_count()
with multiprocessing.Pool(num_jobs) as pool:
results = pool.map(worker, args_list)
# Merge results from all workers and check for errors
source_lines = defaultdict(set)
errors = []
for result_dict, error_msg in results:
if error_msg:
errors.append(error_msg)
else:
for source_file, lines in result_dict.items():
source_lines[source_file].update(lines)
# Report any errors
if errors:
for error in errors:
tout.error(error)
tout.fatal(f'readelf failed on {len(errors)} object file(s)')
return source_lines
def process(self, jobs=None):
"""Perform line-level analysis using DWARF debug info.
Args:
jobs (int): Number of parallel jobs (None = use all CPUs)
Returns:
dict: Mapping of source file paths to FileResult named tuples
"""
tout.progress('Extracting DWARF line information...')
dwarf_line_map = self.extract_lines(jobs)
file_results = {}
for source_file in self.used_sources:
abs_path = os.path.realpath(source_file)
used_lines = dwarf_line_map.get(abs_path, set())
# Count total lines in the file
total_lines = self.count_lines(abs_path)
active_lines = len(used_lines)
inactive_lines = total_lines - active_lines
# Create line status dict
line_status = {}
for i in range(1, total_lines + 1):
line_status[i] = 'active' if i in used_lines else 'inactive'
file_results[abs_path] = FileResult(
total_lines=total_lines,
active_lines=active_lines,
inactive_lines=inactive_lines,
line_status=line_status
)
tout.info(f'Analysed {len(file_results)} files using DWARF debug info')
return file_results