codman: Provide an dwarf analyser
Add a way to do static preprocessor analysis using debug information from compiled code. This reads the DWARF tables to determin which lines produced code. Co-developed-by: Claude <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com>
This commit is contained in:
200
tools/codman/dwarf.py
Normal file
200
tools/codman/dwarf.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright 2025 Canonical Ltd
|
||||
#
|
||||
"""DWARF debug info-based line-level analysis for source code.
|
||||
|
||||
This module provides functionality to analyse which lines in source files
|
||||
were compiled by extracting line information from DWARF debug data in
|
||||
object files.
|
||||
"""
|
||||
|
||||
import multiprocessing
|
||||
import os
|
||||
import subprocess
|
||||
from collections import defaultdict
|
||||
|
||||
from u_boot_pylib import tout
|
||||
from analyser import Analyser, FileResult
|
||||
|
||||
|
||||
def worker(args):
|
||||
"""Extract line numbers from DWARF debug info in an object file.
|
||||
|
||||
Uses readelf --debug-dump=decodedline to get the line table, then parses
|
||||
section headers and line entries to determine which source lines were
|
||||
compiled into the object.
|
||||
|
||||
Args:
|
||||
args (tuple): Tuple of (obj_path, build_dir, srcdir)
|
||||
|
||||
Returns:
|
||||
tuple: (source_lines_dict, error_msg) where source_lines_dict is a
|
||||
mapping of source file paths to sets of line numbers, and
|
||||
error_msg is None on success or an error string on failure
|
||||
"""
|
||||
obj_path, build_dir, srcdir = args
|
||||
source_lines = defaultdict(set)
|
||||
|
||||
# Get the directory of the .o file relative to build_dir
|
||||
rel_to_build = os.path.relpath(obj_path, build_dir)
|
||||
obj_dir = os.path.dirname(rel_to_build)
|
||||
|
||||
# Use readelf to extract decoded line information
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['readelf', '--debug-dump=decodedline', obj_path],
|
||||
capture_output=True, text=True, check=False,
|
||||
encoding='utf-8', errors='ignore')
|
||||
if result.returncode != 0:
|
||||
error_msg = (f'readelf failed on {obj_path} with return code '
|
||||
f'{result.returncode}\nstderr: {result.stderr}')
|
||||
return (source_lines, error_msg)
|
||||
|
||||
# Parse the output
|
||||
# Format is: Section header with full path, then data lines
|
||||
current_file = None
|
||||
for line in result.stdout.splitlines():
|
||||
# Skip header lines and empty lines
|
||||
if not line or line.startswith('Contents of') or \
|
||||
line.startswith('File name') or line.strip() == '' or \
|
||||
line.startswith(' '):
|
||||
continue
|
||||
|
||||
# Look for section headers with full path (e.g., '/path/to/file.c:')
|
||||
if line.endswith(':'):
|
||||
header_path = line.rstrip(':')
|
||||
# Try to resolve the path
|
||||
if os.path.isabs(header_path):
|
||||
# Absolute path in DWARF
|
||||
abs_path = os.path.realpath(header_path)
|
||||
else:
|
||||
# Relative path - try relative to srcdir and obj_dir
|
||||
abs_path = os.path.realpath(
|
||||
os.path.join(srcdir, obj_dir, header_path))
|
||||
if not os.path.exists(abs_path):
|
||||
abs_path = os.path.realpath(
|
||||
os.path.join(srcdir, header_path))
|
||||
|
||||
if os.path.exists(abs_path):
|
||||
current_file = abs_path
|
||||
continue
|
||||
|
||||
# Parse data lines - use current_file from section header
|
||||
if current_file:
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
try:
|
||||
line_num = int(parts[1])
|
||||
# Skip special line numbers (like '-')
|
||||
if line_num > 0:
|
||||
source_lines[current_file].add(line_num)
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
except (OSError, subprocess.SubprocessError) as e:
|
||||
error_msg = f'Failed to execute readelf on {obj_path}: {e}'
|
||||
return (source_lines, error_msg)
|
||||
|
||||
return (source_lines, None)
|
||||
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class DwarfAnalyser(Analyser):
|
||||
"""Analyser that uses DWARF debug info to determine active lines.
|
||||
|
||||
This analyser extracts line number information from DWARF debug data in
|
||||
compiled object files to determine which source lines generated code.
|
||||
"""
|
||||
def __init__(self, build_dir, srcdir, used_sources, keep_temps=False):
|
||||
"""Initialise the DWARF analyser.
|
||||
|
||||
Args:
|
||||
build_dir (str): Build directory containing .o files
|
||||
srcdir (str): Path to source root directory
|
||||
used_sources (set): Set of source files that are compiled
|
||||
keep_temps (bool): If True, keep temporary files for debugging
|
||||
"""
|
||||
super().__init__(srcdir, keep_temps)
|
||||
self.build_dir = build_dir
|
||||
self.used_sources = used_sources
|
||||
|
||||
def extract_lines(self, jobs=None):
|
||||
"""Extract used line numbers from DWARF debug info in object files.
|
||||
|
||||
Args:
|
||||
jobs (int): Number of parallel jobs (None = use all CPUs)
|
||||
|
||||
Returns:
|
||||
dict: Mapping of source file paths to sets of line numbers that
|
||||
generated code
|
||||
"""
|
||||
# Find all .o files
|
||||
obj_files = self.find_object_files(self.build_dir)
|
||||
|
||||
if not obj_files:
|
||||
return defaultdict(set)
|
||||
|
||||
# Prepare arguments for parallel processing
|
||||
args_list = [(obj_path, self.build_dir, self.srcdir)
|
||||
for obj_path in obj_files]
|
||||
|
||||
# Process in parallel
|
||||
num_jobs = jobs if jobs else multiprocessing.cpu_count()
|
||||
with multiprocessing.Pool(num_jobs) as pool:
|
||||
results = pool.map(worker, args_list)
|
||||
|
||||
# Merge results from all workers and check for errors
|
||||
source_lines = defaultdict(set)
|
||||
errors = []
|
||||
for result_dict, error_msg in results:
|
||||
if error_msg:
|
||||
errors.append(error_msg)
|
||||
else:
|
||||
for source_file, lines in result_dict.items():
|
||||
source_lines[source_file].update(lines)
|
||||
|
||||
# Report any errors
|
||||
if errors:
|
||||
for error in errors:
|
||||
tout.error(error)
|
||||
tout.fatal(f'readelf failed on {len(errors)} object file(s)')
|
||||
|
||||
return source_lines
|
||||
|
||||
def process(self, jobs=None):
|
||||
"""Perform line-level analysis using DWARF debug info.
|
||||
|
||||
Args:
|
||||
jobs (int): Number of parallel jobs (None = use all CPUs)
|
||||
|
||||
Returns:
|
||||
dict: Mapping of source file paths to FileResult named tuples
|
||||
"""
|
||||
tout.progress('Extracting DWARF line information...')
|
||||
dwarf_line_map = self.extract_lines(jobs)
|
||||
|
||||
file_results = {}
|
||||
for source_file in self.used_sources:
|
||||
abs_path = os.path.realpath(source_file)
|
||||
used_lines = dwarf_line_map.get(abs_path, set())
|
||||
|
||||
# Count total lines in the file
|
||||
total_lines = self.count_lines(abs_path)
|
||||
|
||||
active_lines = len(used_lines)
|
||||
inactive_lines = total_lines - active_lines
|
||||
|
||||
# Create line status dict
|
||||
line_status = {}
|
||||
for i in range(1, total_lines + 1):
|
||||
line_status[i] = 'active' if i in used_lines else 'inactive'
|
||||
|
||||
file_results[abs_path] = FileResult(
|
||||
total_lines=total_lines,
|
||||
active_lines=active_lines,
|
||||
inactive_lines=inactive_lines,
|
||||
line_status=line_status
|
||||
)
|
||||
|
||||
tout.info(f'Analysed {len(file_results)} files using DWARF debug info')
|
||||
return file_results
|
||||
Reference in New Issue
Block a user