Add a way to do static preprocessor analysis using debug information from compiled code. This reads the DWARF tables to determin which lines produced code. Co-developed-by: Claude <noreply@anthropic.com> Signed-off-by: Simon Glass <simon.glass@canonical.com>
201 lines
7.2 KiB
Python
201 lines
7.2 KiB
Python
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# Copyright 2025 Canonical Ltd
|
|
#
|
|
"""DWARF debug info-based line-level analysis for source code.
|
|
|
|
This module provides functionality to analyse which lines in source files
|
|
were compiled by extracting line information from DWARF debug data in
|
|
object files.
|
|
"""
|
|
|
|
import multiprocessing
|
|
import os
|
|
import subprocess
|
|
from collections import defaultdict
|
|
|
|
from u_boot_pylib import tout
|
|
from analyser import Analyser, FileResult
|
|
|
|
|
|
def worker(args):
|
|
"""Extract line numbers from DWARF debug info in an object file.
|
|
|
|
Uses readelf --debug-dump=decodedline to get the line table, then parses
|
|
section headers and line entries to determine which source lines were
|
|
compiled into the object.
|
|
|
|
Args:
|
|
args (tuple): Tuple of (obj_path, build_dir, srcdir)
|
|
|
|
Returns:
|
|
tuple: (source_lines_dict, error_msg) where source_lines_dict is a
|
|
mapping of source file paths to sets of line numbers, and
|
|
error_msg is None on success or an error string on failure
|
|
"""
|
|
obj_path, build_dir, srcdir = args
|
|
source_lines = defaultdict(set)
|
|
|
|
# Get the directory of the .o file relative to build_dir
|
|
rel_to_build = os.path.relpath(obj_path, build_dir)
|
|
obj_dir = os.path.dirname(rel_to_build)
|
|
|
|
# Use readelf to extract decoded line information
|
|
try:
|
|
result = subprocess.run(
|
|
['readelf', '--debug-dump=decodedline', obj_path],
|
|
capture_output=True, text=True, check=False,
|
|
encoding='utf-8', errors='ignore')
|
|
if result.returncode != 0:
|
|
error_msg = (f'readelf failed on {obj_path} with return code '
|
|
f'{result.returncode}\nstderr: {result.stderr}')
|
|
return (source_lines, error_msg)
|
|
|
|
# Parse the output
|
|
# Format is: Section header with full path, then data lines
|
|
current_file = None
|
|
for line in result.stdout.splitlines():
|
|
# Skip header lines and empty lines
|
|
if not line or line.startswith('Contents of') or \
|
|
line.startswith('File name') or line.strip() == '' or \
|
|
line.startswith(' '):
|
|
continue
|
|
|
|
# Look for section headers with full path (e.g., '/path/to/file.c:')
|
|
if line.endswith(':'):
|
|
header_path = line.rstrip(':')
|
|
# Try to resolve the path
|
|
if os.path.isabs(header_path):
|
|
# Absolute path in DWARF
|
|
abs_path = os.path.realpath(header_path)
|
|
else:
|
|
# Relative path - try relative to srcdir and obj_dir
|
|
abs_path = os.path.realpath(
|
|
os.path.join(srcdir, obj_dir, header_path))
|
|
if not os.path.exists(abs_path):
|
|
abs_path = os.path.realpath(
|
|
os.path.join(srcdir, header_path))
|
|
|
|
if os.path.exists(abs_path):
|
|
current_file = abs_path
|
|
continue
|
|
|
|
# Parse data lines - use current_file from section header
|
|
if current_file:
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
try:
|
|
line_num = int(parts[1])
|
|
# Skip special line numbers (like '-')
|
|
if line_num > 0:
|
|
source_lines[current_file].add(line_num)
|
|
except (ValueError, IndexError):
|
|
continue
|
|
except (OSError, subprocess.SubprocessError) as e:
|
|
error_msg = f'Failed to execute readelf on {obj_path}: {e}'
|
|
return (source_lines, error_msg)
|
|
|
|
return (source_lines, None)
|
|
|
|
|
|
# pylint: disable=too-few-public-methods
|
|
class DwarfAnalyser(Analyser):
|
|
"""Analyser that uses DWARF debug info to determine active lines.
|
|
|
|
This analyser extracts line number information from DWARF debug data in
|
|
compiled object files to determine which source lines generated code.
|
|
"""
|
|
def __init__(self, build_dir, srcdir, used_sources, keep_temps=False):
|
|
"""Initialise the DWARF analyser.
|
|
|
|
Args:
|
|
build_dir (str): Build directory containing .o files
|
|
srcdir (str): Path to source root directory
|
|
used_sources (set): Set of source files that are compiled
|
|
keep_temps (bool): If True, keep temporary files for debugging
|
|
"""
|
|
super().__init__(srcdir, keep_temps)
|
|
self.build_dir = build_dir
|
|
self.used_sources = used_sources
|
|
|
|
def extract_lines(self, jobs=None):
|
|
"""Extract used line numbers from DWARF debug info in object files.
|
|
|
|
Args:
|
|
jobs (int): Number of parallel jobs (None = use all CPUs)
|
|
|
|
Returns:
|
|
dict: Mapping of source file paths to sets of line numbers that
|
|
generated code
|
|
"""
|
|
# Find all .o files
|
|
obj_files = self.find_object_files(self.build_dir)
|
|
|
|
if not obj_files:
|
|
return defaultdict(set)
|
|
|
|
# Prepare arguments for parallel processing
|
|
args_list = [(obj_path, self.build_dir, self.srcdir)
|
|
for obj_path in obj_files]
|
|
|
|
# Process in parallel
|
|
num_jobs = jobs if jobs else multiprocessing.cpu_count()
|
|
with multiprocessing.Pool(num_jobs) as pool:
|
|
results = pool.map(worker, args_list)
|
|
|
|
# Merge results from all workers and check for errors
|
|
source_lines = defaultdict(set)
|
|
errors = []
|
|
for result_dict, error_msg in results:
|
|
if error_msg:
|
|
errors.append(error_msg)
|
|
else:
|
|
for source_file, lines in result_dict.items():
|
|
source_lines[source_file].update(lines)
|
|
|
|
# Report any errors
|
|
if errors:
|
|
for error in errors:
|
|
tout.error(error)
|
|
tout.fatal(f'readelf failed on {len(errors)} object file(s)')
|
|
|
|
return source_lines
|
|
|
|
def process(self, jobs=None):
|
|
"""Perform line-level analysis using DWARF debug info.
|
|
|
|
Args:
|
|
jobs (int): Number of parallel jobs (None = use all CPUs)
|
|
|
|
Returns:
|
|
dict: Mapping of source file paths to FileResult named tuples
|
|
"""
|
|
tout.progress('Extracting DWARF line information...')
|
|
dwarf_line_map = self.extract_lines(jobs)
|
|
|
|
file_results = {}
|
|
for source_file in self.used_sources:
|
|
abs_path = os.path.realpath(source_file)
|
|
used_lines = dwarf_line_map.get(abs_path, set())
|
|
|
|
# Count total lines in the file
|
|
total_lines = self.count_lines(abs_path)
|
|
|
|
active_lines = len(used_lines)
|
|
inactive_lines = total_lines - active_lines
|
|
|
|
# Create line status dict
|
|
line_status = {}
|
|
for i in range(1, total_lines + 1):
|
|
line_status[i] = 'active' if i in used_lines else 'inactive'
|
|
|
|
file_results[abs_path] = FileResult(
|
|
total_lines=total_lines,
|
|
active_lines=active_lines,
|
|
inactive_lines=inactive_lines,
|
|
line_status=line_status
|
|
)
|
|
|
|
tout.info(f'Analysed {len(file_results)} files using DWARF debug info')
|
|
return file_results
|