check_linker_lists: Enhance detection of alignment problems

When linker-inserted padding breaks list integrity, pointer arithmetic
like (end - start) / sizeof(struct) produces garbage. GCC optimizes
division by constants using multiplicative inverses, which only works
when the dividend is an exact multiple. With padding, outputs like
"Running -858993444 bloblist tests" appear instead of the correct count.

Enhance the linker list checking script to detect these problems by
adding symbol size tracking using nm -S. This enables:

1. Padding detection: Compare symbol sizes to gaps. If gap > size,
   padding was inserted, breaking contiguous array assumptions.

2. Pointer arithmetic bugs: Check if (end - start) marker span is a
   multiple of struct size.

Co-developed-by: Claude <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
This commit is contained in:
Simon Glass
2025-12-13 07:35:59 -07:00
parent 2ddc96ae88
commit 2812864362

View File

@@ -1,127 +1,194 @@
#!/usr/bin/env python3
#
# check_list_alignment.py: Auto-discover and verify the uniform
# spacing of all U-Boot linker list symbols
#
# Analyze the symbol table of a U-Boot ELF file to ensure that
# all entries in all linker-generated lists are separated by a consistent
# number of bytes. Detect problems caused by linker-inserted
# alignment padding.
#
# By default, produce no output if no problems are found
# Use the -v flag to force output even on success
#
# Exit Codes:
# 0: Success. No alignment problems were found
# 1: Usage Error. The script was not called with the correct arguments
# 2: Execution Error. Failed to run `nm` or the ELF file was not found
# 3: Problem Found. An inconsistent gap was detected in at least one list
#
# SPDX-License-Identifier: GPL-2.0+
"""Check alignment of U-Boot linker lists.
Auto-discover and verify the uniform spacing of all U-Boot linker list symbols.
Analyze the symbol table of a U-Boot ELF file to ensure that all entries in all
linker-generated lists are separated by a consistent number of bytes. Detect
problems caused by linker-inserted alignment padding.
By default, produce no output if no problems are found.
Use the -v flag to force output even on success.
Exit Codes:
0: Success - no alignment problems were found
1: Usage Error - the script was not called with the correct arguments
2: Execution Error - failed to run `nm` or the ELF file was not found
3: Problem Found - an inconsistent gap was detected in at least one list
"""
import sys
import subprocess
import re
import argparse
from statistics import mode, StatisticsError
from statistics import mode
from collections import defaultdict, namedtuple
# Information about a symbol: address, size (from nm -S), and name
Symbol = namedtuple('Symbol', ['address', 'size', 'name'])
# Information about the gap between two consecutive symbols
Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym'])
Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym', 'prev_size'])
# Start and end marker addresses for a list
Markers = namedtuple('Markers', ['start', 'end'])
# Holds all the analysis results from checking the lists
Results = namedtuple('Results', [
'total_problems', 'total_symbols', 'all_lines', 'max_name_len',
'list_count'])
def eprint(*args, **kwargs):
'''Print to stderr'''
"""Print to stderr"""
print(*args, file=sys.stderr, **kwargs)
def check_single_list(name, symbols, max_name_len):
'''Check alignment for a single list and return its findings
def check_single_list(name, symbols, max_name_len, marker_info=None):
"""Check alignment for a single list and return its findings
Args:
name (str): The cleaned-up name of the list for display
symbols (list): A list of (address, name) tuples, sorted by address
symbols (list): A list of Symbol tuples, sorted by address
max_name_len (int): The max length of list names for column formatting
marker_info (Markers): Optional namedtuple with start and end addresses
Returns:
tuple: (problem_count, list_of_output_lines)
'''
"""
lines = []
if len(symbols) < 2:
return 0, []
gaps = []
for i in range(len(symbols) - 1):
addr1, name1 = symbols[i]
addr2, name2 = symbols[i+1]
gaps.append(Gap(gap=addr2 - addr1, prev_sym=name1, next_sym=name2))
sym1, sym2 = symbols[i], symbols[i+1]
gaps.append(Gap(gap=sym2.address - sym1.address, prev_sym=sym1.name,
next_sym=sym2.name, prev_size=sym1.size))
expected_gap = mode(g.gap for g in gaps)
lines.append(
f"{name:<{max_name_len + 2}} {len(symbols):>12} "
f"{f'0x{expected_gap:x}':>17}")
problem_count = 0
hex_gap = f'0x{expected_gap:x}'
line = f'{name:<{max_name_len + 2}} {len(symbols):>12} {hex_gap:>17}'
lines.append(line)
for g in gaps:
if g.gap != expected_gap:
problem_count += 1
lines.append(
f" - Bad gap (0x{g.gap:x}) before symbol: {g.next_sym}")
f' - Bad gap (0x{g.gap:x}) before symbol: {g.next_sym}')
elif g.prev_size and g.gap > g.prev_size:
# Gap is larger than symbol size - padding was inserted
problem_count += 1
lines.append(
f' - Padding: gap 0x{g.gap:x} > size 0x{g.prev_size:x}'
f' before: {g.next_sym}')
# Check if start/end marker span is a multiple of the struct size
# If not, pointer subtraction (end - start) will produce wrong results
# due to compiler optimization using magic number multiplication
if marker_info:
total_span = marker_info.end - marker_info.start
if total_span % expected_gap != 0:
problem_count += 1
remainder = total_span % expected_gap
lines.append(
f' - Pointer arithmetic bug: span 0x{total_span:x} is not a '
f'multiple of struct size 0x{expected_gap:x} '
f'(remainder: {remainder})')
return problem_count, lines
def run_nm_and_get_lists(elf_path):
'''Run `nm` and parse the output to discover all linker lists
"""Run `nm -S` and parse the output to discover all linker lists
Args:
elf_path (str): The path to the ELF file to process
Returns:
dict or None: A dictionary of discovered lists, or None on error
'''
cmd = ['nm', '-n', elf_path]
tuple or None: (lists_dict, markers_dict) or None on error
lists_dict: entries keyed by base_name
markers_dict: start/end marker addresses keyed by base_name
"""
cmd = ['nm', '-S', '-n', elf_path]
try:
proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
except FileNotFoundError:
eprint(
'Error: The "nm" command was not found. '
"Error: The 'nm' command was not found. "
'Please ensure binutils is installed')
return None
except subprocess.CalledProcessError as e:
eprint(
f"Error: Failed to execute 'nm' on '{elf_path}'.\n"
f" Return Code: {e.returncode}\n Stderr:\n{e.stderr}")
f' Return Code: {e.returncode}\n Stderr:\n{e.stderr}')
return None
list_name_pattern = re.compile(
# Pattern to match _2_ entries (the actual list elements)
entry_pattern = re.compile(
r'^(?P<base_name>_u_boot_list_\d+_\w+)(?:_info)?_2_')
# Pattern to match _1 (start) and _3 (end) markers
marker_pattern = re.compile(
r'^(?P<base_name>_u_boot_list_\d+_\w+)_(?P<marker>[13])$')
lists = defaultdict(list)
markers = defaultdict(dict) # {base_name: {'start': addr, 'end': addr}}
for line in proc.stdout.splitlines():
if ' D _u_boot_list_' not in line:
if '_u_boot_list_' not in line:
continue
try:
parts = line.strip().split()
address, name = int(parts[0], 16), parts[-1]
name = parts[-1]
address = int(parts[0], 16)
# Size is present if we have 4 parts and parts[2] is a single char
if len(parts) == 4 and len(parts[2]) == 1:
size = int(parts[1], 16)
else:
size = 0 # Size not available
match = list_name_pattern.match(name)
# Check for entry (_2_) symbols - must be uppercase D
if ' D _u_boot_list_' in line:
match = entry_pattern.match(name)
if match:
base_name = match.group('base_name')
lists[base_name].append(Symbol(address, size, name))
continue
# Check for marker (_1 or _3) symbols - can be any type
match = marker_pattern.match(name)
if match:
base_name = match.group('base_name')
lists[base_name].append((address, name))
marker_type = match.group('marker')
if marker_type == '1':
markers[base_name]['start'] = address
else: # marker_type == '3'
markers[base_name]['end'] = address
except (ValueError, IndexError):
eprint(f'Warning: Could not parse line: {line}')
return lists
# Convert marker dicts to Markers namedtuples (only if both start/end exist)
marker_tuples = {}
for base_name, m in markers.items():
if 'start' in m and 'end' in m:
marker_tuples[base_name] = Markers(m['start'], m['end'])
def collect_data(lists):
'''Collect alignment check data for all lists
return lists, marker_tuples
def collect_data(lists, markers):
"""Collect alignment check data for all lists
Args:
lists (dict): A dictionary of lists and their symbols
markers (dict): A dictionary of start/end marker addresses per list
Returns:
Results: A namedtuple containing the analysis results
'''
"""
if markers is None:
markers = {}
names = {}
prefix_to_strip = '_u_boot_list_2_'
for list_name in lists.keys():
@@ -138,7 +205,9 @@ def collect_data(lists):
symbols = lists[list_name]
total_symbols += len(symbols)
name = names[list_name]
problem_count, lines = check_single_list(name, symbols, max_name_len)
marker_info = markers.get(list_name)
problem_count, lines = check_single_list(name, symbols, max_name_len,
marker_info)
total_problems += problem_count
all_lines.extend(lines)
@@ -150,19 +219,20 @@ def collect_data(lists):
list_count=len(lists))
def show_output(results, verbose):
'''Print the collected results to stderr based on verbosity
"""Print the collected results to stderr based on verbosity
Args:
results (Results): The analysis results from collect_data()
verbose (bool): True to print output even on success
'''
"""
if results.total_problems == 0 and not verbose:
return
header = (f"{'List Name':<{results.max_name_len + 2}} {'# Symbols':>12} "
f"{'Struct Size (hex)':>17}")
sep = f"{'-' * (results.max_name_len + 2)} {'-' * 12} {'-' * 17}"
eprint(header)
eprint(f"{'-' * (results.max_name_len + 2)} {'-' * 12} {'-' * 17}")
eprint(sep)
for line in results.all_lines:
eprint(line)
@@ -177,19 +247,28 @@ def show_output(results, verbose):
eprint('\nSUCCESS: All discovered lists have consistent alignment')
def main():
'''Main entry point of the script, returns an exit code'''
"""Main entry point of the script, returns an exit code"""
epilog_text = '''
Auto-discover all linker-generated lists in a U-Boot ELF file
(e.g., for drivers, commands, etc.) and verify their integrity. Check
that all elements in a given list are separated by a consistent number of
bytes.
(e.g., for drivers, commands, etc.) and verify their integrity.
Problems typically indicate that the linker has inserted alignment padding
between two elements in a list, which can break U-Boot's assumption that the
list is a simple, contiguous array of same-sized structs.
Problems detected (cause build failure):
1. Inconsistent gaps: Elements in a list should all be separated by the same
number of bytes (the struct size). If the linker inserts padding between
some elements but not others, this is detected and reported.
2. Padding detection: Using symbol sizes from nm -S, the script compares each
symbol's size to the gap after it. If gap > size, the linker inserted
padding, which breaks U-Boot's assumption that the list is a contiguous
array of same-sized structs.
3. Pointer arithmetic bugs: Each list has start (_1) and end (_3) markers.
If the span (end - start) is not a multiple of struct size, pointer
subtraction produces garbage due to GCC's magic-number division.
'''
parser = argparse.ArgumentParser(
description='Check alignment of all U-Boot linker lists in an ELF file.',
description='Check alignment of U-Boot linker lists in an ELF file.',
epilog=epilog_text,
formatter_class=argparse.RawDescriptionHelpFormatter
)
@@ -200,16 +279,17 @@ list is a simple, contiguous array of same-sized structs.
args = parser.parse_args()
lists = run_nm_and_get_lists(args.elf_path)
if lists is None:
result = run_nm_and_get_lists(args.elf_path)
if result is None:
return 2 # Error running nm
lists, markers = result
if not lists:
if args.verbose:
eprint('Success: No U-Boot linker lists found to check')
return 0
results = collect_data(lists)
results = collect_data(lists, markers)
show_output(results, args.verbose)
return 3 if results.total_problems > 0 else 0