From 101103c66639bba53309fab3caec4992128b89e4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 2 Dec 2025 10:02:09 -0600 Subject: [PATCH] [core] Add RAM strings and symbols analysis to analyze-memory command (#12161) --- esphome/__main__.py | 22 +- esphome/analyze_memory/__init__.py | 173 +-------- esphome/analyze_memory/demangle.py | 182 ++++++++++ esphome/analyze_memory/ram_strings.py | 493 ++++++++++++++++++++++++++ esphome/analyze_memory/toolchain.py | 57 +++ tests/unit_tests/test_main.py | 25 +- 6 files changed, 779 insertions(+), 173 deletions(-) create mode 100644 esphome/analyze_memory/demangle.py create mode 100644 esphome/analyze_memory/ram_strings.py create mode 100644 esphome/analyze_memory/toolchain.py diff --git a/esphome/__main__.py b/esphome/__main__.py index f8fb678cb..55fbbc6c8 100644 --- a/esphome/__main__.py +++ b/esphome/__main__.py @@ -944,6 +944,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int: """ from esphome import platformio_api from esphome.analyze_memory.cli import MemoryAnalyzerCLI + from esphome.analyze_memory.ram_strings import RamStringsAnalyzer # Always compile to ensure fresh data (fast if no changes - just relinks) exit_code = write_cpp(config) @@ -966,7 +967,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int: external_components = detect_external_components(config) _LOGGER.debug("Detected external components: %s", external_components) - # Perform memory analysis + # Perform component memory analysis _LOGGER.info("Analyzing memory usage...") analyzer = MemoryAnalyzerCLI( str(firmware_elf), @@ -976,11 +977,28 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int: ) analyzer.analyze() - # Generate and display report + # Generate and display component report report = analyzer.generate_report() print() print(report) + # Perform RAM strings analysis + _LOGGER.info("Analyzing RAM strings...") + try: + ram_analyzer = RamStringsAnalyzer( + str(firmware_elf), + objdump_path=idedata.objdump_path, + platform=CORE.target_platform, + ) + ram_analyzer.analyze() + + # Generate and display RAM strings report + ram_report = ram_analyzer.generate_report() + print() + print(ram_report) + except Exception as e: # pylint: disable=broad-except + _LOGGER.warning("RAM strings analysis failed: %s", e) + return 0 diff --git a/esphome/analyze_memory/__init__.py b/esphome/analyze_memory/__init__.py index 71e86e378..9632a6891 100644 --- a/esphome/analyze_memory/__init__.py +++ b/esphome/analyze_memory/__init__.py @@ -15,6 +15,7 @@ from .const import ( SECTION_TO_ATTR, SYMBOL_PATTERNS, ) +from .demangle import batch_demangle from .helpers import ( get_component_class_patterns, get_esphome_components, @@ -27,15 +28,6 @@ if TYPE_CHECKING: _LOGGER = logging.getLogger(__name__) -# GCC global constructor/destructor prefix annotations -_GCC_PREFIX_ANNOTATIONS = { - "_GLOBAL__sub_I_": "global constructor for", - "_GLOBAL__sub_D_": "global destructor for", -} - -# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2) -_GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)") - # C++ runtime patterns for categorization _CPP_RUNTIME_PATTERNS = frozenset(["vtable", "typeinfo", "thunk"]) @@ -312,168 +304,9 @@ class MemoryAnalyzer: if not symbols: return - # Try to find the appropriate c++filt for the platform - cppfilt_cmd = "c++filt" - _LOGGER.info("Demangling %d symbols", len(symbols)) - _LOGGER.debug("objdump_path = %s", self.objdump_path) - - # Check if we have a toolchain-specific c++filt - if self.objdump_path and self.objdump_path != "objdump": - # Replace objdump with c++filt in the path - potential_cppfilt = self.objdump_path.replace("objdump", "c++filt") - _LOGGER.info("Checking for toolchain c++filt at: %s", potential_cppfilt) - if Path(potential_cppfilt).exists(): - cppfilt_cmd = potential_cppfilt - _LOGGER.info("✓ Using toolchain c++filt: %s", cppfilt_cmd) - else: - _LOGGER.info( - "✗ Toolchain c++filt not found at %s, using system c++filt", - potential_cppfilt, - ) - else: - _LOGGER.info("✗ Using system c++filt (objdump_path=%s)", self.objdump_path) - - # Strip GCC optimization suffixes and prefixes before demangling - # Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt - # Prefixes like _GLOBAL__sub_I_ need to be removed and tracked - symbols_stripped: list[str] = [] - symbols_prefixes: list[str] = [] # Track removed prefixes - for symbol in symbols: - # Remove GCC optimization markers - stripped = _GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol) - - # Handle GCC global constructor/initializer prefixes - # _GLOBAL__sub_I_ -> extract for demangling - prefix = "" - for gcc_prefix in _GCC_PREFIX_ANNOTATIONS: - if stripped.startswith(gcc_prefix): - prefix = gcc_prefix - stripped = stripped[len(prefix) :] - break - - symbols_stripped.append(stripped) - symbols_prefixes.append(prefix) - - try: - # Send all symbols to c++filt at once - result = subprocess.run( - [cppfilt_cmd], - input="\n".join(symbols_stripped), - capture_output=True, - text=True, - check=False, - ) - except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e: - # On error, cache originals - _LOGGER.warning("Failed to batch demangle symbols: %s", e) - for symbol in symbols: - self._demangle_cache[symbol] = symbol - return - - if result.returncode != 0: - _LOGGER.warning( - "c++filt exited with code %d: %s", - result.returncode, - result.stderr[:200] if result.stderr else "(no error output)", - ) - # Cache originals on failure - for symbol in symbols: - self._demangle_cache[symbol] = symbol - return - - # Process demangled output - self._process_demangled_output( - symbols, symbols_stripped, symbols_prefixes, result.stdout, cppfilt_cmd - ) - - def _process_demangled_output( - self, - symbols: list[str], - symbols_stripped: list[str], - symbols_prefixes: list[str], - demangled_output: str, - cppfilt_cmd: str, - ) -> None: - """Process demangled symbol output and populate cache. - - Args: - symbols: Original symbol names - symbols_stripped: Stripped symbol names sent to c++filt - symbols_prefixes: Removed prefixes to restore - demangled_output: Output from c++filt - cppfilt_cmd: Path to c++filt command (for logging) - """ - demangled_lines = demangled_output.strip().split("\n") - failed_count = 0 - - for original, stripped, prefix, demangled in zip( - symbols, symbols_stripped, symbols_prefixes, demangled_lines - ): - # Add back any prefix that was removed - demangled = self._restore_symbol_prefix(prefix, stripped, demangled) - - # If we stripped a suffix, add it back to the demangled name for clarity - if original != stripped and not prefix: - demangled = self._restore_symbol_suffix(original, demangled) - - self._demangle_cache[original] = demangled - - # Log symbols that failed to demangle (stayed the same as stripped version) - if stripped == demangled and stripped.startswith("_Z"): - failed_count += 1 - if failed_count <= 5: # Only log first 5 failures - _LOGGER.warning("Failed to demangle: %s", original) - - if failed_count == 0: - _LOGGER.info("Successfully demangled all %d symbols", len(symbols)) - return - - _LOGGER.warning( - "Failed to demangle %d/%d symbols using %s", - failed_count, - len(symbols), - cppfilt_cmd, - ) - - @staticmethod - def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str: - """Restore prefix that was removed before demangling. - - Args: - prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_") - stripped: Stripped symbol name - demangled: Demangled symbol name - - Returns: - Demangled name with prefix restored/annotated - """ - if not prefix: - return demangled - - # Successfully demangled - add descriptive prefix - if demangled != stripped and ( - annotation := _GCC_PREFIX_ANNOTATIONS.get(prefix) - ): - return f"[{annotation}: {demangled}]" - - # Failed to demangle - restore original prefix - return prefix + demangled - - @staticmethod - def _restore_symbol_suffix(original: str, demangled: str) -> str: - """Restore GCC optimization suffix that was removed before demangling. - - Args: - original: Original symbol name with suffix - demangled: Demangled symbol name without suffix - - Returns: - Demangled name with suffix annotation - """ - if suffix_match := _GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original): - return f"{demangled} [{suffix_match.group(1)}]" - return demangled + self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path) + _LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache)) def _demangle_symbol(self, symbol: str) -> str: """Get demangled C++ symbol name from cache.""" diff --git a/esphome/analyze_memory/demangle.py b/esphome/analyze_memory/demangle.py new file mode 100644 index 000000000..8999108b5 --- /dev/null +++ b/esphome/analyze_memory/demangle.py @@ -0,0 +1,182 @@ +"""Symbol demangling utilities for memory analysis. + +This module provides functions for demangling C++ symbol names using c++filt. +""" + +from __future__ import annotations + +import logging +import re +import subprocess + +from .toolchain import find_tool + +_LOGGER = logging.getLogger(__name__) + +# GCC global constructor/destructor prefix annotations +GCC_PREFIX_ANNOTATIONS = { + "_GLOBAL__sub_I_": "global constructor for", + "_GLOBAL__sub_D_": "global destructor for", +} + +# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2) +GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)") + + +def _strip_gcc_annotations(symbol: str) -> tuple[str, str]: + """Strip GCC optimization suffixes and prefixes from a symbol. + + Args: + symbol: The mangled symbol name + + Returns: + Tuple of (stripped_symbol, removed_prefix) + """ + # Remove GCC optimization markers + stripped = GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol) + + # Handle GCC global constructor/initializer prefixes + prefix = "" + for gcc_prefix in GCC_PREFIX_ANNOTATIONS: + if stripped.startswith(gcc_prefix): + prefix = gcc_prefix + stripped = stripped[len(prefix) :] + break + + return stripped, prefix + + +def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str: + """Restore prefix that was removed before demangling. + + Args: + prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_") + stripped: Stripped symbol name + demangled: Demangled symbol name + + Returns: + Demangled name with prefix restored/annotated + """ + if not prefix: + return demangled + + # Successfully demangled - add descriptive prefix + if demangled != stripped and (annotation := GCC_PREFIX_ANNOTATIONS.get(prefix)): + return f"[{annotation}: {demangled}]" + + # Failed to demangle - restore original prefix + return prefix + demangled + + +def _restore_symbol_suffix(original: str, demangled: str) -> str: + """Restore GCC optimization suffix that was removed before demangling. + + Args: + original: Original symbol name with suffix + demangled: Demangled symbol name without suffix + + Returns: + Demangled name with suffix annotation + """ + if suffix_match := GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original): + return f"{demangled} [{suffix_match.group(1)}]" + return demangled + + +def batch_demangle( + symbols: list[str], + cppfilt_path: str | None = None, + objdump_path: str | None = None, +) -> dict[str, str]: + """Batch demangle C++ symbol names. + + Args: + symbols: List of symbol names to demangle + cppfilt_path: Path to c++filt binary (auto-detected if not provided) + objdump_path: Path to objdump binary to derive c++filt path from + + Returns: + Dictionary mapping original symbol names to demangled names + """ + cache: dict[str, str] = {} + + if not symbols: + return cache + + # Find c++filt tool + cppfilt_cmd = cppfilt_path or find_tool("c++filt", objdump_path) + if not cppfilt_cmd: + _LOGGER.warning("Could not find c++filt, symbols will not be demangled") + return {s: s for s in symbols} + + _LOGGER.debug("Demangling %d symbols using %s", len(symbols), cppfilt_cmd) + + # Strip GCC optimization suffixes and prefixes before demangling + symbols_stripped: list[str] = [] + symbols_prefixes: list[str] = [] + for symbol in symbols: + stripped, prefix = _strip_gcc_annotations(symbol) + symbols_stripped.append(stripped) + symbols_prefixes.append(prefix) + + try: + result = subprocess.run( + [cppfilt_cmd], + input="\n".join(symbols_stripped), + capture_output=True, + text=True, + check=False, + ) + except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e: + _LOGGER.warning("Failed to batch demangle symbols: %s", e) + return {s: s for s in symbols} + + if result.returncode != 0: + _LOGGER.warning( + "c++filt exited with code %d: %s", + result.returncode, + result.stderr[:200] if result.stderr else "(no error output)", + ) + return {s: s for s in symbols} + + # Process demangled output + demangled_lines = result.stdout.strip().split("\n") + + # Check for output length mismatch + if len(demangled_lines) != len(symbols): + _LOGGER.warning( + "c++filt output mismatch: expected %d lines, got %d", + len(symbols), + len(demangled_lines), + ) + return {s: s for s in symbols} + + failed_count = 0 + + for original, stripped, prefix, demangled in zip( + symbols, symbols_stripped, symbols_prefixes, demangled_lines + ): + # Add back any prefix that was removed + demangled = _restore_symbol_prefix(prefix, stripped, demangled) + + # If we stripped a suffix, add it back to the demangled name for clarity + if original != stripped and not prefix: + demangled = _restore_symbol_suffix(original, demangled) + + cache[original] = demangled + + # Count symbols that failed to demangle + if stripped == demangled and stripped.startswith("_Z"): + failed_count += 1 + if failed_count <= 5: + _LOGGER.debug("Failed to demangle: %s", original) + + if failed_count > 0: + _LOGGER.debug( + "Failed to demangle %d/%d symbols using %s", + failed_count, + len(symbols), + cppfilt_cmd, + ) + + return cache diff --git a/esphome/analyze_memory/ram_strings.py b/esphome/analyze_memory/ram_strings.py new file mode 100644 index 000000000..fbcbeeca6 --- /dev/null +++ b/esphome/analyze_memory/ram_strings.py @@ -0,0 +1,493 @@ +"""Analyzer for RAM-stored strings in ESP8266/ESP32 firmware ELF files. + +This module identifies strings that are stored in RAM sections (.data, .bss, .rodata) +rather than in flash sections (.irom0.text, .irom.text), which is important for +memory-constrained platforms like ESP8266. +""" + +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass +import logging +from pathlib import Path +import re +import subprocess + +from .demangle import batch_demangle +from .toolchain import find_tool + +_LOGGER = logging.getLogger(__name__) + +# ESP8266: .rodata is in RAM (DRAM), not flash +# ESP32: .rodata is in flash, mapped to data bus +ESP8266_RAM_SECTIONS = frozenset([".data", ".rodata", ".bss"]) +ESP8266_FLASH_SECTIONS = frozenset([".irom0.text", ".irom.text", ".text"]) + +# ESP32: .rodata is memory-mapped from flash +ESP32_RAM_SECTIONS = frozenset([".data", ".bss", ".dram0.data", ".dram0.bss"]) +ESP32_FLASH_SECTIONS = frozenset([".text", ".rodata", ".flash.text", ".flash.rodata"]) + +# nm symbol types for data symbols (D=global data, d=local data, R=rodata, B=bss) +DATA_SYMBOL_TYPES = frozenset(["D", "d", "R", "r", "B", "b"]) + + +@dataclass +class SectionInfo: + """Information about an ELF section.""" + + name: str + address: int + size: int + + +@dataclass +class RamString: + """A string found in RAM.""" + + section: str + address: int + content: str + + @property + def size(self) -> int: + """Size in bytes including null terminator.""" + return len(self.content) + 1 + + +@dataclass +class RamSymbol: + """A symbol found in RAM.""" + + name: str + sym_type: str + address: int + size: int + section: str + demangled: str = "" # Demangled name, set after batch demangling + + +class RamStringsAnalyzer: + """Analyzes ELF files to find strings stored in RAM.""" + + def __init__( + self, + elf_path: str, + objdump_path: str | None = None, + min_length: int = 8, + platform: str = "esp32", + ) -> None: + """Initialize the RAM strings analyzer. + + Args: + elf_path: Path to the ELF file to analyze + objdump_path: Path to objdump binary (used to find other tools) + min_length: Minimum string length to report (default: 8) + platform: Platform name ("esp8266", "esp32", etc.) for section mapping + """ + self.elf_path = Path(elf_path) + if not self.elf_path.exists(): + raise FileNotFoundError(f"ELF file not found: {elf_path}") + + self.objdump_path = objdump_path + self.min_length = min_length + self.platform = platform + + # Set RAM/flash sections based on platform + if self.platform == "esp8266": + self.ram_sections = ESP8266_RAM_SECTIONS + self.flash_sections = ESP8266_FLASH_SECTIONS + else: + # ESP32 and other platforms + self.ram_sections = ESP32_RAM_SECTIONS + self.flash_sections = ESP32_FLASH_SECTIONS + + self.sections: dict[str, SectionInfo] = {} + self.ram_strings: list[RamString] = [] + self.ram_symbols: list[RamSymbol] = [] + + def _run_command(self, cmd: list[str]) -> str: + """Run a command and return its output.""" + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return result.stdout + except subprocess.CalledProcessError as e: + _LOGGER.debug("Command failed: %s - %s", " ".join(cmd), e.stderr) + raise + except FileNotFoundError: + _LOGGER.warning("Command not found: %s", cmd[0]) + raise + + def analyze(self) -> None: + """Perform the full RAM analysis.""" + self._parse_sections() + self._extract_strings() + self._analyze_symbols() + self._demangle_symbols() + + def _parse_sections(self) -> None: + """Parse section headers from ELF file.""" + objdump = find_tool("objdump", self.objdump_path) + if not objdump: + _LOGGER.error("Could not find objdump command") + return + + try: + output = self._run_command([objdump, "-h", str(self.elf_path)]) + except (subprocess.CalledProcessError, FileNotFoundError): + return + + # Parse section headers + # Format: Idx Name Size VMA LMA File off Algn + section_pattern = re.compile( + r"^\s*\d+\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)" + ) + + for line in output.split("\n"): + if match := section_pattern.match(line): + name = match.group(1) + size = int(match.group(2), 16) + vma = int(match.group(3), 16) + self.sections[name] = SectionInfo(name, vma, size) + + def _extract_strings(self) -> None: + """Extract strings from RAM sections.""" + objdump = find_tool("objdump", self.objdump_path) + if not objdump: + return + + for section_name in self.ram_sections: + if section_name not in self.sections: + continue + + try: + output = self._run_command( + [objdump, "-s", "-j", section_name, str(self.elf_path)] + ) + except subprocess.CalledProcessError: + # Section may exist but have no content (e.g., .bss) + continue + except FileNotFoundError: + continue + + strings = self._parse_hex_dump(output, section_name) + self.ram_strings.extend(strings) + + def _parse_hex_dump(self, output: str, section_name: str) -> list[RamString]: + """Parse hex dump output to extract strings. + + Args: + output: Output from objdump -s + section_name: Name of the section being parsed + + Returns: + List of RamString objects + """ + strings: list[RamString] = [] + current_string = bytearray() + string_start_addr = 0 + + for line in output.split("\n"): + # Lines look like: " 3ffef8a0 00000000 00000000 00000000 00000000 ................" + match = re.match(r"^\s+([0-9a-fA-F]+)\s+((?:[0-9a-fA-F]{2,8}\s*)+)", line) + if not match: + continue + + addr = int(match.group(1), 16) + hex_data = match.group(2).strip() + + # Convert hex to bytes + hex_bytes = hex_data.split() + byte_offset = 0 + for hex_chunk in hex_bytes: + # Handle both byte-by-byte and word formats + for i in range(0, len(hex_chunk), 2): + byte_val = int(hex_chunk[i : i + 2], 16) + if 0x20 <= byte_val <= 0x7E: # Printable ASCII + if not current_string: + string_start_addr = addr + byte_offset + current_string.append(byte_val) + else: + if byte_val == 0 and len(current_string) >= self.min_length: + # Found null terminator + strings.append( + RamString( + section=section_name, + address=string_start_addr, + content=current_string.decode( + "ascii", errors="ignore" + ), + ) + ) + current_string = bytearray() + byte_offset += 1 + + return strings + + def _analyze_symbols(self) -> None: + """Analyze symbols in RAM sections.""" + nm = find_tool("nm", self.objdump_path) + if not nm: + return + + try: + output = self._run_command([nm, "-S", "--size-sort", str(self.elf_path)]) + except (subprocess.CalledProcessError, FileNotFoundError): + return + + for line in output.split("\n"): + parts = line.split() + if len(parts) < 4: + continue + + try: + addr = int(parts[0], 16) + size = int(parts[1], 16) if parts[1] != "?" else 0 + except ValueError: + continue + + sym_type = parts[2] + name = " ".join(parts[3:]) + + # Filter for data symbols + if sym_type not in DATA_SYMBOL_TYPES: + continue + + # Check if symbol is in a RAM section + for section_name in self.ram_sections: + if section_name not in self.sections: + continue + + section = self.sections[section_name] + if section.address <= addr < section.address + section.size: + self.ram_symbols.append( + RamSymbol( + name=name, + sym_type=sym_type, + address=addr, + size=size, + section=section_name, + ) + ) + break + + def _demangle_symbols(self) -> None: + """Batch demangle all RAM symbol names.""" + if not self.ram_symbols: + return + + # Collect all symbol names and demangle them + symbol_names = [s.name for s in self.ram_symbols] + demangle_cache = batch_demangle(symbol_names, objdump_path=self.objdump_path) + + # Assign demangled names to symbols + for symbol in self.ram_symbols: + symbol.demangled = demangle_cache.get(symbol.name, symbol.name) + + def _get_sections_size(self, section_names: frozenset[str]) -> int: + """Get total size of specified sections.""" + return sum( + section.size + for name, section in self.sections.items() + if name in section_names + ) + + def get_total_ram_usage(self) -> int: + """Get total RAM usage from RAM sections.""" + return self._get_sections_size(self.ram_sections) + + def get_total_flash_usage(self) -> int: + """Get total flash usage from flash sections.""" + return self._get_sections_size(self.flash_sections) + + def get_total_string_bytes(self) -> int: + """Get total bytes used by strings in RAM.""" + return sum(s.size for s in self.ram_strings) + + def get_repeated_strings(self) -> list[tuple[str, int]]: + """Find strings that appear multiple times. + + Returns: + List of (string, count) tuples sorted by potential savings + """ + string_counts: dict[str, int] = defaultdict(int) + for ram_string in self.ram_strings: + string_counts[ram_string.content] += 1 + + return sorted( + [(s, c) for s, c in string_counts.items() if c > 1], + key=lambda x: x[1] * (len(x[0]) + 1), + reverse=True, + ) + + def get_long_strings(self, min_len: int = 20) -> list[RamString]: + """Get strings longer than the specified length. + + Args: + min_len: Minimum string length + + Returns: + List of RamString objects sorted by length + """ + return sorted( + [s for s in self.ram_strings if len(s.content) >= min_len], + key=lambda x: len(x.content), + reverse=True, + ) + + def get_largest_symbols(self, min_size: int = 100) -> list[RamSymbol]: + """Get RAM symbols larger than the specified size. + + Args: + min_size: Minimum symbol size in bytes + + Returns: + List of RamSymbol objects sorted by size + """ + return sorted( + [s for s in self.ram_symbols if s.size >= min_size], + key=lambda x: x.size, + reverse=True, + ) + + def generate_report(self, show_all_sections: bool = False) -> str: + """Generate a formatted RAM strings analysis report. + + Args: + show_all_sections: If True, show all sections, not just RAM + + Returns: + Formatted report string + """ + lines: list[str] = [] + table_width = 80 + + lines.append("=" * table_width) + lines.append( + f"RAM Strings Analysis ({self.platform.upper()})".center(table_width) + ) + lines.append("=" * table_width) + lines.append("") + + # Section Analysis + lines.append("SECTION ANALYSIS") + lines.append("-" * table_width) + lines.append(f"{'Section':<20} {'Address':<12} {'Size':<12} {'Location'}") + lines.append("-" * table_width) + + total_ram_usage = 0 + total_flash_usage = 0 + + for name, section in sorted(self.sections.items(), key=lambda x: x[1].address): + if name in self.ram_sections: + location = "RAM" + total_ram_usage += section.size + elif name in self.flash_sections: + location = "FLASH" + total_flash_usage += section.size + else: + location = "OTHER" + + if show_all_sections or name in self.ram_sections: + lines.append( + f"{name:<20} 0x{section.address:08x} {section.size:>8} B {location}" + ) + + lines.append("-" * table_width) + lines.append(f"Total RAM sections size: {total_ram_usage:,} bytes") + lines.append(f"Total Flash sections size: {total_flash_usage:,} bytes") + + # Strings in RAM + lines.append("") + lines.append("=" * table_width) + lines.append("STRINGS IN RAM SECTIONS") + lines.append("=" * table_width) + lines.append( + "Note: .bss sections contain uninitialized data (no strings to extract)" + ) + + # Group strings by section + strings_by_section: dict[str, list[RamString]] = defaultdict(list) + for ram_string in self.ram_strings: + strings_by_section[ram_string.section].append(ram_string) + + for section_name in sorted(strings_by_section.keys()): + section_strings = strings_by_section[section_name] + lines.append(f"\nSection: {section_name}") + lines.append("-" * 40) + for ram_string in sorted(section_strings, key=lambda x: x.address): + clean_string = ram_string.content[:100] + ( + "..." if len(ram_string.content) > 100 else "" + ) + lines.append( + f' 0x{ram_string.address:08x}: "{clean_string}" (len={len(ram_string.content)})' + ) + + # Large RAM symbols + lines.append("") + lines.append("=" * table_width) + lines.append("LARGE DATA SYMBOLS IN RAM (>= 50 bytes)") + lines.append("=" * table_width) + + largest_symbols = self.get_largest_symbols(50) + lines.append(f"\n{'Symbol':<50} {'Type':<6} {'Size':<10} {'Section'}") + lines.append("-" * table_width) + + for symbol in largest_symbols: + # Use demangled name if available, otherwise raw name + display_name = symbol.demangled or symbol.name + name_display = display_name[:49] if len(display_name) > 49 else display_name + lines.append( + f"{name_display:<50} {symbol.sym_type:<6} {symbol.size:>8} B {symbol.section}" + ) + + # Summary + lines.append("") + lines.append("=" * table_width) + lines.append("SUMMARY") + lines.append("=" * table_width) + lines.append(f"Total strings found in RAM: {len(self.ram_strings)}") + total_string_bytes = self.get_total_string_bytes() + lines.append(f"Total bytes used by strings: {total_string_bytes:,}") + + # Optimization targets + lines.append("") + lines.append("=" * table_width) + lines.append("POTENTIAL OPTIMIZATION TARGETS") + lines.append("=" * table_width) + + # Repeated strings + repeated = self.get_repeated_strings()[:10] + if repeated: + lines.append("\nRepeated strings (could be deduplicated):") + for string, count in repeated: + savings = (count - 1) * (len(string) + 1) + clean_string = string[:50] + ("..." if len(string) > 50 else "") + lines.append( + f' "{clean_string}" - appears {count} times (potential savings: {savings} bytes)' + ) + + # Long strings - platform-specific advice + long_strings = self.get_long_strings(20)[:10] + if long_strings: + if self.platform == "esp8266": + lines.append( + "\nLong strings that could be moved to PROGMEM (>= 20 chars):" + ) + else: + # ESP32: strings in DRAM are typically there for a reason + # (interrupt handlers, pre-flash-init code, etc.) + lines.append("\nLong strings in DRAM (>= 20 chars):") + lines.append( + "Note: ESP32 DRAM strings may be required for interrupt/early-boot contexts" + ) + for ram_string in long_strings: + clean_string = ram_string.content[:60] + ( + "..." if len(ram_string.content) > 60 else "" + ) + lines.append( + f' {ram_string.section} @ 0x{ram_string.address:08x}: "{clean_string}" ({len(ram_string.content)} bytes)' + ) + + lines.append("") + return "\n".join(lines) diff --git a/esphome/analyze_memory/toolchain.py b/esphome/analyze_memory/toolchain.py new file mode 100644 index 000000000..e76625241 --- /dev/null +++ b/esphome/analyze_memory/toolchain.py @@ -0,0 +1,57 @@ +"""Toolchain utilities for memory analysis.""" + +from __future__ import annotations + +import logging +from pathlib import Path +import subprocess + +_LOGGER = logging.getLogger(__name__) + +# Platform-specific toolchain prefixes +TOOLCHAIN_PREFIXES = [ + "xtensa-lx106-elf-", # ESP8266 + "xtensa-esp32-elf-", # ESP32 + "xtensa-esp-elf-", # ESP32 (newer IDF) + "", # System default (no prefix) +] + + +def find_tool( + tool_name: str, + objdump_path: str | None = None, +) -> str | None: + """Find a toolchain tool by name. + + First tries to derive the tool path from objdump_path (if provided), + then falls back to searching for platform-specific tools. + + Args: + tool_name: Name of the tool (e.g., "objdump", "nm", "c++filt") + objdump_path: Path to objdump binary to derive other tool paths from + + Returns: + Path to the tool or None if not found + """ + # Try to derive from objdump path first (most reliable) + if objdump_path and objdump_path != "objdump": + objdump_file = Path(objdump_path) + # Replace just the filename portion, preserving any prefix (e.g., xtensa-esp32-elf-) + new_name = objdump_file.name.replace("objdump", tool_name) + potential_path = str(objdump_file.with_name(new_name)) + if Path(potential_path).exists(): + _LOGGER.debug("Found %s at: %s", tool_name, potential_path) + return potential_path + + # Try platform-specific tools + for prefix in TOOLCHAIN_PREFIXES: + cmd = f"{prefix}{tool_name}" + try: + subprocess.run([cmd, "--version"], capture_output=True, check=True) + _LOGGER.debug("Found %s: %s", tool_name, cmd) + return cmd + except (subprocess.CalledProcessError, FileNotFoundError): + continue + + _LOGGER.warning("Could not find %s tool", tool_name) + return None diff --git a/tests/unit_tests/test_main.py b/tests/unit_tests/test_main.py index ccbc5a130..670d6c16f 100644 --- a/tests/unit_tests/test_main.py +++ b/tests/unit_tests/test_main.py @@ -269,6 +269,16 @@ def mock_memory_analyzer_cli() -> Generator[Mock]: yield mock_class +@pytest.fixture +def mock_ram_strings_analyzer() -> Generator[Mock]: + """Mock RamStringsAnalyzer for testing.""" + with patch("esphome.analyze_memory.ram_strings.RamStringsAnalyzer") as mock_class: + mock_analyzer = MagicMock() + mock_analyzer.generate_report.return_value = "Mock RAM Strings Report" + mock_class.return_value = mock_analyzer + yield mock_class + + def test_choose_upload_log_host_with_string_default() -> None: """Test with a single string default device.""" setup_core() @@ -2424,6 +2434,7 @@ def test_command_analyze_memory_success( mock_get_idedata: Mock, mock_get_esphome_components: Mock, mock_memory_analyzer_cli: Mock, + mock_ram_strings_analyzer: Mock, ) -> None: """Test command_analyze_memory with successful compilation and analysis.""" setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device") @@ -2471,9 +2482,20 @@ def test_command_analyze_memory_success( mock_analyzer.analyze.assert_called_once() mock_analyzer.generate_report.assert_called_once() - # Verify report was printed + # Verify RAM strings analyzer was created and run + mock_ram_strings_analyzer.assert_called_once_with( + str(firmware_elf), + objdump_path="/path/to/objdump", + platform="esp32", + ) + mock_ram_analyzer = mock_ram_strings_analyzer.return_value + mock_ram_analyzer.analyze.assert_called_once() + mock_ram_analyzer.generate_report.assert_called_once() + + # Verify reports were printed captured = capfd.readouterr() assert "Mock Memory Report" in captured.out + assert "Mock RAM Strings Report" in captured.out def test_command_analyze_memory_with_external_components( @@ -2483,6 +2505,7 @@ def test_command_analyze_memory_with_external_components( mock_get_idedata: Mock, mock_get_esphome_components: Mock, mock_memory_analyzer_cli: Mock, + mock_ram_strings_analyzer: Mock, ) -> None: """Test command_analyze_memory detects external components.""" setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")