[core] Add RAM strings and symbols analysis to analyze-memory command (#12161)

This commit is contained in:
J. Nick Koston
2025-12-02 10:02:09 -06:00
committed by GitHub
parent 5142ff372b
commit 101103c666
6 changed files with 779 additions and 173 deletions

View File

@@ -944,6 +944,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
"""
from esphome import platformio_api
from esphome.analyze_memory.cli import MemoryAnalyzerCLI
from esphome.analyze_memory.ram_strings import RamStringsAnalyzer
# Always compile to ensure fresh data (fast if no changes - just relinks)
exit_code = write_cpp(config)
@@ -966,7 +967,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
external_components = detect_external_components(config)
_LOGGER.debug("Detected external components: %s", external_components)
# Perform memory analysis
# Perform component memory analysis
_LOGGER.info("Analyzing memory usage...")
analyzer = MemoryAnalyzerCLI(
str(firmware_elf),
@@ -976,11 +977,28 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
)
analyzer.analyze()
# Generate and display report
# Generate and display component report
report = analyzer.generate_report()
print()
print(report)
# Perform RAM strings analysis
_LOGGER.info("Analyzing RAM strings...")
try:
ram_analyzer = RamStringsAnalyzer(
str(firmware_elf),
objdump_path=idedata.objdump_path,
platform=CORE.target_platform,
)
ram_analyzer.analyze()
# Generate and display RAM strings report
ram_report = ram_analyzer.generate_report()
print()
print(ram_report)
except Exception as e: # pylint: disable=broad-except
_LOGGER.warning("RAM strings analysis failed: %s", e)
return 0

View File

@@ -15,6 +15,7 @@ from .const import (
SECTION_TO_ATTR,
SYMBOL_PATTERNS,
)
from .demangle import batch_demangle
from .helpers import (
get_component_class_patterns,
get_esphome_components,
@@ -27,15 +28,6 @@ if TYPE_CHECKING:
_LOGGER = logging.getLogger(__name__)
# GCC global constructor/destructor prefix annotations
_GCC_PREFIX_ANNOTATIONS = {
"_GLOBAL__sub_I_": "global constructor for",
"_GLOBAL__sub_D_": "global destructor for",
}
# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
_GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
# C++ runtime patterns for categorization
_CPP_RUNTIME_PATTERNS = frozenset(["vtable", "typeinfo", "thunk"])
@@ -312,168 +304,9 @@ class MemoryAnalyzer:
if not symbols:
return
# Try to find the appropriate c++filt for the platform
cppfilt_cmd = "c++filt"
_LOGGER.info("Demangling %d symbols", len(symbols))
_LOGGER.debug("objdump_path = %s", self.objdump_path)
# Check if we have a toolchain-specific c++filt
if self.objdump_path and self.objdump_path != "objdump":
# Replace objdump with c++filt in the path
potential_cppfilt = self.objdump_path.replace("objdump", "c++filt")
_LOGGER.info("Checking for toolchain c++filt at: %s", potential_cppfilt)
if Path(potential_cppfilt).exists():
cppfilt_cmd = potential_cppfilt
_LOGGER.info("✓ Using toolchain c++filt: %s", cppfilt_cmd)
else:
_LOGGER.info(
"✗ Toolchain c++filt not found at %s, using system c++filt",
potential_cppfilt,
)
else:
_LOGGER.info("✗ Using system c++filt (objdump_path=%s)", self.objdump_path)
# Strip GCC optimization suffixes and prefixes before demangling
# Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt
# Prefixes like _GLOBAL__sub_I_ need to be removed and tracked
symbols_stripped: list[str] = []
symbols_prefixes: list[str] = [] # Track removed prefixes
for symbol in symbols:
# Remove GCC optimization markers
stripped = _GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
# Handle GCC global constructor/initializer prefixes
# _GLOBAL__sub_I_<mangled> -> extract <mangled> for demangling
prefix = ""
for gcc_prefix in _GCC_PREFIX_ANNOTATIONS:
if stripped.startswith(gcc_prefix):
prefix = gcc_prefix
stripped = stripped[len(prefix) :]
break
symbols_stripped.append(stripped)
symbols_prefixes.append(prefix)
try:
# Send all symbols to c++filt at once
result = subprocess.run(
[cppfilt_cmd],
input="\n".join(symbols_stripped),
capture_output=True,
text=True,
check=False,
)
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
# On error, cache originals
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
for symbol in symbols:
self._demangle_cache[symbol] = symbol
return
if result.returncode != 0:
_LOGGER.warning(
"c++filt exited with code %d: %s",
result.returncode,
result.stderr[:200] if result.stderr else "(no error output)",
)
# Cache originals on failure
for symbol in symbols:
self._demangle_cache[symbol] = symbol
return
# Process demangled output
self._process_demangled_output(
symbols, symbols_stripped, symbols_prefixes, result.stdout, cppfilt_cmd
)
def _process_demangled_output(
self,
symbols: list[str],
symbols_stripped: list[str],
symbols_prefixes: list[str],
demangled_output: str,
cppfilt_cmd: str,
) -> None:
"""Process demangled symbol output and populate cache.
Args:
symbols: Original symbol names
symbols_stripped: Stripped symbol names sent to c++filt
symbols_prefixes: Removed prefixes to restore
demangled_output: Output from c++filt
cppfilt_cmd: Path to c++filt command (for logging)
"""
demangled_lines = demangled_output.strip().split("\n")
failed_count = 0
for original, stripped, prefix, demangled in zip(
symbols, symbols_stripped, symbols_prefixes, demangled_lines
):
# Add back any prefix that was removed
demangled = self._restore_symbol_prefix(prefix, stripped, demangled)
# If we stripped a suffix, add it back to the demangled name for clarity
if original != stripped and not prefix:
demangled = self._restore_symbol_suffix(original, demangled)
self._demangle_cache[original] = demangled
# Log symbols that failed to demangle (stayed the same as stripped version)
if stripped == demangled and stripped.startswith("_Z"):
failed_count += 1
if failed_count <= 5: # Only log first 5 failures
_LOGGER.warning("Failed to demangle: %s", original)
if failed_count == 0:
_LOGGER.info("Successfully demangled all %d symbols", len(symbols))
return
_LOGGER.warning(
"Failed to demangle %d/%d symbols using %s",
failed_count,
len(symbols),
cppfilt_cmd,
)
@staticmethod
def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
"""Restore prefix that was removed before demangling.
Args:
prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
stripped: Stripped symbol name
demangled: Demangled symbol name
Returns:
Demangled name with prefix restored/annotated
"""
if not prefix:
return demangled
# Successfully demangled - add descriptive prefix
if demangled != stripped and (
annotation := _GCC_PREFIX_ANNOTATIONS.get(prefix)
):
return f"[{annotation}: {demangled}]"
# Failed to demangle - restore original prefix
return prefix + demangled
@staticmethod
def _restore_symbol_suffix(original: str, demangled: str) -> str:
"""Restore GCC optimization suffix that was removed before demangling.
Args:
original: Original symbol name with suffix
demangled: Demangled symbol name without suffix
Returns:
Demangled name with suffix annotation
"""
if suffix_match := _GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
return f"{demangled} [{suffix_match.group(1)}]"
return demangled
self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path)
_LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache))
def _demangle_symbol(self, symbol: str) -> str:
"""Get demangled C++ symbol name from cache."""

View File

@@ -0,0 +1,182 @@
"""Symbol demangling utilities for memory analysis.
This module provides functions for demangling C++ symbol names using c++filt.
"""
from __future__ import annotations
import logging
import re
import subprocess
from .toolchain import find_tool
_LOGGER = logging.getLogger(__name__)
# GCC global constructor/destructor prefix annotations
GCC_PREFIX_ANNOTATIONS = {
"_GLOBAL__sub_I_": "global constructor for",
"_GLOBAL__sub_D_": "global destructor for",
}
# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
def _strip_gcc_annotations(symbol: str) -> tuple[str, str]:
"""Strip GCC optimization suffixes and prefixes from a symbol.
Args:
symbol: The mangled symbol name
Returns:
Tuple of (stripped_symbol, removed_prefix)
"""
# Remove GCC optimization markers
stripped = GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
# Handle GCC global constructor/initializer prefixes
prefix = ""
for gcc_prefix in GCC_PREFIX_ANNOTATIONS:
if stripped.startswith(gcc_prefix):
prefix = gcc_prefix
stripped = stripped[len(prefix) :]
break
return stripped, prefix
def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
"""Restore prefix that was removed before demangling.
Args:
prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
stripped: Stripped symbol name
demangled: Demangled symbol name
Returns:
Demangled name with prefix restored/annotated
"""
if not prefix:
return demangled
# Successfully demangled - add descriptive prefix
if demangled != stripped and (annotation := GCC_PREFIX_ANNOTATIONS.get(prefix)):
return f"[{annotation}: {demangled}]"
# Failed to demangle - restore original prefix
return prefix + demangled
def _restore_symbol_suffix(original: str, demangled: str) -> str:
"""Restore GCC optimization suffix that was removed before demangling.
Args:
original: Original symbol name with suffix
demangled: Demangled symbol name without suffix
Returns:
Demangled name with suffix annotation
"""
if suffix_match := GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
return f"{demangled} [{suffix_match.group(1)}]"
return demangled
def batch_demangle(
symbols: list[str],
cppfilt_path: str | None = None,
objdump_path: str | None = None,
) -> dict[str, str]:
"""Batch demangle C++ symbol names.
Args:
symbols: List of symbol names to demangle
cppfilt_path: Path to c++filt binary (auto-detected if not provided)
objdump_path: Path to objdump binary to derive c++filt path from
Returns:
Dictionary mapping original symbol names to demangled names
"""
cache: dict[str, str] = {}
if not symbols:
return cache
# Find c++filt tool
cppfilt_cmd = cppfilt_path or find_tool("c++filt", objdump_path)
if not cppfilt_cmd:
_LOGGER.warning("Could not find c++filt, symbols will not be demangled")
return {s: s for s in symbols}
_LOGGER.debug("Demangling %d symbols using %s", len(symbols), cppfilt_cmd)
# Strip GCC optimization suffixes and prefixes before demangling
symbols_stripped: list[str] = []
symbols_prefixes: list[str] = []
for symbol in symbols:
stripped, prefix = _strip_gcc_annotations(symbol)
symbols_stripped.append(stripped)
symbols_prefixes.append(prefix)
try:
result = subprocess.run(
[cppfilt_cmd],
input="\n".join(symbols_stripped),
capture_output=True,
text=True,
check=False,
)
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
return {s: s for s in symbols}
if result.returncode != 0:
_LOGGER.warning(
"c++filt exited with code %d: %s",
result.returncode,
result.stderr[:200] if result.stderr else "(no error output)",
)
return {s: s for s in symbols}
# Process demangled output
demangled_lines = result.stdout.strip().split("\n")
# Check for output length mismatch
if len(demangled_lines) != len(symbols):
_LOGGER.warning(
"c++filt output mismatch: expected %d lines, got %d",
len(symbols),
len(demangled_lines),
)
return {s: s for s in symbols}
failed_count = 0
for original, stripped, prefix, demangled in zip(
symbols, symbols_stripped, symbols_prefixes, demangled_lines
):
# Add back any prefix that was removed
demangled = _restore_symbol_prefix(prefix, stripped, demangled)
# If we stripped a suffix, add it back to the demangled name for clarity
if original != stripped and not prefix:
demangled = _restore_symbol_suffix(original, demangled)
cache[original] = demangled
# Count symbols that failed to demangle
if stripped == demangled and stripped.startswith("_Z"):
failed_count += 1
if failed_count <= 5:
_LOGGER.debug("Failed to demangle: %s", original)
if failed_count > 0:
_LOGGER.debug(
"Failed to demangle %d/%d symbols using %s",
failed_count,
len(symbols),
cppfilt_cmd,
)
return cache

View File

@@ -0,0 +1,493 @@
"""Analyzer for RAM-stored strings in ESP8266/ESP32 firmware ELF files.
This module identifies strings that are stored in RAM sections (.data, .bss, .rodata)
rather than in flash sections (.irom0.text, .irom.text), which is important for
memory-constrained platforms like ESP8266.
"""
from __future__ import annotations
from collections import defaultdict
from dataclasses import dataclass
import logging
from pathlib import Path
import re
import subprocess
from .demangle import batch_demangle
from .toolchain import find_tool
_LOGGER = logging.getLogger(__name__)
# ESP8266: .rodata is in RAM (DRAM), not flash
# ESP32: .rodata is in flash, mapped to data bus
ESP8266_RAM_SECTIONS = frozenset([".data", ".rodata", ".bss"])
ESP8266_FLASH_SECTIONS = frozenset([".irom0.text", ".irom.text", ".text"])
# ESP32: .rodata is memory-mapped from flash
ESP32_RAM_SECTIONS = frozenset([".data", ".bss", ".dram0.data", ".dram0.bss"])
ESP32_FLASH_SECTIONS = frozenset([".text", ".rodata", ".flash.text", ".flash.rodata"])
# nm symbol types for data symbols (D=global data, d=local data, R=rodata, B=bss)
DATA_SYMBOL_TYPES = frozenset(["D", "d", "R", "r", "B", "b"])
@dataclass
class SectionInfo:
"""Information about an ELF section."""
name: str
address: int
size: int
@dataclass
class RamString:
"""A string found in RAM."""
section: str
address: int
content: str
@property
def size(self) -> int:
"""Size in bytes including null terminator."""
return len(self.content) + 1
@dataclass
class RamSymbol:
"""A symbol found in RAM."""
name: str
sym_type: str
address: int
size: int
section: str
demangled: str = "" # Demangled name, set after batch demangling
class RamStringsAnalyzer:
"""Analyzes ELF files to find strings stored in RAM."""
def __init__(
self,
elf_path: str,
objdump_path: str | None = None,
min_length: int = 8,
platform: str = "esp32",
) -> None:
"""Initialize the RAM strings analyzer.
Args:
elf_path: Path to the ELF file to analyze
objdump_path: Path to objdump binary (used to find other tools)
min_length: Minimum string length to report (default: 8)
platform: Platform name ("esp8266", "esp32", etc.) for section mapping
"""
self.elf_path = Path(elf_path)
if not self.elf_path.exists():
raise FileNotFoundError(f"ELF file not found: {elf_path}")
self.objdump_path = objdump_path
self.min_length = min_length
self.platform = platform
# Set RAM/flash sections based on platform
if self.platform == "esp8266":
self.ram_sections = ESP8266_RAM_SECTIONS
self.flash_sections = ESP8266_FLASH_SECTIONS
else:
# ESP32 and other platforms
self.ram_sections = ESP32_RAM_SECTIONS
self.flash_sections = ESP32_FLASH_SECTIONS
self.sections: dict[str, SectionInfo] = {}
self.ram_strings: list[RamString] = []
self.ram_symbols: list[RamSymbol] = []
def _run_command(self, cmd: list[str]) -> str:
"""Run a command and return its output."""
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return result.stdout
except subprocess.CalledProcessError as e:
_LOGGER.debug("Command failed: %s - %s", " ".join(cmd), e.stderr)
raise
except FileNotFoundError:
_LOGGER.warning("Command not found: %s", cmd[0])
raise
def analyze(self) -> None:
"""Perform the full RAM analysis."""
self._parse_sections()
self._extract_strings()
self._analyze_symbols()
self._demangle_symbols()
def _parse_sections(self) -> None:
"""Parse section headers from ELF file."""
objdump = find_tool("objdump", self.objdump_path)
if not objdump:
_LOGGER.error("Could not find objdump command")
return
try:
output = self._run_command([objdump, "-h", str(self.elf_path)])
except (subprocess.CalledProcessError, FileNotFoundError):
return
# Parse section headers
# Format: Idx Name Size VMA LMA File off Algn
section_pattern = re.compile(
r"^\s*\d+\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)"
)
for line in output.split("\n"):
if match := section_pattern.match(line):
name = match.group(1)
size = int(match.group(2), 16)
vma = int(match.group(3), 16)
self.sections[name] = SectionInfo(name, vma, size)
def _extract_strings(self) -> None:
"""Extract strings from RAM sections."""
objdump = find_tool("objdump", self.objdump_path)
if not objdump:
return
for section_name in self.ram_sections:
if section_name not in self.sections:
continue
try:
output = self._run_command(
[objdump, "-s", "-j", section_name, str(self.elf_path)]
)
except subprocess.CalledProcessError:
# Section may exist but have no content (e.g., .bss)
continue
except FileNotFoundError:
continue
strings = self._parse_hex_dump(output, section_name)
self.ram_strings.extend(strings)
def _parse_hex_dump(self, output: str, section_name: str) -> list[RamString]:
"""Parse hex dump output to extract strings.
Args:
output: Output from objdump -s
section_name: Name of the section being parsed
Returns:
List of RamString objects
"""
strings: list[RamString] = []
current_string = bytearray()
string_start_addr = 0
for line in output.split("\n"):
# Lines look like: " 3ffef8a0 00000000 00000000 00000000 00000000 ................"
match = re.match(r"^\s+([0-9a-fA-F]+)\s+((?:[0-9a-fA-F]{2,8}\s*)+)", line)
if not match:
continue
addr = int(match.group(1), 16)
hex_data = match.group(2).strip()
# Convert hex to bytes
hex_bytes = hex_data.split()
byte_offset = 0
for hex_chunk in hex_bytes:
# Handle both byte-by-byte and word formats
for i in range(0, len(hex_chunk), 2):
byte_val = int(hex_chunk[i : i + 2], 16)
if 0x20 <= byte_val <= 0x7E: # Printable ASCII
if not current_string:
string_start_addr = addr + byte_offset
current_string.append(byte_val)
else:
if byte_val == 0 and len(current_string) >= self.min_length:
# Found null terminator
strings.append(
RamString(
section=section_name,
address=string_start_addr,
content=current_string.decode(
"ascii", errors="ignore"
),
)
)
current_string = bytearray()
byte_offset += 1
return strings
def _analyze_symbols(self) -> None:
"""Analyze symbols in RAM sections."""
nm = find_tool("nm", self.objdump_path)
if not nm:
return
try:
output = self._run_command([nm, "-S", "--size-sort", str(self.elf_path)])
except (subprocess.CalledProcessError, FileNotFoundError):
return
for line in output.split("\n"):
parts = line.split()
if len(parts) < 4:
continue
try:
addr = int(parts[0], 16)
size = int(parts[1], 16) if parts[1] != "?" else 0
except ValueError:
continue
sym_type = parts[2]
name = " ".join(parts[3:])
# Filter for data symbols
if sym_type not in DATA_SYMBOL_TYPES:
continue
# Check if symbol is in a RAM section
for section_name in self.ram_sections:
if section_name not in self.sections:
continue
section = self.sections[section_name]
if section.address <= addr < section.address + section.size:
self.ram_symbols.append(
RamSymbol(
name=name,
sym_type=sym_type,
address=addr,
size=size,
section=section_name,
)
)
break
def _demangle_symbols(self) -> None:
"""Batch demangle all RAM symbol names."""
if not self.ram_symbols:
return
# Collect all symbol names and demangle them
symbol_names = [s.name for s in self.ram_symbols]
demangle_cache = batch_demangle(symbol_names, objdump_path=self.objdump_path)
# Assign demangled names to symbols
for symbol in self.ram_symbols:
symbol.demangled = demangle_cache.get(symbol.name, symbol.name)
def _get_sections_size(self, section_names: frozenset[str]) -> int:
"""Get total size of specified sections."""
return sum(
section.size
for name, section in self.sections.items()
if name in section_names
)
def get_total_ram_usage(self) -> int:
"""Get total RAM usage from RAM sections."""
return self._get_sections_size(self.ram_sections)
def get_total_flash_usage(self) -> int:
"""Get total flash usage from flash sections."""
return self._get_sections_size(self.flash_sections)
def get_total_string_bytes(self) -> int:
"""Get total bytes used by strings in RAM."""
return sum(s.size for s in self.ram_strings)
def get_repeated_strings(self) -> list[tuple[str, int]]:
"""Find strings that appear multiple times.
Returns:
List of (string, count) tuples sorted by potential savings
"""
string_counts: dict[str, int] = defaultdict(int)
for ram_string in self.ram_strings:
string_counts[ram_string.content] += 1
return sorted(
[(s, c) for s, c in string_counts.items() if c > 1],
key=lambda x: x[1] * (len(x[0]) + 1),
reverse=True,
)
def get_long_strings(self, min_len: int = 20) -> list[RamString]:
"""Get strings longer than the specified length.
Args:
min_len: Minimum string length
Returns:
List of RamString objects sorted by length
"""
return sorted(
[s for s in self.ram_strings if len(s.content) >= min_len],
key=lambda x: len(x.content),
reverse=True,
)
def get_largest_symbols(self, min_size: int = 100) -> list[RamSymbol]:
"""Get RAM symbols larger than the specified size.
Args:
min_size: Minimum symbol size in bytes
Returns:
List of RamSymbol objects sorted by size
"""
return sorted(
[s for s in self.ram_symbols if s.size >= min_size],
key=lambda x: x.size,
reverse=True,
)
def generate_report(self, show_all_sections: bool = False) -> str:
"""Generate a formatted RAM strings analysis report.
Args:
show_all_sections: If True, show all sections, not just RAM
Returns:
Formatted report string
"""
lines: list[str] = []
table_width = 80
lines.append("=" * table_width)
lines.append(
f"RAM Strings Analysis ({self.platform.upper()})".center(table_width)
)
lines.append("=" * table_width)
lines.append("")
# Section Analysis
lines.append("SECTION ANALYSIS")
lines.append("-" * table_width)
lines.append(f"{'Section':<20} {'Address':<12} {'Size':<12} {'Location'}")
lines.append("-" * table_width)
total_ram_usage = 0
total_flash_usage = 0
for name, section in sorted(self.sections.items(), key=lambda x: x[1].address):
if name in self.ram_sections:
location = "RAM"
total_ram_usage += section.size
elif name in self.flash_sections:
location = "FLASH"
total_flash_usage += section.size
else:
location = "OTHER"
if show_all_sections or name in self.ram_sections:
lines.append(
f"{name:<20} 0x{section.address:08x} {section.size:>8} B {location}"
)
lines.append("-" * table_width)
lines.append(f"Total RAM sections size: {total_ram_usage:,} bytes")
lines.append(f"Total Flash sections size: {total_flash_usage:,} bytes")
# Strings in RAM
lines.append("")
lines.append("=" * table_width)
lines.append("STRINGS IN RAM SECTIONS")
lines.append("=" * table_width)
lines.append(
"Note: .bss sections contain uninitialized data (no strings to extract)"
)
# Group strings by section
strings_by_section: dict[str, list[RamString]] = defaultdict(list)
for ram_string in self.ram_strings:
strings_by_section[ram_string.section].append(ram_string)
for section_name in sorted(strings_by_section.keys()):
section_strings = strings_by_section[section_name]
lines.append(f"\nSection: {section_name}")
lines.append("-" * 40)
for ram_string in sorted(section_strings, key=lambda x: x.address):
clean_string = ram_string.content[:100] + (
"..." if len(ram_string.content) > 100 else ""
)
lines.append(
f' 0x{ram_string.address:08x}: "{clean_string}" (len={len(ram_string.content)})'
)
# Large RAM symbols
lines.append("")
lines.append("=" * table_width)
lines.append("LARGE DATA SYMBOLS IN RAM (>= 50 bytes)")
lines.append("=" * table_width)
largest_symbols = self.get_largest_symbols(50)
lines.append(f"\n{'Symbol':<50} {'Type':<6} {'Size':<10} {'Section'}")
lines.append("-" * table_width)
for symbol in largest_symbols:
# Use demangled name if available, otherwise raw name
display_name = symbol.demangled or symbol.name
name_display = display_name[:49] if len(display_name) > 49 else display_name
lines.append(
f"{name_display:<50} {symbol.sym_type:<6} {symbol.size:>8} B {symbol.section}"
)
# Summary
lines.append("")
lines.append("=" * table_width)
lines.append("SUMMARY")
lines.append("=" * table_width)
lines.append(f"Total strings found in RAM: {len(self.ram_strings)}")
total_string_bytes = self.get_total_string_bytes()
lines.append(f"Total bytes used by strings: {total_string_bytes:,}")
# Optimization targets
lines.append("")
lines.append("=" * table_width)
lines.append("POTENTIAL OPTIMIZATION TARGETS")
lines.append("=" * table_width)
# Repeated strings
repeated = self.get_repeated_strings()[:10]
if repeated:
lines.append("\nRepeated strings (could be deduplicated):")
for string, count in repeated:
savings = (count - 1) * (len(string) + 1)
clean_string = string[:50] + ("..." if len(string) > 50 else "")
lines.append(
f' "{clean_string}" - appears {count} times (potential savings: {savings} bytes)'
)
# Long strings - platform-specific advice
long_strings = self.get_long_strings(20)[:10]
if long_strings:
if self.platform == "esp8266":
lines.append(
"\nLong strings that could be moved to PROGMEM (>= 20 chars):"
)
else:
# ESP32: strings in DRAM are typically there for a reason
# (interrupt handlers, pre-flash-init code, etc.)
lines.append("\nLong strings in DRAM (>= 20 chars):")
lines.append(
"Note: ESP32 DRAM strings may be required for interrupt/early-boot contexts"
)
for ram_string in long_strings:
clean_string = ram_string.content[:60] + (
"..." if len(ram_string.content) > 60 else ""
)
lines.append(
f' {ram_string.section} @ 0x{ram_string.address:08x}: "{clean_string}" ({len(ram_string.content)} bytes)'
)
lines.append("")
return "\n".join(lines)

View File

@@ -0,0 +1,57 @@
"""Toolchain utilities for memory analysis."""
from __future__ import annotations
import logging
from pathlib import Path
import subprocess
_LOGGER = logging.getLogger(__name__)
# Platform-specific toolchain prefixes
TOOLCHAIN_PREFIXES = [
"xtensa-lx106-elf-", # ESP8266
"xtensa-esp32-elf-", # ESP32
"xtensa-esp-elf-", # ESP32 (newer IDF)
"", # System default (no prefix)
]
def find_tool(
tool_name: str,
objdump_path: str | None = None,
) -> str | None:
"""Find a toolchain tool by name.
First tries to derive the tool path from objdump_path (if provided),
then falls back to searching for platform-specific tools.
Args:
tool_name: Name of the tool (e.g., "objdump", "nm", "c++filt")
objdump_path: Path to objdump binary to derive other tool paths from
Returns:
Path to the tool or None if not found
"""
# Try to derive from objdump path first (most reliable)
if objdump_path and objdump_path != "objdump":
objdump_file = Path(objdump_path)
# Replace just the filename portion, preserving any prefix (e.g., xtensa-esp32-elf-)
new_name = objdump_file.name.replace("objdump", tool_name)
potential_path = str(objdump_file.with_name(new_name))
if Path(potential_path).exists():
_LOGGER.debug("Found %s at: %s", tool_name, potential_path)
return potential_path
# Try platform-specific tools
for prefix in TOOLCHAIN_PREFIXES:
cmd = f"{prefix}{tool_name}"
try:
subprocess.run([cmd, "--version"], capture_output=True, check=True)
_LOGGER.debug("Found %s: %s", tool_name, cmd)
return cmd
except (subprocess.CalledProcessError, FileNotFoundError):
continue
_LOGGER.warning("Could not find %s tool", tool_name)
return None

View File

@@ -269,6 +269,16 @@ def mock_memory_analyzer_cli() -> Generator[Mock]:
yield mock_class
@pytest.fixture
def mock_ram_strings_analyzer() -> Generator[Mock]:
"""Mock RamStringsAnalyzer for testing."""
with patch("esphome.analyze_memory.ram_strings.RamStringsAnalyzer") as mock_class:
mock_analyzer = MagicMock()
mock_analyzer.generate_report.return_value = "Mock RAM Strings Report"
mock_class.return_value = mock_analyzer
yield mock_class
def test_choose_upload_log_host_with_string_default() -> None:
"""Test with a single string default device."""
setup_core()
@@ -2424,6 +2434,7 @@ def test_command_analyze_memory_success(
mock_get_idedata: Mock,
mock_get_esphome_components: Mock,
mock_memory_analyzer_cli: Mock,
mock_ram_strings_analyzer: Mock,
) -> None:
"""Test command_analyze_memory with successful compilation and analysis."""
setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")
@@ -2471,9 +2482,20 @@ def test_command_analyze_memory_success(
mock_analyzer.analyze.assert_called_once()
mock_analyzer.generate_report.assert_called_once()
# Verify report was printed
# Verify RAM strings analyzer was created and run
mock_ram_strings_analyzer.assert_called_once_with(
str(firmware_elf),
objdump_path="/path/to/objdump",
platform="esp32",
)
mock_ram_analyzer = mock_ram_strings_analyzer.return_value
mock_ram_analyzer.analyze.assert_called_once()
mock_ram_analyzer.generate_report.assert_called_once()
# Verify reports were printed
captured = capfd.readouterr()
assert "Mock Memory Report" in captured.out
assert "Mock RAM Strings Report" in captured.out
def test_command_analyze_memory_with_external_components(
@@ -2483,6 +2505,7 @@ def test_command_analyze_memory_with_external_components(
mock_get_idedata: Mock,
mock_get_esphome_components: Mock,
mock_memory_analyzer_cli: Mock,
mock_ram_strings_analyzer: Mock,
) -> None:
"""Test command_analyze_memory detects external components."""
setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")