[core] Add RAM strings and symbols analysis to analyze-memory command (#12161)
This commit is contained in:
@@ -944,6 +944,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
|
||||
"""
|
||||
from esphome import platformio_api
|
||||
from esphome.analyze_memory.cli import MemoryAnalyzerCLI
|
||||
from esphome.analyze_memory.ram_strings import RamStringsAnalyzer
|
||||
|
||||
# Always compile to ensure fresh data (fast if no changes - just relinks)
|
||||
exit_code = write_cpp(config)
|
||||
@@ -966,7 +967,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
|
||||
external_components = detect_external_components(config)
|
||||
_LOGGER.debug("Detected external components: %s", external_components)
|
||||
|
||||
# Perform memory analysis
|
||||
# Perform component memory analysis
|
||||
_LOGGER.info("Analyzing memory usage...")
|
||||
analyzer = MemoryAnalyzerCLI(
|
||||
str(firmware_elf),
|
||||
@@ -976,11 +977,28 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
|
||||
)
|
||||
analyzer.analyze()
|
||||
|
||||
# Generate and display report
|
||||
# Generate and display component report
|
||||
report = analyzer.generate_report()
|
||||
print()
|
||||
print(report)
|
||||
|
||||
# Perform RAM strings analysis
|
||||
_LOGGER.info("Analyzing RAM strings...")
|
||||
try:
|
||||
ram_analyzer = RamStringsAnalyzer(
|
||||
str(firmware_elf),
|
||||
objdump_path=idedata.objdump_path,
|
||||
platform=CORE.target_platform,
|
||||
)
|
||||
ram_analyzer.analyze()
|
||||
|
||||
# Generate and display RAM strings report
|
||||
ram_report = ram_analyzer.generate_report()
|
||||
print()
|
||||
print(ram_report)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
_LOGGER.warning("RAM strings analysis failed: %s", e)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ from .const import (
|
||||
SECTION_TO_ATTR,
|
||||
SYMBOL_PATTERNS,
|
||||
)
|
||||
from .demangle import batch_demangle
|
||||
from .helpers import (
|
||||
get_component_class_patterns,
|
||||
get_esphome_components,
|
||||
@@ -27,15 +28,6 @@ if TYPE_CHECKING:
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# GCC global constructor/destructor prefix annotations
|
||||
_GCC_PREFIX_ANNOTATIONS = {
|
||||
"_GLOBAL__sub_I_": "global constructor for",
|
||||
"_GLOBAL__sub_D_": "global destructor for",
|
||||
}
|
||||
|
||||
# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
|
||||
_GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
|
||||
|
||||
# C++ runtime patterns for categorization
|
||||
_CPP_RUNTIME_PATTERNS = frozenset(["vtable", "typeinfo", "thunk"])
|
||||
|
||||
@@ -312,168 +304,9 @@ class MemoryAnalyzer:
|
||||
if not symbols:
|
||||
return
|
||||
|
||||
# Try to find the appropriate c++filt for the platform
|
||||
cppfilt_cmd = "c++filt"
|
||||
|
||||
_LOGGER.info("Demangling %d symbols", len(symbols))
|
||||
_LOGGER.debug("objdump_path = %s", self.objdump_path)
|
||||
|
||||
# Check if we have a toolchain-specific c++filt
|
||||
if self.objdump_path and self.objdump_path != "objdump":
|
||||
# Replace objdump with c++filt in the path
|
||||
potential_cppfilt = self.objdump_path.replace("objdump", "c++filt")
|
||||
_LOGGER.info("Checking for toolchain c++filt at: %s", potential_cppfilt)
|
||||
if Path(potential_cppfilt).exists():
|
||||
cppfilt_cmd = potential_cppfilt
|
||||
_LOGGER.info("✓ Using toolchain c++filt: %s", cppfilt_cmd)
|
||||
else:
|
||||
_LOGGER.info(
|
||||
"✗ Toolchain c++filt not found at %s, using system c++filt",
|
||||
potential_cppfilt,
|
||||
)
|
||||
else:
|
||||
_LOGGER.info("✗ Using system c++filt (objdump_path=%s)", self.objdump_path)
|
||||
|
||||
# Strip GCC optimization suffixes and prefixes before demangling
|
||||
# Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt
|
||||
# Prefixes like _GLOBAL__sub_I_ need to be removed and tracked
|
||||
symbols_stripped: list[str] = []
|
||||
symbols_prefixes: list[str] = [] # Track removed prefixes
|
||||
for symbol in symbols:
|
||||
# Remove GCC optimization markers
|
||||
stripped = _GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
|
||||
|
||||
# Handle GCC global constructor/initializer prefixes
|
||||
# _GLOBAL__sub_I_<mangled> -> extract <mangled> for demangling
|
||||
prefix = ""
|
||||
for gcc_prefix in _GCC_PREFIX_ANNOTATIONS:
|
||||
if stripped.startswith(gcc_prefix):
|
||||
prefix = gcc_prefix
|
||||
stripped = stripped[len(prefix) :]
|
||||
break
|
||||
|
||||
symbols_stripped.append(stripped)
|
||||
symbols_prefixes.append(prefix)
|
||||
|
||||
try:
|
||||
# Send all symbols to c++filt at once
|
||||
result = subprocess.run(
|
||||
[cppfilt_cmd],
|
||||
input="\n".join(symbols_stripped),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
|
||||
# On error, cache originals
|
||||
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
|
||||
for symbol in symbols:
|
||||
self._demangle_cache[symbol] = symbol
|
||||
return
|
||||
|
||||
if result.returncode != 0:
|
||||
_LOGGER.warning(
|
||||
"c++filt exited with code %d: %s",
|
||||
result.returncode,
|
||||
result.stderr[:200] if result.stderr else "(no error output)",
|
||||
)
|
||||
# Cache originals on failure
|
||||
for symbol in symbols:
|
||||
self._demangle_cache[symbol] = symbol
|
||||
return
|
||||
|
||||
# Process demangled output
|
||||
self._process_demangled_output(
|
||||
symbols, symbols_stripped, symbols_prefixes, result.stdout, cppfilt_cmd
|
||||
)
|
||||
|
||||
def _process_demangled_output(
|
||||
self,
|
||||
symbols: list[str],
|
||||
symbols_stripped: list[str],
|
||||
symbols_prefixes: list[str],
|
||||
demangled_output: str,
|
||||
cppfilt_cmd: str,
|
||||
) -> None:
|
||||
"""Process demangled symbol output and populate cache.
|
||||
|
||||
Args:
|
||||
symbols: Original symbol names
|
||||
symbols_stripped: Stripped symbol names sent to c++filt
|
||||
symbols_prefixes: Removed prefixes to restore
|
||||
demangled_output: Output from c++filt
|
||||
cppfilt_cmd: Path to c++filt command (for logging)
|
||||
"""
|
||||
demangled_lines = demangled_output.strip().split("\n")
|
||||
failed_count = 0
|
||||
|
||||
for original, stripped, prefix, demangled in zip(
|
||||
symbols, symbols_stripped, symbols_prefixes, demangled_lines
|
||||
):
|
||||
# Add back any prefix that was removed
|
||||
demangled = self._restore_symbol_prefix(prefix, stripped, demangled)
|
||||
|
||||
# If we stripped a suffix, add it back to the demangled name for clarity
|
||||
if original != stripped and not prefix:
|
||||
demangled = self._restore_symbol_suffix(original, demangled)
|
||||
|
||||
self._demangle_cache[original] = demangled
|
||||
|
||||
# Log symbols that failed to demangle (stayed the same as stripped version)
|
||||
if stripped == demangled and stripped.startswith("_Z"):
|
||||
failed_count += 1
|
||||
if failed_count <= 5: # Only log first 5 failures
|
||||
_LOGGER.warning("Failed to demangle: %s", original)
|
||||
|
||||
if failed_count == 0:
|
||||
_LOGGER.info("Successfully demangled all %d symbols", len(symbols))
|
||||
return
|
||||
|
||||
_LOGGER.warning(
|
||||
"Failed to demangle %d/%d symbols using %s",
|
||||
failed_count,
|
||||
len(symbols),
|
||||
cppfilt_cmd,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
|
||||
"""Restore prefix that was removed before demangling.
|
||||
|
||||
Args:
|
||||
prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
|
||||
stripped: Stripped symbol name
|
||||
demangled: Demangled symbol name
|
||||
|
||||
Returns:
|
||||
Demangled name with prefix restored/annotated
|
||||
"""
|
||||
if not prefix:
|
||||
return demangled
|
||||
|
||||
# Successfully demangled - add descriptive prefix
|
||||
if demangled != stripped and (
|
||||
annotation := _GCC_PREFIX_ANNOTATIONS.get(prefix)
|
||||
):
|
||||
return f"[{annotation}: {demangled}]"
|
||||
|
||||
# Failed to demangle - restore original prefix
|
||||
return prefix + demangled
|
||||
|
||||
@staticmethod
|
||||
def _restore_symbol_suffix(original: str, demangled: str) -> str:
|
||||
"""Restore GCC optimization suffix that was removed before demangling.
|
||||
|
||||
Args:
|
||||
original: Original symbol name with suffix
|
||||
demangled: Demangled symbol name without suffix
|
||||
|
||||
Returns:
|
||||
Demangled name with suffix annotation
|
||||
"""
|
||||
if suffix_match := _GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
|
||||
return f"{demangled} [{suffix_match.group(1)}]"
|
||||
return demangled
|
||||
self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path)
|
||||
_LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache))
|
||||
|
||||
def _demangle_symbol(self, symbol: str) -> str:
|
||||
"""Get demangled C++ symbol name from cache."""
|
||||
|
||||
182
esphome/analyze_memory/demangle.py
Normal file
182
esphome/analyze_memory/demangle.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""Symbol demangling utilities for memory analysis.
|
||||
|
||||
This module provides functions for demangling C++ symbol names using c++filt.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from .toolchain import find_tool
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# GCC global constructor/destructor prefix annotations
|
||||
GCC_PREFIX_ANNOTATIONS = {
|
||||
"_GLOBAL__sub_I_": "global constructor for",
|
||||
"_GLOBAL__sub_D_": "global destructor for",
|
||||
}
|
||||
|
||||
# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
|
||||
GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
|
||||
|
||||
|
||||
def _strip_gcc_annotations(symbol: str) -> tuple[str, str]:
|
||||
"""Strip GCC optimization suffixes and prefixes from a symbol.
|
||||
|
||||
Args:
|
||||
symbol: The mangled symbol name
|
||||
|
||||
Returns:
|
||||
Tuple of (stripped_symbol, removed_prefix)
|
||||
"""
|
||||
# Remove GCC optimization markers
|
||||
stripped = GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
|
||||
|
||||
# Handle GCC global constructor/initializer prefixes
|
||||
prefix = ""
|
||||
for gcc_prefix in GCC_PREFIX_ANNOTATIONS:
|
||||
if stripped.startswith(gcc_prefix):
|
||||
prefix = gcc_prefix
|
||||
stripped = stripped[len(prefix) :]
|
||||
break
|
||||
|
||||
return stripped, prefix
|
||||
|
||||
|
||||
def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
|
||||
"""Restore prefix that was removed before demangling.
|
||||
|
||||
Args:
|
||||
prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
|
||||
stripped: Stripped symbol name
|
||||
demangled: Demangled symbol name
|
||||
|
||||
Returns:
|
||||
Demangled name with prefix restored/annotated
|
||||
"""
|
||||
if not prefix:
|
||||
return demangled
|
||||
|
||||
# Successfully demangled - add descriptive prefix
|
||||
if demangled != stripped and (annotation := GCC_PREFIX_ANNOTATIONS.get(prefix)):
|
||||
return f"[{annotation}: {demangled}]"
|
||||
|
||||
# Failed to demangle - restore original prefix
|
||||
return prefix + demangled
|
||||
|
||||
|
||||
def _restore_symbol_suffix(original: str, demangled: str) -> str:
|
||||
"""Restore GCC optimization suffix that was removed before demangling.
|
||||
|
||||
Args:
|
||||
original: Original symbol name with suffix
|
||||
demangled: Demangled symbol name without suffix
|
||||
|
||||
Returns:
|
||||
Demangled name with suffix annotation
|
||||
"""
|
||||
if suffix_match := GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
|
||||
return f"{demangled} [{suffix_match.group(1)}]"
|
||||
return demangled
|
||||
|
||||
|
||||
def batch_demangle(
|
||||
symbols: list[str],
|
||||
cppfilt_path: str | None = None,
|
||||
objdump_path: str | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""Batch demangle C++ symbol names.
|
||||
|
||||
Args:
|
||||
symbols: List of symbol names to demangle
|
||||
cppfilt_path: Path to c++filt binary (auto-detected if not provided)
|
||||
objdump_path: Path to objdump binary to derive c++filt path from
|
||||
|
||||
Returns:
|
||||
Dictionary mapping original symbol names to demangled names
|
||||
"""
|
||||
cache: dict[str, str] = {}
|
||||
|
||||
if not symbols:
|
||||
return cache
|
||||
|
||||
# Find c++filt tool
|
||||
cppfilt_cmd = cppfilt_path or find_tool("c++filt", objdump_path)
|
||||
if not cppfilt_cmd:
|
||||
_LOGGER.warning("Could not find c++filt, symbols will not be demangled")
|
||||
return {s: s for s in symbols}
|
||||
|
||||
_LOGGER.debug("Demangling %d symbols using %s", len(symbols), cppfilt_cmd)
|
||||
|
||||
# Strip GCC optimization suffixes and prefixes before demangling
|
||||
symbols_stripped: list[str] = []
|
||||
symbols_prefixes: list[str] = []
|
||||
for symbol in symbols:
|
||||
stripped, prefix = _strip_gcc_annotations(symbol)
|
||||
symbols_stripped.append(stripped)
|
||||
symbols_prefixes.append(prefix)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[cppfilt_cmd],
|
||||
input="\n".join(symbols_stripped),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
|
||||
_LOGGER.warning("Failed to batch demangle symbols: %s", e)
|
||||
return {s: s for s in symbols}
|
||||
|
||||
if result.returncode != 0:
|
||||
_LOGGER.warning(
|
||||
"c++filt exited with code %d: %s",
|
||||
result.returncode,
|
||||
result.stderr[:200] if result.stderr else "(no error output)",
|
||||
)
|
||||
return {s: s for s in symbols}
|
||||
|
||||
# Process demangled output
|
||||
demangled_lines = result.stdout.strip().split("\n")
|
||||
|
||||
# Check for output length mismatch
|
||||
if len(demangled_lines) != len(symbols):
|
||||
_LOGGER.warning(
|
||||
"c++filt output mismatch: expected %d lines, got %d",
|
||||
len(symbols),
|
||||
len(demangled_lines),
|
||||
)
|
||||
return {s: s for s in symbols}
|
||||
|
||||
failed_count = 0
|
||||
|
||||
for original, stripped, prefix, demangled in zip(
|
||||
symbols, symbols_stripped, symbols_prefixes, demangled_lines
|
||||
):
|
||||
# Add back any prefix that was removed
|
||||
demangled = _restore_symbol_prefix(prefix, stripped, demangled)
|
||||
|
||||
# If we stripped a suffix, add it back to the demangled name for clarity
|
||||
if original != stripped and not prefix:
|
||||
demangled = _restore_symbol_suffix(original, demangled)
|
||||
|
||||
cache[original] = demangled
|
||||
|
||||
# Count symbols that failed to demangle
|
||||
if stripped == demangled and stripped.startswith("_Z"):
|
||||
failed_count += 1
|
||||
if failed_count <= 5:
|
||||
_LOGGER.debug("Failed to demangle: %s", original)
|
||||
|
||||
if failed_count > 0:
|
||||
_LOGGER.debug(
|
||||
"Failed to demangle %d/%d symbols using %s",
|
||||
failed_count,
|
||||
len(symbols),
|
||||
cppfilt_cmd,
|
||||
)
|
||||
|
||||
return cache
|
||||
493
esphome/analyze_memory/ram_strings.py
Normal file
493
esphome/analyze_memory/ram_strings.py
Normal file
@@ -0,0 +1,493 @@
|
||||
"""Analyzer for RAM-stored strings in ESP8266/ESP32 firmware ELF files.
|
||||
|
||||
This module identifies strings that are stored in RAM sections (.data, .bss, .rodata)
|
||||
rather than in flash sections (.irom0.text, .irom.text), which is important for
|
||||
memory-constrained platforms like ESP8266.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
from .demangle import batch_demangle
|
||||
from .toolchain import find_tool
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# ESP8266: .rodata is in RAM (DRAM), not flash
|
||||
# ESP32: .rodata is in flash, mapped to data bus
|
||||
ESP8266_RAM_SECTIONS = frozenset([".data", ".rodata", ".bss"])
|
||||
ESP8266_FLASH_SECTIONS = frozenset([".irom0.text", ".irom.text", ".text"])
|
||||
|
||||
# ESP32: .rodata is memory-mapped from flash
|
||||
ESP32_RAM_SECTIONS = frozenset([".data", ".bss", ".dram0.data", ".dram0.bss"])
|
||||
ESP32_FLASH_SECTIONS = frozenset([".text", ".rodata", ".flash.text", ".flash.rodata"])
|
||||
|
||||
# nm symbol types for data symbols (D=global data, d=local data, R=rodata, B=bss)
|
||||
DATA_SYMBOL_TYPES = frozenset(["D", "d", "R", "r", "B", "b"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class SectionInfo:
|
||||
"""Information about an ELF section."""
|
||||
|
||||
name: str
|
||||
address: int
|
||||
size: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class RamString:
|
||||
"""A string found in RAM."""
|
||||
|
||||
section: str
|
||||
address: int
|
||||
content: str
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
"""Size in bytes including null terminator."""
|
||||
return len(self.content) + 1
|
||||
|
||||
|
||||
@dataclass
|
||||
class RamSymbol:
|
||||
"""A symbol found in RAM."""
|
||||
|
||||
name: str
|
||||
sym_type: str
|
||||
address: int
|
||||
size: int
|
||||
section: str
|
||||
demangled: str = "" # Demangled name, set after batch demangling
|
||||
|
||||
|
||||
class RamStringsAnalyzer:
|
||||
"""Analyzes ELF files to find strings stored in RAM."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
elf_path: str,
|
||||
objdump_path: str | None = None,
|
||||
min_length: int = 8,
|
||||
platform: str = "esp32",
|
||||
) -> None:
|
||||
"""Initialize the RAM strings analyzer.
|
||||
|
||||
Args:
|
||||
elf_path: Path to the ELF file to analyze
|
||||
objdump_path: Path to objdump binary (used to find other tools)
|
||||
min_length: Minimum string length to report (default: 8)
|
||||
platform: Platform name ("esp8266", "esp32", etc.) for section mapping
|
||||
"""
|
||||
self.elf_path = Path(elf_path)
|
||||
if not self.elf_path.exists():
|
||||
raise FileNotFoundError(f"ELF file not found: {elf_path}")
|
||||
|
||||
self.objdump_path = objdump_path
|
||||
self.min_length = min_length
|
||||
self.platform = platform
|
||||
|
||||
# Set RAM/flash sections based on platform
|
||||
if self.platform == "esp8266":
|
||||
self.ram_sections = ESP8266_RAM_SECTIONS
|
||||
self.flash_sections = ESP8266_FLASH_SECTIONS
|
||||
else:
|
||||
# ESP32 and other platforms
|
||||
self.ram_sections = ESP32_RAM_SECTIONS
|
||||
self.flash_sections = ESP32_FLASH_SECTIONS
|
||||
|
||||
self.sections: dict[str, SectionInfo] = {}
|
||||
self.ram_strings: list[RamString] = []
|
||||
self.ram_symbols: list[RamSymbol] = []
|
||||
|
||||
def _run_command(self, cmd: list[str]) -> str:
|
||||
"""Run a command and return its output."""
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return result.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
_LOGGER.debug("Command failed: %s - %s", " ".join(cmd), e.stderr)
|
||||
raise
|
||||
except FileNotFoundError:
|
||||
_LOGGER.warning("Command not found: %s", cmd[0])
|
||||
raise
|
||||
|
||||
def analyze(self) -> None:
|
||||
"""Perform the full RAM analysis."""
|
||||
self._parse_sections()
|
||||
self._extract_strings()
|
||||
self._analyze_symbols()
|
||||
self._demangle_symbols()
|
||||
|
||||
def _parse_sections(self) -> None:
|
||||
"""Parse section headers from ELF file."""
|
||||
objdump = find_tool("objdump", self.objdump_path)
|
||||
if not objdump:
|
||||
_LOGGER.error("Could not find objdump command")
|
||||
return
|
||||
|
||||
try:
|
||||
output = self._run_command([objdump, "-h", str(self.elf_path)])
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
return
|
||||
|
||||
# Parse section headers
|
||||
# Format: Idx Name Size VMA LMA File off Algn
|
||||
section_pattern = re.compile(
|
||||
r"^\s*\d+\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)"
|
||||
)
|
||||
|
||||
for line in output.split("\n"):
|
||||
if match := section_pattern.match(line):
|
||||
name = match.group(1)
|
||||
size = int(match.group(2), 16)
|
||||
vma = int(match.group(3), 16)
|
||||
self.sections[name] = SectionInfo(name, vma, size)
|
||||
|
||||
def _extract_strings(self) -> None:
|
||||
"""Extract strings from RAM sections."""
|
||||
objdump = find_tool("objdump", self.objdump_path)
|
||||
if not objdump:
|
||||
return
|
||||
|
||||
for section_name in self.ram_sections:
|
||||
if section_name not in self.sections:
|
||||
continue
|
||||
|
||||
try:
|
||||
output = self._run_command(
|
||||
[objdump, "-s", "-j", section_name, str(self.elf_path)]
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
# Section may exist but have no content (e.g., .bss)
|
||||
continue
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
strings = self._parse_hex_dump(output, section_name)
|
||||
self.ram_strings.extend(strings)
|
||||
|
||||
def _parse_hex_dump(self, output: str, section_name: str) -> list[RamString]:
|
||||
"""Parse hex dump output to extract strings.
|
||||
|
||||
Args:
|
||||
output: Output from objdump -s
|
||||
section_name: Name of the section being parsed
|
||||
|
||||
Returns:
|
||||
List of RamString objects
|
||||
"""
|
||||
strings: list[RamString] = []
|
||||
current_string = bytearray()
|
||||
string_start_addr = 0
|
||||
|
||||
for line in output.split("\n"):
|
||||
# Lines look like: " 3ffef8a0 00000000 00000000 00000000 00000000 ................"
|
||||
match = re.match(r"^\s+([0-9a-fA-F]+)\s+((?:[0-9a-fA-F]{2,8}\s*)+)", line)
|
||||
if not match:
|
||||
continue
|
||||
|
||||
addr = int(match.group(1), 16)
|
||||
hex_data = match.group(2).strip()
|
||||
|
||||
# Convert hex to bytes
|
||||
hex_bytes = hex_data.split()
|
||||
byte_offset = 0
|
||||
for hex_chunk in hex_bytes:
|
||||
# Handle both byte-by-byte and word formats
|
||||
for i in range(0, len(hex_chunk), 2):
|
||||
byte_val = int(hex_chunk[i : i + 2], 16)
|
||||
if 0x20 <= byte_val <= 0x7E: # Printable ASCII
|
||||
if not current_string:
|
||||
string_start_addr = addr + byte_offset
|
||||
current_string.append(byte_val)
|
||||
else:
|
||||
if byte_val == 0 and len(current_string) >= self.min_length:
|
||||
# Found null terminator
|
||||
strings.append(
|
||||
RamString(
|
||||
section=section_name,
|
||||
address=string_start_addr,
|
||||
content=current_string.decode(
|
||||
"ascii", errors="ignore"
|
||||
),
|
||||
)
|
||||
)
|
||||
current_string = bytearray()
|
||||
byte_offset += 1
|
||||
|
||||
return strings
|
||||
|
||||
def _analyze_symbols(self) -> None:
|
||||
"""Analyze symbols in RAM sections."""
|
||||
nm = find_tool("nm", self.objdump_path)
|
||||
if not nm:
|
||||
return
|
||||
|
||||
try:
|
||||
output = self._run_command([nm, "-S", "--size-sort", str(self.elf_path)])
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
return
|
||||
|
||||
for line in output.split("\n"):
|
||||
parts = line.split()
|
||||
if len(parts) < 4:
|
||||
continue
|
||||
|
||||
try:
|
||||
addr = int(parts[0], 16)
|
||||
size = int(parts[1], 16) if parts[1] != "?" else 0
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
sym_type = parts[2]
|
||||
name = " ".join(parts[3:])
|
||||
|
||||
# Filter for data symbols
|
||||
if sym_type not in DATA_SYMBOL_TYPES:
|
||||
continue
|
||||
|
||||
# Check if symbol is in a RAM section
|
||||
for section_name in self.ram_sections:
|
||||
if section_name not in self.sections:
|
||||
continue
|
||||
|
||||
section = self.sections[section_name]
|
||||
if section.address <= addr < section.address + section.size:
|
||||
self.ram_symbols.append(
|
||||
RamSymbol(
|
||||
name=name,
|
||||
sym_type=sym_type,
|
||||
address=addr,
|
||||
size=size,
|
||||
section=section_name,
|
||||
)
|
||||
)
|
||||
break
|
||||
|
||||
def _demangle_symbols(self) -> None:
|
||||
"""Batch demangle all RAM symbol names."""
|
||||
if not self.ram_symbols:
|
||||
return
|
||||
|
||||
# Collect all symbol names and demangle them
|
||||
symbol_names = [s.name for s in self.ram_symbols]
|
||||
demangle_cache = batch_demangle(symbol_names, objdump_path=self.objdump_path)
|
||||
|
||||
# Assign demangled names to symbols
|
||||
for symbol in self.ram_symbols:
|
||||
symbol.demangled = demangle_cache.get(symbol.name, symbol.name)
|
||||
|
||||
def _get_sections_size(self, section_names: frozenset[str]) -> int:
|
||||
"""Get total size of specified sections."""
|
||||
return sum(
|
||||
section.size
|
||||
for name, section in self.sections.items()
|
||||
if name in section_names
|
||||
)
|
||||
|
||||
def get_total_ram_usage(self) -> int:
|
||||
"""Get total RAM usage from RAM sections."""
|
||||
return self._get_sections_size(self.ram_sections)
|
||||
|
||||
def get_total_flash_usage(self) -> int:
|
||||
"""Get total flash usage from flash sections."""
|
||||
return self._get_sections_size(self.flash_sections)
|
||||
|
||||
def get_total_string_bytes(self) -> int:
|
||||
"""Get total bytes used by strings in RAM."""
|
||||
return sum(s.size for s in self.ram_strings)
|
||||
|
||||
def get_repeated_strings(self) -> list[tuple[str, int]]:
|
||||
"""Find strings that appear multiple times.
|
||||
|
||||
Returns:
|
||||
List of (string, count) tuples sorted by potential savings
|
||||
"""
|
||||
string_counts: dict[str, int] = defaultdict(int)
|
||||
for ram_string in self.ram_strings:
|
||||
string_counts[ram_string.content] += 1
|
||||
|
||||
return sorted(
|
||||
[(s, c) for s, c in string_counts.items() if c > 1],
|
||||
key=lambda x: x[1] * (len(x[0]) + 1),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
def get_long_strings(self, min_len: int = 20) -> list[RamString]:
|
||||
"""Get strings longer than the specified length.
|
||||
|
||||
Args:
|
||||
min_len: Minimum string length
|
||||
|
||||
Returns:
|
||||
List of RamString objects sorted by length
|
||||
"""
|
||||
return sorted(
|
||||
[s for s in self.ram_strings if len(s.content) >= min_len],
|
||||
key=lambda x: len(x.content),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
def get_largest_symbols(self, min_size: int = 100) -> list[RamSymbol]:
|
||||
"""Get RAM symbols larger than the specified size.
|
||||
|
||||
Args:
|
||||
min_size: Minimum symbol size in bytes
|
||||
|
||||
Returns:
|
||||
List of RamSymbol objects sorted by size
|
||||
"""
|
||||
return sorted(
|
||||
[s for s in self.ram_symbols if s.size >= min_size],
|
||||
key=lambda x: x.size,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
def generate_report(self, show_all_sections: bool = False) -> str:
|
||||
"""Generate a formatted RAM strings analysis report.
|
||||
|
||||
Args:
|
||||
show_all_sections: If True, show all sections, not just RAM
|
||||
|
||||
Returns:
|
||||
Formatted report string
|
||||
"""
|
||||
lines: list[str] = []
|
||||
table_width = 80
|
||||
|
||||
lines.append("=" * table_width)
|
||||
lines.append(
|
||||
f"RAM Strings Analysis ({self.platform.upper()})".center(table_width)
|
||||
)
|
||||
lines.append("=" * table_width)
|
||||
lines.append("")
|
||||
|
||||
# Section Analysis
|
||||
lines.append("SECTION ANALYSIS")
|
||||
lines.append("-" * table_width)
|
||||
lines.append(f"{'Section':<20} {'Address':<12} {'Size':<12} {'Location'}")
|
||||
lines.append("-" * table_width)
|
||||
|
||||
total_ram_usage = 0
|
||||
total_flash_usage = 0
|
||||
|
||||
for name, section in sorted(self.sections.items(), key=lambda x: x[1].address):
|
||||
if name in self.ram_sections:
|
||||
location = "RAM"
|
||||
total_ram_usage += section.size
|
||||
elif name in self.flash_sections:
|
||||
location = "FLASH"
|
||||
total_flash_usage += section.size
|
||||
else:
|
||||
location = "OTHER"
|
||||
|
||||
if show_all_sections or name in self.ram_sections:
|
||||
lines.append(
|
||||
f"{name:<20} 0x{section.address:08x} {section.size:>8} B {location}"
|
||||
)
|
||||
|
||||
lines.append("-" * table_width)
|
||||
lines.append(f"Total RAM sections size: {total_ram_usage:,} bytes")
|
||||
lines.append(f"Total Flash sections size: {total_flash_usage:,} bytes")
|
||||
|
||||
# Strings in RAM
|
||||
lines.append("")
|
||||
lines.append("=" * table_width)
|
||||
lines.append("STRINGS IN RAM SECTIONS")
|
||||
lines.append("=" * table_width)
|
||||
lines.append(
|
||||
"Note: .bss sections contain uninitialized data (no strings to extract)"
|
||||
)
|
||||
|
||||
# Group strings by section
|
||||
strings_by_section: dict[str, list[RamString]] = defaultdict(list)
|
||||
for ram_string in self.ram_strings:
|
||||
strings_by_section[ram_string.section].append(ram_string)
|
||||
|
||||
for section_name in sorted(strings_by_section.keys()):
|
||||
section_strings = strings_by_section[section_name]
|
||||
lines.append(f"\nSection: {section_name}")
|
||||
lines.append("-" * 40)
|
||||
for ram_string in sorted(section_strings, key=lambda x: x.address):
|
||||
clean_string = ram_string.content[:100] + (
|
||||
"..." if len(ram_string.content) > 100 else ""
|
||||
)
|
||||
lines.append(
|
||||
f' 0x{ram_string.address:08x}: "{clean_string}" (len={len(ram_string.content)})'
|
||||
)
|
||||
|
||||
# Large RAM symbols
|
||||
lines.append("")
|
||||
lines.append("=" * table_width)
|
||||
lines.append("LARGE DATA SYMBOLS IN RAM (>= 50 bytes)")
|
||||
lines.append("=" * table_width)
|
||||
|
||||
largest_symbols = self.get_largest_symbols(50)
|
||||
lines.append(f"\n{'Symbol':<50} {'Type':<6} {'Size':<10} {'Section'}")
|
||||
lines.append("-" * table_width)
|
||||
|
||||
for symbol in largest_symbols:
|
||||
# Use demangled name if available, otherwise raw name
|
||||
display_name = symbol.demangled or symbol.name
|
||||
name_display = display_name[:49] if len(display_name) > 49 else display_name
|
||||
lines.append(
|
||||
f"{name_display:<50} {symbol.sym_type:<6} {symbol.size:>8} B {symbol.section}"
|
||||
)
|
||||
|
||||
# Summary
|
||||
lines.append("")
|
||||
lines.append("=" * table_width)
|
||||
lines.append("SUMMARY")
|
||||
lines.append("=" * table_width)
|
||||
lines.append(f"Total strings found in RAM: {len(self.ram_strings)}")
|
||||
total_string_bytes = self.get_total_string_bytes()
|
||||
lines.append(f"Total bytes used by strings: {total_string_bytes:,}")
|
||||
|
||||
# Optimization targets
|
||||
lines.append("")
|
||||
lines.append("=" * table_width)
|
||||
lines.append("POTENTIAL OPTIMIZATION TARGETS")
|
||||
lines.append("=" * table_width)
|
||||
|
||||
# Repeated strings
|
||||
repeated = self.get_repeated_strings()[:10]
|
||||
if repeated:
|
||||
lines.append("\nRepeated strings (could be deduplicated):")
|
||||
for string, count in repeated:
|
||||
savings = (count - 1) * (len(string) + 1)
|
||||
clean_string = string[:50] + ("..." if len(string) > 50 else "")
|
||||
lines.append(
|
||||
f' "{clean_string}" - appears {count} times (potential savings: {savings} bytes)'
|
||||
)
|
||||
|
||||
# Long strings - platform-specific advice
|
||||
long_strings = self.get_long_strings(20)[:10]
|
||||
if long_strings:
|
||||
if self.platform == "esp8266":
|
||||
lines.append(
|
||||
"\nLong strings that could be moved to PROGMEM (>= 20 chars):"
|
||||
)
|
||||
else:
|
||||
# ESP32: strings in DRAM are typically there for a reason
|
||||
# (interrupt handlers, pre-flash-init code, etc.)
|
||||
lines.append("\nLong strings in DRAM (>= 20 chars):")
|
||||
lines.append(
|
||||
"Note: ESP32 DRAM strings may be required for interrupt/early-boot contexts"
|
||||
)
|
||||
for ram_string in long_strings:
|
||||
clean_string = ram_string.content[:60] + (
|
||||
"..." if len(ram_string.content) > 60 else ""
|
||||
)
|
||||
lines.append(
|
||||
f' {ram_string.section} @ 0x{ram_string.address:08x}: "{clean_string}" ({len(ram_string.content)} bytes)'
|
||||
)
|
||||
|
||||
lines.append("")
|
||||
return "\n".join(lines)
|
||||
57
esphome/analyze_memory/toolchain.py
Normal file
57
esphome/analyze_memory/toolchain.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""Toolchain utilities for memory analysis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
# Platform-specific toolchain prefixes
|
||||
TOOLCHAIN_PREFIXES = [
|
||||
"xtensa-lx106-elf-", # ESP8266
|
||||
"xtensa-esp32-elf-", # ESP32
|
||||
"xtensa-esp-elf-", # ESP32 (newer IDF)
|
||||
"", # System default (no prefix)
|
||||
]
|
||||
|
||||
|
||||
def find_tool(
|
||||
tool_name: str,
|
||||
objdump_path: str | None = None,
|
||||
) -> str | None:
|
||||
"""Find a toolchain tool by name.
|
||||
|
||||
First tries to derive the tool path from objdump_path (if provided),
|
||||
then falls back to searching for platform-specific tools.
|
||||
|
||||
Args:
|
||||
tool_name: Name of the tool (e.g., "objdump", "nm", "c++filt")
|
||||
objdump_path: Path to objdump binary to derive other tool paths from
|
||||
|
||||
Returns:
|
||||
Path to the tool or None if not found
|
||||
"""
|
||||
# Try to derive from objdump path first (most reliable)
|
||||
if objdump_path and objdump_path != "objdump":
|
||||
objdump_file = Path(objdump_path)
|
||||
# Replace just the filename portion, preserving any prefix (e.g., xtensa-esp32-elf-)
|
||||
new_name = objdump_file.name.replace("objdump", tool_name)
|
||||
potential_path = str(objdump_file.with_name(new_name))
|
||||
if Path(potential_path).exists():
|
||||
_LOGGER.debug("Found %s at: %s", tool_name, potential_path)
|
||||
return potential_path
|
||||
|
||||
# Try platform-specific tools
|
||||
for prefix in TOOLCHAIN_PREFIXES:
|
||||
cmd = f"{prefix}{tool_name}"
|
||||
try:
|
||||
subprocess.run([cmd, "--version"], capture_output=True, check=True)
|
||||
_LOGGER.debug("Found %s: %s", tool_name, cmd)
|
||||
return cmd
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
continue
|
||||
|
||||
_LOGGER.warning("Could not find %s tool", tool_name)
|
||||
return None
|
||||
@@ -269,6 +269,16 @@ def mock_memory_analyzer_cli() -> Generator[Mock]:
|
||||
yield mock_class
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ram_strings_analyzer() -> Generator[Mock]:
|
||||
"""Mock RamStringsAnalyzer for testing."""
|
||||
with patch("esphome.analyze_memory.ram_strings.RamStringsAnalyzer") as mock_class:
|
||||
mock_analyzer = MagicMock()
|
||||
mock_analyzer.generate_report.return_value = "Mock RAM Strings Report"
|
||||
mock_class.return_value = mock_analyzer
|
||||
yield mock_class
|
||||
|
||||
|
||||
def test_choose_upload_log_host_with_string_default() -> None:
|
||||
"""Test with a single string default device."""
|
||||
setup_core()
|
||||
@@ -2424,6 +2434,7 @@ def test_command_analyze_memory_success(
|
||||
mock_get_idedata: Mock,
|
||||
mock_get_esphome_components: Mock,
|
||||
mock_memory_analyzer_cli: Mock,
|
||||
mock_ram_strings_analyzer: Mock,
|
||||
) -> None:
|
||||
"""Test command_analyze_memory with successful compilation and analysis."""
|
||||
setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")
|
||||
@@ -2471,9 +2482,20 @@ def test_command_analyze_memory_success(
|
||||
mock_analyzer.analyze.assert_called_once()
|
||||
mock_analyzer.generate_report.assert_called_once()
|
||||
|
||||
# Verify report was printed
|
||||
# Verify RAM strings analyzer was created and run
|
||||
mock_ram_strings_analyzer.assert_called_once_with(
|
||||
str(firmware_elf),
|
||||
objdump_path="/path/to/objdump",
|
||||
platform="esp32",
|
||||
)
|
||||
mock_ram_analyzer = mock_ram_strings_analyzer.return_value
|
||||
mock_ram_analyzer.analyze.assert_called_once()
|
||||
mock_ram_analyzer.generate_report.assert_called_once()
|
||||
|
||||
# Verify reports were printed
|
||||
captured = capfd.readouterr()
|
||||
assert "Mock Memory Report" in captured.out
|
||||
assert "Mock RAM Strings Report" in captured.out
|
||||
|
||||
|
||||
def test_command_analyze_memory_with_external_components(
|
||||
@@ -2483,6 +2505,7 @@ def test_command_analyze_memory_with_external_components(
|
||||
mock_get_idedata: Mock,
|
||||
mock_get_esphome_components: Mock,
|
||||
mock_memory_analyzer_cli: Mock,
|
||||
mock_ram_strings_analyzer: Mock,
|
||||
) -> None:
|
||||
"""Test command_analyze_memory detects external components."""
|
||||
setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")
|
||||
|
||||
Reference in New Issue
Block a user