[core] Add RAM strings and symbols analysis to analyze-memory command (#12161)

2025-12-02 10:02:09 -06:00
parent 5142ff372b
commit 101103c666
6 changed files with 779 additions and 173 deletions
--- a/esphome/main.py
+++ b/esphome/main.py
@@ -944,6 +944,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
    """
    from esphome import platformio_api
    from esphome.analyze_memory.cli import MemoryAnalyzerCLI
+    from esphome.analyze_memory.ram_strings import RamStringsAnalyzer

    # Always compile to ensure fresh data (fast if no changes - just relinks)
    exit_code = write_cpp(config)
@@ -966,7 +967,7 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
    external_components = detect_external_components(config)
    _LOGGER.debug("Detected external components: %s", external_components)

-    # Perform memory analysis
+    # Perform component memory analysis
    _LOGGER.info("Analyzing memory usage...")
    analyzer = MemoryAnalyzerCLI(
        str(firmware_elf),
@@ -976,11 +977,28 @@ def command_analyze_memory(args: ArgsProtocol, config: ConfigType) -> int:
    )
    analyzer.analyze()

-    # Generate and display report
+    # Generate and display component report
    report = analyzer.generate_report()
    print()
    print(report)

+    # Perform RAM strings analysis
+    _LOGGER.info("Analyzing RAM strings...")
+    try:
+        ram_analyzer = RamStringsAnalyzer(
+            str(firmware_elf),
+            objdump_path=idedata.objdump_path,
+            platform=CORE.target_platform,
+        )
+        ram_analyzer.analyze()
+
+        # Generate and display RAM strings report
+        ram_report = ram_analyzer.generate_report()
+        print()
+        print(ram_report)
+    except Exception as e:  # pylint: disable=broad-except
+        _LOGGER.warning("RAM strings analysis failed: %s", e)
+
    return 0


--- a/esphome/analyze_memory/init.py
+++ b/esphome/analyze_memory/init.py
@@ -15,6 +15,7 @@ from .const import (
    SECTION_TO_ATTR,
    SYMBOL_PATTERNS,
 )
+from .demangle import batch_demangle
 from .helpers import (
    get_component_class_patterns,
    get_esphome_components,
@@ -27,15 +28,6 @@ if TYPE_CHECKING:

 _LOGGER = logging.getLogger(__name__)

-# GCC global constructor/destructor prefix annotations
-_GCC_PREFIX_ANNOTATIONS = {
-    "_GLOBAL__sub_I_": "global constructor for",
-    "_GLOBAL__sub_D_": "global destructor for",
-}
-
-# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
-_GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
-
 # C++ runtime patterns for categorization
 _CPP_RUNTIME_PATTERNS = frozenset(["vtable", "typeinfo", "thunk"])

@@ -312,168 +304,9 @@ class MemoryAnalyzer:
        if not symbols:
            return

-        # Try to find the appropriate c++filt for the platform
-        cppfilt_cmd = "c++filt"
-
        _LOGGER.info("Demangling %d symbols", len(symbols))
-        _LOGGER.debug("objdump_path = %s", self.objdump_path)
-
-        # Check if we have a toolchain-specific c++filt
-        if self.objdump_path and self.objdump_path != "objdump":
-            # Replace objdump with c++filt in the path
-            potential_cppfilt = self.objdump_path.replace("objdump", "c++filt")
-            _LOGGER.info("Checking for toolchain c++filt at: %s", potential_cppfilt)
-            if Path(potential_cppfilt).exists():
-                cppfilt_cmd = potential_cppfilt
-                _LOGGER.info("✓ Using toolchain c++filt: %s", cppfilt_cmd)
-            else:
-                _LOGGER.info(
-                    "✗ Toolchain c++filt not found at %s, using system c++filt",
-                    potential_cppfilt,
-                )
-        else:
-            _LOGGER.info("✗ Using system c++filt (objdump_path=%s)", self.objdump_path)
-
-        # Strip GCC optimization suffixes and prefixes before demangling
-        # Suffixes like $isra$0, $part$0, $constprop$0 confuse c++filt
-        # Prefixes like _GLOBAL__sub_I_ need to be removed and tracked
-        symbols_stripped: list[str] = []
-        symbols_prefixes: list[str] = []  # Track removed prefixes
-        for symbol in symbols:
-            # Remove GCC optimization markers
-            stripped = _GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
-
-            # Handle GCC global constructor/initializer prefixes
-            # _GLOBAL__sub_I_<mangled> -> extract <mangled> for demangling
-            prefix = ""
-            for gcc_prefix in _GCC_PREFIX_ANNOTATIONS:
-                if stripped.startswith(gcc_prefix):
-                    prefix = gcc_prefix
-                    stripped = stripped[len(prefix) :]
-                    break
-
-            symbols_stripped.append(stripped)
-            symbols_prefixes.append(prefix)
-
-        try:
-            # Send all symbols to c++filt at once
-            result = subprocess.run(
-                [cppfilt_cmd],
-                input="\n".join(symbols_stripped),
-                capture_output=True,
-                text=True,
-                check=False,
-            )
-        except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
-            # On error, cache originals
-            _LOGGER.warning("Failed to batch demangle symbols: %s", e)
-            for symbol in symbols:
-                self._demangle_cache[symbol] = symbol
-            return
-
-        if result.returncode != 0:
-            _LOGGER.warning(
-                "c++filt exited with code %d: %s",
-                result.returncode,
-                result.stderr[:200] if result.stderr else "(no error output)",
-            )
-            # Cache originals on failure
-            for symbol in symbols:
-                self._demangle_cache[symbol] = symbol
-            return
-
-        # Process demangled output
-        self._process_demangled_output(
-            symbols, symbols_stripped, symbols_prefixes, result.stdout, cppfilt_cmd
-        )
-
-    def _process_demangled_output(
-        self,
-        symbols: list[str],
-        symbols_stripped: list[str],
-        symbols_prefixes: list[str],
-        demangled_output: str,
-        cppfilt_cmd: str,
-    ) -> None:
-        """Process demangled symbol output and populate cache.
-
-        Args:
-            symbols: Original symbol names
-            symbols_stripped: Stripped symbol names sent to c++filt
-            symbols_prefixes: Removed prefixes to restore
-            demangled_output: Output from c++filt
-            cppfilt_cmd: Path to c++filt command (for logging)
-        """
-        demangled_lines = demangled_output.strip().split("\n")
-        failed_count = 0
-
-        for original, stripped, prefix, demangled in zip(
-            symbols, symbols_stripped, symbols_prefixes, demangled_lines
-        ):
-            # Add back any prefix that was removed
-            demangled = self._restore_symbol_prefix(prefix, stripped, demangled)
-
-            # If we stripped a suffix, add it back to the demangled name for clarity
-            if original != stripped and not prefix:
-                demangled = self._restore_symbol_suffix(original, demangled)
-
-            self._demangle_cache[original] = demangled
-
-            # Log symbols that failed to demangle (stayed the same as stripped version)
-            if stripped == demangled and stripped.startswith("_Z"):
-                failed_count += 1
-                if failed_count <= 5:  # Only log first 5 failures
-                    _LOGGER.warning("Failed to demangle: %s", original)
-
-        if failed_count == 0:
-            _LOGGER.info("Successfully demangled all %d symbols", len(symbols))
-            return
-
-        _LOGGER.warning(
-            "Failed to demangle %d/%d symbols using %s",
-            failed_count,
-            len(symbols),
-            cppfilt_cmd,
-        )
-
-    @staticmethod
-    def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
-        """Restore prefix that was removed before demangling.
-
-        Args:
-            prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
-            stripped: Stripped symbol name
-            demangled: Demangled symbol name
-
-        Returns:
-            Demangled name with prefix restored/annotated
-        """
-        if not prefix:
-            return demangled
-
-        # Successfully demangled - add descriptive prefix
-        if demangled != stripped and (
-            annotation := _GCC_PREFIX_ANNOTATIONS.get(prefix)
-        ):
-            return f"[{annotation}: {demangled}]"
-
-        # Failed to demangle - restore original prefix
-        return prefix + demangled
-
-    @staticmethod
-    def _restore_symbol_suffix(original: str, demangled: str) -> str:
-        """Restore GCC optimization suffix that was removed before demangling.
-
-        Args:
-            original: Original symbol name with suffix
-            demangled: Demangled symbol name without suffix
-
-        Returns:
-            Demangled name with suffix annotation
-        """
-        if suffix_match := _GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
-            return f"{demangled} [{suffix_match.group(1)}]"
-        return demangled
+        self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path)
+        _LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache))

    def _demangle_symbol(self, symbol: str) -> str:
        """Get demangled C++ symbol name from cache."""
--- a/esphome/analyze_memory/demangle.py
+++ b/esphome/analyze_memory/demangle.py
@@ -0,0 +1,182 @@
+"""Symbol demangling utilities for memory analysis.
+
+This module provides functions for demangling C++ symbol names using c++filt.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import subprocess
+
+from .toolchain import find_tool
+
+_LOGGER = logging.getLogger(__name__)
+
+# GCC global constructor/destructor prefix annotations
+GCC_PREFIX_ANNOTATIONS = {
+    "_GLOBAL__sub_I_": "global constructor for",
+    "_GLOBAL__sub_D_": "global destructor for",
+}
+
+# GCC optimization suffix pattern (e.g., $isra$0, $part$1, $constprop$2)
+GCC_OPTIMIZATION_SUFFIX_PATTERN = re.compile(r"(\$(?:isra|part|constprop)\$\d+)")
+
+
+def _strip_gcc_annotations(symbol: str) -> tuple[str, str]:
+    """Strip GCC optimization suffixes and prefixes from a symbol.
+
+    Args:
+        symbol: The mangled symbol name
+
+    Returns:
+        Tuple of (stripped_symbol, removed_prefix)
+    """
+    # Remove GCC optimization markers
+    stripped = GCC_OPTIMIZATION_SUFFIX_PATTERN.sub("", symbol)
+
+    # Handle GCC global constructor/initializer prefixes
+    prefix = ""
+    for gcc_prefix in GCC_PREFIX_ANNOTATIONS:
+        if stripped.startswith(gcc_prefix):
+            prefix = gcc_prefix
+            stripped = stripped[len(prefix) :]
+            break
+
+    return stripped, prefix
+
+
+def _restore_symbol_prefix(prefix: str, stripped: str, demangled: str) -> str:
+    """Restore prefix that was removed before demangling.
+
+    Args:
+        prefix: Prefix that was removed (e.g., "_GLOBAL__sub_I_")
+        stripped: Stripped symbol name
+        demangled: Demangled symbol name
+
+    Returns:
+        Demangled name with prefix restored/annotated
+    """
+    if not prefix:
+        return demangled
+
+    # Successfully demangled - add descriptive prefix
+    if demangled != stripped and (annotation := GCC_PREFIX_ANNOTATIONS.get(prefix)):
+        return f"[{annotation}: {demangled}]"
+
+    # Failed to demangle - restore original prefix
+    return prefix + demangled
+
+
+def _restore_symbol_suffix(original: str, demangled: str) -> str:
+    """Restore GCC optimization suffix that was removed before demangling.
+
+    Args:
+        original: Original symbol name with suffix
+        demangled: Demangled symbol name without suffix
+
+    Returns:
+        Demangled name with suffix annotation
+    """
+    if suffix_match := GCC_OPTIMIZATION_SUFFIX_PATTERN.search(original):
+        return f"{demangled} [{suffix_match.group(1)}]"
+    return demangled
+
+
+def batch_demangle(
+    symbols: list[str],
+    cppfilt_path: str | None = None,
+    objdump_path: str | None = None,
+) -> dict[str, str]:
+    """Batch demangle C++ symbol names.
+
+    Args:
+        symbols: List of symbol names to demangle
+        cppfilt_path: Path to c++filt binary (auto-detected if not provided)
+        objdump_path: Path to objdump binary to derive c++filt path from
+
+    Returns:
+        Dictionary mapping original symbol names to demangled names
+    """
+    cache: dict[str, str] = {}
+
+    if not symbols:
+        return cache
+
+    # Find c++filt tool
+    cppfilt_cmd = cppfilt_path or find_tool("c++filt", objdump_path)
+    if not cppfilt_cmd:
+        _LOGGER.warning("Could not find c++filt, symbols will not be demangled")
+        return {s: s for s in symbols}
+
+    _LOGGER.debug("Demangling %d symbols using %s", len(symbols), cppfilt_cmd)
+
+    # Strip GCC optimization suffixes and prefixes before demangling
+    symbols_stripped: list[str] = []
+    symbols_prefixes: list[str] = []
+    for symbol in symbols:
+        stripped, prefix = _strip_gcc_annotations(symbol)
+        symbols_stripped.append(stripped)
+        symbols_prefixes.append(prefix)
+
+    try:
+        result = subprocess.run(
+            [cppfilt_cmd],
+            input="\n".join(symbols_stripped),
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+    except (subprocess.SubprocessError, OSError, UnicodeDecodeError) as e:
+        _LOGGER.warning("Failed to batch demangle symbols: %s", e)
+        return {s: s for s in symbols}
+
+    if result.returncode != 0:
+        _LOGGER.warning(
+            "c++filt exited with code %d: %s",
+            result.returncode,
+            result.stderr[:200] if result.stderr else "(no error output)",
+        )
+        return {s: s for s in symbols}
+
+    # Process demangled output
+    demangled_lines = result.stdout.strip().split("\n")
+
+    # Check for output length mismatch
+    if len(demangled_lines) != len(symbols):
+        _LOGGER.warning(
+            "c++filt output mismatch: expected %d lines, got %d",
+            len(symbols),
+            len(demangled_lines),
+        )
+        return {s: s for s in symbols}
+
+    failed_count = 0
+
+    for original, stripped, prefix, demangled in zip(
+        symbols, symbols_stripped, symbols_prefixes, demangled_lines
+    ):
+        # Add back any prefix that was removed
+        demangled = _restore_symbol_prefix(prefix, stripped, demangled)
+
+        # If we stripped a suffix, add it back to the demangled name for clarity
+        if original != stripped and not prefix:
+            demangled = _restore_symbol_suffix(original, demangled)
+
+        cache[original] = demangled
+
+        # Count symbols that failed to demangle
+        if stripped == demangled and stripped.startswith("_Z"):
+            failed_count += 1
+            if failed_count <= 5:
+                _LOGGER.debug("Failed to demangle: %s", original)
+
+    if failed_count > 0:
+        _LOGGER.debug(
+            "Failed to demangle %d/%d symbols using %s",
+            failed_count,
+            len(symbols),
+            cppfilt_cmd,
+        )
+
+    return cache
--- a/esphome/analyze_memory/ram_strings.py
+++ b/esphome/analyze_memory/ram_strings.py
@@ -0,0 +1,493 @@
+"""Analyzer for RAM-stored strings in ESP8266/ESP32 firmware ELF files.
+
+This module identifies strings that are stored in RAM sections (.data, .bss, .rodata)
+rather than in flash sections (.irom0.text, .irom.text), which is important for
+memory-constrained platforms like ESP8266.
+"""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from dataclasses import dataclass
+import logging
+from pathlib import Path
+import re
+import subprocess
+
+from .demangle import batch_demangle
+from .toolchain import find_tool
+
+_LOGGER = logging.getLogger(__name__)
+
+# ESP8266: .rodata is in RAM (DRAM), not flash
+# ESP32: .rodata is in flash, mapped to data bus
+ESP8266_RAM_SECTIONS = frozenset([".data", ".rodata", ".bss"])
+ESP8266_FLASH_SECTIONS = frozenset([".irom0.text", ".irom.text", ".text"])
+
+# ESP32: .rodata is memory-mapped from flash
+ESP32_RAM_SECTIONS = frozenset([".data", ".bss", ".dram0.data", ".dram0.bss"])
+ESP32_FLASH_SECTIONS = frozenset([".text", ".rodata", ".flash.text", ".flash.rodata"])
+
+# nm symbol types for data symbols (D=global data, d=local data, R=rodata, B=bss)
+DATA_SYMBOL_TYPES = frozenset(["D", "d", "R", "r", "B", "b"])
+
+
+@dataclass
+class SectionInfo:
+    """Information about an ELF section."""
+
+    name: str
+    address: int
+    size: int
+
+
+@dataclass
+class RamString:
+    """A string found in RAM."""
+
+    section: str
+    address: int
+    content: str
+
+    @property
+    def size(self) -> int:
+        """Size in bytes including null terminator."""
+        return len(self.content) + 1
+
+
+@dataclass
+class RamSymbol:
+    """A symbol found in RAM."""
+
+    name: str
+    sym_type: str
+    address: int
+    size: int
+    section: str
+    demangled: str = ""  # Demangled name, set after batch demangling
+
+
+class RamStringsAnalyzer:
+    """Analyzes ELF files to find strings stored in RAM."""
+
+    def __init__(
+        self,
+        elf_path: str,
+        objdump_path: str | None = None,
+        min_length: int = 8,
+        platform: str = "esp32",
+    ) -> None:
+        """Initialize the RAM strings analyzer.
+
+        Args:
+            elf_path: Path to the ELF file to analyze
+            objdump_path: Path to objdump binary (used to find other tools)
+            min_length: Minimum string length to report (default: 8)
+            platform: Platform name ("esp8266", "esp32", etc.) for section mapping
+        """
+        self.elf_path = Path(elf_path)
+        if not self.elf_path.exists():
+            raise FileNotFoundError(f"ELF file not found: {elf_path}")
+
+        self.objdump_path = objdump_path
+        self.min_length = min_length
+        self.platform = platform
+
+        # Set RAM/flash sections based on platform
+        if self.platform == "esp8266":
+            self.ram_sections = ESP8266_RAM_SECTIONS
+            self.flash_sections = ESP8266_FLASH_SECTIONS
+        else:
+            # ESP32 and other platforms
+            self.ram_sections = ESP32_RAM_SECTIONS
+            self.flash_sections = ESP32_FLASH_SECTIONS
+
+        self.sections: dict[str, SectionInfo] = {}
+        self.ram_strings: list[RamString] = []
+        self.ram_symbols: list[RamSymbol] = []
+
+    def _run_command(self, cmd: list[str]) -> str:
+        """Run a command and return its output."""
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            return result.stdout
+        except subprocess.CalledProcessError as e:
+            _LOGGER.debug("Command failed: %s - %s", " ".join(cmd), e.stderr)
+            raise
+        except FileNotFoundError:
+            _LOGGER.warning("Command not found: %s", cmd[0])
+            raise
+
+    def analyze(self) -> None:
+        """Perform the full RAM analysis."""
+        self._parse_sections()
+        self._extract_strings()
+        self._analyze_symbols()
+        self._demangle_symbols()
+
+    def _parse_sections(self) -> None:
+        """Parse section headers from ELF file."""
+        objdump = find_tool("objdump", self.objdump_path)
+        if not objdump:
+            _LOGGER.error("Could not find objdump command")
+            return
+
+        try:
+            output = self._run_command([objdump, "-h", str(self.elf_path)])
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            return
+
+        # Parse section headers
+        # Format: Idx Name          Size      VMA       LMA       File off  Algn
+        section_pattern = re.compile(
+            r"^\s*\d+\s+(\S+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)"
+        )
+
+        for line in output.split("\n"):
+            if match := section_pattern.match(line):
+                name = match.group(1)
+                size = int(match.group(2), 16)
+                vma = int(match.group(3), 16)
+                self.sections[name] = SectionInfo(name, vma, size)
+
+    def _extract_strings(self) -> None:
+        """Extract strings from RAM sections."""
+        objdump = find_tool("objdump", self.objdump_path)
+        if not objdump:
+            return
+
+        for section_name in self.ram_sections:
+            if section_name not in self.sections:
+                continue
+
+            try:
+                output = self._run_command(
+                    [objdump, "-s", "-j", section_name, str(self.elf_path)]
+                )
+            except subprocess.CalledProcessError:
+                # Section may exist but have no content (e.g., .bss)
+                continue
+            except FileNotFoundError:
+                continue
+
+            strings = self._parse_hex_dump(output, section_name)
+            self.ram_strings.extend(strings)
+
+    def _parse_hex_dump(self, output: str, section_name: str) -> list[RamString]:
+        """Parse hex dump output to extract strings.
+
+        Args:
+            output: Output from objdump -s
+            section_name: Name of the section being parsed
+
+        Returns:
+            List of RamString objects
+        """
+        strings: list[RamString] = []
+        current_string = bytearray()
+        string_start_addr = 0
+
+        for line in output.split("\n"):
+            # Lines look like: " 3ffef8a0 00000000 00000000 00000000 00000000  ................"
+            match = re.match(r"^\s+([0-9a-fA-F]+)\s+((?:[0-9a-fA-F]{2,8}\s*)+)", line)
+            if not match:
+                continue
+
+            addr = int(match.group(1), 16)
+            hex_data = match.group(2).strip()
+
+            # Convert hex to bytes
+            hex_bytes = hex_data.split()
+            byte_offset = 0
+            for hex_chunk in hex_bytes:
+                # Handle both byte-by-byte and word formats
+                for i in range(0, len(hex_chunk), 2):
+                    byte_val = int(hex_chunk[i : i + 2], 16)
+                    if 0x20 <= byte_val <= 0x7E:  # Printable ASCII
+                        if not current_string:
+                            string_start_addr = addr + byte_offset
+                        current_string.append(byte_val)
+                    else:
+                        if byte_val == 0 and len(current_string) >= self.min_length:
+                            # Found null terminator
+                            strings.append(
+                                RamString(
+                                    section=section_name,
+                                    address=string_start_addr,
+                                    content=current_string.decode(
+                                        "ascii", errors="ignore"
+                                    ),
+                                )
+                            )
+                        current_string = bytearray()
+                    byte_offset += 1
+
+        return strings
+
+    def _analyze_symbols(self) -> None:
+        """Analyze symbols in RAM sections."""
+        nm = find_tool("nm", self.objdump_path)
+        if not nm:
+            return
+
+        try:
+            output = self._run_command([nm, "-S", "--size-sort", str(self.elf_path)])
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            return
+
+        for line in output.split("\n"):
+            parts = line.split()
+            if len(parts) < 4:
+                continue
+
+            try:
+                addr = int(parts[0], 16)
+                size = int(parts[1], 16) if parts[1] != "?" else 0
+            except ValueError:
+                continue
+
+            sym_type = parts[2]
+            name = " ".join(parts[3:])
+
+            # Filter for data symbols
+            if sym_type not in DATA_SYMBOL_TYPES:
+                continue
+
+            # Check if symbol is in a RAM section
+            for section_name in self.ram_sections:
+                if section_name not in self.sections:
+                    continue
+
+                section = self.sections[section_name]
+                if section.address <= addr < section.address + section.size:
+                    self.ram_symbols.append(
+                        RamSymbol(
+                            name=name,
+                            sym_type=sym_type,
+                            address=addr,
+                            size=size,
+                            section=section_name,
+                        )
+                    )
+                    break
+
+    def _demangle_symbols(self) -> None:
+        """Batch demangle all RAM symbol names."""
+        if not self.ram_symbols:
+            return
+
+        # Collect all symbol names and demangle them
+        symbol_names = [s.name for s in self.ram_symbols]
+        demangle_cache = batch_demangle(symbol_names, objdump_path=self.objdump_path)
+
+        # Assign demangled names to symbols
+        for symbol in self.ram_symbols:
+            symbol.demangled = demangle_cache.get(symbol.name, symbol.name)
+
+    def _get_sections_size(self, section_names: frozenset[str]) -> int:
+        """Get total size of specified sections."""
+        return sum(
+            section.size
+            for name, section in self.sections.items()
+            if name in section_names
+        )
+
+    def get_total_ram_usage(self) -> int:
+        """Get total RAM usage from RAM sections."""
+        return self._get_sections_size(self.ram_sections)
+
+    def get_total_flash_usage(self) -> int:
+        """Get total flash usage from flash sections."""
+        return self._get_sections_size(self.flash_sections)
+
+    def get_total_string_bytes(self) -> int:
+        """Get total bytes used by strings in RAM."""
+        return sum(s.size for s in self.ram_strings)
+
+    def get_repeated_strings(self) -> list[tuple[str, int]]:
+        """Find strings that appear multiple times.
+
+        Returns:
+            List of (string, count) tuples sorted by potential savings
+        """
+        string_counts: dict[str, int] = defaultdict(int)
+        for ram_string in self.ram_strings:
+            string_counts[ram_string.content] += 1
+
+        return sorted(
+            [(s, c) for s, c in string_counts.items() if c > 1],
+            key=lambda x: x[1] * (len(x[0]) + 1),
+            reverse=True,
+        )
+
+    def get_long_strings(self, min_len: int = 20) -> list[RamString]:
+        """Get strings longer than the specified length.
+
+        Args:
+            min_len: Minimum string length
+
+        Returns:
+            List of RamString objects sorted by length
+        """
+        return sorted(
+            [s for s in self.ram_strings if len(s.content) >= min_len],
+            key=lambda x: len(x.content),
+            reverse=True,
+        )
+
+    def get_largest_symbols(self, min_size: int = 100) -> list[RamSymbol]:
+        """Get RAM symbols larger than the specified size.
+
+        Args:
+            min_size: Minimum symbol size in bytes
+
+        Returns:
+            List of RamSymbol objects sorted by size
+        """
+        return sorted(
+            [s for s in self.ram_symbols if s.size >= min_size],
+            key=lambda x: x.size,
+            reverse=True,
+        )
+
+    def generate_report(self, show_all_sections: bool = False) -> str:
+        """Generate a formatted RAM strings analysis report.
+
+        Args:
+            show_all_sections: If True, show all sections, not just RAM
+
+        Returns:
+            Formatted report string
+        """
+        lines: list[str] = []
+        table_width = 80
+
+        lines.append("=" * table_width)
+        lines.append(
+            f"RAM Strings Analysis ({self.platform.upper()})".center(table_width)
+        )
+        lines.append("=" * table_width)
+        lines.append("")
+
+        # Section Analysis
+        lines.append("SECTION ANALYSIS")
+        lines.append("-" * table_width)
+        lines.append(f"{'Section':<20} {'Address':<12} {'Size':<12} {'Location'}")
+        lines.append("-" * table_width)
+
+        total_ram_usage = 0
+        total_flash_usage = 0
+
+        for name, section in sorted(self.sections.items(), key=lambda x: x[1].address):
+            if name in self.ram_sections:
+                location = "RAM"
+                total_ram_usage += section.size
+            elif name in self.flash_sections:
+                location = "FLASH"
+                total_flash_usage += section.size
+            else:
+                location = "OTHER"
+
+            if show_all_sections or name in self.ram_sections:
+                lines.append(
+                    f"{name:<20} 0x{section.address:08x}   {section.size:>8} B   {location}"
+                )
+
+        lines.append("-" * table_width)
+        lines.append(f"Total RAM sections size: {total_ram_usage:,} bytes")
+        lines.append(f"Total Flash sections size: {total_flash_usage:,} bytes")
+
+        # Strings in RAM
+        lines.append("")
+        lines.append("=" * table_width)
+        lines.append("STRINGS IN RAM SECTIONS")
+        lines.append("=" * table_width)
+        lines.append(
+            "Note: .bss sections contain uninitialized data (no strings to extract)"
+        )
+
+        # Group strings by section
+        strings_by_section: dict[str, list[RamString]] = defaultdict(list)
+        for ram_string in self.ram_strings:
+            strings_by_section[ram_string.section].append(ram_string)
+
+        for section_name in sorted(strings_by_section.keys()):
+            section_strings = strings_by_section[section_name]
+            lines.append(f"\nSection: {section_name}")
+            lines.append("-" * 40)
+            for ram_string in sorted(section_strings, key=lambda x: x.address):
+                clean_string = ram_string.content[:100] + (
+                    "..." if len(ram_string.content) > 100 else ""
+                )
+                lines.append(
+                    f'  0x{ram_string.address:08x}: "{clean_string}" (len={len(ram_string.content)})'
+                )
+
+        # Large RAM symbols
+        lines.append("")
+        lines.append("=" * table_width)
+        lines.append("LARGE DATA SYMBOLS IN RAM (>= 50 bytes)")
+        lines.append("=" * table_width)
+
+        largest_symbols = self.get_largest_symbols(50)
+        lines.append(f"\n{'Symbol':<50} {'Type':<6} {'Size':<10} {'Section'}")
+        lines.append("-" * table_width)
+
+        for symbol in largest_symbols:
+            # Use demangled name if available, otherwise raw name
+            display_name = symbol.demangled or symbol.name
+            name_display = display_name[:49] if len(display_name) > 49 else display_name
+            lines.append(
+                f"{name_display:<50} {symbol.sym_type:<6} {symbol.size:>8} B  {symbol.section}"
+            )
+
+        # Summary
+        lines.append("")
+        lines.append("=" * table_width)
+        lines.append("SUMMARY")
+        lines.append("=" * table_width)
+        lines.append(f"Total strings found in RAM: {len(self.ram_strings)}")
+        total_string_bytes = self.get_total_string_bytes()
+        lines.append(f"Total bytes used by strings: {total_string_bytes:,}")
+
+        # Optimization targets
+        lines.append("")
+        lines.append("=" * table_width)
+        lines.append("POTENTIAL OPTIMIZATION TARGETS")
+        lines.append("=" * table_width)
+
+        # Repeated strings
+        repeated = self.get_repeated_strings()[:10]
+        if repeated:
+            lines.append("\nRepeated strings (could be deduplicated):")
+            for string, count in repeated:
+                savings = (count - 1) * (len(string) + 1)
+                clean_string = string[:50] + ("..." if len(string) > 50 else "")
+                lines.append(
+                    f'  "{clean_string}" - appears {count} times (potential savings: {savings} bytes)'
+                )
+
+        # Long strings - platform-specific advice
+        long_strings = self.get_long_strings(20)[:10]
+        if long_strings:
+            if self.platform == "esp8266":
+                lines.append(
+                    "\nLong strings that could be moved to PROGMEM (>= 20 chars):"
+                )
+            else:
+                # ESP32: strings in DRAM are typically there for a reason
+                # (interrupt handlers, pre-flash-init code, etc.)
+                lines.append("\nLong strings in DRAM (>= 20 chars):")
+                lines.append(
+                    "Note: ESP32 DRAM strings may be required for interrupt/early-boot contexts"
+                )
+            for ram_string in long_strings:
+                clean_string = ram_string.content[:60] + (
+                    "..." if len(ram_string.content) > 60 else ""
+                )
+                lines.append(
+                    f'  {ram_string.section} @ 0x{ram_string.address:08x}: "{clean_string}" ({len(ram_string.content)} bytes)'
+                )
+
+        lines.append("")
+        return "\n".join(lines)
--- a/esphome/analyze_memory/toolchain.py
+++ b/esphome/analyze_memory/toolchain.py
@@ -0,0 +1,57 @@
+"""Toolchain utilities for memory analysis."""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+import subprocess
+
+_LOGGER = logging.getLogger(__name__)
+
+# Platform-specific toolchain prefixes
+TOOLCHAIN_PREFIXES = [
+    "xtensa-lx106-elf-",  # ESP8266
+    "xtensa-esp32-elf-",  # ESP32
+    "xtensa-esp-elf-",  # ESP32 (newer IDF)
+    "",  # System default (no prefix)
+]
+
+
+def find_tool(
+    tool_name: str,
+    objdump_path: str | None = None,
+) -> str | None:
+    """Find a toolchain tool by name.
+
+    First tries to derive the tool path from objdump_path (if provided),
+    then falls back to searching for platform-specific tools.
+
+    Args:
+        tool_name: Name of the tool (e.g., "objdump", "nm", "c++filt")
+        objdump_path: Path to objdump binary to derive other tool paths from
+
+    Returns:
+        Path to the tool or None if not found
+    """
+    # Try to derive from objdump path first (most reliable)
+    if objdump_path and objdump_path != "objdump":
+        objdump_file = Path(objdump_path)
+        # Replace just the filename portion, preserving any prefix (e.g., xtensa-esp32-elf-)
+        new_name = objdump_file.name.replace("objdump", tool_name)
+        potential_path = str(objdump_file.with_name(new_name))
+        if Path(potential_path).exists():
+            _LOGGER.debug("Found %s at: %s", tool_name, potential_path)
+            return potential_path
+
+    # Try platform-specific tools
+    for prefix in TOOLCHAIN_PREFIXES:
+        cmd = f"{prefix}{tool_name}"
+        try:
+            subprocess.run([cmd, "--version"], capture_output=True, check=True)
+            _LOGGER.debug("Found %s: %s", tool_name, cmd)
+            return cmd
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            continue
+
+    _LOGGER.warning("Could not find %s tool", tool_name)
+    return None
--- a/tests/unit_tests/test_main.py
+++ b/tests/unit_tests/test_main.py
@@ -269,6 +269,16 @@ def mock_memory_analyzer_cli() -> Generator[Mock]:
        yield mock_class


+@pytest.fixture
+def mock_ram_strings_analyzer() -> Generator[Mock]:
+    """Mock RamStringsAnalyzer for testing."""
+    with patch("esphome.analyze_memory.ram_strings.RamStringsAnalyzer") as mock_class:
+        mock_analyzer = MagicMock()
+        mock_analyzer.generate_report.return_value = "Mock RAM Strings Report"
+        mock_class.return_value = mock_analyzer
+        yield mock_class
+
+
 def test_choose_upload_log_host_with_string_default() -> None:
    """Test with a single string default device."""
    setup_core()
@@ -2424,6 +2434,7 @@ def test_command_analyze_memory_success(
    mock_get_idedata: Mock,
    mock_get_esphome_components: Mock,
    mock_memory_analyzer_cli: Mock,
+    mock_ram_strings_analyzer: Mock,
 ) -> None:
    """Test command_analyze_memory with successful compilation and analysis."""
    setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")
@@ -2471,9 +2482,20 @@ def test_command_analyze_memory_success(
    mock_analyzer.analyze.assert_called_once()
    mock_analyzer.generate_report.assert_called_once()

-    # Verify report was printed
+    # Verify RAM strings analyzer was created and run
+    mock_ram_strings_analyzer.assert_called_once_with(
+        str(firmware_elf),
+        objdump_path="/path/to/objdump",
+        platform="esp32",
+    )
+    mock_ram_analyzer = mock_ram_strings_analyzer.return_value
+    mock_ram_analyzer.analyze.assert_called_once()
+    mock_ram_analyzer.generate_report.assert_called_once()
+
+    # Verify reports were printed
    captured = capfd.readouterr()
    assert "Mock Memory Report" in captured.out
+    assert "Mock RAM Strings Report" in captured.out


 def test_command_analyze_memory_with_external_components(
@@ -2483,6 +2505,7 @@ def test_command_analyze_memory_with_external_components(
    mock_get_idedata: Mock,
    mock_get_esphome_components: Mock,
    mock_memory_analyzer_cli: Mock,
+    mock_ram_strings_analyzer: Mock,
 ) -> None:
    """Test command_analyze_memory detects external components."""
    setup_core(platform=PLATFORM_ESP32, tmp_path=tmp_path, name="test_device")