Files
esphome/esphome/analyze_memory/cli.py

679 lines
26 KiB
Python

"""CLI interface for memory analysis with report generation."""
from __future__ import annotations
from collections import defaultdict
from collections.abc import Callable
import heapq
from operator import itemgetter
import sys
from typing import TYPE_CHECKING
from . import (
_COMPONENT_API,
_COMPONENT_CORE,
_COMPONENT_PREFIX_ESPHOME,
_COMPONENT_PREFIX_EXTERNAL,
_COMPONENT_PREFIX_LIB,
RAM_SECTIONS,
MemoryAnalyzer,
)
if TYPE_CHECKING:
from . import ComponentMemory
class MemoryAnalyzerCLI(MemoryAnalyzer):
"""Memory analyzer with CLI-specific report generation."""
# Symbol size threshold for detailed analysis
SYMBOL_SIZE_THRESHOLD: int = (
100 # Show symbols larger than this in detailed analysis
)
# Lower threshold for RAM symbols (RAM is more constrained)
RAM_SYMBOL_SIZE_THRESHOLD: int = 24
# Number of top symbols to show in the largest symbols report
TOP_SYMBOLS_LIMIT: int = 30
# Width for symbol name display in top symbols report
COL_TOP_SYMBOL_NAME: int = 55
# Column width constants
COL_COMPONENT: int = 29
COL_FLASH_TEXT: int = 14
COL_FLASH_DATA: int = 14
COL_RAM_DATA: int = 12
COL_RAM_BSS: int = 12
COL_TOTAL_FLASH: int = 15
COL_TOTAL_RAM: int = 12
COL_SEPARATOR: int = 3 # " | "
# Core analysis column widths
COL_CORE_SUBCATEGORY: int = 30
COL_CORE_SIZE: int = 12
COL_CORE_COUNT: int = 6
COL_CORE_PERCENT: int = 10
# Calculate table width once at class level
TABLE_WIDTH: int = (
COL_COMPONENT
+ COL_SEPARATOR
+ COL_FLASH_TEXT
+ COL_SEPARATOR
+ COL_FLASH_DATA
+ COL_SEPARATOR
+ COL_RAM_DATA
+ COL_SEPARATOR
+ COL_RAM_BSS
+ COL_SEPARATOR
+ COL_TOTAL_FLASH
+ COL_SEPARATOR
+ COL_TOTAL_RAM
)
@staticmethod
def _make_separator_line(*widths: int) -> str:
"""Create a separator line with given column widths.
Args:
widths: Column widths to create separators for
Returns:
Separator line like "----+---------+-----"
"""
return "-+-".join("-" * width for width in widths)
# Pre-computed separator lines
MAIN_TABLE_SEPARATOR: str = _make_separator_line(
COL_COMPONENT,
COL_FLASH_TEXT,
COL_FLASH_DATA,
COL_RAM_DATA,
COL_RAM_BSS,
COL_TOTAL_FLASH,
COL_TOTAL_RAM,
)
CORE_TABLE_SEPARATOR: str = _make_separator_line(
COL_CORE_SUBCATEGORY,
COL_CORE_SIZE,
COL_CORE_COUNT,
COL_CORE_PERCENT,
)
def _add_section_header(self, lines: list[str], title: str) -> None:
"""Add a section header with title centered between separator lines."""
lines.append("")
lines.append("=" * self.TABLE_WIDTH)
lines.append(title.center(self.TABLE_WIDTH))
lines.append("=" * self.TABLE_WIDTH)
lines.append("")
def _add_top_consumers(
self,
lines: list[str],
title: str,
components: list[tuple[str, ComponentMemory]],
get_size: Callable[[ComponentMemory], int],
total: int,
memory_type: str,
limit: int = 25,
) -> None:
"""Add a formatted list of top memory consumers to the report.
Args:
lines: List of report lines to append the output to.
title: Section title to print before the list.
components: Sequence of (name, ComponentMemory) tuples to analyze.
get_size: Callable that takes a ComponentMemory and returns the
size in bytes to use for ranking and display.
total: Total size in bytes for computing percentage usage.
memory_type: Label for the memory region (e.g., "flash" or "RAM").
limit: Maximum number of components to include in the list.
"""
lines.append("")
lines.append(f"{title}:")
for i, (name, mem) in enumerate(components[:limit]):
size = get_size(mem)
if size > 0:
percentage = (size / total * 100) if total > 0 else 0
lines.append(
f"{i + 1}. {name} ({size:,} B) - {percentage:.1f}% of analyzed {memory_type}"
)
def _format_symbol_with_section(
self, demangled: str, size: int, section: str | None = None
) -> str:
"""Format a symbol entry, optionally adding a RAM section label.
If section is one of the RAM sections (.data or .bss), a label like
" [data]" or " [bss]" is appended. For non-RAM sections or when
section is None, no section label is added.
"""
section_label = ""
if section in RAM_SECTIONS:
section_label = f" [{section[1:]}]" # .data -> [data], .bss -> [bss]
return f"{demangled} ({size:,} B){section_label}"
def _add_top_symbols(self, lines: list[str]) -> None:
"""Add a section showing the top largest symbols in the binary."""
# Collect all symbols from all components: (symbol, demangled, size, section, component)
all_symbols = [
(symbol, demangled, size, section, component)
for component, symbols in self._component_symbols.items()
for symbol, demangled, size, section in symbols
]
# Get top N symbols by size using heapq for efficiency
top_symbols = heapq.nlargest(
self.TOP_SYMBOLS_LIMIT, all_symbols, key=itemgetter(2)
)
lines.append("")
lines.append(f"Top {self.TOP_SYMBOLS_LIMIT} Largest Symbols:")
# Calculate truncation limit from column width (leaving room for "...")
truncate_limit = self.COL_TOP_SYMBOL_NAME - 3
for i, (_, demangled, size, section, component) in enumerate(top_symbols):
# Format section label
section_label = f"[{section[1:]}]" if section else ""
# Truncate demangled name if too long
demangled_display = (
f"{demangled[:truncate_limit]}..."
if len(demangled) > self.COL_TOP_SYMBOL_NAME
else demangled
)
lines.append(
f"{i + 1:>2}. {size:>7,} B {section_label:<8} {demangled_display:<{self.COL_TOP_SYMBOL_NAME}} {component}"
)
def _add_cswtch_analysis(self, lines: list[str]) -> None:
"""Add CSWTCH (GCC switch table lookup) analysis section."""
self._add_section_header(lines, "CSWTCH Analysis (GCC Switch Table Lookups)")
total_size = sum(size for _, size, _, _ in self._cswtch_symbols)
lines.append(
f"Total: {len(self._cswtch_symbols)} switch table(s), {total_size:,} B"
)
lines.append("")
# Group by component
by_component: dict[str, list[tuple[str, int, str]]] = defaultdict(list)
for sym_name, size, source_file, component in self._cswtch_symbols:
by_component[component].append((sym_name, size, source_file))
# Sort components by total size descending
sorted_components = sorted(
by_component.items(),
key=lambda x: sum(s[1] for s in x[1]),
reverse=True,
)
for component, symbols in sorted_components:
comp_total = sum(s[1] for s in symbols)
lines.append(f"{component} ({comp_total:,} B, {len(symbols)} tables):")
# Group by source file within component
by_file: dict[str, list[tuple[str, int]]] = defaultdict(list)
for sym_name, size, source_file in symbols:
by_file[source_file].append((sym_name, size))
for source_file, file_symbols in sorted(
by_file.items(),
key=lambda x: sum(s[1] for s in x[1]),
reverse=True,
):
file_total = sum(s[1] for s in file_symbols)
lines.append(
f" {source_file} ({file_total:,} B, {len(file_symbols)} tables)"
)
for sym_name, size in sorted(
file_symbols, key=lambda x: x[1], reverse=True
):
lines.append(f" {size:>6,} B {sym_name}")
lines.append("")
def generate_report(self, detailed: bool = False) -> str:
"""Generate a formatted memory report."""
components = sorted(
self.components.items(), key=lambda x: x[1].flash_total, reverse=True
)
# Calculate totals
total_flash = sum(c.flash_total for _, c in components)
total_ram = sum(c.ram_total for _, c in components)
# Build report
lines: list[str] = []
lines.append("=" * self.TABLE_WIDTH)
lines.append("Component Memory Analysis".center(self.TABLE_WIDTH))
lines.append("=" * self.TABLE_WIDTH)
lines.append("")
# Main table - fixed column widths
lines.append(
f"{'Component':<{self.COL_COMPONENT}} | {'Flash (text)':>{self.COL_FLASH_TEXT}} | {'Flash (data)':>{self.COL_FLASH_DATA}} | {'RAM (data)':>{self.COL_RAM_DATA}} | {'RAM (bss)':>{self.COL_RAM_BSS}} | {'Total Flash':>{self.COL_TOTAL_FLASH}} | {'Total RAM':>{self.COL_TOTAL_RAM}}"
)
lines.append(self.MAIN_TABLE_SEPARATOR)
for name, mem in components:
if mem.flash_total > 0 or mem.ram_total > 0:
flash_rodata = mem.rodata_size + mem.data_size
lines.append(
f"{name:<{self.COL_COMPONENT}} | {mem.text_size:>{self.COL_FLASH_TEXT - 2},} B | {flash_rodata:>{self.COL_FLASH_DATA - 2},} B | "
f"{mem.data_size:>{self.COL_RAM_DATA - 2},} B | {mem.bss_size:>{self.COL_RAM_BSS - 2},} B | "
f"{mem.flash_total:>{self.COL_TOTAL_FLASH - 2},} B | {mem.ram_total:>{self.COL_TOTAL_RAM - 2},} B"
)
lines.append(self.MAIN_TABLE_SEPARATOR)
lines.append(
f"{'TOTAL':<{self.COL_COMPONENT}} | {' ':>{self.COL_FLASH_TEXT}} | {' ':>{self.COL_FLASH_DATA}} | "
f"{' ':>{self.COL_RAM_DATA}} | {' ':>{self.COL_RAM_BSS}} | "
f"{total_flash:>{self.COL_TOTAL_FLASH - 2},} B | {total_ram:>{self.COL_TOTAL_RAM - 2},} B"
)
# Show unattributed RAM (SDK/framework overhead)
unattributed_bss, unattributed_data, unattributed_total = (
self.get_unattributed_ram()
)
if unattributed_total > 0:
lines.append("")
lines.append(
f"Unattributed RAM: {unattributed_total:,} B (SDK/framework overhead)"
)
if unattributed_bss > 0 and unattributed_data > 0:
lines.append(
f" .bss: {unattributed_bss:,} B | .data: {unattributed_data:,} B"
)
# Show SDK symbol breakdown if available
sdk_by_lib = self.get_sdk_ram_by_library()
if sdk_by_lib:
lines.append("")
lines.append("SDK library breakdown (static symbols not in ELF):")
# Sort libraries by total size
lib_totals = [
(lib, sum(s.size for s in syms), syms)
for lib, syms in sdk_by_lib.items()
]
lib_totals.sort(key=lambda x: x[1], reverse=True)
for lib_name, lib_total, syms in lib_totals:
if lib_total == 0:
continue
lines.append(f" {lib_name}: {lib_total:,} B")
# Show top symbols from this library
for sym in sorted(syms, key=lambda s: s.size, reverse=True)[:3]:
section_label = sym.section.lstrip(".")
# Use demangled name (falls back to original if not demangled)
display_name = sym.demangled or sym.name
if len(display_name) > 50:
display_name = f"{display_name[:47]}..."
lines.append(
f" {sym.size:>6,} B [{section_label}] {display_name}"
)
# Top consumers
self._add_top_consumers(
lines,
"Top Flash Consumers",
components,
lambda m: m.flash_total,
total_flash,
"flash",
)
ram_components = sorted(components, key=lambda x: x[1].ram_total, reverse=True)
self._add_top_consumers(
lines,
"Top RAM Consumers",
ram_components,
lambda m: m.ram_total,
total_ram,
"RAM",
)
# Top largest symbols in the binary
self._add_top_symbols(lines)
# Add ESPHome core detailed analysis if there are core symbols
if self._esphome_core_symbols:
self._add_section_header(lines, f"{_COMPONENT_CORE} Detailed Analysis")
# Group core symbols by subcategory
core_subcategories: dict[str, list[tuple[str, str, int]]] = defaultdict(
list
)
for symbol, demangled, size in self._esphome_core_symbols:
# Categorize based on demangled name patterns
subcategory = self._categorize_esphome_core_symbol(demangled)
core_subcategories[subcategory].append((symbol, demangled, size))
# Sort subcategories by total size
sorted_subcategories = sorted(
[
(name, symbols, sum(s[2] for s in symbols))
for name, symbols in core_subcategories.items()
],
key=lambda x: x[2],
reverse=True,
)
lines.append(
f"{'Subcategory':<{self.COL_CORE_SUBCATEGORY}} | {'Size':>{self.COL_CORE_SIZE}} | "
f"{'Count':>{self.COL_CORE_COUNT}} | {'% of Core':>{self.COL_CORE_PERCENT}}"
)
lines.append(self.CORE_TABLE_SEPARATOR)
core_total = sum(size for _, _, size in self._esphome_core_symbols)
for subcategory, symbols, total_size in sorted_subcategories:
percentage = (total_size / core_total * 100) if core_total > 0 else 0
lines.append(
f"{subcategory:<{self.COL_CORE_SUBCATEGORY}} | {total_size:>{self.COL_CORE_SIZE - 2},} B | "
f"{len(symbols):>{self.COL_CORE_COUNT}} | {percentage:>{self.COL_CORE_PERCENT - 1}.1f}%"
)
# All core symbols above threshold
lines.append("")
sorted_core_symbols = sorted(
self._esphome_core_symbols, key=lambda x: x[2], reverse=True
)
large_core_symbols = [
(symbol, demangled, size)
for symbol, demangled, size in sorted_core_symbols
if size > self.SYMBOL_SIZE_THRESHOLD
]
lines.append(
f"{_COMPONENT_CORE} Symbols > {self.SYMBOL_SIZE_THRESHOLD} B ({len(large_core_symbols)} symbols):"
)
for i, (symbol, demangled, size) in enumerate(large_core_symbols):
# Core symbols only track (symbol, demangled, size) without section info,
# so we don't show section labels here
lines.append(
f"{i + 1}. {self._format_symbol_with_section(demangled, size)}"
)
lines.append("=" * self.TABLE_WIDTH)
# Add detailed analysis for top ESPHome and external components
esphome_components = [
(name, mem)
for name, mem in components
if name.startswith(_COMPONENT_PREFIX_ESPHOME) and name != _COMPONENT_CORE
]
external_components = [
(name, mem)
for name, mem in components
if name.startswith(_COMPONENT_PREFIX_EXTERNAL)
]
library_components = [
(name, mem)
for name, mem in components
if name.startswith(_COMPONENT_PREFIX_LIB)
]
top_esphome_components = sorted(
esphome_components, key=lambda x: x[1].flash_total, reverse=True
)[:30]
# Include all external components (they're usually important)
top_external_components = sorted(
external_components, key=lambda x: x[1].flash_total, reverse=True
)
# Include all library components
top_library_components = sorted(
library_components, key=lambda x: x[1].flash_total, reverse=True
)
# Check if API component exists and ensure it's included
api_component = None
for name, mem in components:
if name == _COMPONENT_API:
api_component = (name, mem)
break
# Also include wifi_stack and other important system components if they exist
system_components_to_include = [
# Empty list - we've finished debugging symbol categorization
# Add component names here if you need to debug their symbols
]
system_components = [
(name, mem)
for name, mem in components
if name in system_components_to_include
]
# Combine all components to analyze: top ESPHome + all external + libraries + API if not already included + system components
components_to_analyze = (
list(top_esphome_components)
+ list(top_external_components)
+ list(top_library_components)
+ system_components
)
if api_component and api_component not in components_to_analyze:
components_to_analyze.append(api_component)
if components_to_analyze:
for comp_name, comp_mem in components_to_analyze:
if not (comp_symbols := self._component_symbols.get(comp_name, [])):
continue
self._add_section_header(lines, f"{comp_name} Detailed Analysis")
# Sort symbols by size
sorted_symbols = sorted(comp_symbols, key=lambda x: x[2], reverse=True)
lines.append(f"Total symbols: {len(sorted_symbols)}")
lines.append(f"Total size: {comp_mem.flash_total:,} B")
lines.append("")
# Show all symbols above threshold for better visibility
large_symbols = [
(sym, dem, size, sec)
for sym, dem, size, sec in sorted_symbols
if size > self.SYMBOL_SIZE_THRESHOLD
]
lines.append(
f"{comp_name} Symbols > {self.SYMBOL_SIZE_THRESHOLD} B ({len(large_symbols)} symbols):"
)
for i, (symbol, demangled, size, section) in enumerate(large_symbols):
lines.append(
f"{i + 1}. {self._format_symbol_with_section(demangled, size, section)}"
)
lines.append("=" * self.TABLE_WIDTH)
# Detailed RAM analysis by component (at end, before RAM strings analysis)
self._add_section_header(lines, "RAM Symbol Analysis by Component")
# Show top 15 RAM consumers with their large symbols
for name, mem in ram_components[:15]:
if mem.ram_total == 0:
continue
ram_syms = self._ram_symbols.get(name, [])
if not ram_syms:
continue
# Sort by size descending
sorted_ram_syms = sorted(ram_syms, key=lambda x: x[2], reverse=True)
large_ram_syms = [
s for s in sorted_ram_syms if s[2] > self.RAM_SYMBOL_SIZE_THRESHOLD
]
lines.append(f"{name} ({mem.ram_total:,} B total RAM):")
# Show breakdown by section type
data_size = sum(s[2] for s in ram_syms if s[3] == ".data")
bss_size = sum(s[2] for s in ram_syms if s[3] == ".bss")
lines.append(f" .data (initialized): {data_size:,} B")
lines.append(f" .bss (uninitialized): {bss_size:,} B")
if large_ram_syms:
lines.append(
f" Symbols > {self.RAM_SYMBOL_SIZE_THRESHOLD} B ({len(large_ram_syms)}):"
)
for symbol, demangled, size, section in large_ram_syms[:10]:
# Format section label consistently by stripping leading dot
section_label = section.lstrip(".") if section else ""
# Add ellipsis if name is truncated
demangled_display = (
f"{demangled[:70]}..." if len(demangled) > 70 else demangled
)
lines.append(
f" {size:>6,} B [{section_label}] {demangled_display}"
)
if len(large_ram_syms) > 10:
lines.append(f" ... and {len(large_ram_syms) - 10} more")
lines.append("")
# CSWTCH (GCC switch table) analysis
if self._cswtch_symbols:
self._add_cswtch_analysis(lines)
lines.append(
"Note: This analysis covers symbols in the ELF file. Some runtime allocations may not be included."
)
lines.append("=" * self.TABLE_WIDTH)
return "\n".join(lines)
def dump_uncategorized_symbols(self, output_file: str | None = None) -> None:
"""Dump uncategorized symbols for analysis."""
# Sort by size descending
sorted_symbols = sorted(
self._uncategorized_symbols, key=lambda x: x[2], reverse=True
)
lines = ["Uncategorized Symbols Analysis", "=" * 80]
lines.append(f"Total uncategorized symbols: {len(sorted_symbols)}")
lines.append(
f"Total uncategorized size: {sum(s[2] for s in sorted_symbols):,} bytes"
)
lines.append("")
lines.append(f"{'Size':>10} | {'Symbol':<60} | Demangled")
lines.append("-" * 10 + "-+-" + "-" * 60 + "-+-" + "-" * 40)
for symbol, demangled, size in sorted_symbols[:100]: # Top 100
demangled_display = (
demangled[:100] if symbol != demangled else "[not demangled]"
)
lines.append(f"{size:>10,} | {symbol[:60]:<60} | {demangled_display}")
if len(sorted_symbols) > 100:
lines.append(f"\n... and {len(sorted_symbols) - 100} more symbols")
content = "\n".join(lines)
if output_file:
with open(output_file, "w", encoding="utf-8") as f:
f.write(content)
else:
print(content)
def analyze_elf(
elf_path: str,
objdump_path: str | None = None,
readelf_path: str | None = None,
detailed: bool = False,
external_components: set[str] | None = None,
) -> str:
"""Analyze an ELF file and return a memory report."""
analyzer = MemoryAnalyzerCLI(
elf_path, objdump_path, readelf_path, external_components
)
analyzer.analyze()
return analyzer.generate_report(detailed)
def main():
"""CLI entrypoint for memory analysis."""
if len(sys.argv) < 2:
print("Usage: python -m esphome.analyze_memory <build_directory>")
print("\nAnalyze memory usage from an ESPHome build directory.")
print("The build directory should contain firmware.elf and idedata will be")
print("loaded from ~/.esphome/.internal/idedata/<device>.json")
print("\nExamples:")
print(" python -m esphome.analyze_memory ~/.esphome/build/my-device")
print(" python -m esphome.analyze_memory .esphome/build/my-device")
print(" python -m esphome.analyze_memory my-device # Short form")
sys.exit(1)
build_dir = sys.argv[1]
# Load build directory
import json
from pathlib import Path
from esphome.platformio_api import IDEData
build_path = Path(build_dir)
# If no path separator in name, assume it's a device name
if "/" not in build_dir and not build_path.is_dir():
# Try current directory first
cwd_path = Path.cwd() / ".esphome" / "build" / build_dir
if cwd_path.is_dir():
build_path = cwd_path
print(f"Using build directory: {build_path}", file=sys.stderr)
else:
# Fall back to home directory
build_path = Path.home() / ".esphome" / "build" / build_dir
print(f"Using build directory: {build_path}", file=sys.stderr)
if not build_path.is_dir():
print(f"Error: {build_path} is not a directory", file=sys.stderr)
sys.exit(1)
# Find firmware.elf
elf_file = None
for elf_candidate in [
build_path / "firmware.elf",
build_path / ".pioenvs" / build_path.name / "firmware.elf",
]:
if elf_candidate.exists():
elf_file = str(elf_candidate)
break
if not elf_file:
print(f"Error: firmware.elf not found in {build_dir}", file=sys.stderr)
sys.exit(1)
# Find idedata.json - check current directory first, then home
device_name = build_path.name
idedata_candidates = [
Path.cwd() / ".esphome" / "idedata" / f"{device_name}.json",
Path.home() / ".esphome" / "idedata" / f"{device_name}.json",
]
idedata = None
for idedata_path in idedata_candidates:
if not idedata_path.exists():
continue
try:
with open(idedata_path, encoding="utf-8") as f:
raw_data = json.load(f)
idedata = IDEData(raw_data)
print(f"Loaded idedata from: {idedata_path}", file=sys.stderr)
break
except (json.JSONDecodeError, OSError) as e:
print(f"Warning: Failed to load idedata: {e}", file=sys.stderr)
if not idedata:
print(
f"Warning: idedata not found (searched {idedata_candidates[0]} and {idedata_candidates[1]})",
file=sys.stderr,
)
analyzer = MemoryAnalyzerCLI(elf_file, idedata=idedata)
analyzer.analyze()
report = analyzer.generate_report()
print(report)
if __name__ == "__main__":
main()