MAESTRO: Add ArchitectureAnalysis dataclass for priority analysis (Phase 06)

bryce13950 · claude · bryce13950 · commit 024b2e090c77 · 2025-12-30T17:06:44.000+01:00
Added new dataclasses to schemas.py for architecture prioritization: - TopModel: Represents a top model with model_id and downloads count - ArchitectureAnalysis: Comprehensive analysis dataclass containing: - architecture_id, total_models, total_downloads, avg_model_downloads - top_models (list of TopModel), priority_score, has_official_implementation Both classes include to_dict() and from_dict() methods for JSON serialization. Added comprehensive unit tests in test_schemas.py. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/Auto Run Docs/Phase-06-Architecture-Priority-Analysis.md b/Auto Run Docs/Phase-06-Architecture-Priority-Analysis.md
@@ -0,0 +1,20 @@
+# Phase 06: Architecture Priority Analysis
+
+This phase enhances the architecture gap analysis tool with intelligent prioritization features. Instead of just showing model counts, it provides actionable insights about which architectures would be most valuable to support next based on multiple factors.
+
+## Tasks
+
+- [x] Enhance `schemas.py` with ArchitectureAnalysis dataclass containing: architecture_id, total_models, total_downloads, avg_model_downloads, top_models (list of top 5 by downloads), priority_score, has_official_implementation
+  - **Completed**: Added `TopModel` dataclass (model_id, downloads) and `ArchitectureAnalysis` dataclass with all required fields. Both include `to_dict()`, `from_dict()` methods. Added comprehensive tests to `test_schemas.py` (TestTopModel and TestArchitectureAnalysis classes).
+- [ ] Update `get_all_architectures()` to also collect aggregate download statistics per architecture
+- [ ] Create `transformer_lens/tools/model_registry/priority.py` with function `calculate_priority_score(architecture)` using weighted formula: (total_downloads * 0.4) + (model_count * 0.3) + (recent_activity * 0.3)
+- [ ] Add function `get_top_models_per_architecture(architecture_id, n=5)` returning the most downloaded models for each architecture
+- [ ] Add function `detect_architecture_family(architecture_id)` to group related architectures (e.g., Llama variants, GPT variants)
+- [ ] Update `generate_architecture_gaps.py` to use ArchitectureAnalysis and include priority scores in output
+- [ ] Create `docs/ARCHITECTURE_ROADMAP.md` generator showing prioritized list with reasoning for each priority level
+- [ ] Add `--top-n` CLI flag to limit architecture gap analysis to top N by priority score (default: 50)
+- [ ] Add function `find_similar_supported_architectures(unsupported_id)` that suggests which existing adapter might be closest match for implementation reference
+- [ ] Create `docs/IMPLEMENTATION_GUIDE.md` generator that for each top priority architecture shows: similar supported architecture, key differences to handle, estimated complexity
+- [ ] Add `--analyze ARCHITECTURE_ID` CLI command that provides deep analysis of a specific unsupported architecture
+- [ ] Add monthly trend tracking by storing snapshots of architecture statistics in `data/trends/YYYY-MM.json`
+- [ ] Create `docs/TRENDS.md` generator showing which unsupported architectures are growing fastest
diff --git a/tests/tools/model_registry/test_schemas.py b/tests/tools/model_registry/test_schemas.py
@@ -14,11 +14,13 @@
 import pytest
 
 from transformer_lens.tools.model_registry.schemas import (
+    ArchitectureAnalysis,
     ArchitectureGap,
     ArchitectureGapsReport,
     ModelEntry,
     ModelMetadata,
     SupportedModelsReport,
+    TopModel,
 )
 
 
@@ -295,6 +297,250 @@ def test_json_serialization(self):
         assert parsed["architecture_id"] == "LlamaForCausalLM"
 
 
+class TestTopModel:
+    """Tests for TopModel dataclass."""
+
+    def test_initialization(self):
+        """Test TopModel initialization."""
+        model = TopModel(
+            model_id="meta-llama/Llama-2-7b-hf",
+            downloads=1_000_000,
+        )
+        assert model.model_id == "meta-llama/Llama-2-7b-hf"
+        assert model.downloads == 1_000_000
+
+    def test_to_dict(self):
+        """Test to_dict serialization."""
+        model = TopModel(
+            model_id="google/gemma-2b",
+            downloads=500_000,
+        )
+        result = model.to_dict()
+        assert result == {
+            "model_id": "google/gemma-2b",
+            "downloads": 500_000,
+        }
+
+    def test_from_dict(self):
+        """Test from_dict deserialization."""
+        data = {
+            "model_id": "mistralai/Mistral-7B-v0.1",
+            "downloads": 750_000,
+        }
+        model = TopModel.from_dict(data)
+        assert model.model_id == "mistralai/Mistral-7B-v0.1"
+        assert model.downloads == 750_000
+
+    def test_roundtrip_serialization(self):
+        """Test to_dict -> from_dict roundtrip."""
+        original = TopModel(
+            model_id="test/model",
+            downloads=12345,
+        )
+        serialized = original.to_dict()
+        deserialized = TopModel.from_dict(serialized)
+        assert deserialized.model_id == original.model_id
+        assert deserialized.downloads == original.downloads
+
+    def test_json_serialization(self):
+        """Test that to_dict output is JSON serializable."""
+        model = TopModel(model_id="org/model", downloads=100)
+        json_str = json.dumps(model.to_dict())
+        parsed = json.loads(json_str)
+        assert parsed["model_id"] == "org/model"
+        assert parsed["downloads"] == 100
+
+
+class TestArchitectureAnalysis:
+    """Tests for ArchitectureAnalysis dataclass."""
+
+    def test_required_fields_only(self):
+        """Test ArchitectureAnalysis with required fields only."""
+        analysis = ArchitectureAnalysis(
+            architecture_id="LlamaForCausalLM",
+            total_models=1000,
+            total_downloads=50_000_000,
+            avg_model_downloads=50_000.0,
+        )
+        assert analysis.architecture_id == "LlamaForCausalLM"
+        assert analysis.total_models == 1000
+        assert analysis.total_downloads == 50_000_000
+        assert analysis.avg_model_downloads == 50_000.0
+        assert analysis.top_models == []
+        assert analysis.priority_score == 0.0
+        assert analysis.has_official_implementation is False
+
+    def test_all_fields(self):
+        """Test ArchitectureAnalysis with all fields populated."""
+        top_models = [
+            TopModel(model_id="meta-llama/Llama-2-7b-hf", downloads=5_000_000),
+            TopModel(model_id="meta-llama/Llama-2-13b-hf", downloads=3_000_000),
+        ]
+        analysis = ArchitectureAnalysis(
+            architecture_id="LlamaForCausalLM",
+            total_models=1000,
+            total_downloads=50_000_000,
+            avg_model_downloads=50_000.0,
+            top_models=top_models,
+            priority_score=0.85,
+            has_official_implementation=True,
+        )
+        assert analysis.architecture_id == "LlamaForCausalLM"
+        assert analysis.total_models == 1000
+        assert analysis.total_downloads == 50_000_000
+        assert analysis.avg_model_downloads == 50_000.0
+        assert len(analysis.top_models) == 2
+        assert analysis.top_models[0].model_id == "meta-llama/Llama-2-7b-hf"
+        assert analysis.priority_score == 0.85
+        assert analysis.has_official_implementation is True
+
+    def test_to_dict_without_top_models(self):
+        """Test to_dict serialization without top models."""
+        analysis = ArchitectureAnalysis(
+            architecture_id="GPT2LMHeadModel",
+            total_models=500,
+            total_downloads=10_000_000,
+            avg_model_downloads=20_000.0,
+        )
+        result = analysis.to_dict()
+        assert result == {
+            "architecture_id": "GPT2LMHeadModel",
+            "total_models": 500,
+            "total_downloads": 10_000_000,
+            "avg_model_downloads": 20_000.0,
+            "top_models": [],
+            "priority_score": 0.0,
+            "has_official_implementation": False,
+        }
+
+    def test_to_dict_with_top_models(self):
+        """Test to_dict serialization with top models."""
+        top_models = [
+            TopModel(model_id="openai-community/gpt2", downloads=2_000_000),
+            TopModel(model_id="openai-community/gpt2-medium", downloads=500_000),
+        ]
+        analysis = ArchitectureAnalysis(
+            architecture_id="GPT2LMHeadModel",
+            total_models=500,
+            total_downloads=10_000_000,
+            avg_model_downloads=20_000.0,
+            top_models=top_models,
+            priority_score=0.75,
+            has_official_implementation=True,
+        )
+        result = analysis.to_dict()
+        assert result["architecture_id"] == "GPT2LMHeadModel"
+        assert result["total_models"] == 500
+        assert result["total_downloads"] == 10_000_000
+        assert result["avg_model_downloads"] == 20_000.0
+        assert len(result["top_models"]) == 2
+        assert result["top_models"][0]["model_id"] == "openai-community/gpt2"
+        assert result["priority_score"] == 0.75
+        assert result["has_official_implementation"] is True
+
+    def test_from_dict_required_fields_only(self):
+        """Test from_dict with required fields only."""
+        data = {
+            "architecture_id": "MistralForCausalLM",
+            "total_models": 200,
+            "total_downloads": 5_000_000,
+            "avg_model_downloads": 25_000.0,
+        }
+        analysis = ArchitectureAnalysis.from_dict(data)
+        assert analysis.architecture_id == "MistralForCausalLM"
+        assert analysis.total_models == 200
+        assert analysis.total_downloads == 5_000_000
+        assert analysis.avg_model_downloads == 25_000.0
+        assert analysis.top_models == []
+        assert analysis.priority_score == 0.0
+        assert analysis.has_official_implementation is False
+
+    def test_from_dict_all_fields(self):
+        """Test from_dict with all fields."""
+        data = {
+            "architecture_id": "LlamaForCausalLM",
+            "total_models": 1000,
+            "total_downloads": 50_000_000,
+            "avg_model_downloads": 50_000.0,
+            "top_models": [
+                {"model_id": "meta-llama/Llama-2-7b-hf", "downloads": 5_000_000},
+                {"model_id": "meta-llama/Llama-2-13b-hf", "downloads": 3_000_000},
+            ],
+            "priority_score": 0.85,
+            "has_official_implementation": True,
+        }
+        analysis = ArchitectureAnalysis.from_dict(data)
+        assert analysis.architecture_id == "LlamaForCausalLM"
+        assert analysis.total_models == 1000
+        assert len(analysis.top_models) == 2
+        assert analysis.top_models[0].model_id == "meta-llama/Llama-2-7b-hf"
+        assert analysis.top_models[1].downloads == 3_000_000
+        assert analysis.priority_score == 0.85
+        assert analysis.has_official_implementation is True
+
+    def test_roundtrip_serialization(self):
+        """Test to_dict -> from_dict roundtrip."""
+        top_models = [
+            TopModel(model_id="org/model1", downloads=100_000),
+            TopModel(model_id="org/model2", downloads=50_000),
+            TopModel(model_id="org/model3", downloads=25_000),
+        ]
+        original = ArchitectureAnalysis(
+            architecture_id="TestArch",
+            total_models=300,
+            total_downloads=1_000_000,
+            avg_model_downloads=3333.33,
+            top_models=top_models,
+            priority_score=0.42,
+            has_official_implementation=True,
+        )
+        serialized = original.to_dict()
+        deserialized = ArchitectureAnalysis.from_dict(serialized)
+        assert deserialized.architecture_id == original.architecture_id
+        assert deserialized.total_models == original.total_models
+        assert deserialized.total_downloads == original.total_downloads
+        assert deserialized.avg_model_downloads == original.avg_model_downloads
+        assert len(deserialized.top_models) == len(original.top_models)
+        assert deserialized.top_models[0].model_id == original.top_models[0].model_id
+        assert deserialized.priority_score == original.priority_score
+        assert deserialized.has_official_implementation == original.has_official_implementation
+
+    def test_json_serialization(self):
+        """Test that to_dict output is JSON serializable."""
+        analysis = ArchitectureAnalysis(
+            architecture_id="JsonArch",
+            total_models=50,
+            total_downloads=100_000,
+            avg_model_downloads=2000.0,
+            top_models=[TopModel(model_id="test/model", downloads=10_000)],
+            priority_score=0.5,
+            has_official_implementation=False,
+        )
+        json_str = json.dumps(analysis.to_dict())
+        parsed = json.loads(json_str)
+        assert parsed["architecture_id"] == "JsonArch"
+        assert parsed["total_models"] == 50
+        assert len(parsed["top_models"]) == 1
+
+    def test_top_models_empty_list_default(self):
+        """Test that top_models is a new list instance for each ArchitectureAnalysis."""
+        analysis1 = ArchitectureAnalysis(
+            architecture_id="Arch1",
+            total_models=100,
+            total_downloads=1000,
+            avg_model_downloads=10.0,
+        )
+        analysis2 = ArchitectureAnalysis(
+            architecture_id="Arch2",
+            total_models=200,
+            total_downloads=2000,
+            avg_model_downloads=10.0,
+        )
+        analysis1.top_models.append(TopModel(model_id="test/model", downloads=100))
+        # Verify that modifying analysis1's top_models doesn't affect analysis2
+        assert analysis2.top_models == []
+
+
 class TestArchitectureGap:
     """Tests for ArchitectureGap dataclass."""
 
diff --git a/transformer_lens/tools/model_registry/schemas.py b/transformer_lens/tools/model_registry/schemas.py
@@ -99,6 +99,86 @@ def from_dict(cls, data: dict) -> "ModelEntry":
         )
 
 
+@dataclass
+class TopModel:
+    """Represents a top model for an architecture.
+
+    Attributes:
+        model_id: The HuggingFace model identifier
+        downloads: Total download count for the model
+    """
+
+    model_id: str
+    downloads: int
+
+    def to_dict(self) -> dict:
+        """Convert to a dictionary for JSON serialization."""
+        return {
+            "model_id": self.model_id,
+            "downloads": self.downloads,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "TopModel":
+        """Create a TopModel from a dictionary."""
+        return cls(
+            model_id=data["model_id"],
+            downloads=data["downloads"],
+        )
+
+
+@dataclass
+class ArchitectureAnalysis:
+    """Comprehensive analysis of an architecture with prioritization data.
+
+    This dataclass provides detailed information about an architecture to help
+    prioritize which unsupported architectures should be implemented next.
+
+    Attributes:
+        architecture_id: The HuggingFace architecture class name (e.g., "LlamaForCausalLM")
+        total_models: The total number of models using this architecture on HuggingFace
+        total_downloads: The aggregate download count across all models of this architecture
+        avg_model_downloads: The average downloads per model for this architecture
+        top_models: List of top 5 models by downloads for this architecture
+        priority_score: Calculated priority score for implementation (higher = more important)
+        has_official_implementation: Whether an official (non-community) implementation exists
+    """
+
+    architecture_id: str
+    total_models: int
+    total_downloads: int
+    avg_model_downloads: float
+    top_models: list[TopModel] = field(default_factory=list)
+    priority_score: float = 0.0
+    has_official_implementation: bool = False
+
+    def to_dict(self) -> dict:
+        """Convert to a dictionary for JSON serialization."""
+        return {
+            "architecture_id": self.architecture_id,
+            "total_models": self.total_models,
+            "total_downloads": self.total_downloads,
+            "avg_model_downloads": self.avg_model_downloads,
+            "top_models": [m.to_dict() for m in self.top_models],
+            "priority_score": self.priority_score,
+            "has_official_implementation": self.has_official_implementation,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "ArchitectureAnalysis":
+        """Create an ArchitectureAnalysis from a dictionary."""
+        top_models = [TopModel.from_dict(m) for m in data.get("top_models", [])]
+        return cls(
+            architecture_id=data["architecture_id"],
+            total_models=data["total_models"],
+            total_downloads=data["total_downloads"],
+            avg_model_downloads=data["avg_model_downloads"],
+            top_models=top_models,
+            priority_score=data.get("priority_score", 0.0),
+            has_official_implementation=data.get("has_official_implementation", False),
+        )
+
+
 @dataclass
 class ArchitectureGap:
     """Represents an unsupported architecture and its model count on HuggingFace.