""" HF Space to Daggr Node Generator ================================ Automatically generate daggr nodes from Hugging Face Gradio Spaces. Extensible architecture supporting future InferenceNode and FnNode generators. Usage: python daggr_generator.py "username/space-name" [--api-name /predict] [--output node.py] """ import argparse import json import re import sys from abc import ABC, abstractmethod from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse try: from gradio_client import Client, handle_file import gradio as gr except ImportError: print("Error: Required packages not installed.") print("Run: pip install gradio gradio-client daggr") sys.exit(1) @dataclass class PortSchema: """Represents an input or output port schema.""" name: str python_type: str component_type: Optional[str] = None label: Optional[str] = None default: Any = None description: Optional[str] = None choices: Optional[List] = None def to_gradio_component(self) -> str: """Convert to gradio component code string.""" type_mapping = { "str": "gr.Textbox", "int": "gr.Number", "float": "gr.Number", "bool": "gr.Checkbox", "filepath": "gr.File", "file": "gr.File", "image": "gr.Image", "audio": "gr.Audio", "video": "gr.Video", "dict": "gr.JSON", "list": "gr.JSON", "dataframe": "gr.Dataframe", "model3d": "gr.Model3D", "downloadbutton": "gr.File", # Maps to File for download "annotatedimage": "gr.AnnotatedImage", } comp_base = type_mapping.get(self.python_type, "gr.Textbox") params = [] if self.label: params.append(f'label="{self.label}"') if self.default is not None and self.default != "": if isinstance(self.default, str): params.append(f'value="{self.default}"') else: params.append(f'value={self.default}') if self.choices: params.append(f'choices={self.choices}') if comp_base == "gr.Textbox" and self.python_type == "str": if len(str(self.default or "")) > 50: params.append("lines=3") return f"{comp_base}({', '.join(params)})" if params else comp_base @dataclass class APIEndpoint: """Represents a Gradio Space API endpoint.""" name: str route: str inputs: List[PortSchema] = field(default_factory=list) outputs: List[PortSchema] = field(default_factory=list) description: Optional[str] = None @dataclass class NodeTemplate: """Generated node configuration.""" space_id: str endpoint: APIEndpoint all_endpoints: List[str] imports: List[str] node_code: str wiring_placeholders: List[str] metadata: Dict = field(default_factory=dict) class NodeGenerator(ABC): """Abstract base class for daggr node generators.""" @abstractmethod def can_handle(self, space_info: Dict) -> bool: """Check if this generator can handle the given space.""" pass @abstractmethod def generate(self, *args, **kwargs) -> NodeTemplate: """Generate node template.""" pass class GradioNodeGenerator(NodeGenerator): """ Generator for daggr.GradioNode from HF Space URLs. Introspects API and maps components correctly. """ # Mapping of gradio component types to Python types COMPONENT_TYPE_MAP = { "textbox": "str", "number": "float", "slider": "float", "checkbox": "bool", "checkboxgroup": "list", "radio": "str", "dropdown": "str", "image": "filepath", "file": "filepath", "audio": "filepath", "video": "filepath", "dataframe": "dataframe", "json": "dict", "gallery": "list", "chatbot": "list", "code": "str", "colorpicker": "str", "timer": "float", "model3d": "model3d", "downloadbutton": "filepath", "annotatedimage": "annotatedimage", } def can_handle(self, space_info: Dict) -> bool: """Check if space has Gradio API.""" return space_info.get("sdk") == "gradio" def _extract_space_id(self, url_or_id: str) -> str: """Extract space ID from URL or return as-is if already ID.""" if url_or_id.startswith("http"): parsed = urlparse(url_or_id) # Handle https://huggingface.co/spaces/username/space-name if "huggingface.co" in parsed.netloc: path_parts = parsed.path.strip("/").split("/") if len(path_parts) >= 3 and path_parts[0] == "spaces": return "/".join(path_parts[1:3]) # Handle direct space URL return parsed.path.strip("/").split("/")[0] return url_or_id def _normalize_type(self, type_val) -> str: """Normalize Python type from API (handles both strings and dicts).""" if type_val is None: return "str" if isinstance(type_val, str): return type_val.lower() if isinstance(type_val, dict): # Handle complex types like {"type": "union", ...} # For now, default to str if complex if "type" in type_val: if type_val["type"] == "filepath": return "filepath" elif type_val["type"] == "integer": return "int" elif type_val["type"] == "float": return "float" elif type_val["type"] == "boolean": return "bool" return "str" return "str" def _parse_type_info(self, param: Dict) -> Tuple[str, str]: """Extract python_type and handle Union/Optional types.""" raw_type = param.get("python_type") python_type = self._normalize_type(raw_type) # Check if it's optional (Union with None) if isinstance(raw_type, dict) and raw_type.get("type") == "union": # Try to find non-null type choices = raw_type.get("choices", []) non_none = [c for c in choices if self._normalize_type(c) != "none"] if non_none: python_type = self._normalize_type(non_none[0]) return python_type def _inspect_endpoints(self, client: Client) -> List[APIEndpoint]: """Extract all API endpoints from Gradio Client.""" endpoints = [] # Get API info from client api_info = client.view_api(return_format="dict") if not api_info or "named_endpoints" not in api_info: return endpoints for route, info in api_info["named_endpoints"].items(): endpoint = APIEndpoint( name=info.get("fn", route), route=route, description=info.get("description", "") ) # Parse inputs for param in info.get("parameters", []): comp_type = self._detect_component_type(param) python_type = self._parse_type_info(param) port = PortSchema( name=param.get("parameter_name", "input"), python_type=self.COMPONENT_TYPE_MAP.get(comp_type, python_type), component_type=comp_type, label=param.get("label"), default=param.get("default"), description=param.get("description"), choices=param.get("choices") ) endpoint.inputs.append(port) # Parse outputs returns = info.get("returns", []) for i, ret in enumerate(returns): comp_type = self._detect_component_type(ret) python_type = self._parse_type_info(ret) # Try to get a meaningful name ret_name = ret.get("label", "") if not ret_name: if len(returns) == 1: ret_name = "result" else: ret_name = f"output_{i}" # Clean name for Python attribute ret_name = re.sub(r'[^a-zA-Z0-9_]', '_', ret_name).lower() if ret_name[0].isdigit(): ret_name = "out_" + ret_name port = PortSchema( name=ret_name, python_type=self.COMPONENT_TYPE_MAP.get(comp_type, python_type), component_type=comp_type, label=ret.get("label", f"Output {i+1}"), description=ret.get("description") ) endpoint.outputs.append(port) endpoints.append(endpoint) return endpoints def _detect_component_type(self, param: Dict) -> str: """Detect Gradio component type from parameter info.""" label = (param.get("label", "") or "").lower() # Check explicit component field first component = param.get("component", "") if component and isinstance(component, str): return component.lower() # Check for file paths based on label if "path" in label or "file" in label: if "image" in label: return "image" elif "audio" in label: return "audio" elif "video" in label: return "video" elif "3d" in label or "model" in label or "mesh" in label: return "model3d" return "file" # Check python_type for hints python_type = self._parse_type_info(param) if "image" in python_type or "pil" in python_type: return "image" elif "dataframe" in python_type: return "dataframe" elif "filepath" in python_type: if "image" in label: return "image" return "file" return "textbox" def generate( self, space_url: str, api_name: Optional[str] = None, node_name: Optional[str] = None ) -> NodeTemplate: """ Generate GradioNode template from space URL. Args: space_url: HF Space URL or ID (e.g., 'black-forest-labs/FLUX.1-schnell') api_name: Specific API endpoint to use (auto-selected if None) node_name: Custom variable name for the node (auto-generated if None) """ space_id = self._extract_space_id(space_url) var_name = node_name or self._to_snake_case(space_id.split("/")[-1]) # Connect and inspect print(f"🔍 Inspecting space: {space_id}") client = Client(space_id) endpoints = self._inspect_endpoints(client) if not endpoints: raise ValueError(f"No API endpoints found for space: {space_id}") # Select endpoint if api_name: selected = next((e for e in endpoints if e.route == api_name), None) if not selected: available = ", ".join([e.route for e in endpoints]) raise ValueError(f"API endpoint '{api_name}' not found. Available: {available}") else: # Find best endpoint (one with inputs and outputs, not lambda) candidates = [e for e in endpoints if e.inputs or e.outputs] candidates = [e for e in candidates if not e.route.startswith("/lambda")] selected = candidates[0] if candidates else endpoints[0] print(f"✓ Found {len(endpoints)} endpoint(s), using: {selected.route}") if selected.inputs: print(f" Inputs: {len(selected.inputs)} ({', '.join([i.name for i in selected.inputs[:3]])}{'...' if len(selected.inputs) > 3 else ''})") if selected.outputs: print(f" Outputs: {len(selected.outputs)} ({', '.join([o.name for o in selected.outputs[:3]])}{'...' if len(selected.outputs) > 3 else ''})") # Build wiring placeholders wiring = self._generate_wiring_docs(selected, var_name) # Generate code code = self._render_node_code(space_id, var_name, selected) return NodeTemplate( space_id=space_id, endpoint=selected, all_endpoints=[e.route for e in endpoints], imports=["from daggr import GradioNode", "import gradio as gr"], node_code=code, wiring_placeholders=wiring, metadata={"generator": "GradioNodeGenerator", "client_kwargs": {}} ) def _to_snake_case(self, name: str) -> str: """Convert space name to valid Python variable name.""" # Remove special chars, convert to snake_case clean = re.sub(r'[^a-zA-Z0-9]', '_', name) clean = re.sub(r'([A-Z])', r'_\1', clean).lower() clean = re.sub(r'_+', '_', clean).strip('_') return clean or "node" def _generate_wiring_docs(self, endpoint: APIEndpoint, var_name: str) -> List[str]: """Generate documentation for wiring inputs/outputs.""" docs = [] docs.append(f"# === WIRING GUIDE for {var_name} ===") if endpoint.inputs: docs.append("# Inputs (what this node expects):") for inp in endpoint.inputs: example = f"upstream_node.{inp.name}" if inp.python_type != "str" else f'"{inp.default or "value"}"' default_info = f" [default: {inp.default}]" if inp.default is not None else "" docs.append(f"# - {inp.name}: {inp.python_type}{default_info}") docs.append(f"# Wire: {var_name}.inputs['{inp.name}'] = {example}") else: docs.append("# Inputs: None (no parameters required)") if endpoint.outputs: docs.append("#") docs.append("# Outputs (what this node produces):") for out in endpoint.outputs: docs.append(f"# - {out.name}: {out.python_type}") docs.append(f"# Access: {var_name}.{out.name}") docs.append(f"# Usage: next_node.inputs['{out.name}'] = {var_name}.{out.name}") else: docs.append("# Outputs: None") docs.append("# ===========================================") return docs def _render_node_code(self, space_id: str, var_name: str, endpoint: APIEndpoint) -> str: """Render the actual Python code for the GradioNode.""" lines = [] # Node definition with docstring lines.append(f'{var_name} = GradioNode(') lines.append(f' space_or_url="{space_id}", # Space ID') lines.append(f' api_name="{endpoint.route}", # API endpoint') if endpoint.description: lines.append(f' # Description: {endpoint.description}') lines.append(f'') # Inputs section if endpoint.inputs: lines.append(f' inputs={{') for inp in endpoint.inputs: # Determine default value representation if inp.default is not None: if isinstance(inp.default, (int, float, bool)): default_val = f"{inp.default} # Fixed value" elif isinstance(inp.default, str): default_val = f'"{inp.default}" # Fixed value' else: default_val = f"{inp.default} # Fixed value" else: # Suggest gradio component for UI input comp = inp.to_gradio_component() default_val = f"{comp} # UI input - connect to upstream node or provide value" # Clean up multiline descriptions comment = "" if inp.description: desc = inp.description.replace(chr(10), " ")[:50] comment = f" # {desc}" lines.append(f' "{inp.name}": {default_val},{comment}') lines.append(f' }},') else: lines.append(f' inputs={{}}, # No inputs required') lines.append(f'') # Outputs section if endpoint.outputs: lines.append(f' outputs={{') for out in endpoint.outputs: comp = out.to_gradio_component() lines.append(f' "{out.name}": {comp}, # Display in node card') lines.append(f' # Use None to hide outputs: "hidden_output": None') lines.append(f' }},') else: lines.append(f' outputs={{}}, # No outputs') # Optional flags lines.append(f'') lines.append(f' # Optional: Transform outputs before downstream flow') lines.append(f' # postprocess=lambda outputs, final: final,') lines.append(f')') return "\n".join(lines) class InferenceNodeGenerator(NodeGenerator): """ Future generator for daggr.InferenceNode (HF Inference Providers). Placeholder for extension. """ def can_handle(self, space_info: Dict) -> bool: return False def generate(self, model_id: str, **kwargs) -> NodeTemplate: raise NotImplementedError("InferenceNode generator coming in next revision") class FnNodeGenerator(NodeGenerator): """ Future generator for daggr.FnNode (custom Python functions). Placeholder for extension. """ def can_handle(self, space_info: Dict) -> bool: return False def generate(self, func: Callable, **kwargs) -> NodeTemplate: raise NotImplementedError("FnNode generator coming in next revision") class DaggrGenerator: """ Main orchestrator for generating daggr workflows. Supports multiple node types and provides extensible registry. """ def __init__(self): self.generators: Dict[str, NodeGenerator] = { "gradio": GradioNodeGenerator(), "inference": InferenceNodeGenerator(), "function": FnNodeGenerator(), } def generate_from_space( self, space_url: str, output_file: Optional[str] = None, api_name: Optional[str] = None, node_name: Optional[str] = None, include_boilerplate: bool = True ) -> str: """ Generate daggr node from HF Space. Args: space_url: HF Space URL or ID output_file: Optional file to write (prints to stdout if None) api_name: Specific API endpoint to use node_name: Custom variable name for node include_boilerplate: Include imports and example usage Returns: Generated Python code as string """ generator = self.generators["gradio"] try: template = generator.generate(space_url, api_name, node_name) code = self._assemble_code(template, include_boilerplate) if output_file: Path(output_file).write_text(code) print(f"\nGenerated node written to: {output_file}") return code except Exception as e: print(f"\nError generating node: {e}") raise def _assemble_code(self, template: NodeTemplate, include_boilerplate: bool) -> str: """Assemble final Python script.""" lines = [] if include_boilerplate: lines.append("'''") lines.append(f'Auto-generated Daggr Node') lines.append(f'Space: {template.space_id}') lines.append(f'API: {template.endpoint.route}') lines.append(f'Endpoints available: {", ".join(template.all_endpoints[:5])}{"..." if len(template.all_endpoints) > 5 else ""}') lines.append("'''") lines.append('') lines.extend(template.imports) lines.append('from daggr import Graph') lines.append('') # Add wiring documentation lines.extend(template.wiring_placeholders) lines.append('') # Add the node code lines.append(template.node_code) lines.append('') if include_boilerplate: # Extract variable name from first line of node code var_line = template.node_code.split('\n')[0] var_name = var_line.split('=')[0].strip() # Add example graph setup space_short = template.space_id.split("/")[-1] lines.append(f'# Example usage') lines.append(f'if __name__ == "__main__":') lines.append(f' graph = Graph(') lines.append(f' name="{space_short} Workflow",') lines.append(f' nodes=[{var_name}]') lines.append(f' )') lines.append(f' graph.launch()') lines.append('') lines.append(f' # Or run with: daggr this_file.py') return "\n".join(lines) def register_generator(self, name: str, generator: NodeGenerator): """Register a new generator for extensibility.""" self.generators[name] = generator print(f"Registered new generator: {name}") def main(): parser = argparse.ArgumentParser( description="Generate daggr nodes from Hugging Face Gradio Spaces" ) parser.add_argument("space", help="HF Space URL or ID (e.g., 'user/space-name')") parser.add_argument("--api-name", "-a", help="Specific API endpoint (default: first substantial endpoint)") parser.add_argument("--output", "-o", help="Output Python file (default: stdout)") parser.add_argument("--node-name", "-n", help="Variable name for node (default: auto)") parser.add_argument("--no-boilerplate", action="store_true", help="Generate only node definition") args = parser.parse_args() generator = DaggrGenerator() code = generator.generate_from_space( args.space, output_file=args.output, api_name=args.api_name, node_name=args.node_name, include_boilerplate=not args.no_boilerplate ) if not args.output: print("\n" + "="*60) print("GENERATED DAGGR NODE") print("="*60) print(code) if __name__ == "__main__": main()