API

`ChannelStatsService` ¶

Bases: HealthCheckable

Handles channel statistics updates

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/stats.py

class ChannelStatsService(HealthCheckable):
    """Handles channel statistics updates"""

    def __init__(self, client: Any, config: MongoDBConfig, retry_config: RetryConfig):
        self.client = client
        self.config = config
        self.retry = RetryableOperation(retry_config)
        self.db = self.client[config.database_name]
        self.channels_collection = self.db[config.channels_collection]
        logger.info("Initialized ChannelStatsService")

    async def ensure_indices(self) -> OperationResult:
        """Ensure the required database indices exist"""
        created_indexes = []
        failed_indexes = []

        for index_name, index_config in self.config.channel_indexes.items():
            try:
                await self.channels_collection.create_index(index_name, **index_config)
                created_indexes.append(index_name)
                logger.debug(f"Created index: {index_name}")
            except OperationFailure as e:
                if "already exists" in str(e):
                    logger.debug(f"Index '{index_name}' already exists")
                    created_indexes.append(index_name)
                else:
                    failed_indexes.append(index_name)
                    logger.error(f"Failed to create index {index_name}: {str(e)}")

        if failed_indexes:
            return OperationResult.failure(
                f"Failed to create indexes: {failed_indexes}",
                metadata={"created": created_indexes, "failed": failed_indexes},
            )
        else:
            return OperationResult.success(
                f"Ensured indexes: {created_indexes}",
                metadata={"indexes": created_indexes},
            )

    async def update_channel_stats(self, video_data: Dict[str, Any]) -> OperationResult:
        """Update channel statistics based on video data with retry logic"""
        channel_id = video_data.get("channel_id")
        if not channel_id:
            return OperationResult.failure("Video data missing channel_id")

        async def _update_operation():
            result = await self.channels_collection.update_one(
                {"channel_id": channel_id},
                {
                    "$inc": {"video_count": 1},
                    "$set": {"last_activity": datetime.now(timezone.utc)},
                    "$setOnInsert": {"first_seen": datetime.now(timezone.utc)},
                },
                upsert=True,
            )
            return result

        try:
            result = await self.retry.execute(
                _update_operation, f"update_channel_stats_{channel_id}"
            )
            logger.debug(f"Updated channel stats: {channel_id}")

            action = "updated" if result.matched_count > 0 else "created"
            return OperationResult.success(
                f"Channel stats {action}: {channel_id}",
                metadata={"channel_id": channel_id, "action": action},
            )
        except Exception as e:
            logger.error(f"Failed to update channel stats: {str(e)}")
            logger.debug(traceback.format_exc())
            return OperationResult.failure(
                f"Failed to update channel stats: {str(e)}", e
            )

    async def health_check(self) -> HealthStatus:
        """Check MongoDB connection health for channels collection"""
        start_time = time.time()
        try:
            # Test access to channels collection
            await self.channels_collection.count_documents({}, limit=1)
            response_time = (time.time() - start_time) * 1000

            return HealthStatus(
                service_name="mongodb_channels",
                is_healthy=True,
                response_time_ms=response_time,
                message="Channels collection accessible",
            )
        except Exception as e:
            response_time = (time.time() - start_time) * 1000
            return HealthStatus(
                service_name="mongodb_channels",
                is_healthy=False,
                response_time_ms=response_time,
                message=f"Health check failed: {str(e)}",
            )

`ensure_indices()` `async` ¶

Ensure the required database indices exist

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/stats.py

async def ensure_indices(self) -> OperationResult:
    """Ensure the required database indices exist"""
    created_indexes = []
    failed_indexes = []

    for index_name, index_config in self.config.channel_indexes.items():
        try:
            await self.channels_collection.create_index(index_name, **index_config)
            created_indexes.append(index_name)
            logger.debug(f"Created index: {index_name}")
        except OperationFailure as e:
            if "already exists" in str(e):
                logger.debug(f"Index '{index_name}' already exists")
                created_indexes.append(index_name)
            else:
                failed_indexes.append(index_name)
                logger.error(f"Failed to create index {index_name}: {str(e)}")

    if failed_indexes:
        return OperationResult.failure(
            f"Failed to create indexes: {failed_indexes}",
            metadata={"created": created_indexes, "failed": failed_indexes},
        )
    else:
        return OperationResult.success(
            f"Ensured indexes: {created_indexes}",
            metadata={"indexes": created_indexes},
        )

`health_check()` `async` ¶

Check MongoDB connection health for channels collection

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/stats.py

async def health_check(self) -> HealthStatus:
    """Check MongoDB connection health for channels collection"""
    start_time = time.time()
    try:
        # Test access to channels collection
        await self.channels_collection.count_documents({}, limit=1)
        response_time = (time.time() - start_time) * 1000

        return HealthStatus(
            service_name="mongodb_channels",
            is_healthy=True,
            response_time_ms=response_time,
            message="Channels collection accessible",
        )
    except Exception as e:
        response_time = (time.time() - start_time) * 1000
        return HealthStatus(
            service_name="mongodb_channels",
            is_healthy=False,
            response_time_ms=response_time,
            message=f"Health check failed: {str(e)}",
        )

`update_channel_stats(video_data)` `async` ¶

Update channel statistics based on video data with retry logic

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/stats.py

async def update_channel_stats(self, video_data: Dict[str, Any]) -> OperationResult:
    """Update channel statistics based on video data with retry logic"""
    channel_id = video_data.get("channel_id")
    if not channel_id:
        return OperationResult.failure("Video data missing channel_id")

    async def _update_operation():
        result = await self.channels_collection.update_one(
            {"channel_id": channel_id},
            {
                "$inc": {"video_count": 1},
                "$set": {"last_activity": datetime.now(timezone.utc)},
                "$setOnInsert": {"first_seen": datetime.now(timezone.utc)},
            },
            upsert=True,
        )
        return result

    try:
        result = await self.retry.execute(
            _update_operation, f"update_channel_stats_{channel_id}"
        )
        logger.debug(f"Updated channel stats: {channel_id}")

        action = "updated" if result.matched_count > 0 else "created"
        return OperationResult.success(
            f"Channel stats {action}: {channel_id}",
            metadata={"channel_id": channel_id, "action": action},
        )
    except Exception as e:
        logger.error(f"Failed to update channel stats: {str(e)}")
        logger.debug(traceback.format_exc())
        return OperationResult.failure(
            f"Failed to update channel stats: {str(e)}", e
        )

`ElasticsearchConfig` `dataclass` ¶

Configuration for Elasticsearch indexing and search operations.

This class manages Elasticsearch-specific settings including index configuration, field mappings, and analysis settings. It provides a computed mapping property that generates the complete index configuration based on the configured parameters.

The mapping includes optimized field types for video metadata, proper analyzers for text search, and index settings for performance tuning.

Attributes:

Name	Type	Description
`index_name`	`str`	Name of the Elasticsearch index for storing video documents.
`shards`	`int`	Number of primary shards for the index. More shards allow better distribution across nodes but increase overhead.
`replicas`	`int`	Number of replica shards for each primary shard. Replicas provide redundancy and can improve search throughput.

Example

config = ElasticsearchConfig( ... index_name="videos_production", ... shards=3, ... replicas=2 ... )

Create index with the computed mapping¶

es_client.indices.create( ... index=config.index_name, ... body=config.mapping ... )

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/config.py

@dataclass
class ElasticsearchConfig:
    """Configuration for Elasticsearch indexing and search operations.

    This class manages Elasticsearch-specific settings including index configuration,
    field mappings, and analysis settings. It provides a computed mapping property
    that generates the complete index configuration based on the configured parameters.

    The mapping includes optimized field types for video metadata, proper analyzers
    for text search, and index settings for performance tuning.

    Attributes:
        index_name: Name of the Elasticsearch index for storing video documents.
        shards: Number of primary shards for the index. More shards allow better
            distribution across nodes but increase overhead.
        replicas: Number of replica shards for each primary shard. Replicas provide
            redundancy and can improve search throughput.

    Example:
        >>> config = ElasticsearchConfig(
        ...     index_name="videos_production",
        ...     shards=3,
        ...     replicas=2
        ... )
        >>> 
        >>> # Create index with the computed mapping
        >>> es_client.indices.create(
        ...     index=config.index_name,
        ...     body=config.mapping
        ... )
    """

    index_name: str = "videos"
    shards: int = 1
    replicas: int = 0

    @property
    def mapping(self) -> Dict[str, Any]:
        """Generate the complete Elasticsearch index mapping and settings.

        Creates a comprehensive mapping configuration that defines how video
        documents are indexed and stored. The mapping includes:

        - Keyword fields for exact matching (video_id, channel_id, tags)
        - Text fields with standard analyzer for full-text search
        - Multi-field configurations for both search and aggregations
        - Appropriate data types for metrics and timestamps
        - Index settings based on configured shard and replica counts

        Returns:
            Dictionary containing the complete Elasticsearch mapping configuration
            with both field mappings and index settings.

        Note:
            The mapping is generated dynamically based on current attribute values,
            so changes to shards or replicas will be reflected in subsequent calls.

        Example:
            >>> config = ElasticsearchConfig(shards=2, replicas=1)
            >>> mapping = config.mapping
            >>> print(mapping["settings"]["number_of_shards"])  # 2
            >>> 
            >>> # Text fields support both search and keyword aggregations
            >>> title_mapping = mapping["mappings"]["properties"]["title"]
            >>> print(title_mapping["type"])  # "text"
            >>> print(title_mapping["fields"]["keyword"]["type"])  # "keyword"
        """
        return {
            "mappings": {
                "properties": {
                    "video_id": {"type": "keyword"},
                    "channel_id": {"type": "keyword"},
                    "title": {
                        "type": "text",
                        "analyzer": "standard",
                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                    },
                    "description": {"type": "text", "analyzer": "standard"},
                    "published": {"type": "date"},
                    "updated": {"type": "date"},
                    "author": {
                        "type": "text",
                        "fields": {"keyword": {"type": "keyword", "ignore_above": 256}},
                    },
                    "tags": {"type": "keyword"},
                    "categories": {"type": "keyword"},
                    "duration": {"type": "integer"},
                    "view_count": {"type": "long"},
                    "like_count": {"type": "long"},
                    "comment_count": {"type": "long"},
                    "processed_at": {"type": "date"},
                }
            },
            "settings": {
                "number_of_shards": self.shards,
                "number_of_replicas": self.replicas,
            },
        }

`mapping` `property` ¶

Generate the complete Elasticsearch index mapping and settings.

Creates a comprehensive mapping configuration that defines how video documents are indexed and stored. The mapping includes:

Keyword fields for exact matching (video_id, channel_id, tags)
Text fields with standard analyzer for full-text search
Multi-field configurations for both search and aggregations
Appropriate data types for metrics and timestamps
Index settings based on configured shard and replica counts

Returns:

Type	Description
`Dict[str, Any]`	Dictionary containing the complete Elasticsearch mapping configuration
`Dict[str, Any]`	with both field mappings and index settings.

Note

The mapping is generated dynamically based on current attribute values, so changes to shards or replicas will be reflected in subsequent calls.

Example

config = ElasticsearchConfig(shards=2, replicas=1) mapping = config.mapping print(mapping["settings"]["number_of_shards"]) # 2

Text fields support both search and keyword aggregations¶

title_mapping = mapping["mappings"]["properties"]["title"] print(title_mapping["type"]) # "text" print(title_mapping["fields"]["keyword"]["type"]) # "keyword"

`MongoDBConfig` `dataclass` ¶

Configuration for MongoDB collections and database indexes.

This class manages MongoDB-specific configuration including database and collection names, as well as index definitions for optimal query performance. It provides computed properties that generate index configurations for different collection types.

The index configurations are optimized for common query patterns including lookups by ID, filtering by channel, date-based queries, and subscription management operations.

Attributes:

Name	Type	Description
`database_name`	`str`	Name of the MongoDB database containing the collections.
`videos_collection`	`str`	Name of the collection storing video metadata documents.
`channels_collection`	`str`	Name of the collection storing channel information.

Example

config = MongoDBConfig( ... database_name="video_platform", ... videos_collection="video_metadata", ... channels_collection="channel_data" ... )

Create indexes for optimal query performance¶

db = mongo_client[config.database_name] videos = db[config.videos_collection]

for index_config in config.video_indexes.values(): ... videos.create_index(index_config["key"], **index_config)

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/config.py

@dataclass
class MongoDBConfig:
    """Configuration for MongoDB collections and database indexes.

    This class manages MongoDB-specific configuration including database and
    collection names, as well as index definitions for optimal query performance.
    It provides computed properties that generate index configurations for
    different collection types.

    The index configurations are optimized for common query patterns including
    lookups by ID, filtering by channel, date-based queries, and subscription
    management operations.

    Attributes:
        database_name: Name of the MongoDB database containing the collections.
        videos_collection: Name of the collection storing video metadata documents.
        channels_collection: Name of the collection storing channel information.

    Example:
        >>> config = MongoDBConfig(
        ...     database_name="video_platform",
        ...     videos_collection="video_metadata",
        ...     channels_collection="channel_data"
        ... )
        >>> 
        >>> # Create indexes for optimal query performance
        >>> db = mongo_client[config.database_name]
        >>> videos = db[config.videos_collection]
        >>> 
        >>> for index_config in config.video_indexes.values():
        ...     videos.create_index(index_config["key"], **index_config)
    """

    database_name: str = "mongo"
    videos_collection: str = "videos"
    channels_collection: str = "channels"

    @property
    def video_indexes(self) -> Dict[str, Dict[str, Any]]:
        """Generate index configurations for the videos collection.

        Creates index definitions optimized for common video query patterns
        including unique video lookups, channel-based filtering, and date-based
        sorting and filtering operations.

        Returns:
            Dictionary mapping index names to their MongoDB index configurations.
            Each configuration includes the key specification, uniqueness constraint,
            and index name.

        Note:
            The video_id index enforces uniqueness to prevent duplicate video
            documents, while other indexes are non-unique to support filtering
            and sorting operations.

        Example:
            >>> config = MongoDBConfig()
            >>> indexes = config.video_indexes
            >>> 
            >>> # Unique index for video lookups
            >>> video_id_idx = indexes["video_id"]
            >>> print(video_id_idx["unique"])  # True
            >>> 
            >>> # Non-unique index for channel filtering
            >>> channel_idx = indexes["channel_id_non"]
            >>> print(channel_idx["unique"])  # False
        """
        return {
            "video_id": {
                "key": [("video_id", 1)],
                "unique": True,
                "name": "video_id_idx",
            },
            "channel_id_non": {
                "key": [("channel_id", 1)],
                "unique": False,
                "name": "channel_id_non_idx",
            },
            "published_non": {
                "key": [("published", 1)],
                "unique": False,
                "name": "published_non_idx",
            },
        }

    @property
    def channel_indexes(self) -> Dict[str, Dict[str, Any]]:
        """Generate index configurations for the channels collection.

        Creates index definitions for channel-related queries, primarily focused
        on unique channel identification and lookups.

        Returns:
            Dictionary mapping index names to their MongoDB index configurations
            for the channels collection.

        Example:
            >>> config = MongoDBConfig()
            >>> channel_indexes = config.channel_indexes
            >>> 
            >>> # Unique index for channel identification
            >>> channel_idx = channel_indexes["channel_id"]
            >>> print(channel_idx["unique"])  # True
        """
        return {
            "channel_id": {
                "key": [("channel_id", 1)],
                "unique": True,
                "name": "channel_id_idx",
            }
        }

    @property
    def subscription_indexes(self) -> Dict[str, Dict[str, Any]]:
        """Generate index configurations for the subscriptions collection.

        Creates index definitions optimized for subscription management operations
        including unique subscription lookups, expiration queries, and active
        subscription filtering. Includes a compound index for efficient queries
        on expiring active subscriptions.

        Returns:
            Dictionary mapping index names to their MongoDB index configurations
            for subscription management.

        Note:
            The compound index on expires_at and is_active enables efficient
            queries for finding subscriptions that need renewal, which is a
            common operation in subscription management workflows.

        Example:
            >>> config = MongoDBConfig()
            >>> sub_indexes = config.subscription_indexes
            >>> 
            >>> # Unique constraint on channel subscriptions
            >>> channel_idx = sub_indexes["channel_id"]
            >>> print(channel_idx["unique"])  # True
            >>> 
            >>> # Compound index for expiration queries
            >>> compound_idx = sub_indexes["expires_at_active"]
            >>> print(compound_idx)  # [("expires_at", 1), ("is_active", 1)]
        """
        return {
            "channel_id": {"unique": True},
            "expires_at": {},
            "is_active": {},
            "expires_at_active": [("expires_at", 1), ("is_active", 1)],
        }

`channel_indexes` `property` ¶

Generate index configurations for the channels collection.

Creates index definitions for channel-related queries, primarily focused on unique channel identification and lookups.

Returns:

Type	Description
`Dict[str, Dict[str, Any]]`	Dictionary mapping index names to their MongoDB index configurations
`Dict[str, Dict[str, Any]]`	for the channels collection.

Example

config = MongoDBConfig() channel_indexes = config.channel_indexes

Unique index for channel identification¶

channel_idx = channel_indexes["channel_id"] print(channel_idx["unique"]) # True

`subscription_indexes` `property` ¶

Generate index configurations for the subscriptions collection.

Creates index definitions optimized for subscription management operations including unique subscription lookups, expiration queries, and active subscription filtering. Includes a compound index for efficient queries on expiring active subscriptions.

Returns:

Type	Description
`Dict[str, Dict[str, Any]]`	Dictionary mapping index names to their MongoDB index configurations
`Dict[str, Dict[str, Any]]`	for subscription management.

Note

The compound index on expires_at and is_active enables efficient queries for finding subscriptions that need renewal, which is a common operation in subscription management workflows.

Example

config = MongoDBConfig() sub_indexes = config.subscription_indexes

Unique constraint on channel subscriptions¶

channel_idx = sub_indexes["channel_id"] print(channel_idx["unique"]) # True

Compound index for expiration queries¶

compound_idx = sub_indexes["expires_at_active"] print(compound_idx) # [("expires_at", 1), ("is_active", 1)]

`video_indexes` `property` ¶

Generate index configurations for the videos collection.

Creates index definitions optimized for common video query patterns including unique video lookups, channel-based filtering, and date-based sorting and filtering operations.

Returns:

Type	Description
`Dict[str, Dict[str, Any]]`	Dictionary mapping index names to their MongoDB index configurations.
`Dict[str, Dict[str, Any]]`	Each configuration includes the key specification, uniqueness constraint,
`Dict[str, Dict[str, Any]]`	and index name.

Note

The video_id index enforces uniqueness to prevent duplicate video documents, while other indexes are non-unique to support filtering and sorting operations.

Example

config = MongoDBConfig() indexes = config.video_indexes

Unique index for video lookups¶

video_id_idx = indexes["video_id"] print(video_id_idx["unique"]) # True

Non-unique index for channel filtering¶

channel_idx = indexes["channel_id_non"] print(channel_idx["unique"]) # False

`RetryConfig` `dataclass` ¶

Configuration for retry logic with exponential backoff.

This class defines parameters for implementing robust retry mechanisms with exponential backoff to handle transient failures gracefully. The configuration controls retry attempts, timing, and backoff behavior.

The exponential backoff algorithm increases delays between retry attempts to reduce load on failing systems and improve the likelihood of eventual success. The max_delay parameter prevents delays from becoming excessive.

Attributes:

Name	Type	Description
`max_attempts`	`int`	Maximum number of retry attempts before giving up. Includes the initial attempt, so max_attempts=3 means 1 initial attempt plus 2 retries.
`base_delay`	`float`	Initial delay in seconds before the first retry attempt. Subsequent delays are calculated using exponential backoff.
`max_delay`	`float`	Maximum delay in seconds between retry attempts. Prevents exponential backoff from creating excessively long delays.
`exponential_base`	`float`	Base for exponential backoff calculation. Common values are 2.0 (doubling) or 1.5 (50% increase per attempt).

Example

Conservative retry configuration¶

config = RetryConfig( ... max_attempts=3, ... base_delay=1.0, ... max_delay=30.0, ... exponential_base=2.0 ... )

Aggressive retry for critical operations¶

critical_config = RetryConfig( ... max_attempts=10, ... base_delay=0.5, ... max_delay=120.0, ... exponential_base=1.5 ... )

Delay calculation example:¶

Attempt 1: base_delay * (exponential_base ^ 0) = 1.0 * 1 = 1.0s¶

Attempt 2: base_delay * (exponential_base ^ 1) = 1.0 * 2 = 2.0s¶

Attempt 3: base_delay * (exponential_base ^ 2) = 1.0 * 4 = 4.0s¶

Note

Actual delays include random jitter to prevent thundering herd problems when multiple clients retry simultaneously. The jitter is typically 10-30% of the calculated delay.

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/config.py

@dataclass
class RetryConfig:
    """Configuration for retry logic with exponential backoff.

    This class defines parameters for implementing robust retry mechanisms
    with exponential backoff to handle transient failures gracefully. The
    configuration controls retry attempts, timing, and backoff behavior.

    The exponential backoff algorithm increases delays between retry attempts
    to reduce load on failing systems and improve the likelihood of eventual
    success. The max_delay parameter prevents delays from becoming excessive.

    Attributes:
        max_attempts: Maximum number of retry attempts before giving up.
            Includes the initial attempt, so max_attempts=3 means 1 initial
            attempt plus 2 retries.
        base_delay: Initial delay in seconds before the first retry attempt.
            Subsequent delays are calculated using exponential backoff.
        max_delay: Maximum delay in seconds between retry attempts. Prevents
            exponential backoff from creating excessively long delays.
        exponential_base: Base for exponential backoff calculation. Common
            values are 2.0 (doubling) or 1.5 (50% increase per attempt).

    Example:
        >>> # Conservative retry configuration
        >>> config = RetryConfig(
        ...     max_attempts=3,
        ...     base_delay=1.0,
        ...     max_delay=30.0,
        ...     exponential_base=2.0
        ... )
        >>> 
        >>> # Aggressive retry for critical operations
        >>> critical_config = RetryConfig(
        ...     max_attempts=10,
        ...     base_delay=0.5,
        ...     max_delay=120.0,
        ...     exponential_base=1.5
        ... )
        >>> 
        >>> # Delay calculation example:
        >>> # Attempt 1: base_delay * (exponential_base ^ 0) = 1.0 * 1 = 1.0s
        >>> # Attempt 2: base_delay * (exponential_base ^ 1) = 1.0 * 2 = 2.0s  
        >>> # Attempt 3: base_delay * (exponential_base ^ 2) = 1.0 * 4 = 4.0s

    Note:
        Actual delays include random jitter to prevent thundering herd problems
        when multiple clients retry simultaneously. The jitter is typically
        10-30% of the calculated delay.
    """

    max_attempts: int = 3
    base_delay: float = 1.0
    max_delay: float = 60.0
    exponential_base: float = 2.0

`SearchIndexingService` ¶

Bases: HealthCheckable

Handles video search indexing in Elasticsearch

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/indexing.py

class SearchIndexingService(HealthCheckable):
    """Handles video search indexing in Elasticsearch"""

    def __init__(
        self, client: Any, config: ElasticsearchConfig, retry_config: RetryConfig
    ):
        self.client = client
        self.config = config
        self.retry = RetryableOperation(retry_config)
        logger.info("Initialized SearchIndexingService")

    async def ensure_index(self) -> OperationResult:
        """Create Elasticsearch index if it doesn't exist"""
        try:
            if not await self.client.indices.exists(index=self.config.index_name):
                await self.client.indices.create(
                    index=self.config.index_name, body=self.config.mapping
                )
                logger.info(f"Created Elasticsearch index: {self.config.index_name}")
                return OperationResult.success(
                    f"Created index: {self.config.index_name}"
                )
            else:
                return OperationResult.success(
                    f"Index already exists: {self.config.index_name}"
                )
        except Exception as e:
            logger.error(f"Failed to ensure index: {str(e)}")
            return OperationResult.failure(f"Failed to ensure index: {str(e)}", e)

    async def index_video(self, video_data: Dict[str, Any]) -> OperationResult:
        """Index video metadata in Elasticsearch with retry logic"""
        video_id = video_data.get("video_id")
        if not video_id:
            return OperationResult.failure("Video data missing video_id")

        async def _index_operation():
            await self.client.index(
                index=self.config.index_name, id=video_id, body=video_data, refresh=True
            )
            return video_id

        try:
            result_id = await self.retry.execute(
                _index_operation, f"index_video_{video_id}"
            )
            logger.debug(f"Indexed video in Elasticsearch: {result_id}")
            return OperationResult.success(
                f"Indexed video: {result_id}", metadata={"video_id": result_id}
            )
        except Exception as e:
            logger.error(f"Failed to index video in Elasticsearch: {str(e)}")
            logger.debug(traceback.format_exc())
            return OperationResult.failure(f"Failed to index video: {str(e)}", e)

    async def health_check(self) -> HealthStatus:
        """Check Elasticsearch cluster health"""
        start_time = time.time()
        try:
            health = await self.client.cluster.health()
            response_time = (time.time() - start_time) * 1000

            is_healthy = health.get("status") in ["green", "yellow"]
            message = f"Cluster status: {health.get('status', 'unknown')}"

            return HealthStatus(
                service_name="elasticsearch",
                is_healthy=is_healthy,
                response_time_ms=response_time,
                message=message,
                metadata=health,
            )
        except Exception as e:
            response_time = (time.time() - start_time) * 1000
            return HealthStatus(
                service_name="elasticsearch",
                is_healthy=False,
                response_time_ms=response_time,
                message=f"Health check failed: {str(e)}",
            )

    async def close(self):
        """Close the Elasticsearch client"""
        await self.client.close()

`close()` `async` ¶

Close the Elasticsearch client

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/indexing.py

async def close(self):
    """Close the Elasticsearch client"""
    await self.client.close()

`ensure_index()` `async` ¶

Create Elasticsearch index if it doesn't exist

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/indexing.py

async def ensure_index(self) -> OperationResult:
    """Create Elasticsearch index if it doesn't exist"""
    try:
        if not await self.client.indices.exists(index=self.config.index_name):
            await self.client.indices.create(
                index=self.config.index_name, body=self.config.mapping
            )
            logger.info(f"Created Elasticsearch index: {self.config.index_name}")
            return OperationResult.success(
                f"Created index: {self.config.index_name}"
            )
        else:
            return OperationResult.success(
                f"Index already exists: {self.config.index_name}"
            )
    except Exception as e:
        logger.error(f"Failed to ensure index: {str(e)}")
        return OperationResult.failure(f"Failed to ensure index: {str(e)}", e)

`health_check()` `async` ¶

Check Elasticsearch cluster health

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/indexing.py

async def health_check(self) -> HealthStatus:
    """Check Elasticsearch cluster health"""
    start_time = time.time()
    try:
        health = await self.client.cluster.health()
        response_time = (time.time() - start_time) * 1000

        is_healthy = health.get("status") in ["green", "yellow"]
        message = f"Cluster status: {health.get('status', 'unknown')}"

        return HealthStatus(
            service_name="elasticsearch",
            is_healthy=is_healthy,
            response_time_ms=response_time,
            message=message,
            metadata=health,
        )
    except Exception as e:
        response_time = (time.time() - start_time) * 1000
        return HealthStatus(
            service_name="elasticsearch",
            is_healthy=False,
            response_time_ms=response_time,
            message=f"Health check failed: {str(e)}",
        )

`index_video(video_data)` `async` ¶

Index video metadata in Elasticsearch with retry logic

Source code in .venv/lib/python3.12/site-packages/ytindexer/indexer/indexing.py

async def index_video(self, video_data: Dict[str, Any]) -> OperationResult:
    """Index video metadata in Elasticsearch with retry logic"""
    video_id = video_data.get("video_id")
    if not video_id:
        return OperationResult.failure("Video data missing video_id")

    async def _index_operation():
        await self.client.index(
            index=self.config.index_name, id=video_id, body=video_data, refresh=True
        )
        return video_id

    try:
        result_id = await self.retry.execute(
            _index_operation, f"index_video_{video_id}"
        )
        logger.debug(f"Indexed video in Elasticsearch: {result_id}")
        return OperationResult.success(
            f"Indexed video: {result_id}", metadata={"video_id": result_id}
        )
    except Exception as e:
        logger.error(f"Failed to index video in Elasticsearch: {str(e)}")
        logger.debug(traceback.format_exc())
        return OperationResult.failure(f"Failed to index video: {str(e)}", e)