<?php

declare(strict_types=1);

namespace Agent\Modules\Indexing;

use Agent\Modules\OpenRouter\OpenRouterService;
use Agent\Support\Logger;
use WP_Error;

final class ChunkPipelineService
{
    private const OPTION_PIPELINE_STATE = 'agent_chunk_pipeline_state';
    private const OPTION_PIPELINE_RUNS = 'agent_chunk_pipeline_runs';
    private const OPTION_FALLBACK_CHUNKS = 'agent_chunk_pipeline_chunks';

    private OpenRouterService $openRouter;
    private Logger $logger;

    public function __construct(OpenRouterService $openRouter, Logger $logger)
    {
        $this->openRouter = $openRouter;
        $this->logger = $logger;
    }

    public function prepare(int $postId): array|WP_Error
    {
        $startedAt = gmdate('c');
        $post = get_post($postId);
        if (! $post instanceof \WP_Post) {
            return new WP_Error('agent_post_not_found', 'Post not found.', ['status' => 404, 'failed_step' => 'prepare']);
        }

        $content = trim((string) ($post->post_content ?? ''));
        if ($content === '') {
            return new WP_Error('agent_empty_post_content', 'Post content is empty.', ['status' => 400, 'failed_step' => 'prepare']);
        }

        $contentHash = hash('sha256', (string) ($post->post_title ?? '') . "\n" . $content);
        $existing = $this->readIndexStatus($postId);
        $alreadyIndexed = is_array($existing)
            && (string) ($existing['content_hash'] ?? '') === $contentHash
            && (string) ($existing['status'] ?? '') === 'indexed';

        $state = $this->readPipelineState();
        $state[(string) $postId] = [
            'post_id' => $postId,
            'content_hash' => $contentHash,
            'status' => $alreadyIndexed ? 'up_to_date' : 'prepared',
            'updated_at' => gmdate('c'),
            'steps' => [
                'prepare' => [
                    'status' => $alreadyIndexed ? 'skipped' : 'completed',
                    'started_at' => $startedAt,
                    'finished_at' => gmdate('c'),
                    'details' => $alreadyIndexed ? 'Content hash unchanged. Reindex skipped.' : 'Preparation complete.',
                ],
            ],
        ];
        update_option(self::OPTION_PIPELINE_STATE, $state, false);

        $result = [
            'post_id' => $postId,
            'content_hash' => $contentHash,
            'already_indexed' => $alreadyIndexed,
            'status' => $alreadyIndexed ? 'up_to_date' : 'prepared',
        ];
        $this->logger->log('index_pipeline_prepare', 'info', $result);
        return $result;
    }

    public function chunk(int $postId, int $chunkSize = 1200, int $chunkOverlap = 120): array|WP_Error
    {
        $state = $this->readPipelineState();
        $entry = is_array($state[(string) $postId] ?? null) ? $state[(string) $postId] : null;
        if (! is_array($entry)) {
            return new WP_Error('agent_pipeline_not_prepared', 'Prepare step must run first.', ['status' => 409, 'failed_step' => 'chunk']);
        }
        if ((string) ($entry['status'] ?? '') === 'up_to_date') {
            $entry['steps']['chunk'] = [
                'status' => 'skipped',
                'started_at' => gmdate('c'),
                'finished_at' => gmdate('c'),
                'details' => 'Chunking skipped because content is unchanged.',
            ];
            $state[(string) $postId] = $entry;
            update_option(self::OPTION_PIPELINE_STATE, $state, false);
            return ['post_id' => $postId, 'status' => 'up_to_date', 'chunks_count' => 0];
        }

        $post = get_post($postId);
        if (! $post instanceof \WP_Post) {
            return new WP_Error('agent_post_not_found', 'Post not found.', ['status' => 404, 'failed_step' => 'chunk']);
        }

        $chunkSize = max(300, min(8000, $chunkSize));
        $chunkOverlap = max(0, min((int) floor($chunkSize / 2), $chunkOverlap));
        $chunks = $this->splitIntoChunks((string) ($post->post_content ?? ''), $chunkSize, $chunkOverlap);
        if ($chunks === []) {
            return new WP_Error('agent_chunking_failed', 'No chunks were generated from the post content.', ['status' => 500, 'failed_step' => 'chunk']);
        }

        $this->persistChunks($postId, $chunks);
        $entry['status'] = 'chunked';
        $entry['updated_at'] = gmdate('c');
        $entry['steps']['chunk'] = [
            'status' => 'completed',
            'started_at' => gmdate('c'),
            'finished_at' => gmdate('c'),
            'details' => 'Generated ' . (string) count($chunks) . ' chunks.',
        ];
        $state[(string) $postId] = $entry;
        update_option(self::OPTION_PIPELINE_STATE, $state, false);

        $result = [
            'post_id' => $postId,
            'status' => 'chunked',
            'chunks_count' => count($chunks),
            'chunk_size' => $chunkSize,
            'chunk_overlap' => $chunkOverlap,
        ];
        $this->logger->log('index_pipeline_chunk', 'info', $result);
        return $result;
    }

    public function embed(int $postId, ?string $model = null, bool $allowLocalFallback = true): array|WP_Error
    {
        $state = $this->readPipelineState();
        $entry = is_array($state[(string) $postId] ?? null) ? $state[(string) $postId] : null;
        if (! is_array($entry)) {
            return new WP_Error('agent_pipeline_not_prepared', 'Prepare step must run first.', ['status' => 409, 'failed_step' => 'embed']);
        }
        if ((string) ($entry['status'] ?? '') === 'up_to_date') {
            $entry['steps']['embed'] = [
                'status' => 'skipped',
                'started_at' => gmdate('c'),
                'finished_at' => gmdate('c'),
                'details' => 'Embedding skipped because content is unchanged.',
            ];
            $state[(string) $postId] = $entry;
            update_option(self::OPTION_PIPELINE_STATE, $state, false);
            return ['post_id' => $postId, 'status' => 'up_to_date', 'embedded_count' => 0, 'provider' => 'none'];
        }

        $chunks = $this->loadChunks($postId);
        if ($chunks === []) {
            return new WP_Error('agent_no_chunks_found', 'No chunks found. Run chunk step first.', ['status' => 409, 'failed_step' => 'embed']);
        }

        $cfg = $this->openRouter->config();
        $embeddingModel = $model !== null && trim($model) !== ''
            ? sanitize_text_field($model)
            : sanitize_text_field((string) ($cfg['planner_model'] ?? 'openrouter/free'));

        $provider = 'local';
        foreach ($chunks as $index => $chunk) {
            $text = (string) ($chunk['text'] ?? '');
            $embedding = null;

            if ($this->openRouter->isEnabled() && $this->openRouter->hasApiKey()) {
                $payload = [
                    'model' => $embeddingModel !== '' ? $embeddingModel : 'openrouter/free',
                    'input' => $text,
                ];
                $response = $this->openRouter->embeddings($payload);
                if (! ($response instanceof WP_Error)) {
                    $embedding = $this->extractEmbeddingVector($response);
                    if ($embedding !== null) {
                        $provider = 'openrouter';
                    }
                } elseif (! $allowLocalFallback) {
                    return new WP_Error(
                        'agent_embedding_failed',
                        'Embedding failed for chunk #' . (string) $index . ': ' . $response->get_error_message(),
                        ['status' => (int) ($response->data['status'] ?? 502), 'failed_step' => 'embed']
                    );
                }
            }

            if (! is_array($embedding)) {
                $embedding = $this->localEmbedding($text);
            }

            $chunks[$index]['embedding'] = $embedding;
        }

        $this->persistChunks($postId, $chunks);
        $entry['status'] = 'embedded';
        $entry['updated_at'] = gmdate('c');
        $entry['embedding_model'] = $embeddingModel;
        $entry['steps']['embed'] = [
            'status' => 'completed',
            'started_at' => gmdate('c'),
            'finished_at' => gmdate('c'),
            'details' => 'Embedded ' . (string) count($chunks) . ' chunks via ' . $provider . '.',
        ];
        $state[(string) $postId] = $entry;
        update_option(self::OPTION_PIPELINE_STATE, $state, false);

        $result = [
            'post_id' => $postId,
            'status' => 'embedded',
            'embedded_count' => count($chunks),
            'provider' => $provider,
            'embedding_model' => $embeddingModel,
        ];
        $this->logger->log('index_pipeline_embed', 'info', $result);
        return $result;
    }

    public function finalize(int $postId): array|WP_Error
    {
        $state = $this->readPipelineState();
        $entry = is_array($state[(string) $postId] ?? null) ? $state[(string) $postId] : null;
        if (! is_array($entry)) {
            return new WP_Error('agent_pipeline_not_prepared', 'Prepare step must run first.', ['status' => 409, 'failed_step' => 'finalize']);
        }

        if ((string) ($entry['status'] ?? '') !== 'embedded' && (string) ($entry['status'] ?? '') !== 'up_to_date') {
            return new WP_Error('agent_pipeline_not_embedded', 'Embed step must run before finalize.', ['status' => 409, 'failed_step' => 'finalize']);
        }

        $contentHash = (string) ($entry['content_hash'] ?? '');
        if ($contentHash === '') {
            return new WP_Error('agent_pipeline_missing_hash', 'Missing content hash in pipeline state.', ['status' => 500, 'failed_step' => 'finalize']);
        }

        $this->writeIndexStatus($postId, $contentHash, 'indexed');
        $entry['status'] = 'indexed';
        $entry['updated_at'] = gmdate('c');
        $entry['steps']['finalize'] = [
            'status' => 'completed',
            'started_at' => gmdate('c'),
            'finished_at' => gmdate('c'),
            'details' => 'Index status updated.',
        ];
        $state[(string) $postId] = $entry;
        update_option(self::OPTION_PIPELINE_STATE, $state, false);

        $result = [
            'post_id' => $postId,
            'status' => 'indexed',
            'content_hash' => $contentHash,
        ];
        $this->logger->log('index_pipeline_finalize', 'info', $result);
        return $result;
    }

    public function run(int $postId, array $options = []): array|WP_Error
    {
        $runId = 'run_' . gmdate('YmdHis') . '_' . substr((string) wp_rand(), 0, 6);
        $startedAt = gmdate('c');
        $stepResults = [];

        $prepare = $this->prepare($postId);
        if ($prepare instanceof WP_Error) {
            return $this->recordRunFailure($runId, $postId, $startedAt, 'prepare', $prepare, $stepResults);
        }
        $stepResults['prepare'] = $prepare;

        $chunkSize = (int) ($options['chunk_size'] ?? 1200);
        $chunkOverlap = (int) ($options['chunk_overlap'] ?? 120);
        $chunk = $this->chunk($postId, $chunkSize, $chunkOverlap);
        if ($chunk instanceof WP_Error) {
            return $this->recordRunFailure($runId, $postId, $startedAt, 'chunk', $chunk, $stepResults);
        }
        $stepResults['chunk'] = $chunk;

        $embeddingModel = isset($options['embedding_model']) ? sanitize_text_field((string) $options['embedding_model']) : null;
        $allowLocalFallback = array_key_exists('allow_local_fallback', $options) ? (bool) $options['allow_local_fallback'] : true;
        $embed = $this->embed($postId, $embeddingModel, $allowLocalFallback);
        if ($embed instanceof WP_Error) {
            return $this->recordRunFailure($runId, $postId, $startedAt, 'embed', $embed, $stepResults);
        }
        $stepResults['embed'] = $embed;

        $finalize = $this->finalize($postId);
        if ($finalize instanceof WP_Error) {
            return $this->recordRunFailure($runId, $postId, $startedAt, 'finalize', $finalize, $stepResults);
        }
        $stepResults['finalize'] = $finalize;

        $run = [
            'run_id' => $runId,
            'post_id' => $postId,
            'status' => 'completed',
            'failed_step' => '',
            'error' => '',
            'started_at' => $startedAt,
            'finished_at' => gmdate('c'),
            'steps' => $stepResults,
        ];
        $this->appendRun($run);
        $this->logger->log('index_pipeline_run_completed', 'warning', ['post_id' => $postId, 'run_id' => $runId]);

        return $run;
    }

    public function status(int $postId): array
    {
        $state = $this->readPipelineState();
        $entry = is_array($state[(string) $postId] ?? null) ? $state[(string) $postId] : [];
        $chunks = $this->loadChunks($postId);
        $indexStatus = $this->readIndexStatus($postId);

        return [
            'post_id' => $postId,
            'pipeline' => $entry,
            'index_status' => $indexStatus,
            'chunks_count' => count($chunks),
            'with_embeddings_count' => $this->countEmbeddedChunks($chunks),
        ];
    }

    public function latestRuns(int $limit = 20): array
    {
        $limit = max(1, min(200, $limit));
        $runs = get_option(self::OPTION_PIPELINE_RUNS, []);
        if (! is_array($runs)) {
            return [];
        }

        return array_slice(array_values($runs), 0, $limit);
    }

    private function splitIntoChunks(string $content, int $chunkSize, int $chunkOverlap): array
    {
        $text = trim((string) preg_replace('/\s+/', ' ', wp_strip_all_tags($content)));
        if ($text === '') {
            return [];
        }

        $chunks = [];
        $length = strlen($text);
        $start = 0;
        while ($start < $length) {
            $window = substr($text, $start, $chunkSize);
            if ($window === '') {
                break;
            }

            $end = $start + strlen($window);
            if ($end < $length) {
                $lastSpace = strrpos($window, ' ');
                if ($lastSpace !== false && $lastSpace > 200) {
                    $window = substr($window, 0, $lastSpace);
                    $end = $start + strlen($window);
                }
            }

            $chunks[] = [
                'chunk_no' => count($chunks),
                'heading_path' => '',
                'text' => trim($window),
                'embedding' => null,
            ];

            if ($end >= $length) {
                break;
            }

            $start = max($start + 1, $end - $chunkOverlap);
        }

        return $chunks;
    }

    private function extractEmbeddingVector(array $response): ?array
    {
        $rows = $response['data'] ?? null;
        if (! is_array($rows) || $rows === []) {
            return null;
        }

        $first = $rows[0] ?? null;
        if (! is_array($first) || ! is_array($first['embedding'] ?? null)) {
            return null;
        }

        $vector = [];
        foreach ((array) $first['embedding'] as $value) {
            if (! is_numeric($value)) {
                continue;
            }
            $vector[] = (float) $value;
        }

        return $vector !== [] ? $vector : null;
    }

    private function localEmbedding(string $text): array
    {
        $hash = hash('sha256', $text);
        $vector = [];
        for ($i = 0; $i < 16; $i++) {
            $slice = substr($hash, $i * 4, 4);
            $value = hexdec($slice);
            $normalized = (($value % 2000) / 1000.0) - 1.0;
            $vector[] = (float) $normalized;
        }
        return $vector;
    }

    private function persistChunks(int $postId, array $chunks): void
    {
        if ($this->isDbReady()) {
            global $wpdb;
            $table = $wpdb->prefix . 'ai_chunks';
            $wpdb->delete($table, ['post_id' => $postId], ['%d']);
            foreach ($chunks as $chunk) {
                $wpdb->insert(
                    $table,
                    [
                        'post_id' => $postId,
                        'chunk_no' => (int) ($chunk['chunk_no'] ?? 0),
                        'heading_path' => (string) ($chunk['heading_path'] ?? ''),
                        'text' => (string) ($chunk['text'] ?? ''),
                        'embedding' => wp_json_encode($chunk['embedding'] ?? null),
                        'created_at' => gmdate('Y-m-d H:i:s'),
                        'updated_at' => gmdate('Y-m-d H:i:s'),
                    ],
                    ['%d', '%d', '%s', '%s', '%s', '%s', '%s']
                );
            }
            return;
        }

        $all = get_option(self::OPTION_FALLBACK_CHUNKS, []);
        if (! is_array($all)) {
            $all = [];
        }
        $all[(string) $postId] = array_values($chunks);
        update_option(self::OPTION_FALLBACK_CHUNKS, $all, false);
    }

    private function loadChunks(int $postId): array
    {
        if ($this->isDbReady()) {
            global $wpdb;
            $rows = $wpdb->get_results(
                $wpdb->prepare(
                    'SELECT chunk_no, heading_path, text, embedding FROM ' . $wpdb->prefix . 'ai_chunks WHERE post_id = %d ORDER BY chunk_no ASC',
                    $postId
                ),
                ARRAY_A
            );
            if (! is_array($rows)) {
                return [];
            }

            $chunks = [];
            foreach ($rows as $row) {
                if (! is_array($row)) {
                    continue;
                }
                $decoded = json_decode((string) ($row['embedding'] ?? ''), true);
                $chunks[] = [
                    'chunk_no' => (int) ($row['chunk_no'] ?? 0),
                    'heading_path' => (string) ($row['heading_path'] ?? ''),
                    'text' => (string) ($row['text'] ?? ''),
                    'embedding' => is_array($decoded) ? $decoded : null,
                ];
            }
            return $chunks;
        }

        $all = get_option(self::OPTION_FALLBACK_CHUNKS, []);
        if (! is_array($all)) {
            return [];
        }
        $chunks = $all[(string) $postId] ?? [];
        return is_array($chunks) ? array_values($chunks) : [];
    }

    private function readPipelineState(): array
    {
        $state = get_option(self::OPTION_PIPELINE_STATE, []);
        return is_array($state) ? $state : [];
    }

    private function writeIndexStatus(int $postId, string $contentHash, string $status): void
    {
        if ($this->isDbReady()) {
            global $wpdb;
            $table = $wpdb->prefix . 'ai_index_status';
            $existing = $wpdb->get_var(
                $wpdb->prepare('SELECT id FROM ' . $table . ' WHERE post_id = %d LIMIT 1', $postId)
            );
            if (is_numeric($existing) && (int) $existing > 0) {
                $wpdb->update(
                    $table,
                    [
                        'last_indexed_at' => gmdate('Y-m-d H:i:s'),
                        'content_hash' => $contentHash,
                        'status' => $status,
                    ],
                    ['id' => (int) $existing],
                    ['%s', '%s', '%s'],
                    ['%d']
                );
                return;
            }

            $wpdb->insert(
                $table,
                [
                    'post_id' => $postId,
                    'last_indexed_at' => gmdate('Y-m-d H:i:s'),
                    'content_hash' => $contentHash,
                    'status' => $status,
                ],
                ['%d', '%s', '%s', '%s']
            );
            return;
        }

        $fallback = get_option('agent_chunk_pipeline_index_status', []);
        if (! is_array($fallback)) {
            $fallback = [];
        }
        $fallback[(string) $postId] = [
            'post_id' => $postId,
            'last_indexed_at' => gmdate('c'),
            'content_hash' => $contentHash,
            'status' => $status,
        ];
        update_option('agent_chunk_pipeline_index_status', $fallback, false);
    }

    private function readIndexStatus(int $postId): ?array
    {
        if ($this->isDbReady()) {
            global $wpdb;
            $table = $wpdb->prefix . 'ai_index_status';
            $row = $wpdb->get_row(
                $wpdb->prepare('SELECT post_id, last_indexed_at, content_hash, status FROM ' . $table . ' WHERE post_id = %d LIMIT 1', $postId),
                ARRAY_A
            );
            if (! is_array($row)) {
                return null;
            }

            return [
                'post_id' => (int) ($row['post_id'] ?? 0),
                'last_indexed_at' => is_string($row['last_indexed_at'] ?? null) ? gmdate('c', (int) strtotime((string) $row['last_indexed_at'] . ' UTC')) : null,
                'content_hash' => (string) ($row['content_hash'] ?? ''),
                'status' => (string) ($row['status'] ?? ''),
            ];
        }

        $fallback = get_option('agent_chunk_pipeline_index_status', []);
        if (! is_array($fallback)) {
            return null;
        }
        $row = $fallback[(string) $postId] ?? null;
        return is_array($row) ? $row : null;
    }

    private function appendRun(array $run): void
    {
        $runs = get_option(self::OPTION_PIPELINE_RUNS, []);
        if (! is_array($runs)) {
            $runs = [];
        }
        array_unshift($runs, $run);
        if (count($runs) > 200) {
            $runs = array_slice($runs, 0, 200);
        }
        update_option(self::OPTION_PIPELINE_RUNS, $runs, false);
    }

    private function recordRunFailure(string $runId, int $postId, string $startedAt, string $failedStep, WP_Error $error, array $stepResults): WP_Error
    {
        $run = [
            'run_id' => $runId,
            'post_id' => $postId,
            'status' => 'failed',
            'failed_step' => $failedStep,
            'error' => $error->get_error_message(),
            'error_code' => (string) $error->code,
            'started_at' => $startedAt,
            'finished_at' => gmdate('c'),
            'steps' => $stepResults,
        ];
        $this->appendRun($run);
        $this->logger->log('index_pipeline_run_failed', 'warning', ['post_id' => $postId, 'run_id' => $runId, 'failed_step' => $failedStep, 'error_code' => (string) $error->code]);
        return $error;
    }

    private function isDbReady(): bool
    {
        global $wpdb;
        if (! isset($wpdb) || ! is_object($wpdb)) {
            return false;
        }
        if (! isset($wpdb->prefix) || ! is_string($wpdb->prefix) || $wpdb->prefix === '') {
            return false;
        }
        return method_exists($wpdb, 'insert')
            && method_exists($wpdb, 'update')
            && method_exists($wpdb, 'delete')
            && method_exists($wpdb, 'get_row')
            && method_exists($wpdb, 'get_results')
            && method_exists($wpdb, 'get_var')
            && method_exists($wpdb, 'prepare');
    }

    private function countEmbeddedChunks(array $chunks): int
    {
        $count = 0;
        foreach ($chunks as $chunk) {
            if (is_array($chunk) && is_array($chunk['embedding'] ?? null) && $chunk['embedding'] !== []) {
                $count++;
            }
        }
        return $count;
    }
}

