<?php

ini_set('memory_limit', '1024M');
set_time_limit(0);

define('SITEMAP_PRODUCT_PAGE_SIZE', 5000);

/**
 * XML 安全转义
 */
function xml_escape($text)
{
    return htmlspecialchars((string)$text, ENT_XML1 | ENT_QUOTES, 'UTF-8');
}

/**
 * 获取站点根地址
 */
function get_base_url()
{
    $host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '127.0.0.1';
    $https = false;

    if (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] && strtolower((string)$_SERVER['HTTPS']) !== 'off') {
        $https = true;
    }
    if (isset($_SERVER['REQUEST_SCHEME']) && strtolower((string)$_SERVER['REQUEST_SCHEME']) === 'https') {
        $https = true;
    }
    if (strtolower($host) === 'onlinenorms.com') {
        $https = true;
    }

    return ($https ? 'https' : 'http') . '://' . $host;
}

/**
 * 获取当前请求路径
 */
function get_request_path()
{
    $uri = isset($_SERVER['REQUEST_URI']) ? (string)$_SERVER['REQUEST_URI'] : '/sitemap.xml';
    $path = parse_url($uri, PHP_URL_PATH);
    if (!is_string($path) || $path === '') {
        return '/sitemap.xml';
    }
    return strtolower($path);
}

/**
 * 拼接完整 URL
 */
function full_url($base, $path)
{
    return rtrim($base, '/') . '/' . ltrim($path, '/');
}

/**
 * 输出 XML 声明头
 */
function emit_xml_header()
{
    header('Content-Type: application/xml; charset=UTF-8');
    echo '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
}

/**
 * 输出 sitemapindex 子项
 */
function emit_sitemap_entry($loc, $lastmod = '')
{
    echo "<sitemap>\n";
    echo '  <loc>' . xml_escape($loc) . "</loc>\n";
    if ($lastmod !== '') {
        echo '  <lastmod>' . xml_escape($lastmod) . "</lastmod>\n";
    }
    echo "</sitemap>\n";
}

/**
 * 输出 urlset 子项
 */
function emit_url_entry($loc, $lastmod = '')
{
    echo "<url>\n";
    echo '  <loc>' . xml_escape($loc) . "</loc>\n";
    if ($lastmod !== '') {
        echo '  <lastmod>' . xml_escape($lastmod) . "</lastmod>\n";
    }
    echo "</url>\n";
}

/**
 * 获取所有语言（默认至少 en）
 */
function get_languages($langDir)
{
    $langs = array();
    if (is_dir($langDir)) {
        $files = @scandir($langDir);
        if (is_array($files)) {
            foreach ($files as $fileName) {
                if (substr(strtolower($fileName), -4) !== '.php') {
                    continue;
                }
                $langs[] = strtolower(substr($fileName, 0, -4));
            }
        }
    }
    $langs = array_values(array_unique($langs));
    sort($langs);
    if (empty($langs)) {
        $langs[] = 'en';
    }
    return $langs;
}

/**
 * 获取发行商目录列表
 */
function get_publishers($cateDir)
{
    $publishers = array();
    if (!is_dir($cateDir)) {
        return $publishers;
    }

    $it = new DirectoryIterator($cateDir);
    foreach ($it as $item) {
        if ($item->isDot() || !$item->isDir()) {
            continue;
        }
        $publishers[] = strtolower($item->getFilename());
    }

    $publishers = array_values(array_unique($publishers));
    sort($publishers);
    return $publishers;
}

/**
 * 统计每个发行商下的 json 数量（带缓存）
 */
function get_publisher_json_counts($cateDir, array $publishers, $ttl = 21600)
{
    $cacheFile = __DIR__ . '/img_cache/sitemap_product_counts.json';
    $now = time();

    if (is_file($cacheFile)) {
        $raw = @file_get_contents($cacheFile);
        $obj = $raw ? json_decode($raw, true) : null;
        if (is_array($obj) && isset($obj['time']) && isset($obj['counts']) && is_array($obj['counts'])) {
            $age = $now - (int)$obj['time'];
            if ($age >= 0 && $age <= $ttl) {
                return $obj['counts'];
            }
        }
    }

    $counts = array();
    foreach ($publishers as $publisher) {
        $count = 0;
        $dir = $cateDir . '/' . $publisher;
        if (is_dir($dir)) {
            $files = @scandir($dir);
            if (is_array($files)) {
                foreach ($files as $fileName) {
                    if (substr(strtolower($fileName), -5) === '.json') {
                        $count++;
                    }
                }
            }
        }
        $counts[$publisher] = $count;
    }

    $payload = array('time' => $now, 'counts' => $counts);
    @file_put_contents($cacheFile, json_encode($payload, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));

    return $counts;
}

/**
 * 输出根 sitemap（分层）
 * 顺序：先分类 sitemap，再产品 sitemap 索引
 */
function output_root_sitemap($base)
{
    emit_xml_header();
    echo '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
    emit_sitemap_entry(full_url($base, '/sitemap-categories.xml'), gmdate('c'));
    emit_sitemap_entry(full_url($base, '/sitemap-products.xml'), gmdate('c'));
    echo "</sitemapindex>\n";
}

/**
 * 输出分类 sitemap
 * 分类 URL 采用 /{lang}/cate/{publisher}
 */
function output_categories_sitemap($base, array $langs, array $publishers, $cateDir)
{
    $lang = in_array('en', $langs, true) ? 'en' : $langs[0];

    emit_xml_header();
    echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

    foreach ($publishers as $publisher) {
        $dir = $cateDir . '/' . $publisher;
        $mtime = @filemtime($dir);
        $lastmod = $mtime ? gmdate('c', $mtime) : gmdate('c');
        $loc = full_url($base, '/' . $lang . '/cate/' . rawurlencode($publisher));
        emit_url_entry($loc, $lastmod);
    }

    echo "</urlset>\n";
}

/**
 * 输出产品 sitemap 索引（仅负责列出分页文件）
 * 例如 /sitemap-products-1.xml、/sitemap-products-2.xml
 */
function output_products_index_sitemap($base, array $counts)
{
    $totalProducts = 0;
    foreach ($counts as $c) {
        $totalProducts += (int)$c;
    }

    $pages = (int)ceil($totalProducts / SITEMAP_PRODUCT_PAGE_SIZE);
    if ($pages < 1) {
        $pages = 1;
    }

    emit_xml_header();
    echo '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

    for ($i = 1; $i <= $pages; $i++) {
        emit_sitemap_entry(full_url($base, '/sitemap-products-' . $i . '.xml'), gmdate('c'));
    }

    echo "</sitemapindex>\n";
}

/**
 * 输出产品分页 sitemap（5000 条/页）
 * URL 采用 /{lang}/{publisher}/{slug}.html
 */
function output_products_page_sitemap($base, array $langs, array $publishers, $cateDir, $page)
{
    $lang = in_array('en', $langs, true) ? 'en' : $langs[0];
    $offset = ($page - 1) * SITEMAP_PRODUCT_PAGE_SIZE;
    $remain = SITEMAP_PRODUCT_PAGE_SIZE;

    emit_xml_header();
    echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";

    foreach ($publishers as $publisher) {
        if ($remain <= 0) {
            break;
        }

        $dir = $cateDir . '/' . $publisher;
        if (!is_dir($dir)) {
            continue;
        }

        $files = @scandir($dir);
        if (!is_array($files)) {
            continue;
        }

        $jsonFiles = array();
        foreach ($files as $fileName) {
            if (substr(strtolower($fileName), -5) === '.json') {
                $jsonFiles[] = $fileName;
            }
        }

        $count = count($jsonFiles);
        if ($count <= 0) {
            continue;
        }

        if ($offset >= $count) {
            $offset -= $count;
            continue;
        }

        $slice = array_slice($jsonFiles, $offset, $remain);
        foreach ($slice as $fileName) {
            $slug = substr($fileName, 0, -5);
            $loc = full_url(
                $base,
                '/' . $lang . '/' . rawurlencode($publisher) . '/' . rawurlencode($slug) . '.html'
            );

            $mtime = @filemtime($dir . '/' . $fileName);
            $lastmod = $mtime ? gmdate('c', $mtime) : gmdate('c');
            emit_url_entry($loc, $lastmod);
        }

        $remain -= count($slice);
        $offset = 0;
    }

    echo "</urlset>\n";
}

$base = get_base_url();
$path = get_request_path();
$cateDir = __DIR__ . '/cate';
$langDir = __DIR__ . '/lang';

$langs = get_languages($langDir);
$publishers = get_publishers($cateDir);
$counts = get_publisher_json_counts($cateDir, $publishers);

if ($path === '/sitemap.xml' || $path === '/sitemap.php') {
    output_root_sitemap($base);
    exit;
}

if ($path === '/sitemap-categories.xml') {
    output_categories_sitemap($base, $langs, $publishers, $cateDir);
    exit;
}

if ($path === '/sitemap-products.xml') {
    output_products_index_sitemap($base, $counts);
    exit;
}

if (preg_match('#^/sitemap-products-([0-9]+)\.xml$#', $path, $m)) {
    $page = max(1, (int)$m[1]);
    output_products_page_sitemap($base, $langs, $publishers, $cateDir, $page);
    exit;
}

http_response_code(404);
header('Content-Type: text/plain; charset=UTF-8');
echo "sitemap route not found";

