<?php
namespace Filanco\SitemapTool;

use Curl\Curl;

/**
 * Class Sitemap
 * @package Halk\Service
 */
class Sitemap
{
    use Crawler;

    protected $baseUrl;
    protected $priority = 0.8;
    protected $priorityCustom = [];
    protected $lastMod;
    protected $frequency = 'daily';
    protected $frequencyCustom = [];

    protected $additionalLinks = [];
    protected $forbiddenLinks  = [];

    protected $redirectLimit = 0;

    /**
     * @param string $baseUrl
     * @param array  $params
     */
    public function __construct($baseUrl, $params = [])
    {
        $this->baseUrl = $baseUrl;
        $this->url = $this->baseUrl;
        $this->lastMod = date('c', time());
        $this->applyParams($params);
    }

    /**
     * Apply parameters set in constructor
     * @param array $params
     */
    private function applyParams(array $params)
    {
        foreach ($params as $paramKey => $paramVal) {
            if (!empty($paramVal)) {
                $this->{'set' . ucfirst($paramKey)}($paramVal);
            }
        }
    }

    /**
     * Get base url
     * @return mixed
     */
    public function getBaseUrl()
    {
        return $this->baseUrl;
    }

    /**
     * Set links default priority
     * @param float $priority
     */
    public function setPriority($priority)
    {
        $this->priority = $priority;
    }

    /**
     * Set links custom priority
     * @param array $priorityCustom
     */
    public function setPriorityCustom($priorityCustom = [])
    {
        $this->priorityCustom = $priorityCustom;
    }

    /**
     * Set links default frequency
     * @param string $frequency
     */
    public function setFrequency($frequency)
    {
        $this->frequency = $frequency;
    }

    /**
     * Set links custom frequency
     * @param array $frequencyCustom
     */
    public function setFrequencyCustom($frequencyCustom = [])
    {
        $this->frequencyCustom = $frequencyCustom;
    }

    /**
     * Add links to sitemap
     * @param array $links
     */
    public function setAdditionalLinks($links = [])
    {
        $this->additionalLinks = $links;
    }

    /**
     * Exclude links from crawler
     * @param array $links
     */
    public function setForbiddenLinks($links = [])
    {
        $this->forbiddenLinks = $links;
    }

    /**
     * Set limit for redirects (301, 302)
     * @param int $limit
     */
    public function setRedirectLimit($limit = 0)
    {
        $this->redirectLimit = $limit;
    }

    /**
     * Generate regex pattern
     * @param string $patternBody
     * @return string
     */
    protected function getPattern($patternBody)
    {
        return '%' . $patternBody . '%ui';
    }

    /**
     * Return custom value (from $this->priorityCustom or $this->frequencyCustom) or $default
     * @param  string $url
     * @param  array  $customValues
     * @param  mixed  $default
     * @return mixed
     */
    protected function getCustomValue($url, array $customValues, $default)
    {
        if (!empty($customValues)) {
            foreach ($customValues as $patternBody => $val) {
                if (preg_match($this->getPattern($patternBody), $url) === 1) {
                    return $val;
                }
            }
        }

        return $default;
    }

    /**
     * @return array
     * @throws \Exception
     */
    public function run()
    {
        $this->crawl();
        $this->prepareForOutput();
        return $this->getLinks();
    }

    /**
     * Crawl links from url
     * @return bool
     * @throws \Exception
     */
    private function crawl()
    {
        $resource = new Curl();

        $resource->error(function (Curl $curl) {
            if ($curl->errorCode === 404) {
                $this->notFound[] = $this->url;
                throw new \Exception('Not found');
            }
        });

        try {
            $content = $this->scan($resource, $this->redirectLimit);
            $resource->close();

            if (!$content) {
                return false;
            }

        } catch (\Exception $e) {
            return false;
        }

        $dom = new Parser($content);

        $links = $this->findLinks($dom, function ($links) {
            $hrefs = [];

            foreach ($links as $link) {
                $href = $link[2];

                if ($href == '/') {
                    continue;
                }

                if (0 === strpos($href, '/') || false !== stripos($href, $this->baseUrl)) {
                    $href = str_replace($this->baseUrl, '', $href);

                    if (false !== stripos($link[0], "nofollow")) {
                        continue;
                    }

                    foreach ($this->forbiddenLinks as $patternBody) {
                        if (preg_match($this->getPattern($patternBody), $href) === 1) {
                            continue 2;
                        }
                    }

                    if (!isset($hrefs[$href])) {
                        $hrefs[$href] = ['url' => $href, 'text' => strip_tags(trim($link[3]))];
                    }
                }
            }

            return $hrefs;
        });

        if (empty($links)) return false;

        foreach ($links as $href => $link) {
            if (!isset($this->links[$href])) {
                $this->url = rtrim($this->baseUrl . '/' . ltrim($href, '/'), '/');
                $this->links[$href] = $link;
                $this->crawl();
            }
        }

        return true;
    }

    /**
     * Generate xml
     * @return string XML
     * @throws \Exception
     */
    public function generateXML()
    {
        $dom = new \DOMDocument('1.0', 'UTF-8');

        $urlSet = $dom->createElement('urlset');
        $urlSet->setAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
        $urlSet->setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
        $urlSet->setAttribute(
            'xsi:schemaLocation',
            'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd'
        );

        foreach ($this->getLinks() as $link) {
            $url = $dom->createElement('url');
            $loc = $dom->createElement('loc', $this->baseUrl . $link['url']);

            $priority = $dom->createElement(
                'priority',
                $this->getCustomValue($link['url'], $this->priorityCustom, $this->priority)
            );

            $frequency = $dom->createElement(
                'changefreq',
                $this->getCustomValue($link['url'], $this->frequencyCustom, $this->frequency)
            );

            $lastmod = $dom->createElement('lastmod', $this->lastMod);

            $url->appendChild($loc);
            $url->appendChild($priority);
            $url->appendChild($frequency);
            $url->appendChild($lastmod);

            $urlSet->appendChild($url);
        }

        $dom->appendChild($urlSet);
        $dom->formatOutput = true;

        return $dom->saveXML();
    }

    /**
     * @return void
     * @throws \Exception
     */
    private function prepareForOutput()
    {
        $this->mergeWithAdditionalLinks();
        $this->remove404Links();
        if (empty($this->getLinks())) throw new \Exception('Nothing has been found');
    }

    /**
     * @return void
     */
    private function mergeWithAdditionalLinks()
    {
        if (!empty($this->additionalLinks)) {
            $this->links = array_merge($this->additionalLinks, $this->links);
        }
    }

    /**
     * @return void
     */
    private function remove404Links()
    {
        if (empty($this->notFound)) return;

        foreach ($this->notFound as $nf) {
            $nf = str_replace($this->baseUrl, '', $nf);
            $this->links = array_filter($this->links, function ($link) use ($nf) {
                return $link['url'] !== $nf;
            });
        }
    }

    /**
     * @param  string $filePath
     * @param  string $content
     * @return boolean
     * @throws \Exception
     */
    private function writeFile($filePath, $content)
    {
        if (!$filePath) return false;
        if ($content) {
            if (!file_put_contents($filePath, $content)) {
                throw new \Exception('Can\'t write file ' . $filePath);
            }
            return true;
        }

        return false;
    }

    /**
     * @param  string $filePath
     * @return boolean
     * @throws \Exception
     */
    public function saveAsXML($filePath)
    {
        $xml = $this->generateXML();
        return $this->writeFile($filePath, $xml);
    }

    /**
     * @param  string $filePath
     * @return boolean
     * @throws \Exception
     */
    public function saveAsJSON($filePath)
    {
        $json = json_encode($this->getLinks());
        return $this->writeFile($filePath, $json);
    }

}
