<?php
/**
 * Class AutoDescription_Search
 *
 * Generates sitemap and outputs it.
 *
 * @since 2.2.9
 */
class AutoDescription_Sitemaps extends AutoDescription_Metaboxes {

	/**
	 * Maximum number of posts that show up in the sitemap.xml page.
	 *
	 * @since 2.2.9
	 *
	 * @var int Max Posts in Sitemap
	 */
	protected $max_posts;

	/**
	 * Checks for pretty permalinks.
	 *
	 * @since 2.2.9
	 *
	 * @var bool true if pretty
	 */
	protected $pretty_permalinks;

	/**
	 * Checks for pretty permalinks.
	 *
	 * @since 2.2.9
	 *
	 * @var bool true if pretty
	 */
	protected $sitemap_transient;

	/**
	 * Constructor, load parent constructor and set up caches.
	 */
	public function __construct() {
		parent::__construct();

		// The sitemap transient.
		$this->sitemap_transient = $this->fetch_sitemap_transient_name();

		$this->max_posts = (int) apply_filters( 'the_seo_framework_sitemap_post_limit', 100 );

		// I'm not going to initialize my own rewrite engine. Causes too many problems.
		$this->pretty_permalinks = ( get_option( 'permalink_structure' ) != '' ) ? true : false;

		//* Delete transient on post publish/delete.
		add_action( 'publish_post', array( $this, 'delete_sitemap_transient_post' ) );
		add_action( 'delete_post', array( $this, 'delete_sitemap_transient_post' ) );
		add_action( 'save_post', array( $this, 'delete_sitemap_transient_post' ) );

		//* Delete sitemap transient on permalink structure change.
		add_action( 'load-options-permalink.php', array( $this, 'delete_sitemap_transient_permalink_updated' ), 20 );

		/**
		 * Add query strings to rewrite
		 */
		add_action( 'init', array( $this, 'rewrite_rule_sitemap' ), 1 );
		add_filter( 'query_vars', array( $this, 'enqueue_sitemap_query_vars' ), 1 );

		/**
		 * Adding a higher priority will cause a trailing slash to be added.
		 * We need to be in front of the queue to prevent this from happening.
		 */
		add_action( 'template_redirect', array( $this, 'maybe_output_sitemap' ), 1 );

		//* Edit the robots.txt file
		add_filter( 'robots_txt', array( $this, 'robots_txt' ), 10, 2 );

		//* Enqueue rewrite flush
		add_action( 'shutdown', array( $this, 'maybe_flush_rewrite' ), 999 );
	}

	/**
	 * Setup var for sitemap transient.
	 *
	 * @since 2.2.9
	 */
	protected function fetch_sitemap_transient_name() {
		global $blog_id;

		$this->sitemap_transient = 'the_seo_framework_sitemap_' . (string) $blog_id;
	}

	/**
	 * Adds rewrite rule to WordPress
	 * This rule defines the sitemap.xml output
	 *
	 * @param bool $override add the rule anyway, regardless of setting.
	 *
	 * @since 2.2.9
	 */
	public function rewrite_rule_sitemap( $run = false ) {

		if ( (bool) $this->get_option( 'sitemaps_output' ) || $run ) {

			/**
			 * Don't do anything if a sitemap plugin is active.
			 * On sitemap plugin activation, the sitemap plugin should flush the
			 * rewrite rules. If it doesn't, then this plugin's sitemap will be called.
			 *
			 * @todo expand detection list.
			 */
			if ( $this->has_sitemap_plugin() )
				return;

			//* Adding rewrite rules only has effect when permalink structures are active.
			if ( $this->pretty_permalinks )
				add_rewrite_rule( 'sitemap\.xml$', 'index.php?the_seo_framework_sitemap=xml', 'top' );

			$this->wpmudev_domainmap_flush_fix( false );

		}
	}

	/**
	 * Register the_seo_framework_sitemap to wp_query
	 *
	 * @param array vars The WP_Query vars
	 *
	 * @since 2.2.9
	 */
	public function enqueue_sitemap_query_vars( $vars ) {

		if ( (bool) $this->get_option( 'sitemaps_output' ) )
			$vars[] = 'the_seo_framework_sitemap';

		return $vars;
	}

	/**
	 * Maybe Output sitemap.xml 'file' and header.
	 *
	 * @since 2.2.9
	 *
	 * @return void|mixed SiteMAp XML file.
	 */
	public function maybe_output_sitemap() {

		if ( (bool) $this->get_option( 'sitemaps_output' ) ) {
			global $current_blog;

			/**
			 * Don't do anything on a deleted or spam blog.
			 * There's nothing to find anyway.
			 *
			 * Don't do anything either is this isn't a GET request.
			 *
			 * @since 2.2.9
			 */
			if ( ( isset( $current_blog ) && ( $current_blog->spam == 1 || $current_blog->deleted == 1 ) ) || !isset( $_GET ) )
				return;

			return $this->output_sitemap();
		}

	}

	/**
	 * Output sitemap.xml 'file' and header.
	 *
	 * @since 2.2.9
	 */
	protected function output_sitemap() {
		global $wp_query;

		if ( isset( $wp_query->query_vars['the_seo_framework_sitemap'] ) && $wp_query->query_vars['the_seo_framework_sitemap'] === 'xml' ) {
			//* Fetch sitemap content.
			$xml_content = $this->get_sitemap_content();

			// Don't let WordPress think this is 404.
			$wp_query->is_404 = false;

			header( 'Content-type: text/xml; charset=utf-8' );

			echo $xml_content . "\r\n";

			// We're done now.
			die();
		}
	}

	/**
	 * Output sitemap.xml content from transient.
	 *
	 * @since 2.2.9
	 *
	 * @return string Sitemap XML contents.
	 */
	protected function get_sitemap_content() {

		if ( false == get_transient( $this->sitemap_transient ) ) {
			$cached_content = "\r\n<!-- Sitemap is generated for this view -->";
		} else {
			$cached_content = "\r\n<!-- Sitemap is served from cache -->";
		}

		$content  = '<?xml version="1.0" encoding="UTF-8"?>' . "\r\n";
		$content .= '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\r\n";
		$content .= $this->setup_sitemap_transient();
		$content .= '</urlset>';

		$content .= $cached_content;

		return $content;
	}

	/**
	 * Create sitemap.xml content transient.
	 *
	 * @since 2.2.9
	 */
	public function setup_sitemap_transient() {

		$output = get_transient( $this->sitemap_transient );

		if ( false === $output ) {
			//* Transient doesn't exist yet.
			$output = $this->generate_sitemap();

			/**
			 * Transient expiration: 1 week.
			 * Keep the sitemap for at most 1 week.
			 *
			 * 60s * 60m * 24h * 7d
			 */
			$expiration = 60 * 60 * 24 * 7;

			set_transient( $this->sitemap_transient, $output, $expiration );
		}

		return $output;
	}

	/**
	 * Generate sitemap.xml content.
	 *
	 * @since 2.2.9
	 */
	protected function generate_sitemap() {

		$content = '';

		/**
		 * Maximum pages and posts to fetch.
		 * A total of 200, consisting of 2 times $max_posts
		 *
		 * Applies filters the_seo_framework_sitemap_pages_count : int max pages
		 * Applies filters the_seo_framework_sitemap_posts_count : int max posts
		 */
		$totalpages = (int) apply_filters( 'the_seo_framework_sitemap_pages_count', $this->max_posts );
		$totalposts = (int) apply_filters( 'the_seo_framework_sitemap_posts_count', $this->max_posts );

		// Ascend by the date for normal pages. Older pages get to the top of the list.
		$latest_pages = $totalpages ? get_posts( 'numberposts=' . $totalpages . '&post_type=page&orderby=date&order=ASC' ) : array();

		// Descend by the date for posts. The latest posts get to the top of the list after pages.
		$latest_posts = $totalpages ? get_posts( 'numberposts=' . $totalposts . '&post_type=post&orderby=date&order=DESC' ) : array();

		/**
		 * Fetch the page/post modified options
		 *
		 * We can't get specific on the home page, unfortunately.
		 */
		$page_lastmod = $this->get_option( 'sitemaps_modified' ) || $this->get_option( 'page_modify_time' ) ? true : false;
		$post_lastmod = $this->get_option( 'sitemaps_modified' ) || $this->get_option( 'post_modify_time' ) ? true : false;
		$home_lastmod = $this->get_option( 'sitemaps_modified' ) || $this->get_option( 'home_modify_time' ) ? true : false;

		$page_on_front = get_option( 'page_on_front' );

		/**
		 * Generation time output
		 *
		 * Applies filters the_seo_framework_sitemap_timestamp : bool
		 */
		$timestamp = (bool) apply_filters( 'the_seo_framework_sitemap_timestamp', true );

		if ( $timestamp )
			$content .= '<!-- Sitemap is generated on ' . current_time( "Y-m-d H:i:s" ) . ' -->' . "\r\n";

		/**
		 * This can be heavy.
		 */
		foreach ( $latest_pages as $page ) {

			$page_id = $page->ID;

			//* Fetch the noindex option, per page.
			$noindex = $this->get_custom_field( '_genesis_noindex', $page_id ) ? true : false;

			//* Don't add the posts page. @TODO Figure out if removing this would be beneficial, actually.
			$page_for_posts = $page_id == get_option( 'page_for_posts' ) && 'page' == get_option( 'show_on_front' ) ? true : false;

			//* Continue if indexed
			if ( ! $noindex && ! $page_for_posts ) {

				$content .= "	<url>\r\n";
				$content .= '		<loc>' . $this->the_url_from_cache( '', $page_id, '', false ) . "</loc>\r\n";

				// Keep it consistent. Only parse if page_lastmod is true.
				if ( $page_lastmod && ( ( $page_id != $page_on_front ) || ( $home_lastmod && $page_id == $page_on_front ) ) )
					$content .= '		<lastmod>' . mysql2date( 'Y-m-d', $page->post_modified_gmt ) . "</lastmod>\r\n";

				// Give higher priority to the home page.
				$priority_page = $page_id == $page_on_front ? 1 : 0.9;

				$content .= '		<priority>' . number_format( $priority_page, 1 ) . "</priority>\r\n";
				$content .= "	</url>\r\n";

			}

		}

		/**
		 * Setting up priorities, with pages always being important.
		 *
		 * From there, older posts get a gradually lower priority. Down to 0.
		 * Differentiate with 1 / max posts (0 to $this->max_posts). With a 1 dot decimal.
		 */
		$priority = 0.9;
		$prioritydiff = 0.9 / count( $latest_posts );

		/**
		 * This can be heavy.
		 */
		foreach ( $latest_posts as $post ) {

			$post_id = $post->ID;

			//* Fetch the noindex option, per page.
			$noindex = $this->get_custom_field( '_genesis_noindex', $post_id ) ? true : false;

			//* Continue if indexed
			if ( ! $noindex ) {

				$content .= "	<url>\r\n";
				$content .= '		<loc>' . $this->the_url_from_cache( '', $post_id, '', false ) . "</loc>\r\n";

				// Keep it consistent. Only parse if page_lastmod is true.
				if ( $post_lastmod )
					$content .= '		<lastmod>' . mysql2date( 'Y-m-d', $page->post_modified_gmt ) . "</lastmod>\r\n";

				$content .= '		<priority>' . number_format( $priority, 1 ) . "</priority>\r\n";
				$content .= "	</url>\r\n";

				// Lower the priority for the next pass.
				$priority = $priority - $prioritydiff;

				// Cast negative numbers. You shall not pass.
				$priority = $priority <= (int) 0 ? (int) 0 : (float) $priority;
			}

		}

		return $content;
	}

	/**
	 * Delete transient on post save.
	 *
	 * @since 2.2.9
	 */
	public function delete_sitemap_transient_post( $post_id ) {
		if ( (bool) $this->get_option( 'sitemaps_output' ) !== false ) {
			// Don't flush on revision.
			if ( wp_is_post_revision( $post_id ) )
				return;

			return $this->delete_sitemap_transient();
		}
	}

	/**
	 * Checks wether the permalink structure is updated.
	 *
	 * @since 2.3.0
	 *
	 * @return bool Wether if sitemap transient is deleted.
	 */
	public function delete_sitemap_transient_permalink_updated() {

		if ( isset($_POST['permalink_structure']) || isset($_POST['category_base']) ) {
			return $this->delete_sitemap_transient();
		}

		return false;
	}

	/**
	 * Delete transient for sitemap on requests.
	 * Also ping search engines.
	 *
	 * @since 2.2.9
	 *
	 * @return bool true;
	 */
	public function delete_sitemap_transient() {

		delete_transient( $this->sitemap_transient );

		$this->ping_searchengines();

		return true;
	}

	/**
	 * Ping search engines on post publish.
	 *
	 * @since 2.2.9
	 */
	public function ping_searchengines() {
		global $blog_id;

		$blog_id = (string) $blog_id;

		$transient = 'trottle_the_ping_' . $blog_id;

		if ( false === get_transient( $transient ) ) {
			//* Transient doesn't exist yet.

			if ( $this->get_option( 'ping_google' ) )
				$this->ping_google();

			if ( $this->get_option( 'ping_bing' ) )
				$this->ping_bing();

			if ( $this->get_option( 'ping_yahoo' ) )
				$this->ping_yahoo();

			// Sorry I couldn't help myself.
			$trottle = 'Bert and Ernie are weird.';

			/**
			 * Limit the pinging to a maximum of 1 per hour.
			 * Transient expiration. 1 hour.
			 *
			 * 60s * 60m
			 */
			$expiration = 60 * 60;

			set_transient( $transient, $trottle, $expiration );
		}

	}

	/**
	 * Ping Google
	 *
	 * @since 2.2.9
	 */
	public function ping_google() {
		$pingurl = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=' . urlencode( $this->the_url_from_cache( '', '0' ) . '/sitemap.xml' );

		wp_remote_get( $pingurl );
	}

	/**
	 * Ping Bing
	 *
	 * @since 2.2.9
	 */
	public function ping_bing() {
		$pingurl = 'http://www.bing.com/webmaster/ping.aspx?siteMap=' . urlencode( $this->the_url_from_cache( '', '0' ) . '/sitemap.xml' );

		wp_remote_get( $pingurl );
	}

	/**
	 * Ping Yahoo
	 *
	 * @since 2.2.9
	 */
	public function ping_yahoo() {
		$pingurl = 'http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=' . urlencode( $this->the_url_from_cache( '', '0' ) . '/sitemap.xml' );

		wp_remote_get( $pingurl );
	}

	/**
	 * Edits the robots.txt output
	 *
	 * Requires not to have a robots.txt file in the root directory
	 *
	 * @uses robots_txt filter located at WP core
	 *
	 * @since 2.2.9
	 *
	 * @todo maybe combine with noindex/noarchive/(nofollow) -> only when object caching?
	 */
	public function robots_txt( $robots_txt = '', $public = '' ) {

		/**
		 * Don't do anything if the blog isn't public
		 */
		if ( '0' == $public )
			return $robots_txt;

		$blog_id = (string) get_current_blog_id();

		$output = wp_cache_get( 'the_seo_framework_robots_' . $blog_id, 'the_seo_framework' );
		if ( false === $output ) {
			$output = '';

			$home_url = $this->the_url_from_cache( '', '0', false, false );
			$parse_url = parse_url( $home_url );
			$path = ( !empty( $site_url['path'] ) ) ? $site_url['path'] : '';

			//* Output defaults
			$output .= "Disallow: $path/wp-includes/\r\n";

			//* Prevents query caching
			$output .= "Disallow: $path/*?*\r\n";

			//* Add whitespace
			$output .= "\r\n";

			if ( $this->get_option( 'sitemaps_output') &&  (bool) $this->get_option ( 'sitemaps_robots' ) ) {
				//* Add sitemap full url
				//* Becomes relative if host is empty.
				$host = ( !empty( $parse_url['host'] ) ) ? $parse_url['host'] : '';
				$scheme = ( !empty( $parse_url['scheme'] ) && !empty( $host ) ) ? $parse_url['scheme'] . '://' : '';
				$output .= "Sitemap: $scheme$host/sitemap.xml\r\n";
			}

			wp_cache_set( 'the_seo_framework_robots_' . $blog_id , $output, 'the_seo_framework', 86400 ); // 24 hours
		}

		$robots_txt .= $output;

		return $robots_txt;
	}

	/**
	 * Add and Flush rewrite rules on plugin activation.
	 *
	 * @since 2.2.9
	 * Do not return anything. Just be here. Thanks.
	 */
	public static function flush_rewrite_rules_activation() {
		global $wp_rewrite;

		// This function is called statically.
		$the_seo_framework = the_seo_framework();

		$the_seo_framework->rewrite_rule_sitemap( true );

		$wp_rewrite->init();
		$wp_rewrite->flush_rules( true );
	}

	/**
	 * Flush rewrite rules on plugin deactivation.
	 *
	 * @since 2.2.9
	 * Do not return anything. Just be here. Thanks.
	 */
	public static function flush_rewrite_rules_deactivation() {
		global $wp_rewrite;

		$wp_rewrite->init();

		// @TODO find a better way?
		// For preventing plugin conflicts etc.
		unset( $wp_rewrite->extra_rules_top['sitemap\.xml$'] );

		$wp_rewrite->flush_rules( true );
	}

	/**
	 * Enqueue rewrite flush for activation.
	 *
	 * @since 2.3.0
	 */
	public function enqueue_rewrite_activate( $enqueue = false ) {

		static $flush = null;

		if ( isset( $flush ) )
			return (bool) $flush;

		if ( $enqueue )
			return $flush = true;

		return false;
	}

	/**
	 * Enqueue rewrite flush for deactivation.
	 *
	 * @since 2.3.0
	 */
	public function enqueue_rewrite_deactivate( $enqueue = false ) {

		static $flush = null;

		if ( isset( $flush ) )
			return (bool) $flush;

		if ( $enqueue )
			return $flush = true;

		return false;
	}

	/**
	 * Flush rewrite rules based on static variables.
	 *
	 * @since 2.3.0
	 */
	public function maybe_flush_rewrite() {

		if ( $this->enqueue_rewrite_activate() )
			$this->call_function( array( 'AutoDescription_Sitemaps', 'flush_rewrite_rules_activation' ) );

		if ( $this->enqueue_rewrite_deactivate() )
			$this->call_function( array( 'AutoDescription_Sitemaps', 'flush_rewrite_rules_deactivation' ) );

	}

	/**
	 * Add the WPMUdev Domain Mapping rules again. And flush them on init.
	 * Domain Mapping bugfix.
	 *
	 * @param bool $options_saved : If we're in admin and the sanitiation function runs.
	 *
	 * Runs a flush and updates the site option to "true".
	 * When the site option is set to true, it not flush again on init.
	 *
	 * If $options_saved is true, it will not check for the init action hook and continue,
	 * So it will flush the next time on init.
	 *
	 * @since 2.3.0
	 */
	public function wpmudev_domainmap_flush_fix( $options_saved = false ) {

		if ( $this->pretty_permalinks && $this->is_domainmapping_active() ) {
			if ( current_action() == 'init' || $options_saved ) {
				if ( class_exists( 'Domainmap_Module_Cdsso' ) && defined( 'Domainmap_Module_Cdsso::SSO_ENDPOINT' ) ) {
					add_rewrite_endpoint( Domainmap_Module_Cdsso::SSO_ENDPOINT, EP_ALL );

					//* Force extra flush on init.
					if ( class_exists( 'domain_map' ) ) {
						$key = 'the_seo_framework_wpmdev_dm' . get_current_blog_id() . '_extra_flush';

						if ( $options_saved ) {
							if ( get_site_option( $key ) ) {
								error_log( $key . ' is now marked for flush. On: ' . current_action() );

								//* Prevent flushing multiple times.
								update_site_option( $key, false );
							}
						} else {
							if ( ! get_site_option( $key ) ) {
								error_log( $key . ' is now flushed on init. On: ' . current_action() );

								//* Prevent flushing multiple times.
								update_site_option( $key, true );

								//* Now flush
								flush_rewrite_rules();
							}
						}
					}
				}
			}
		}

	}

}
