Home / Archive / Markdown URLs for LLMs
Duplicate Snippet

Embed Snippet on Your Site

Markdown URLs for LLMs

LLM-friendly WordPress content. Add .md to any post URL for AI-ready markdown format. Makes your content easily consumable by ChatGPT, Claude, and other AI tools.

<10
Code Preview
php
<?php
/**
 * After enabling this snippet, go to Settings > Permalinks and click "Save Changes"
 * Configure the $enabled_post_types array to specify which post types should support .md URLs
 */
$enabled_post_types = [ 'post', 'page' ];
// Add rewrite rule to catch .md URLs
add_action( 'init', function () {
	add_rewrite_rule(
		'(.+?)\.md$',
		'index.php?markdown_url=$matches[1]',
		'top'
	);
} );
// Add custom query variable
add_filter( 'query_vars', function ( $vars ) {
	$vars[] = 'markdown_url';
	return $vars;
} );
// Handle the markdown request
add_action( 'template_redirect', function () use ( $enabled_post_types ) {
	$markdown_url = get_query_var( 'markdown_url' );
	if ( ! $markdown_url ) {
		return;
	}
	// Try to find the post by URL
	$post = ( function ( $url ) use ( $enabled_post_types ) {
		// Remove leading slash if present
		$url = ltrim( $url, '/' );
		// Try to get post by URL path
		$post_id = url_to_postid( '/' . $url );
		if ( $post_id ) {
			return get_post( $post_id );
		}
		// If that doesn't work, try by post name/slug
		foreach ( $enabled_post_types as $post_type ) {
			// Extract just the slug (last part after last slash)
			$slug = basename( $url );
			$posts = get_posts( [
				'name'        => $slug,
				'post_type'   => $post_type,
				'post_status' => 'publish',
				'numberposts' => 1
			] );
			if ( ! empty( $posts ) ) {
				return $posts[0];
			}
		}
		return null;
	} )( $markdown_url );
	if ( ! $post || ! in_array( $post->post_type, $enabled_post_types ) ) {
		status_header( 404 );
		nocache_headers();
		echo "Post not found or markdown not enabled for this post type.";
		exit;
	}
	// Generate markdown content
	$markdown_content = '';
	// Add title
	$markdown_content .= '# ' . get_the_title( $post ) . "\n\n";
	// Add metadata
	$markdown_content .= '**Published:** ' . get_the_date( 'F j, Y', $post ) . "\n";
	$markdown_content .= '**Author:** ' . get_the_author_meta( 'display_name', $post->post_author ) . "\n";
	// Add categories for posts
	if ( $post->post_type === 'post' ) {
		$categories = get_the_category( $post->ID );
		if ( ! empty( $categories ) ) {
			$cat_names        = array_map( function ( $cat ) {
				return $cat->name;
			}, $categories );
			$markdown_content .= '**Categories:** ' . implode( ', ', $cat_names ) . "\n";
		}
		// Add tags for posts
		$tags = get_the_tags( $post->ID );
		if ( ! empty( $tags ) ) {
			$tag_names        = array_map( function ( $tag ) {
				return $tag->name;
			}, $tags );
			$markdown_content .= '**Tags:** ' . implode( ', ', $tag_names ) . "\n";
		}
	}
	$markdown_content .= "\n---\n\n";
	// Convert HTML content to markdown-friendly format
	$post_content = apply_filters( 'the_content', $post->post_content );
	// Basic HTML to Markdown conversion
	$post_content     = ( function ( $html ) {
		$html = trim( preg_replace( '/\s+/', ' ', $html ) );
		$html = preg_replace( '/<h1[^>]*>(.*?)<\/h1>/i', "\n# $1\n", $html );
		$html = preg_replace( '/<h2[^>]*>(.*?)<\/h2>/i', "\n## $1\n", $html );
		$html = preg_replace( '/<h3[^>]*>(.*?)<\/h3>/i', "\n### $1\n", $html );
		$html = preg_replace( '/<h4[^>]*>(.*?)<\/h4>/i', "\n#### $1\n", $html );
		$html = preg_replace( '/<h5[^>]*>(.*?)<\/h5>/i', "\n##### $1\n", $html );
		$html = preg_replace( '/<h6[^>]*>(.*?)<\/h6>/i', "\n###### $1\n", $html );
		$html = preg_replace( '/<p[^>]*>(.*?)<\/p>/i', "$1\n\n", $html );
		$html = str_replace( [ '<br>', '<br/>', '<br />' ], "\n", $html );
		$html = preg_replace( '/<(strong|b)[^>]*>(.*?)<\/\1>/i', "**$2**", $html );
		$html = preg_replace( '/<(em|i)[^>]*>(.*?)<\/\1>/i', "*$2*", $html );
		$html = preg_replace( '/<a[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)<\/a>/i', "[$2]($1)", $html );
		$html = preg_replace( '/<img[^>]*src=["\']([^"\']*)["\'][^>]*alt=["\']([^"\']*)["\'][^>]*\/?>/i', "![$2]($1)", $html );
		$html = preg_replace( '/<img[^>]*alt=["\']([^"\']*)["\'][^>]*src=["\']([^"\']*)["\'][^>]*\/?>/i', "![$1]($2)", $html );
		$html = preg_replace( '/<img[^>]*src=["\']([^"\']*)["\'][^>]*\/?>/i', "![]($1)", $html );
		$html = preg_replace( '/<ul[^>]*>/i', "", $html );
		$html = preg_replace( '/<\/ul>/i', "\n", $html );
		$html = preg_replace( '/<ol[^>]*>/i', "", $html );
		$html = preg_replace( '/<\/ol>/i', "\n", $html );
		$html = preg_replace( '/<li[^>]*>(.*?)<\/li>/i', "- $1\n", $html );
		$html = preg_replace_callback( '/<blockquote[^>]*>(.*?)<\/blockquote>/is', function ( $matches ) {
			$lines  = explode( "\n", trim( $matches[1] ) );
			$quoted = array_map( function ( $line ) {
				return '> ' . trim( $line );
			}, $lines );
			return "\n" . implode( "\n", $quoted ) . "\n\n";
		}, $html );
		$html = preg_replace_callback( '/<pre[^>]*><code[^>]*>(.*?)<\/code><\/pre>/is', function ( $matches ) {
			return "\n```\n" . html_entity_decode( strip_tags( $matches[1] ) ) . "\n```\n\n";
		}, $html );
		$html = preg_replace( '/<code[^>]*>(.*?)<\/code>/i', "`$1`", $html );
		$html = strip_tags( $html );
		$html = html_entity_decode( $html, ENT_QUOTES, 'UTF-8' );
		$html = preg_replace( '/\n\s*\n\s*\n/', "\n\n", $html );
		return trim( $html );
	} )( $post_content );
	$markdown_content .= $post_content;
	// Add permalink at the end
	$markdown_content .= "\n\n---\n\n";
	$markdown_content .= '**Original URL:** ' . get_permalink( $post ) . "\n";
	// Set proper headers
	nocache_headers();
	header( 'Content-Type: text/plain; charset=UTF-8' );
	header( 'Content-Disposition: inline; filename="' . sanitize_file_name( $post->post_name ) . '.md"' );
	// Output the markdown content
	echo $markdown_content;
	// Stop WordPress from processing further
	exit;
} );

Comments

Add a Comment