Converting XHTML To Text-Only Version Using ColdFusion And XSLT

<!--- Save HTML content. --->
<cfsavecontent variable="strHTML">
 
	<h1>
		Thank you for your purchase!
	</h1>
 
	<p>
		Invoice number: <strong>12345</strong><br />
		Price: <strong>$19.95</strong>
	</p>
 
	<hr />
 
	<h2>
		Purchased Products
	</h2>
 
	<table cellspacing="5" border="1">
	<tr>
		<td>
			Muscle Girls Gone Wild
		</td>
		<td>
			$10.95
		</td>
	</tr>
	<tr>
		<td>
			Female Muscle - The Definitive Guide
		</td>
		<td>
			$9.00
		</td>
	</tr>
	</table>
 
	<hr />
 
	<p>
		If you have any questions about your order please
		contact us at
		<a href="mailto:orders@amazon.com">orders@amazon.com</a>.
	</p>
 
</cfsavecontent>
 
 
<!--- Define the XSLT --->
<cfsavecontent variable="strXSLT">
 
	<?xml version="1.0" encoding="ISO-8859-1"?>
	<xsl:transform
		version="1.0"
		xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
		<!--- Store variable for new line. --->
		<xsl:variable
			name="new-line"
			select="'&#10;'"
			/>
 
		<!--- Store variable for double-new line. --->
		<xsl:variable
			name="new-lines"
			select="concat( $new-line, $new-line )"
			/>
 
 
		<!---
			Match the root node plus any nodes that are not
			matched specifically by the templates defined
			below.
		--->
		<xsl:template match="*">
			<xsl:apply-templates select="text()|*" />
		</xsl:template>
 
		<!--- For all text nodes, output trimmed value. --->
		<xsl:template match="text()">
			<xsl:value-of select="normalize-space( . )" />
		</xsl:template>
 
		<!--- Denote primary header with hrule. --->
		<xsl:template match="h1">
			<xsl:apply-templates select="text()|*" />
			<xsl:value-of select="$new-line" />
			<xsl:text>---------------------------------</xsl:text>
			<xsl:value-of select="$new-lines" />
		</xsl:template>
 
		<!--- Denote secondary headers with hash marks. --->
		<xsl:template match="h2|h3|h4|h5">
			<xsl:text>## </xsl:text>
			<xsl:apply-templates select="text()|*" />
			<xsl:value-of select="$new-lines" />
		</xsl:template>
 
		<!--- Turn block level elements into text-only. --->
		<xsl:template match="p|blockquote|li">
			<xsl:apply-templates select="text()|*" />
			<xsl:value-of select="$new-lines" />
		</xsl:template>
 
		<!--- Add new line after table. --->
		<xsl:template match="table">
			<xsl:apply-templates select="*" />
			<xsl:value-of select="$new-line" />
		</xsl:template>
 
		<!--- Turn table rows into bracketed values. --->
		<xsl:template match="tr">
			<xsl:apply-templates select="*" />
			<xsl:value-of select="$new-line" />
		</xsl:template>
 
		<!--- Bracket table values. --->
		<xsl:template match="td">
			<xsl:value-of select="'[ '" />
			<xsl:apply-templates select="text()|*" />
			<xsl:value-of select="' ]'" />
		</xsl:template>
 
		<!---
			Strip out any inline tags (and start them off with
			an initial space so that nested and sibling tags don't
			get concatenated text).
		--->
		<xsl:template match="strong|em|span|a">
			<xsl:text> </xsl:text>
			<xsl:value-of select="text()" />
		</xsl:template>
 
		<!---
			Replace hrule with manual dashes.
			NOTE: template also named for manual execution.
		--->
		<xsl:template match="hr" name="hr">
			<xsl:text>. . . . . . . . . . . . . . . . .</xsl:text>
			<xsl:value-of select="$new-lines" />
		</xsl:template>
 
		<!--- Replace break tag with new line. --->
		<xsl:template match="br">
			<xsl:value-of select="$new-line" />
		</xsl:template>
 
	</xsl:transform>
 
</cfsavecontent>
 
 
<!---
	Convert to the HTML to text only. As we are doing this,
	we need to wrap the HTML in a root node so that the XML
	document we parse is well formatted.
--->
<cfset strTextOnly = XmlTransform(
	("<data>" & strHTML & "</data>"),
	Trim( strXSLT )
	) />
 
<!--- Strip out doc type. --->
<cfset strTextOnly = Trim(
	REReplace(
		strTextOnly,
		"<[^>]*>",
		"",
		"one"
		)
	) />
 
 
<!--- Output the text-only verson. --->
<cfset WriteOutput( strTextOnly ) />

For Cut-and-Paste