String Tokenizer ColdFusion Component That Can Handle Qualified Fields

<cfcomponent
	displayname="StringTokenzier"
	output="false"
	hint="Iterates over the tokens of a given string based on string delimiters and token qualifiers.">
 
	<!--- Run the pseudo constructor. --->
	<cfscript>
 
		// Set up an instance structure to hold instance data.
		VARIABLES.Instance = StructNew();
 
		// This will hold the original string passed in by the user.
		VARIABLES.Instance.OriginalString = "";
 
		// Set the default delimiter and qualifiers.
		VARIABLES.Instance.Delimiter = ",";
		VARIABLES.Instance.Qualifier = "";
 
		// This will hold the index of the previously returned token.
		VARIABLES.Instance.TokenIndex = 0;
 
		// This will hold the data for the raw tokens. These are related to
		// the tokens returned, but not exactly the same thing.
		VARIABLES.Instance.RawTokens = "";
 
		// This will keep track of where we are in the raw tokens.
		VARIABLES.Instance.RawTokenIndex = 0;
 
	</cfscript>
 
 
	<cffunction
		name="Init"
		access="public"
		returntype="any"
		output="false"
		hint="Returns an initialized String Tokenizer instance.">
 
		<!--- Define arguments. --->
		<cfargument
			name="String"
			type="string"
			required="true"
			hint="This is the string that will be broken up into tokens."
			/>
 
		<cfargument
			name="Delimiter"
			type="string"
			required="false"
			default=","
			hint="This is the delimiter that will separate the tokens."
			/>
 
		<cfargument
			name="Qualifier"
			type="string"
			required="false"
			default=""""
			hint="This is the qualifier that will wrap around fields that have special characters embeded."
			/>
 
		<!---
			When storing the delimiter, we only want to accept the first character
			returned. This is different than standard ColdFusion, but I am trying
			to make this as easy as possible.
		--->
		<cfset VARIABLES.Instance.Delimiter = Left( ARGUMENTS.Delimiter, 1 ) />
 
		<!---
			When storing the qualifier, we only want to accept the first character
			returned. Is is possible that there is no qualifier being used. In that
			case, we can just store the empty string.
		--->
		<cfif Len( ARGUMENTS.Qualifier )>
 
			<cfset VARIABLES.Instance.Qualifier = Left( ARGUMENTS.Qualifier, 1 ) />
 
		<cfelse>
 
			<cfset VARIABLES.Instance.Qualifier = "" />
 
		</cfif>
 
		<!--- Store the original string. --->
		<cfset VARIABLES.Instance.OriginalString = ARGUMENTS.String />
 
		<!---
			Break the original string up into raw tokens. Going forward, some of
			these tokens may be merged, but doing it this way will help us
			iterate over them. When splitting the string, add a space to each
			token first to ensure that the split works properly.
 
			BE CAREFUL! Splitting a string into an array using the Split
			notation does not create a COLDFUSION ARRAY. You cannot alter this
			array once it has been created. It can merely be referenced.
		--->
		<cfset VARIABLES.Instance.RawTokens = ToString(
			" " &
			ARGUMENTS.String
			).ReplaceAll(
				"([\#VARIABLES.Instance.Delimiter#]{1})",
				"$1 "
				).Split( "[\#VARIABLES.Instance.Delimiter#]{1}" )
			/>
 
 
		<!--- Set the default indexes. --->
		<cfset VARIABLES.Instance.TokenIndex = 0 />
		<cfset VARIABLES.Instance.RawTokenIndex = 0 />
 
 
		<!--- Return This reference. --->
		<cfreturn THIS />
 
	</cffunction>
 
 
	<cffunction
		name="CountTokens"
		access="public"
		returntype="numeric"
		output="false"
		hint="Returns the number over which the tokenizer has iterated.">
 
		<!---
			Return the number of tokens that we have returned. This should be
			equal to the token index (seeing as this value it incremented for
			each call to NextElement()).
		--->
		<cfreturn VARIABLES.Instance.TokenIndex />
	</cffunction>
 
 
	<cffunction
		name="HasMoreElements"
		access="public"
		returntype="boolean"
		output="false"
		hint="Checks to see if there are more elemnts to be returned.">
 
		<!---
			We know that we have more elements if the current raw token index
			is still less than the number of raw tokens we have.
		--->
		<cfreturn (VARIABLES.Instance.RawTokenIndex LT ArrayLen( VARIABLES.Instance.RawTokens )) />
	</cffunction>
 
 
	<cffunction
		name="HasMoreTokens"
		access="public"
		returntype="boolean"
		output="false"
		hint="Checks to see if there are more elemnts to be returned (this just wraps around HasMoreElements()).">
 
		<cfreturn THIS.HasMoreElements() />
	</cffunction>
 
 
	<cffunction
		name="NextElement"
		access="public"
		returntype="string"
		output="false"
		hint="Returns the next element.">
 
		<!--- Define the local scope. --->
		<cfset var LOCAL = StructNew() />
 
		<!--- Set the default value for the returned token. --->
		<cfset LOCAL.Value = "" />
 
		<!---
			Set the default flag for wether or not we are in the middle
			of building a value across raw tokens.
		--->
		<cfset LOCAL.IsInValue = false />
 
 
		<!---
			Check to see if we have a field qualifier. If we do, then we might
			have to build the value across multiple fields. If we do not, then
			the raw tokens should line up perfectly with the real tokens.
		--->
		<cfif Len( VARIABLES.Instance.Qualifier )>
 
 
			<!---
				Since we are using a field qualifier, we might have to build a value
				across several raw tokens. Remember, for this, all fields containing
				embedded qualifiers and/or delimiters MUST be in qualified field values.
			--->
 
			<!--- Increment raw token index. --->
			<cfset VARIABLES.Instance.RawTokenIndex = (VARIABLES.Instance.RawTokenIndex + 1) />
 
			<!--- Set the value to the current raw token. --->
			<cfset LOCAL.Value = VARIABLES.Instance.RawTokens[ VARIABLES.Instance.RawTokenIndex ] />
 
			<!--- Remove the leading white space from the raw token. --->
			<cfset LOCAL.Value = LOCAL.Value.ReplaceFirst( "^.{1}", "" ) />
 
 
			<!--- Now, we have to check to see what kind of token we are dealing with. --->
			<cfif (LOCAL.Value EQ (VARIABLES.Instance.Qualifier & VARIABLES.Instance.Qualifier))>
 
				<!---
					This field is just a fully qualified empty field. Set the
					current value to be empty.
				--->
				<cfset LOCAL.Value = "" />
 
 
			<!---
				Check to see if we are dealing with a qualified field. If we are,
				then we MIGHT have to build the value across tokens.
			--->
			<cfelseif (Left( LOCAL.Value, 1 ) EQ VARIABLES.Instance.Qualifier)>
 
				<!--- Strip out the first qualifier. --->
				<cfset LOCAL.Value = LOCAL.Value.ReplaceFirst( "^.{1}", "" ) />
 
				<!---
					Replace any escaped qualifiers (double-instance) with text
					that cannot be confused.
				--->
				<cfset LOCAL.Value = LOCAL.Value.ReplaceAll(
					"\#VARIABLES.Instance.Qualifier#{2}",
					"[[QUALIFIER]]"
					) />
 
				<!---
					Now, check to see if this value ends with a quote. If it does,
					then we know that we are dealing with a single qualified field.
					If it does NOT, then that is when we have to build across tokens.
				--->
				<cfif (Right( LOCAL.Value, 1 ) EQ VARIABLES.Instance.Qualifier)>
 
					<!---
						We are dealing with a single field here. Just remove the
						last character of the value.
					--->
					<cfset LOCAL.Value = LOCAL.Value.ReplaceFirst( ".{1}$", "" ) />
 
				<cfelse>
 
					<!---
						We have just started a value that is incomplete. Now, we
						must loop over the tokens to find the rest of the value.
					--->
					<cfloop
						index="VARIABLES.Instance.RawTokenIndex"
						from="#(VARIABLES.Instance.RawTokenIndex + 1)#"
						to="#ArrayLen( VARIABLES.Instance.RawTokens )#"
						step="1">
 
						<!--- Grab the next token value. --->
						<cfset LOCAL.TempValue = VARIABLES.Instance.RawTokens[ VARIABLES.Instance.RawTokenIndex ] />
 
						<!--- Remove the leading white space from the raw token. --->
						<cfset LOCAL.TempValue = LOCAL.TempValue.ReplaceFirst( "^.{1}", "" ) />
 
						<!---
							Replace any escaped qualifiers (double-instance) with text
							that cannot be confused.
						--->
						<cfset LOCAL.TempValue = LOCAL.TempValue.ReplaceAll(
							"\#VARIABLES.Instance.Qualifier#{2}",
							"[[QUALIFIER]]"
							) />
 
						<!---
							Check to see if this token ends with a qualifier. If it does,
							then we have reached the end of the true value.
						--->
						<cfif (Right( LOCAL.TempValue, 1 ) EQ VARIABLES.Instance.Qualifier)>
 
							<!---
								Add this temp value to the value we are building. Remember
								to add the delimiter to the last value and to remove the
								trailing qualifier.
							--->
							<cfset LOCAL.Value = (
								LOCAL.Value &
								VARIABLES.Instance.Delimiter &
								LOCAL.TempValue.ReplaceFirst( ".{1}$", "" )
								) />
 
							<!---
								Since we have reached the end of the value we are building,
								break out of this FOR loop.
							--->
							<cfbreak />
 
						<cfelse>
 
							<!---
								Since we have NOT finished building this value, just add the
								temp value to the value we are building.
							--->
							<cfset LOCAL.Value = (
								LOCAL.Value &
								VARIABLES.Instance.Delimiter &
								LOCAL.TempValue
								) />
 
						</cfif>
 
					</cfloop>
 
				</cfif>
 
 
				<!--- Replace any escape qualifiers with actual qualifiers. --->
				<cfset LOCAL.Value = LOCAL.Value.ReplaceAll(
					"\[\[QUALIFIER\]\]",
					VARIABLES.Instance.Qualifier
					) />
 
			</cfif>
 
 
			<!---
				ASSERT: At this point, whether we built the value across raw tokens
				or just grabbed a single token, we now have a complete value to return.
			--->
 
 
			<!--- Increment the token index. --->
			<cfset VARIABLES.Instance.TokenIndex = (VARIABLES.Instance.TokenIndex + 1) />
 
 
		<cfelse>
 
 
			<!---
				Since we don't have a qualifier, just return the next raw token
				as we don't have to worry about building values.
			--->
 
			<!--- Increment raw token index. --->
			<cfset VARIABLES.Instance.RawTokenIndex = (VARIABLES.Instance.RawTokenIndex + 1) />
 
			<!---
				Set the token index equal to the raw token index as they should
				both be the same value when a delimiter is not used.
			--->
			<cfset VARIABLES.Instance.TokenIndex = VARIABLES.Instance.RawTokenIndex />
 
			<!--- Set the value to the current raw token. --->
			<cfset LOCAL.Value = VARIABLES.Instance.RawTokens[ VARIABLES.Instance.RawTokenIndex ] />
 
			<!--- Remove the leading white space from the raw token. --->
			<cfset LOCAL.Value = LOCAL.Value.ReplaceFirst( "^.{1}", "" ) />
 
 
		</cfif>
 
 
		<!--- Return the value. --->
		<cfreturn LOCAL.Value />
 
	</cffunction>
 
 
	<cffunction
		name="NextToken"
		access="public"
		returntype="string"
		output="false"
		hint="Returns the next element (this just wraps around NextElement()).">
 
		<cfreturn THIS.NextElement() />
	</cffunction>
 
</cfcomponent>

For Cut-and-Paste