<cfcomponent
output="false"
hint="I help to parse large XML files by matching patterns and then only parsing sub-nodes of the document.">
<cffunction
name="Init"
access="public"
returntype="any"
output="false"
hint="I return an intialized object.">
<cfargument
name="Nodes"
type="string"
required="true"
hint="I am the list of node names that will be parsed using regular expressions."
/>
<cfargument
name="XmlFilePath"
type="string"
required="true"
hint="I am the file path for the large XML file to be parsed."
/>
<cfargument
name="BufferSize"
type="numeric"
required="false"
default="#(1024 * 1024 * 5)#"
hint="I am the size of the buffer which will be used to make reads to the input stream."
/>
<cfset var LOCAL = {} />
<cfset LOCAL.Nodes = ListChangeDelims(
ARGUMENTS.Nodes,
"|",
", "
) />
<cfset LOCAL.Pattern = (
"(?i)" &
"<(#LOCAL.Nodes#)\b[^>]*(?<=/)>|" &
"<(#LOCAL.Nodes#)\b[^>]*>[\w\W]*?</\2>"
) />
<cfset VARIABLES.Instance = {
Pattern = CreateObject(
"java",
"java.util.regex.Pattern"
).Compile(
JavaCast( "string", LOCAL.Pattern )
),
DataBuffer = "",
TransferBuffer = RepeatString( " ", ARGUMENTS.BufferSize ).GetBytes(),
InputStream = ""
} />
<cfset VARIABLES.Instance.InputStream = CreateObject(
"java",
"java.io.BufferedInputStream"
).Init(
CreateObject(
"java",
"java.io.FileInputStream"
).Init(
JavaCast(
"string",
ARGUMENTS.XmlFilePath
)
)
)
/>
<cfreturn THIS />
</cffunction>
<cffunction
name="Close"
access="public"
returntype="void"
output="false"
hint="This closes the input file stream. It is recommended that you call this if you finish before all nodes have been matched.">
<cfset VARIABLES.Instance.InputStream.Close() />
<cfreturn />
</cffunction>
<cffunction
name="GetNextNode"
access="public"
returntype="any"
output="false"
hint="I return the next node in the XML document. If no node can be found, I return VOID.">
<cfset var LOCAL = {} />
<cfset LOCAL.Matcher = VARIABLES.Instance.Pattern.Matcher(
JavaCast( "string", VARIABLES.Instance.DataBuffer )
) />
<cfif LOCAL.Matcher.Find()>
<cfset LOCAL.XMLData = LOCAL.Matcher.Group() />
<cfset LOCAL.CharsToLeave = (
Len( VARIABLES.Instance.DataBuffer ) -
(LOCAL.Matcher.Start() + Len( LOCAL.XMLData ))
) />
<cfif LOCAL.CharsToLeave>
<cfset VARIABLES.Instance.DataBuffer = Right(
VARIABLES.Instance.DataBuffer,
LOCAL.CharsToLeave
) />
<cfelse>
<cfset VARIABLES.Instance.DataBuffer = "" />
</cfif>
<cfreturn
XmlParse( Trim( LOCAL.XMLData ) )
.XmlRoot
/>
<cfelse>
<cfset LOCAL.BytesRead = VARIABLES.Instance.InputStream.Read(
VARIABLES.Instance.TransferBuffer,
JavaCast( "int", 0 ),
JavaCast( "int", ArrayLen( VARIABLES.Instance.TransferBuffer ) )
) />
<cfif (LOCAL.BytesRead EQ -1)>
<cfset THIS.Close() />
<cfreturn />
<cfelse>
<cfset VARIABLES.Instance.DataBuffer &= Left(
ToString( VARIABLES.Instance.TransferBuffer ),
LOCAL.BytesRead
) />
</cfif>
<cfreturn GetNextNode() />
</cfif>
</cffunction>
</cfcomponent>