<!--- First we have to create a file that has random values. Since we are doing a proof of concept, we don't need a bagillion lines - we just need enough that we can divide up into multple files. ---> <cfset intTotalRows = 30 /> <!--- Set a value for the MAX number of lines that we are going to use in each individual sub-sort file. ---> <cfset intFileRowSize = 8 /> <!--- Create an array to hold the values that we are going to use to create each row value. This array will be shuffled and converted to a string to create randomly ordered array values. ---> <cfset arrChars = ListToArray( "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z" ) /> <!--- Create a constant for line feeds. ---> <cfset strNL = (Chr( 13 ) & Chr( 10 )) /> <!--- Create a collection utility object that will be used to shuffle the arrays by reference. ---> <cfset objCollection = CreateObject( "java", "java.util.Collections" ) /> <!--- Create a string buffer to build the individual values before we store them in the unsorted file. ---> <cfset sbUnsorted = CreateObject( "java", "java.lang.StringBuffer" ).Init() /> <!--- Create random values for each row. ---> <cfloop index="intRow" from="1" to="#intTotalRows#" step="1"> <!--- Shuffle the character array. ---> <cfset objCollection.Shuffle( arrChars ) /> <!--- Convert the array to a string and then add it as an individual line to the output buffer. ---> <cfset sbUnsorted.Append( JavaCast( "string", (ArrayToList( arrChars, "" ) & strNL) ) ) /> </cfloop> <!--- Write the unsorted values string buffer to disk. ---> <cffile action="WRITE" file="#ExpandPath( './unsorted.txt' )#" output="#sbUnsorted.ToString()#" addnewline="false" /> <!--- ASSERT: At this point, we have a completely unsorted text file, unsorted.txt. Our goal is to created a sorted file, sorted.txt. ---> <!--- Keep track of how many files we end up using for our sub-sorting of the larger file. ---> <cfset intFileCount = 0 /> <!--- Create a buffered file line reader so that we can start reading in the unsorted file one line at a time. By doing this, we can create the individual files without totally chewing up the server's memory. ---> <cfset objLineReader = CreateObject( "java", "java.io.LineNumberReader" ).Init( <!--- Create a buffered reader to feed our line number reader. This will help optimize the file access. ---> CreateObject( "java", "java.io.BufferedReader" ).Init( <!--- Create a file reader to buffer. ---> CreateObject( "java", "java.io.FileReader" ).Init( JavaCast( "string", ExpandPath( "./unsorted.txt" ) ) ) ) ) /> <!--- Create an array to keep track of the lines for our sub-sort file. ---> <cfset arrLines = ArrayNew( 1 ) /> <!--- Get the first value from the line reader. ---> <cfset strLine = objLineReader.ReadLine() /> <!--- Keep looping over the line reader until it fails to return a value. We will know if it returns a NULL value if our variable no longer exists in the page scope. ---> <cfloop condition="StructKeyExists( VARIABLES, 'strLine' )"> <!--- Add the current line of data to our array. ---> <cfset ArrayAppend( arrLines, strLine ) /> <!--- Check to see if we have reached the max row size we wanted to use for the sub files. ---> <cfif (ArrayLen( arrLines ) EQ intFileRowSize)> <!--- Sort the array. Each individual file must contain sorted values. ---> <cfset ArraySort( arrLines, "text", "ASC" ) /> <!--- Increment the file counter. ---> <cfset intFileCount = (intFileCount + 1) /> <!--- Write these lines to the sub-file. ---> <cffile action="WRITE" file="#ExpandPath( './sorted_#intFileCount#.txt')#" output="#ArrayToList( arrLines, strNL )#" addnewline="false" /> <!--- Create a new array so that we can keep reading in lines for the next sub-sorted file. ---> <cfset arrLines = ArrayNew( 1 ) /> </cfif> <!--- Read in the next line of data. ---> <cfset strLine = objLineReader.ReadLine() /> </cfloop> <!--- At this point, we may have written some sub-sort files to disk. However, we might still have data left in our lines array. Check to see if we need to write one final file. ---> <cfif ArrayLen( arrLines )> <!--- Sort the array. Each individual file must contain sorted values. ---> <cfset ArraySort( arrLines, "text", "ASC" ) /> <!--- Increment the file counter. ---> <cfset intFileCount = (intFileCount + 1) /> <!--- Write these lines to the sub-file. ---> <cffile action="WRITE" file="#ExpandPath( './sorted_#intFileCount#.txt')#" output="#ArrayToList( arrLines, strNL )#" addnewline="false" /> </cfif> <!--- Close the file reader. ---> <cfset objLineReader.Close() /> <!--- ASSERT: At this point, we have split our unsorted file up into many smaller, sorted files. Now, here's where it gets really exciting. We have to combine each of those sorted files into a single sorted file. ---> <!--- Now, we are gonna get out the latex and do some really kinky stuff with the language. We are going to create a query object that has two columns: one for the line file reader for each file and one for the smallest row of data from that file. ---> <cfset qReader = QueryNew( "reader, value" ) /> <!--- Loop over the file count, add a row to the query and populate the value with the first record. ---> <cfloop index="intFileIndex" from="1" to="#intFileCount#" step="1"> <!--- Add a row to the query. ---> <cfset QueryAddRow( qReader ) /> <!--- Create a file reader for this record and store that file reader into the query. Notice that we are not casting it to any java type. This is sooooooo not the proper way to use a query :D ---> <cfset qReader[ "reader" ][ intFileIndex ] = CreateObject( "java", "java.io.LineNumberReader" ).Init( CreateObject( "java", "java.io.BufferedReader" ).Init( CreateObject( "java", "java.io.FileReader" ).Init( JavaCast( "string", ExpandPath( "./sorted_#intFileIndex#.txt" ) ) ) ) ) /> <!--- Read the first row from that file. Since a file only gets written if it has data, we don't have to worry about checking row validity at this point. Be sure to cast this as a string as we will need to be able to sort on it properly. ---> <cfset qReader[ "value" ][ intFileIndex ] = JavaCast( "string", qReader[ "reader" ][ intFileIndex ].ReadLine() ) /> </cfloop> <!--- Create a buffered writer to create the final sorted file. This will allow us to optimize file writes. ---> <cfset objOutput = CreateObject( "java", "java.io.BufferedWriter" ).Init( <!--- Create a file writer to feed our buffered writer. By doing this, we can let the writer write the data to the file system as it feels it should. ---> CreateObject( "java", "java.io.FileWriter" ).Init( <!--- Create a file name to store the sorted data. ---> JavaCast( "string", ExpandPath( "./sorted.txt" ) ) ) ) /> <!--- Keep looping until we have break out of the loop. ---> <cfloop condition="true"> <!--- The first thing we want to do is sort the query by value so that the line reader with the smallest value is currently the first record. Use a query of queries to store it right back into itself. ---> <cfquery name="qReader" dbtype="query"> SELECT * FROM qReader ORDER BY [value] ASC </cfquery> <!--- Write the smalled value to the file. ---> <cfset objOutput.Write( JavaCast( "string", qReader.value ) ) /> <!--- Add a new line. ---> <cfset objOutput.NewLine() /> <!--- Get a reference to the reader. We cannot refer to the reader directly in the query column as there is some sort of strange cast issue that happens. By getting a reference to it first, it will properly cast to the LineNumberReader object. ---> <cfset objReader = qReader.reader /> <!--- Get the next value from the first line reader. We cannot store that directly into the query as we want to check to see if a NULL value is returned. ---> <cfset strValue = objReader.ReadLine() /> <!--- Check to see if we have a valid value returned. If a NULL value was returned, the strValue will no longer exists in the page scope. ---> <cfif StructKeyExists( VARIABLES, "strValue" )> <!--- We have a good value. Store that into the value column of the current line reader. ---> <cfset qReader[ "value" ][ 1 ] = JavaCast( "string", strValue ) /> <cfelse> <!--- A valid value was NOT returned from the line reader. That means that this line reader is of no more use to us. Close the file input stream. ---> <cfset objReader.Close() /> <!--- Delete this row from the query. ---> <cfset qReader.RemoveRows( JavaCast( "int", 0 ), JavaCast( "int", 1 ) ) /> </cfif> <!--- Check to see if we have any records left in out query. If we do not then we are read in all of the sub-sorted files and have written them out our buffered output. ---> <cfif NOT qReader.RecordCount> <!--- Close the current output. ---> <cfset objOutput.Close() /> <!--- Break out of the loop. ---> <cfbreak /> </cfif> </cfloop>