// Simple external merge demonstration. // Third in a series of three programs. // This version builds initial runs of the given run size, then combines // the runs with a single multi-way merge. #include #include #include #include #include //#include //#include #define BIGKEY 600000000L // HACK!! #include "book.h" #include "compare.h" const int THRESHOLD=0; template void inssort(Elem A[], int n) { // Insertion Sort for (int i=1; i0) && (Comp::lt(A[j], A[j-1])); j--) swap(A, j, j-1); } template int findpivot(Elem A[], int i, int j) { return (i+j)/2; } template int partition(Elem A[], int l, int r, Elem& pivot) { do { // Move the bounds inward until they meet while (Comp::lt(A[++l], pivot)); // Move l right and while ((r != 0) && Comp::gt(A[--r], pivot)); // r left swap(A, l, r); // Swap out-of-place values } while (l < r); // Stop when they cross swap(A, l, r); // Reverse last, wasted swap return l; // Return first position in right partition } template void qsort(Elem array[], int i, int j) { if ((j-i) <= THRESHOLD) return; // Don't sort short list int pivotindex = findpivot(array, i, j); swap(array, pivotindex, j); // stick pivot at end int k = partition(array, i-1, j, array[j]); swap(array, k, j); // Put pivot value in place qsort(array, i, k-1); qsort(array, k+1, j); } #define RecsPerBlock 1024 // Number of records/block int BlocksPerRun; // Blocks per run. Determined by file size. #define RecsPerRun BlocksPerRun*RecsPerBlock // Number of records/run // This is a simple example of external sorting, with the records being // nothing more than an integer value. typedef int Elem; Elem OutBuff[RecsPerBlock]; int OutPos = 0; // Return type from getnext(). Signifies no more records in file #define EMPTY -2 void sort(Elem* array, int n) { qsort(array, 0, n-1); inssort(array, n); // Cleanup sort } // First pass of merge sort. // Break the file into runs, putting them into the temp file void pass1(fstream& infile, fstream& tempfile, int numruns) { // Declare the space for building the initial runs. Elem *BigArray = new Elem[RecsPerRun]; for (int i=0; i BlocksPerRun) return; // Already exhausted if (Posit[index] == (RecsPerBlock-1)) { // Page Fault if (Count[index] == BlocksPerRun) BlockSpace[index][0] = EMPTY; // Flag as exhausted else { // Read in a new block tempfile.seekg(index*RecsPerRun*sizeof(Elem) + Count[index]*RecsPerBlock*sizeof(Elem)); tempfile.read(BlockSpace[index], sizeof(Elem)*RecsPerBlock); } Count[index]++; Posit[index] = 0; } else Posit[index]++; } // Put a new value to the buffered output file void putout(fstream& outfile, Elem val) { if (OutPos == RecsPerBlock) { // page fault outfile.write(OutBuff, sizeof(Elem)*RecsPerBlock); OutPos = 0; } OutBuff[OutPos++] = val; } // Return true iff all runs are exhausted bool AllEmpty(Elem** BlockSpace, int* Posit, int numruns) { for (int i=0; i .\n"; cout << "Blocksize is " << RecsPerBlock * sizeof(Elem) << " bytes.\n"; exit(-1); } int numblocks = atoi(argv[3]); if (numblocks <= 4096) BlocksPerRun = 64; else if (numblocks <= 65536) BlocksPerRun = 256; else { cout << "File size is too big.\n"; exit(1); } cout << "Allocating " << RecsPerRun*sizeof(Elem) << " bytes of working space\n"; // Start timing from here Settime(); exmergesort(argv[1], argv[2], numblocks/BlocksPerRun); cout << "Time is " << Gettime() << "seconds\n"; return 0; }