// From the software distribution accompanying the textbook // "A Practical Introduction to Data Structures and Algorithm Analysis, // Third Edition (C++)" by Clifford A. Shaffer. // Source code Copyright (C) 2007-2011 by Clifford A. Shaffer. // Simple external merge demonstration. // Second in a series of three programs. // This version implements a standard 2-way mergsort on an external file // like exmrg1, except that it begins by building a large initial run // using quicksort (specifically, qsort2 in this distribution, the version // that does a quicksort on lists of length <= THRESHOLD). // After the initial run has been created (with working memory size // determined by a command line parameter), a series of two-way merge // passes is used to combine the runs together. #include using std::fstream; using std::ifstream; using std::ios; #include "book.h" // Include comparator functions #include "compare.h" const int THRESHOLD=0; // Insertion sort for final cleanup sort pass template void inssort(E A[], int n) { // Insertion Sort for (int i=1; i0) && (Comp::prior(A[j], A[j-1])); j--) swap(A, j, j-1); } // Simple findpivot: Pick middle value in array template inline int findpivot(E A[], int i, int j) { return (i+j)/2; } // Partition the array template inline int partition(E A[], int l, int r, E& pivot) { do { // Move the bounds inward until they meet while (Comp::prior(A[++l], pivot)); // Move l right and while ((l < r) && Comp::prior(pivot, A[--r])); // r left swap(A, l, r); // Swap out-of-place values } while (l < r); // Stop when they cross return l; // Return first position in right partition } // qsort core function: do not qsort lists of length <= THRESHOLD template void qsort(E array[], int i, int j) { if ((j-i) <= THRESHOLD) return; // Don't sort short list int pivotindex = findpivot(array, i, j); swap(array, pivotindex, j); // stick pivot at end int k = partition(array, i-1, j, array[j]); swap(array, k, j); // Put pivot value in place qsort(array, i, k-1); qsort(array, k+1, j); } // Return 2^x int pow2(int x) { int val = 1; for (int i=0; i= RecsPerBlock) { // page fault FS[fpindex].read((char*)&Array[fpindex], sizeof(E)*RecsPerBlock); if (!FS[fpindex]) { cout << "Error reading file " << Name[fpindex] << endl; cout << readcnt << " blocks were read, and " << writecnt << " blocks were written\n"; exit(-1); } readcnt++; Posit[fpindex] = 0; } val = Array[fpindex][Posit[fpindex]++]; } // Write a record (in val) to the output file fpindex. // This supports buffered writing to the output file, one block at a time. void putout(int fpindex, E val) { Array[fpindex][Posit[fpindex]++] = val; if (Posit[fpindex] == RecsPerBlock) { // page fault FS[fpindex].write((char*)Array[fpindex], sizeof(E)*RecsPerBlock); writecnt++; Posit[fpindex] = 0; } } void sort(E* array, int n) { qsort(array, 0, n-1); inssort(array, n); // Cleanup sort } // First pass of merge sort. // Split input into two files. void pass1(int inindex, int outindex1, int outindex2, int numruns, E* BigArray, int memsize) { int memblocks = memsize/(sizeof(E) * RecsPerBlock); for (int i=0; i1; i=i/2) numpass++; // Now do the passes cout << "Now we will do " << numpass << " passes\n"; for(int i=0; i passrecs) { // exhausted first run putout(out1, val2); count2++; if (count2 <= passrecs) getnext(in2, val2); } else if (count2 > passrecs) { // exhausted second run putout(out1, val1); count1++; if (count1 <= passrecs) getnext(in1, val1); } else // Got records from each run still if (val1 < val2) { putout(out1, val1); count1++; if (count1 <= passrecs) getnext(in1, val1); } else { putout(out1, val2); count2++; if (count2 <= passrecs) getnext(in2, val2); } } swap(out1, out2); } // Done this pass, so flush output FS[in1].clear(); FS[in1].close(); FS[in2].clear(); FS[in2].close(); FS[out1].clear(); FS[out1].close(); FS[out2].clear(); FS[out2].close(); numruns = numruns/2; // Ready for next pass } rename(Name[out2], outname); } // Main routine. Get everything ready int main(int argc, char** argv) { if (argc != 4) { cout << "Usage: exmrg2 .\n"; cout << "Numblocks is the size of the file in blocks.\n"; cout << "Blocksize is " << RecsPerBlock * sizeof(E) << " bytes.\n"; cout << "Memsize is the size of working memory, measured in blocks.\n"; exit(-1); } // Check the file size. It must be a multiple of the block size, // and a power of 2. long size; ifstream file (argv[1], ios::in|ios::binary); file.seekg (0, ios::end); size = file.tellg(); file.close(); cout << "size of " << argv[1]; cout << " is " << size << " bytes.\n"; if ((size % (sizeof(E)*RecsPerBlock)) != 0) { cout << "Error. File size must be a multiple of the block size.\n"; exit(-1); } int numrecs = size/sizeof(E); int i; for (i=1; i