I am trying to learn a linear time suffix sorting algorithm who uses suffix array that save a bunch of memory. But I have difficulties understand the its radix sorting, Can you explain to me how it works ?

And I have got a c++ implementation, but I don't even know how to call the interface =,=,

Here is the code,

I have studied it for days, but doesn't make sense with it, I don't want to give up, can you please tell me what is what ?Code:// This programme is proposed to implement a o(n) time complexity DC3 algorithm #include <stdio.h> #include <stdlib.h> void suffixArray ( int* s, int* SA, int n, int K ); int main ( int argv, char *argc[] ) { // I know the author tried to write comprehensive code // but I still don't get it, even to call it =,= return 0; } inline bool leq ( int a1, int a2, int b1, int b2 ) // lexic. order for pairs { return (a1 < b1 || a1 == b1 && a2 <= b2); } // and triples inline bool leq ( int a1, int a2, int a3, int b1, int b2, int b3 ) { return ( a1 < b1 || a1 == b1 && leq ( a2, a3, b2, b3 ) ); } // stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r static void radixPass ( int* a, int* b, int* r, int n, int K ) { // count occurrences int* c = new int[K + 1]; // counter array for (int i = 0; i <= K; i++) c[i] = 0; // reset counters for (int i = 0; i < n; i++) c[ r[ a[i] ] ]++; // count occurrences // exclusive prefix sums for ( int i = 0, sum = 0; i <= K; i++ ) { int t = c[i]; c[i] = sum; sum += t; } for (int i = 0; i < n; i++) b[ c[ r[ a[i] ] ]++ ] = a[i]; // sort delete [] c; } // find the suffix array SA of s[0..n-1] in {1..K}^n // require s[n] = s[n + 1] = s[n + 2] = 0, n >= 2 void suffixArray ( int* s, int* SA, int n, int K ) { // Here I think n might be the length of orginal suffix int n0 = (n + 2)/3, n1 = (n + 1)/3, n2 = n/3, n02 = n0 + n2; // (2n + 2)/3 can be seen as the number of triples whose // pos mod 3 != 0 // I think s12 might be the array that holds the suffixes whose // pos mod 3 != 0 int* s12 = new int[n02 + 3]; s12[n02] = s12[n02 + 1] = s12[n02 + 2] = 0; // Last 3 would end up with 0 int* SA12 = new int[n02 + 3]; // suffix array for pos mod 3 != 0 SA12[n02] = SA12[n02 + 1] = SA12[n02 + 2] = 0; int* s0 = new int[n0]; // whose pos mod 3 = 0 int* SA0 = new int[n0]; // suffix array for pos mod 3 = 0 // generate positions of mod 1 and mod 2 suffixes // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1 for ( int i = 0, j = 0; i < n + (n0 - n1); i++ ) if ( i % 3 != 0 ) s12[j++] = i; // lsb radix sort the mod 1 and mod 2 triples radixPass ( s12 , SA12, s + 2, n02, K ); radixPass ( SA12, s12 , s + 1, n02, K ); radixPass ( s12 , SA12, s + 0, n02, K ); // find lexicographic names of triples int name = 0, c0 = -1, c1 = -1, c2 = -1; for ( int i = 0; i < n02; i++ ) { if ( s[ SA12[i] + 0 ] != c0 || s[ SA12[i] + 1 ] != c1 || s[ SA12[i] + 2 ] != c2 ) { name ++; c0 = s[ SA12[i] + 0 ]; c1 = s[ SA12[i] + 1 ]; c2 = s[ SA12[i] + 2 ] ; } if ( SA12[i] % 3 == 1 ) { s12[ SA12[i]/3 ] = name; // left half } else { s12[ SA12[i]/3 + n0 ] = name; // right half } } // recurse if names are not yet unique if ( name < n02 ) { suffixArray ( s12, SA12, n02, name ); // store unique names in s12 using the suffix array for ( int i = 0; i < n02; i++ ) s12[SA12[i]] = i + 1; } else { // generate the suffix array of s12 directly for ( int i = 0; i < n02; i++ ) SA12[s12[i] - 1] = i; } // stably sort the mod 0 suffixes from SA12 by their first character for ( int i=0, j=0; i < n02; i++ ) if ( SA12[i] < n0) s0[j++] = 3*SA12[i]; radixPass ( s0, SA0, s, n0, K ); // merge sorted SA0 suffixes and sorted SA12 suffixes for ( int p = 0, t = n0 - n1, k = 0; k < n; k++ ) { int i = GetI(); // pos of current offset 12 suffix int j = SA0[p]; // pos of current offset 0 suffix if ( SA12[t] < n0 ? leq ( s[i], s12[SA12[t] + n0], s[j], s12[j/3] ) : leq ( s[i], s[i + 1], s12[SA12[t] - n0 + 1], s[j], s[j + 1], s12[j/3 + n0] ) ) { #define GetI() (SA12[t] < n0 ? SA12[t]*3 + 1 : (SA12[t] - n0)*3 + 2) // suffix from SA12 is smaller SA[k] = i; t ++; if ( t == n02 ) // done --- only SA0 suffixes left { for ( k++; p < n0; p++, k++ ) SA[k] = SA0[p]; } } else { SA[k] = j; p ++; if (p == n0) // done --- only SA12 suffixes left { for ( k ++; t < n02; t++, k++ ) SA[k] = GetI(); } } } delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; }

Thank you !

## Bookmarks