| 1 | /* |
|---|
| 2 | Longest common subsequence |
|---|
| 3 | Time complexity : O(N * M) |
|---|
| 4 | Memory complexity : O(N + M) |
|---|
| 5 | Check for details: http://www.ics.uci.edu/~eppstein/161/960229.html |
|---|
| 6 | To compile use: g++ -O2 -o lcs -static |
|---|
| 7 | |
|---|
| 8 | The input is given in UTF-32 format because UTF-8 or UTF-16 has variable |
|---|
| 9 | length and you can't random acces variables. Also UTF-32 is easily converted |
|---|
| 10 | to int. |
|---|
| 11 | The output also is in UTF-32 format because of the same reasons |
|---|
| 12 | */ |
|---|
| 13 | |
|---|
| 14 | #include <cstring> |
|---|
| 15 | #include <iostream> |
|---|
| 16 | #include <string> |
|---|
| 17 | #include <cstdio> |
|---|
| 18 | #include <fstream> |
|---|
| 19 | #include <vector> |
|---|
| 20 | using namespace std; |
|---|
| 21 | |
|---|
| 22 | const int UNICODE_MARKER = 0x00FEFF; |
|---|
| 23 | |
|---|
| 24 | int N, M; |
|---|
| 25 | |
|---|
| 26 | vector <int> A, B, LCS; |
|---|
| 27 | |
|---|
| 28 | // keeps the last 2 lines for the length of the lcs |
|---|
| 29 | int *len_prev; |
|---|
| 30 | int *len_cur; |
|---|
| 31 | // keeps the predecessors for the lcs |
|---|
| 32 | pair<int, int> *pred_prev; |
|---|
| 33 | pair<int, int> *pred_cur; |
|---|
| 34 | |
|---|
| 35 | void lcs(int leftA, int rightA, int leftB, int rightB) |
|---|
| 36 | { |
|---|
| 37 | if (leftA > rightA) return; |
|---|
| 38 | if (leftB > rightB) return; |
|---|
| 39 | |
|---|
| 40 | int i, j, middle = (leftA + rightA) >> 1; // get the middle |
|---|
| 41 | |
|---|
| 42 | // initialize |
|---|
| 43 | for (i = leftB - 1; i <= rightB; i++) { |
|---|
| 44 | len_prev[i] = 0; |
|---|
| 45 | len_cur[i] = 0; |
|---|
| 46 | pred_prev[i] = make_pair(leftA - 1, i); |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | for (i = leftA; i <= rightA; i++) { |
|---|
| 50 | // another initialization |
|---|
| 51 | pred_cur[leftB - 1] = make_pair(i, leftB - 1); |
|---|
| 52 | |
|---|
| 53 | // DP |
|---|
| 54 | for (j = leftB; j <= rightB; j++) { |
|---|
| 55 | if (len_prev[j] > len_cur[j - 1]) { |
|---|
| 56 | len_cur[j] = len_prev[j]; |
|---|
| 57 | pred_cur[j] = i - 1 <= middle ? make_pair(i - 1, j) : |
|---|
| 58 | pred_prev[j]; // update predecessor (above middle) |
|---|
| 59 | } else { |
|---|
| 60 | len_cur[j] = len_cur[j - 1]; |
|---|
| 61 | pred_cur[j] = i <= middle ? make_pair(i, j - 1) : |
|---|
| 62 | pred_cur[j - 1]; // update predecessor (above middle) |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | if (A[i - 1] == B[j - 1]) { |
|---|
| 66 | len_cur[j] = len_prev[j - 1] + 1; |
|---|
| 67 | pred_cur[j] = i - 1 <= middle ? make_pair(i - 1, j - 1) : |
|---|
| 68 | pred_prev[j - 1]; // update predecessor (above middle) |
|---|
| 69 | } |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | // copy current line in previous line |
|---|
| 73 | for (j = leftB - 1; j <= rightB; j++) { |
|---|
| 74 | len_prev[j] = len_cur[j]; |
|---|
| 75 | pred_prev[j] = pred_cur[j]; |
|---|
| 76 | } |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | // if no lcs get out |
|---|
| 80 | if (len_cur[rightB] == 0) { |
|---|
| 81 | return; |
|---|
| 82 | } |
|---|
| 83 | |
|---|
| 84 | if (leftA == rightA) { // if only one line, insert solution |
|---|
| 85 | LCS.push_back(A[leftA - 1]); |
|---|
| 86 | } else if (leftB == rightB) { // if only one column, insert solution |
|---|
| 87 | LCS.push_back(B[leftB - 1]); |
|---|
| 88 | } else { // solve for the two different parts (divide et impera) |
|---|
| 89 | pair<int, int> p = pred_cur[rightB]; |
|---|
| 90 | |
|---|
| 91 | lcs(leftA, p.first, leftB, p.second); |
|---|
| 92 | lcs(p.first + 1, rightA, p.second + 1, rightB); |
|---|
| 93 | } |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | int main() |
|---|
| 97 | { |
|---|
| 98 | // read input strings |
|---|
| 99 | |
|---|
| 100 | int chr; |
|---|
| 101 | |
|---|
| 102 | // read the unicode marker |
|---|
| 103 | if (!fread(&chr, 4, 1, stdin)) return 0; |
|---|
| 104 | |
|---|
| 105 | // check for UTF-32 |
|---|
| 106 | if (chr != UNICODE_MARKER) { |
|---|
| 107 | return -1; |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | // read in UTF-32 (binary, 4 by 4 bytes) |
|---|
| 111 | do { |
|---|
| 112 | if (!fread(&chr, 4, 1, stdin)) return -1; |
|---|
| 113 | A.push_back(chr); |
|---|
| 114 | } while (chr != '\n'); |
|---|
| 115 | |
|---|
| 116 | do { |
|---|
| 117 | if (!fread(&chr, 4, 1, stdin)) return -1; |
|---|
| 118 | B.push_back(chr); |
|---|
| 119 | } while (chr != '\n'); |
|---|
| 120 | |
|---|
| 121 | N = A.size(), M = B.size(); |
|---|
| 122 | |
|---|
| 123 | // allocate memory |
|---|
| 124 | len_prev = new int[M + 3]; |
|---|
| 125 | len_cur = new int[M + 3]; |
|---|
| 126 | pred_prev = new pair<int, int>[M + 3]; |
|---|
| 127 | pred_cur = new pair<int, int>[M + 3]; |
|---|
| 128 | |
|---|
| 129 | // run lcs |
|---|
| 130 | lcs(1, N, 1, M); |
|---|
| 131 | |
|---|
| 132 | // output lcs |
|---|
| 133 | if (!fwrite(&UNICODE_MARKER, 4, 1, stdout)) return -1; |
|---|
| 134 | for (int i = 0; i < (int) LCS.size(); i++) { |
|---|
| 135 | if (!fwrite(&LCS[i], 4, 1, stdout)) return -1; |
|---|
| 136 | } |
|---|
| 137 | |
|---|
| 138 | return 0; |
|---|
| 139 | } |
|---|
| 140 | |
|---|