| 1 | <?php |
|---|
| 2 | require_once(IA_ROOT_DIR."common/log.php"); |
|---|
| 3 | |
|---|
| 4 | // puts the contents of a string into a temporary files |
|---|
| 5 | // returns the temporary file name |
|---|
| 6 | function file_put_string($string) { |
|---|
| 7 | $name = tempnam(IA_ROOT_DIR.'attach/', "ia"); |
|---|
| 8 | $fp = fopen($name, "w"); |
|---|
| 9 | if (!$fp) { |
|---|
| 10 | return ""; |
|---|
| 11 | } |
|---|
| 12 | $string .= "\n"; |
|---|
| 13 | fputs($fp, $string); |
|---|
| 14 | fclose($fp); |
|---|
| 15 | return $name; |
|---|
| 16 | } |
|---|
| 17 | |
|---|
| 18 | // this function returns an array describing the differences between two strings |
|---|
| 19 | // output array format is: |
|---|
| 20 | // array( |
|---|
| 21 | // ... |
|---|
| 22 | // block_index => array( |
|---|
| 23 | // ... |
|---|
| 24 | // op_index => array( |
|---|
| 25 | // type => added | deleted | normal |
|---|
| 26 | // lines => array of strings |
|---|
| 27 | // ) |
|---|
| 28 | // ... |
|---|
| 29 | // ... |
|---|
| 30 | // ); |
|---|
| 31 | function diff_string($string) { |
|---|
| 32 | log_assert(count($string) == 2); |
|---|
| 33 | // put string contents into files |
|---|
| 34 | $name = array(); |
|---|
| 35 | for ($i = 0; $i < 2; ++$i) { |
|---|
| 36 | $name[$i] = file_put_string($string[$i]); |
|---|
| 37 | if (!$name[$i]) { |
|---|
| 38 | return null; |
|---|
| 39 | } |
|---|
| 40 | } |
|---|
| 41 | |
|---|
| 42 | // execute diff |
|---|
| 43 | exec("diff -au ".$name[0]." ".$name[1], $lines); |
|---|
| 44 | |
|---|
| 45 | // delete temporary files |
|---|
| 46 | for ($i = 0; $i < 2; ++$i) { |
|---|
| 47 | if (!unlink($name[$i])) { |
|---|
| 48 | return null; |
|---|
| 49 | } |
|---|
| 50 | } |
|---|
| 51 | |
|---|
| 52 | // parse diff output |
|---|
| 53 | $result = array(); |
|---|
| 54 | $block_cnt = 0; $op_cnt = -1; |
|---|
| 55 | foreach ($lines as $line) { |
|---|
| 56 | if (strlen($line) == 0 || preg_match("/^(---|\+\+\+)/", $line)) { |
|---|
| 57 | continue; |
|---|
| 58 | } |
|---|
| 59 | if (preg_match("/^(@@)/", $line)) { |
|---|
| 60 | if (isset($result[$block_cnt])) { |
|---|
| 61 | ++$block_cnt; |
|---|
| 62 | $op_cnt = -1; |
|---|
| 63 | } |
|---|
| 64 | continue; |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | // what type of operation is this? |
|---|
| 68 | if (strlen($line) > 0 && $line[0] == '+') { |
|---|
| 69 | $type = 'added'; |
|---|
| 70 | } elseif (strlen($line) > 0 && $line[0] == '-') { |
|---|
| 71 | $type = 'deleted'; |
|---|
| 72 | } else { |
|---|
| 73 | $type = 'normal'; |
|---|
| 74 | } |
|---|
| 75 | |
|---|
| 76 | $line = substr($line, 1); |
|---|
| 77 | if ($op_cnt >= 0 && $result[$block_cnt][$op_cnt]['type'] == $type) { |
|---|
| 78 | $result[$block_cnt][$op_cnt]['lines'][] = $line; |
|---|
| 79 | } else { |
|---|
| 80 | ++$op_cnt; |
|---|
| 81 | $result[$block_cnt][$op_cnt]['type'] = $type; |
|---|
| 82 | $result[$block_cnt][$op_cnt]['lines'] = array($line); |
|---|
| 83 | } |
|---|
| 84 | } |
|---|
| 85 | |
|---|
| 86 | return $result; |
|---|
| 87 | } |
|---|
| 88 | |
|---|
| 89 | // compute longest common subsequence using dynamic programming |
|---|
| 90 | function lcs($a, $b) { |
|---|
| 91 | $descriptorspec = array( |
|---|
| 92 | 0 => array("pipe", "r"), |
|---|
| 93 | 1 => array("pipe", "w"), |
|---|
| 94 | 2 => array("pipe", "w"), |
|---|
| 95 | ); |
|---|
| 96 | |
|---|
| 97 | // run lcs process |
|---|
| 98 | $process = proc_open("iconv -f utf8 -t utf32 | " . IA_ROOT_DIR. |
|---|
| 99 | "/common/lcs" . " | iconv -f utf32 -t utf8", $descriptorspec, $pipes); |
|---|
| 100 | log_assert(is_resource($process), "Could not create process."); |
|---|
| 101 | |
|---|
| 102 | // feed script to pipe |
|---|
| 103 | list($lcs_in, $lcs_out, $lcs_err) = $pipes; |
|---|
| 104 | |
|---|
| 105 | fwrite($lcs_in, $a."\n"); |
|---|
| 106 | fwrite($lcs_in, $b."\n"); |
|---|
| 107 | fclose($lcs_in); |
|---|
| 108 | |
|---|
| 109 | $result = stream_get_contents($lcs_out); |
|---|
| 110 | fclose($lcs_out); |
|---|
| 111 | |
|---|
| 112 | // check for errors |
|---|
| 113 | $errors = stream_get_contents($lcs_err); |
|---|
| 114 | if ($errors) { |
|---|
| 115 | log_error($errors); |
|---|
| 116 | } |
|---|
| 117 | fclose($lcs_err); |
|---|
| 118 | |
|---|
| 119 | // clean-up |
|---|
| 120 | proc_close($process); |
|---|
| 121 | |
|---|
| 122 | $result = trim($result, "\n"); |
|---|
| 123 | |
|---|
| 124 | return $result; |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | function split_string($string, $substring, $op_name) { |
|---|
| 128 | // sentinel character |
|---|
| 129 | $string .= "\n"; |
|---|
| 130 | $substring .= "\n"; |
|---|
| 131 | $N = mb_strlen($string); |
|---|
| 132 | $M = mb_strlen($substring); |
|---|
| 133 | |
|---|
| 134 | $result = array(); |
|---|
| 135 | for ($i = 0, $j = -1; $i < $M; ++$i) { |
|---|
| 136 | for ($prev = $j++; $j < $N && mb_substr($string, $j, 1) != mb_substr($substring, $i, 1); ++$j); |
|---|
| 137 | if ($j-$prev-1 > 0) { |
|---|
| 138 | $result[] = array('type' => $op_name, 'string' => mb_substr($string, $prev+1, $j-$prev-1)); |
|---|
| 139 | } |
|---|
| 140 | if ($i < $M-1) { |
|---|
| 141 | $result[] = array('type' => 'normal', 'string' => mb_substr($string, $j, 1)); |
|---|
| 142 | } |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | return $result; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | // does inline diff on strings unsing <ins> ans <del> HTML tags |
|---|
| 149 | function diff_inline($string, $op_name = array("del", "ins")) { |
|---|
| 150 | $diff = diff_string($string); |
|---|
| 151 | |
|---|
| 152 | $extensions = get_loaded_extensions(); |
|---|
| 153 | if (array_search('mbstring', $extensions) === false) { |
|---|
| 154 | return $diff; |
|---|
| 155 | } |
|---|
| 156 | mb_internal_encoding("utf-8"); |
|---|
| 157 | |
|---|
| 158 | foreach ($diff as &$block) { |
|---|
| 159 | for ($i = 0; $i+1 < count($block); ++$i) { |
|---|
| 160 | if ($block[$i]['type'] != 'deleted' || $block[$i+1]['type'] != 'added' || |
|---|
| 161 | count($block[$i]['lines']) != count($block[$i+1]['lines'])) { |
|---|
| 162 | continue; |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | for ($j = 0; $j < count($block[$i]['lines']); ++$j) { |
|---|
| 166 | $line = array(); |
|---|
| 167 | for ($k = 0; $k < 2; ++$k) { |
|---|
| 168 | $line[$k] = $block[$i+$k]['lines'][$j]; |
|---|
| 169 | } |
|---|
| 170 | $lcs = lcs($line[0], $line[1]); |
|---|
| 171 | if ($lcs == "") { |
|---|
| 172 | continue; |
|---|
| 173 | } |
|---|
| 174 | for ($k = 0; $k < 2; ++$k) { |
|---|
| 175 | $line[$k] = split_string($line[$k], $lcs, $op_name[$k]); |
|---|
| 176 | $block[$i+$k]['lines'][$j] = $line[$k]; |
|---|
| 177 | } |
|---|
| 178 | } |
|---|
| 179 | } |
|---|
| 180 | } |
|---|
| 181 | |
|---|
| 182 | return $diff; |
|---|
| 183 | } |
|---|
| 184 | |
|---|
| 185 | ?> |
|---|