Tree - source-git/libgit2 - CentOS Git server

source-git / libgit2

Blame src/xdiff/xpatience.c

Blob History Raw

Packit Service	20376f	`/*`
Packit Service	20376f	`* LibXDiff by Davide Libenzi ( File Differential Library )`
Packit Service	20376f	`* Copyright (C) 2003-2009 Davide Libenzi, Johannes E. Schindelin`
Packit Service	20376f	`*`
Packit Service	20376f	`* This library is free software; you can redistribute it and/or`
Packit Service	20376f	`* modify it under the terms of the GNU Lesser General Public`
Packit Service	20376f	`* License as published by the Free Software Foundation; either`
Packit Service	20376f	`* version 2.1 of the License, or (at your option) any later version.`
Packit Service	20376f	`*`
Packit Service	20376f	`* This library is distributed in the hope that it will be useful,`
Packit Service	20376f	`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit Service	20376f	`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit Service	20376f	`* Lesser General Public License for more details.`
Packit Service	20376f	`*`
Packit Service	20376f	`* You should have received a copy of the GNU Lesser General Public`
Packit Service	20376f	`* License along with this library; if not, write to the Free Software`
Packit Service	20376f	`* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
Packit Service	20376f	`*`
Packit Service	20376f	`* Davide Libenzi <davidel@xmailserver.org>`
Packit Service	20376f	`*`
Packit Service	20376f	`*/`
Packit Service	20376f	`#include "xinclude.h"`
Packit Service	20376f	`#include "xtypes.h"`
Packit Service	20376f	`#include "xdiff.h"`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* The basic idea of patience diff is to find lines that are unique in`
Packit Service	20376f	`* both files. These are intuitively the ones that we want to see as`
Packit Service	20376f	`* common lines.`
Packit Service	20376f	`*`
Packit Service	20376f	`* The maximal ordered sequence of such line pairs (where ordered means`
Packit Service	20376f	`* that the order in the sequence agrees with the order of the lines in`
Packit Service	20376f	`* both files) naturally defines an initial set of common lines.`
Packit Service	20376f	`*`
Packit Service	20376f	`* Now, the algorithm tries to extend the set of common lines by growing`
Packit Service	20376f	`* the line ranges where the files have identical lines.`
Packit Service	20376f	`*`
Packit Service	20376f	`* Between those common lines, the patience diff algorithm is applied`
Packit Service	20376f	`* recursively, until no unique line pairs can be found; these line ranges`
Packit Service	20376f	`* are handled by the well-known Myers algorithm.`
Packit Service	20376f	`*/`
Packit Service	20376f
Packit Service	20376f	`#define NON_UNIQUE ULONG_MAX`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* This is a hash mapping from line hash to line numbers in the first and`
Packit Service	20376f	`* second file.`
Packit Service	20376f	`*/`
Packit Service	20376f	`struct hashmap {`
Packit Service	20376f	`int nr, alloc;`
Packit Service	20376f	`struct entry {`
Packit Service	20376f	`unsigned long hash;`
Packit Service	20376f	`/*`
Packit Service	20376f	`* 0 = unused entry, 1 = first line, 2 = second, etc.`
Packit Service	20376f	`* line2 is NON_UNIQUE if the line is not unique`
Packit Service	20376f	`* in either the first or the second file.`
Packit Service	20376f	`*/`
Packit Service	20376f	`unsigned long line1, line2;`
Packit Service	20376f	`/*`
Packit Service	20376f	`* "next" & "previous" are used for the longest common`
Packit Service	20376f	`* sequence;`
Packit Service	20376f	`* initially, "next" reflects only the order in file1.`
Packit Service	20376f	`*/`
Packit Service	20376f	`struct entry next, previous;`
Packit Service	20376f	`} entries, first, *last;`
Packit Service	20376f	`/* were common records found? */`
Packit Service	20376f	`unsigned long has_matches;`
Packit Service	20376f	`mmfile_t file1, file2;`
Packit Service	20376f	`xdfenv_t *env;`
Packit Service	20376f	`xpparam_t const *xpp;`
Packit Service	20376f	`};`
Packit Service	20376f
Packit Service	20376f	`/* The argument "pass" is 1 for the first file, 2 for the second. */`
Packit Service	20376f	`static void insert_record(int line, struct hashmap *map, int pass)`
Packit Service	20376f	`{`
Packit Service	20376f	`xrecord_t **records = pass == 1 ?`
Packit Service	20376f	`map->env->xdf1.recs : map->env->xdf2.recs;`
Packit Service	20376f	`xrecord_t record = records[line - 1], other;`
Packit Service	20376f	`/*`
Packit Service	20376f	`* After xdl_prepare_env() (or more precisely, due to`
Packit Service	20376f	`* xdl_classify_record()), the "ha" member of the records (AKA lines)`
Packit Service	20376f	`* is _not_ the hash anymore, but a linearized version of it. In`
Packit Service	20376f	`* other words, the "ha" member is guaranteed to start with 0 and`
Packit Service	20376f	`* the second record's ha can only be 0 or 1, etc.`
Packit Service	20376f	`*`
Packit Service	20376f	`* So we multiply ha by 2 in the hope that the hashing was`
Packit Service	20376f	`* "unique enough".`
Packit Service	20376f	`*/`
Packit Service	20376f	`int index = (int)((record->ha << 1) % map->alloc);`
Packit Service	20376f
Packit Service	20376f	`while (map->entries[index].line1) {`
Packit Service	20376f	`other = map->env->xdf1.recs[map->entries[index].line1 - 1];`
Packit Service	20376f	`if (map->entries[index].hash != record->ha \|\|`
Packit Service	20376f	`!xdl_recmatch(record->ptr, record->size,`
Packit Service	20376f	`other->ptr, other->size,`
Packit Service	20376f	`map->xpp->flags)) {`
Packit Service	20376f	`if (++index >= map->alloc)`
Packit Service	20376f	`index = 0;`
Packit Service	20376f	`continue;`
Packit Service	20376f	`}`
Packit Service	20376f	`if (pass == 2)`
Packit Service	20376f	`map->has_matches = 1;`
Packit Service	20376f	`if (pass == 1 \|\| map->entries[index].line2)`
Packit Service	20376f	`map->entries[index].line2 = NON_UNIQUE;`
Packit Service	20376f	`else`
Packit Service	20376f	`map->entries[index].line2 = line;`
Packit Service	20376f	`return;`
Packit Service	20376f	`}`
Packit Service	20376f	`if (pass == 2)`
Packit Service	20376f	`return;`
Packit Service	20376f	`map->entries[index].line1 = line;`
Packit Service	20376f	`map->entries[index].hash = record->ha;`
Packit Service	20376f	`if (!map->first)`
Packit Service	20376f	`map->first = map->entries + index;`
Packit Service	20376f	`if (map->last) {`
Packit Service	20376f	`map->last->next = map->entries + index;`
Packit Service	20376f	`map->entries[index].previous = map->last;`
Packit Service	20376f	`}`
Packit Service	20376f	`map->last = map->entries + index;`
Packit Service	20376f	`map->nr++;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* This function has to be called for each recursion into the inter-hunk`
Packit Service	20376f	`* parts, as previously non-unique lines can become unique when being`
Packit Service	20376f	`* restricted to a smaller part of the files.`
Packit Service	20376f	`*`
Packit Service	20376f	`* It is assumed that env has been prepared using xdl_prepare().`
Packit Service	20376f	`*/`
Packit Service	20376f	`static int fill_hashmap(mmfile_t file1, mmfile_t file2,`
Packit Service	20376f	`xpparam_t const xpp, xdfenv_t env,`
Packit Service	20376f	`struct hashmap *result,`
Packit Service	20376f	`int line1, int count1, int line2, int count2)`
Packit Service	20376f	`{`
Packit Service	20376f	`result->file1 = file1;`
Packit Service	20376f	`result->file2 = file2;`
Packit Service	20376f	`result->xpp = xpp;`
Packit Service	20376f	`result->env = env;`
Packit Service	20376f
Packit Service	20376f	`/* We know exactly how large we want the hash map */`
Packit Service	20376f	`result->alloc = count1 * 2;`
Packit Service	20376f	`result->entries = (struct entry *)`
Packit Service	20376f	`xdl_malloc(result->alloc * sizeof(struct entry));`
Packit Service	20376f	`if (!result->entries)`
Packit Service	20376f	`return -1;`
Packit Service	20376f	`memset(result->entries, 0, result->alloc * sizeof(struct entry));`
Packit Service	20376f
Packit Service	20376f	`/* First, fill with entries from the first file */`
Packit Service	20376f	`while (count1--)`
Packit Service	20376f	`insert_record(line1++, result, 1);`
Packit Service	20376f
Packit Service	20376f	`/* Then search for matches in the second file */`
Packit Service	20376f	`while (count2--)`
Packit Service	20376f	`insert_record(line2++, result, 2);`
Packit Service	20376f
Packit Service	20376f	`return 0;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* Find the longest sequence with a smaller last element (meaning a smaller`
Packit Service	20376f	`* line2, as we construct the sequence with entries ordered by line1).`
Packit Service	20376f	`*/`
Packit Service	20376f	`static int binary_search(struct entry **sequence, int longest,`
Packit Service	20376f	`struct entry *entry)`
Packit Service	20376f	`{`
Packit Service	20376f	`int left = -1, right = longest;`
Packit Service	20376f
Packit Service	20376f	`while (left + 1 < right) {`
Packit Service	20376f	`int middle = (left + right) / 2;`
Packit Service	20376f	`/* by construction, no two entries can be equal */`
Packit Service	20376f	`if (sequence[middle]->line2 > entry->line2)`
Packit Service	20376f	`right = middle;`
Packit Service	20376f	`else`
Packit Service	20376f	`left = middle;`
Packit Service	20376f	`}`
Packit Service	20376f	`/* return the index in "sequence", _not_ the sequence length */`
Packit Service	20376f	`return left;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* The idea is to start with the list of common unique lines sorted by`
Packit Service	20376f	`* the order in file1. For each of these pairs, the longest (partial)`
Packit Service	20376f	`* sequence whose last element's line2 is smaller is determined.`
Packit Service	20376f	`*`
Packit Service	20376f	`* For efficiency, the sequences are kept in a list containing exactly one`
Packit Service	20376f	`* item per sequence length: the sequence with the smallest last`
Packit Service	20376f	`* element (in terms of line2).`
Packit Service	20376f	`*/`
Packit Service	20376f	`static struct entry find_longest_common_sequence(struct hashmap map)`
Packit Service	20376f	`{`
Packit Service	20376f	`struct entry *sequence = xdl_malloc(map->nr sizeof(struct entry *));`
Packit Service	20376f	`int longest = 0, i;`
Packit Service	20376f	`struct entry *entry;`
Packit Service	20376f
Packit Service	20376f	`for (entry = map->first; entry; entry = entry->next) {`
Packit Service	20376f	`if (!entry->line2 \|\| entry->line2 == NON_UNIQUE)`
Packit Service	20376f	`continue;`
Packit Service	20376f	`i = binary_search(sequence, longest, entry);`
Packit Service	20376f	`entry->previous = i < 0 ? NULL : sequence[i];`
Packit Service	20376f	`sequence[++i] = entry;`
Packit Service	20376f	`if (i == longest)`
Packit Service	20376f	`longest++;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/* No common unique lines were found */`
Packit Service	20376f	`if (!longest) {`
Packit Service	20376f	`xdl_free(sequence);`
Packit Service	20376f	`return NULL;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/* Iterate starting at the last element, adjusting the "next" members */`
Packit Service	20376f	`entry = sequence[longest - 1];`
Packit Service	20376f	`entry->next = NULL;`
Packit Service	20376f	`while (entry->previous) {`
Packit Service	20376f	`entry->previous->next = entry;`
Packit Service	20376f	`entry = entry->previous;`
Packit Service	20376f	`}`
Packit Service	20376f	`xdl_free(sequence);`
Packit Service	20376f	`return entry;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`static int match(struct hashmap *map, int line1, int line2)`
Packit Service	20376f	`{`
Packit Service	20376f	`xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];`
Packit Service	20376f	`xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];`
Packit Service	20376f	`return xdl_recmatch(record1->ptr, record1->size,`
Packit Service	20376f	`record2->ptr, record2->size, map->xpp->flags);`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`static int patience_diff(mmfile_t file1, mmfile_t file2,`
Packit Service	20376f	`xpparam_t const xpp, xdfenv_t env,`
Packit Service	20376f	`int line1, int count1, int line2, int count2);`
Packit Service	20376f
Packit Service	20376f	`static int walk_common_sequence(struct hashmap map, struct entry first,`
Packit Service	20376f	`int line1, int count1, int line2, int count2)`
Packit Service	20376f	`{`
Packit Service	20376f	`int end1 = line1 + count1, end2 = line2 + count2;`
Packit Service	20376f	`int next1, next2;`
Packit Service	20376f
Packit Service	20376f	`for (;;) {`
Packit Service	20376f	`/* Try to grow the line ranges of common lines */`
Packit Service	20376f	`if (first) {`
Packit Service	20376f	`next1 = first->line1;`
Packit Service	20376f	`next2 = first->line2;`
Packit Service	20376f	`while (next1 > line1 && next2 > line2 &&`
Packit Service	20376f	`match(map, next1 - 1, next2 - 1)) {`
Packit Service	20376f	`next1--;`
Packit Service	20376f	`next2--;`
Packit Service	20376f	`}`
Packit Service	20376f	`} else {`
Packit Service	20376f	`next1 = end1;`
Packit Service	20376f	`next2 = end2;`
Packit Service	20376f	`}`
Packit Service	20376f	`while (line1 < next1 && line2 < next2 &&`
Packit Service	20376f	`match(map, line1, line2)) {`
Packit Service	20376f	`line1++;`
Packit Service	20376f	`line2++;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/* Recurse */`
Packit Service	20376f	`if (next1 > line1 \|\| next2 > line2) {`
Packit Service	20376f	`struct hashmap submap;`
Packit Service	20376f
Packit Service	20376f	`memset(&submap, 0, sizeof(submap));`
Packit Service	20376f	`if (patience_diff(map->file1, map->file2,`
Packit Service	20376f	`map->xpp, map->env,`
Packit Service	20376f	`line1, next1 - line1,`
Packit Service	20376f	`line2, next2 - line2))`
Packit Service	20376f	`return -1;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`if (!first)`
Packit Service	20376f	`return 0;`
Packit Service	20376f
Packit Service	20376f	`while (first->next &&`
Packit Service	20376f	`first->next->line1 == first->line1 + 1 &&`
Packit Service	20376f	`first->next->line2 == first->line2 + 1)`
Packit Service	20376f	`first = first->next;`
Packit Service	20376f
Packit Service	20376f	`line1 = first->line1 + 1;`
Packit Service	20376f	`line2 = first->line2 + 1;`
Packit Service	20376f
Packit Service	20376f	`first = first->next;`
Packit Service	20376f	`}`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`static int fall_back_to_classic_diff(struct hashmap *map,`
Packit Service	20376f	`int line1, int count1, int line2, int count2)`
Packit Service	20376f	`{`
Packit Service	20376f	`xpparam_t xpp;`
Packit Service	20376f	`xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;`
Packit Service	20376f
Packit Service	20376f	`return xdl_fall_back_diff(map->env, &xpp,`
Packit Service	20376f	`line1, count1, line2, count2);`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`/*`
Packit Service	20376f	`* Recursively find the longest common sequence of unique lines,`
Packit Service	20376f	`* and if none was found, ask xdl_do_diff() to do the job.`
Packit Service	20376f	`*`
Packit Service	20376f	`* This function assumes that env was prepared with xdl_prepare_env().`
Packit Service	20376f	`*/`
Packit Service	20376f	`static int patience_diff(mmfile_t file1, mmfile_t file2,`
Packit Service	20376f	`xpparam_t const xpp, xdfenv_t env,`
Packit Service	20376f	`int line1, int count1, int line2, int count2)`
Packit Service	20376f	`{`
Packit Service	20376f	`struct hashmap map;`
Packit Service	20376f	`struct entry *first;`
Packit Service	20376f	`int result = 0;`
Packit Service	20376f
Packit Service	20376f	`/* trivial case: one side is empty */`
Packit Service	20376f	`if (!count1) {`
Packit Service	20376f	`while(count2--)`
Packit Service	20376f	`env->xdf2.rchg[line2++ - 1] = 1;`
Packit Service	20376f	`return 0;`
Packit Service	20376f	`} else if (!count2) {`
Packit Service	20376f	`while(count1--)`
Packit Service	20376f	`env->xdf1.rchg[line1++ - 1] = 1;`
Packit Service	20376f	`return 0;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`memset(&map, 0, sizeof(map));`
Packit Service	20376f	`if (fill_hashmap(file1, file2, xpp, env, &map,`
Packit Service	20376f	`line1, count1, line2, count2))`
Packit Service	20376f	`return -1;`
Packit Service	20376f
Packit Service	20376f	`/* are there any matching lines at all? */`
Packit Service	20376f	`if (!map.has_matches) {`
Packit Service	20376f	`while(count1--)`
Packit Service	20376f	`env->xdf1.rchg[line1++ - 1] = 1;`
Packit Service	20376f	`while(count2--)`
Packit Service	20376f	`env->xdf2.rchg[line2++ - 1] = 1;`
Packit Service	20376f	`xdl_free(map.entries);`
Packit Service	20376f	`return 0;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`first = find_longest_common_sequence(&map);`
Packit Service	20376f	`if (first)`
Packit Service	20376f	`result = walk_common_sequence(&map, first,`
Packit Service	20376f	`line1, count1, line2, count2);`
Packit Service	20376f	`else`
Packit Service	20376f	`result = fall_back_to_classic_diff(&map,`
Packit Service	20376f	`line1, count1, line2, count2);`
Packit Service	20376f
Packit Service	20376f	`xdl_free(map.entries);`
Packit Service	20376f	`return result;`
Packit Service	20376f	`}`
Packit Service	20376f
Packit Service	20376f	`int xdl_do_patience_diff(mmfile_t file1, mmfile_t file2,`
Packit Service	20376f	`xpparam_t const xpp, xdfenv_t env)`
Packit Service	20376f	`{`
Packit Service	20376f	`if (xdl_prepare_env(file1, file2, xpp, env) < 0)`
Packit Service	20376f	`return -1;`
Packit Service	20376f
Packit Service	20376f	`/* environment is cleaned up in xdl_diff() */`
Packit Service	20376f	`return patience_diff(file1, file2, xpp, env,`
Packit Service	20376f	`1, env->xdf1.nrec, 1, env->xdf2.nrec);`
Packit Service	20376f	`}`

source-git / libgit2

Source Code

Blame src/xdiff/xpatience.c