/**
* Crush the diff into an encoded string which describes the operations
* required to transform text1 into text2.
* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'.
* Operations are tab-separated. Inserted text is escaped using %xx notation.
* @param diffs List of Diff objects.
* @return Delta text.
*/
public String diff_toDelta(List<Diff> diffs) {
StringBuilder text = new StringBuilder();
for (Diff aDiff : diffs) {
switch (aDiff.operation) {
case INSERT:
try {
text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8")
.replace('+', ' ')).append("\t");
} catch (UnsupportedEncodingException e) {
// Not likely on modern system.
throw new Error("This system does not support UTF-8.", e);
}
break;
case DELETE:
text.append("-").append(aDiff.text.length()).append("\t");
break;
case EQUAL:
text.append("=").append(aDiff.text.length()).append("\t");
break;
}
}
String delta = text.toString();
if (delta.length() != 0) {
// Strip off trailing tab character.
delta = delta.substring(0, delta.length() - 1);
delta = unescapeForEncodeUriCompatability(delta);
}
return delta;
}
/**
* Given the original text1, and an encoded string which describes the
* operations required to transform text1 into text2, compute the full diff.
* @param text1 Source string for the diff.
* @param delta Delta text.
* @return Array of Diff objects or null if invalid.
* @throws IllegalArgumentException If invalid input.
*/
public LinkedList<Diff> diff_fromDelta(String text1, String delta)
throws IllegalArgumentException {
LinkedList<Diff> diffs = new LinkedList<Diff>();
int pointer = 0; // Cursor in text1
String[] tokens = delta.split("\t");
for (String token : tokens) {
if (token.length() == 0) {
// Blank tokens are ok (from a trailing \t).
continue;
}
// Each token begins with a one character parameter which specifies the
// operation of this token (delete, insert, equality).
String param = token.substring(1);
switch (token.charAt(0)) {
case '+':
// decode would change all "+" to " "
param = param.replace("+", "%2B");
try {
param = URLDecoder.decode(param, "UTF-8");
} catch (UnsupportedEncodingException e) {
// Not likely on modern system.
throw new Error("This system does not support UTF-8.", e);
} catch (IllegalArgumentException e) {
// Malformed URI sequence.
throw new IllegalArgumentException(
"Illegal escape in diff_fromDelta: " + param, e);
}
diffs.add(new Diff(Operation.INSERT, param));
break;
case '-':
// Fall through.
case '=':
int n;
try {
n = Integer.parseInt(param);
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Invalid number in diff_fromDelta: " + param, e);
}
if (n < 0) {
throw new IllegalArgumentException(
"Negative number in diff_fromDelta: " + param);
}
String text;
try {
text = text1.substring(pointer, pointer += n);
} catch (StringIndexOutOfBoundsException e) {
throw new IllegalArgumentException("Delta length (" + pointer
+ ") larger than source text length (" + text1.length()
+ ").", e);
}
if (token.charAt(0) == '=') {
diffs.add(new Diff(Operation.EQUAL, text));
} else {
diffs.add(new Diff(Operation.DELETE, text));
}
break;
default:
// Anything else is an error.
throw new IllegalArgumentException(
"Invalid diff operation in diff_fromDelta: " + token.charAt(0));
}
}
if (pointer != text1.length()) {
throw new IllegalArgumentException("Delta length (" + pointer
+ ") smaller than source text length (" + text1.length() + ").");
}
return diffs;
}
// MATCH FUNCTIONS
/**
* Locate the best instance of 'pattern' in 'text' near 'loc'.
* Returns -1 if no match found.
* @param text The text to search.
* @param pattern The pattern to search for.
* @param loc The location to search around.
* @return Best match index or -1.
*/
public int match_main(String text, String pattern, int loc) {
// Check for null inputs.
if (text == null || pattern == null) {
throw new IllegalArgumentException("Null inputs. (match_main)");
}
loc = Math.max(0, Math.min(loc, text.length()));
if (text.equals(pattern)) {
// Shortcut (potentially not guaranteed by the algorithm)
return 0;
} else if (text.length() == 0) {
// Nothing to match.
return -1;
} else if (loc + pattern.length() <= text.length()
&& text.substring(loc, loc + pattern.length()).equals(pattern)) {
// Perfect match at the perfect spot! (Includes case of null pattern)
return loc;
} else {
// Do a fuzzy compare.
return match_bitap(text, pattern, loc);
}
}