Skip to content

Commit

Permalink
Implement new List Diffing algorithm for matching (#49)
Browse files Browse the repository at this point in the history
* Implement new List Diffing algorithm for matching

* Add MatchStrategy interface and classes instead of comparator function

* Start updating the tests

* Update tests
  • Loading branch information
jschroed91 authored and adamCaxy committed May 19, 2016
1 parent 755472d commit 23808d0
Show file tree
Hide file tree
Showing 21 changed files with 651 additions and 16 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
},
"require": {
"php": ">=5.3.3",
"ezyang/htmlpurifier": "^4.7"
"ezyang/htmlpurifier": "^4.7",
"sunra/php-simple-html-dom-parser": "^1.5"
},
"require-dev": {
"phpunit/phpunit": "~4.8",
Expand Down
2 changes: 1 addition & 1 deletion demo/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function addDebugOutput($value, $key = 'general')
}
$diff->setUseTableDiffing($useTableDiffing);
$diffOutput = $diff->build();
$diffOutput = mb_convert_encoding($diffOutput, 'UTF-8');
$diffOutput = iconv('UTF-8', 'UTF-8//IGNORE', $diffOutput);

$jsonOutput = json_encode(array('diff' => $diffOutput, 'debug' => $debugOutput));

Expand Down
10 changes: 9 additions & 1 deletion lib/Caxy/HtmlDiff/HtmlDiff.php
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,19 @@ public function build()
return $this->content;
}

// Pre-processing Optimizations

// 1. Equality
if ($this->oldText == $this->newText) {
return $this->newText;
}

$this->splitInputsToWords();
$this->replaceIsolatedDiffTags();
$this->indexNewWords();

$operations = $this->operations();

foreach ($operations as $item) {
$this->performOperation($item);
}
Expand Down Expand Up @@ -368,7 +376,7 @@ protected function diffElements($oldText, $newText, $stripWrappingTags = true)
*/
protected function diffList($oldText, $newText)
{
$diff = ListDiff::create($oldText, $newText, $this->config);
$diff = ListDiffLines::create($oldText, $newText, $this->config);

return $diff->build();
}
Expand Down
4 changes: 2 additions & 2 deletions lib/Caxy/HtmlDiff/HtmlDiffConfig.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ class HtmlDiffConfig
'dl' => '[[REPLACE_DEFINITION_LIST]]',
'table' => '[[REPLACE_TABLE]]',
'strong' => '[[REPLACE_STRONG]]',
'b' => '[[REPLACE_B]]',
'b' => '[[REPLACE_STRONG]]',
'em' => '[[REPLACE_EM]]',
'i' => '[[REPLACE_I]]',
'i' => '[[REPLACE_EM]]',
'a' => '[[REPLACE_A]]',
'img' => '[[REPLACE_IMG]]',
);
Expand Down
88 changes: 88 additions & 0 deletions lib/Caxy/HtmlDiff/LcsService.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
<?php

namespace Caxy\HtmlDiff;

use Caxy\HtmlDiff\Strategy\EqualMatchStrategy;
use Caxy\HtmlDiff\Strategy\MatchStrategyInterface;

class LcsService
{
/**
* @var MatchStrategyInterface
*/
protected $matchStrategy;

/**
* LcsService constructor.
*
* @param MatchStrategyInterface $matchStrategy
*/
public function __construct(MatchStrategyInterface $matchStrategy = null)
{
if (null === $matchStrategy) {
$matchStrategy = new EqualMatchStrategy();
}

$this->matchStrategy = $matchStrategy;
}

/**
* @param array $a
* @param array $b
*
* @return array
*/
public function longestCommonSubsequence(array $a, array $b)
{
$c = array();

$m = count($a);
$n = count($b);

for ($i = 0; $i <= $m; $i++) {
$c[$i][0] = 0;
}

for ($j = 0; $j <= $n; $j++) {
$c[0][$j] = 0;
}

for ($i = 1; $i <= $m; $i++) {
for ($j = 1; $j <= $n; $j++) {
if ($this->matchStrategy->isMatch($a[$i - 1], $b[$j - 1])) {
$c[$i][$j] = 1 + (isset($c[$i - 1][$j - 1]) ? $c[$i - 1][$j - 1] : 0);
} else {
$c[$i][$j] = max(
isset($c[$i][$j - 1]) ? $c[$i][$j - 1] : 0,
isset($c[$i - 1][$j]) ? $c[$i - 1][$j] : 0
);
}
}
}

$lcs = array_pad([], $m + 1, 0);
$this->compileMatches($c, $a, $b, $m, $n, $lcs);

return $lcs;
}

/**
* @param $c
* @param $a
* @param $b
* @param $i
* @param $j
* @param $matches
*/
protected function compileMatches($c, $a, $b, $i, $j, &$matches)
{
if ($i > 0 && $j > 0 && $this->matchStrategy->isMatch($a[$i - 1], $b[$j - 1])) {
$this->compileMatches($c, $a, $b, $i - 1, $j - 1, $matches);
$matches[$i] = $j;
} elseif ($j > 0 && ($i === 0 || $c[$i][$j - 1] >= $c[$i - 1][$j])) {
$this->compileMatches($c, $a, $b, $i, $j - 1, $matches);
} elseif ($i > 0 && ($j === 0 || $c[$i][$j - 1] < $c[$i - 1][$j])) {
$this->compileMatches($c, $a, $b, $i - 1, $j, $matches);
}
}
}
Loading

0 comments on commit 23808d0

Please sign in to comment.