diff --git a/pkgdiff.pl b/pkgdiff.pl
index 58cf54c..76b4c77 100644
--- a/pkgdiff.pl
+++ b/pkgdiff.pl
@@ -75,7 +75,7 @@
$CustomTmpDir, $HideUnchanged, $TargetName, $TargetTitle, %TargetVersion,
$CompareDirs, $ListAddedRemoved, $SkipSubArchives, $LinksTarget,
$SkipPattern, $AllText, $CheckByteCode, $FullMethodDiffs, $TrackUnchanged,
-$MoveStyles);
+$MoveStyles, $EnhancedMatching);
my $CmdName = getFilename($0);
@@ -150,7 +150,8 @@
"check-byte-code!" => \$CheckByteCode,
"full-method-diffs!" => \$FullMethodDiffs,
"track-unchanged!" => \$TrackUnchanged,
- "move-styles=s" => \$MoveStyles
+ "move-styles=s" => \$MoveStyles,
+ "enhanced-matching!" => \$EnhancedMatching
) or errMsg();
my $TMP_DIR = undef;
@@ -208,7 +209,7 @@ ()
DESCRIPTION:
Package Changes Analyzer (PkgDiff) is a tool for visualizing
changes in Linux software packages (RPM, DEB, TAR.GZ, etc).
-
+
The tool can compare directories as well (with the help of
the -d option).
@@ -230,7 +231,7 @@ ()
ARGUMENTS:
PKG1
Path to the old version of a package (RPM, DEB, TAR.GZ, etc).
-
+
If you need to analyze a group of packages then you can
pass an XML-descriptor of this group (VERSION.xml file):
@@ -241,7 +242,7 @@ ()
/* Group name */
-
+
/path1/to/package(s)
/path2/to/package(s)
@@ -295,7 +296,7 @@ ()
-minimal
Try to find a smaller set of changes.
-
+
-no-wdiff
Do not use GNU Wdiff for analysis of changes.
This may be two times faster, but produces lower
@@ -313,48 +314,48 @@ ()
-template
Create XML-descriptor template ./VERSION.xml
-
+
-extra-info DIR
Dump extra info to DIR.
-
+
-tmp-dir DIR
Use custom temp directory.
-
+
-c|-hide-unchanged
Don't show unchanged files in the report.
-debug
Show debug info.
-
+
-name NAME
Set name of the package to NAME.
-
+
-title TITLE
Set name of the package in the title of the report to TITLE.
-
+
-vnum1 NUM
Set version number of the old package to NUM.
-
+
-vnum2 NUM
Set version number of the new package to NUM.
-
+
-links-target TARGET
Set target attribute for links in the report:
_self (default)
_blank
-
+
-list-added-removed
Show content of added and removed text files.
-
+
-skip-subarchives
Skip checking of archives inside the input packages.
-
+
-skip-pattern REGEX
Don't check files matching REGEX.
-
+
-d|-directories
Compare directories instead of packages.
-
+
-all-text
Treat all files in the archive as text files.
@@ -367,6 +368,11 @@ ()
-track-unchanged
Track unchanged files in extra info.
+ -enhanced-matching
+ Enable enhanced file matching that detects renames/moves even when
+ version numbers change in paths (e.g., lib-1.2/foo.h -> lib-1.3/foo.h).
+ Uses path normalization and content-based fallback matching.
+
REPORT:
Report will be generated to:
pkgdiff_reports//_to_/changes_report.html
@@ -409,7 +415,7 @@ ()
";
# Settings
-my $RENAME_FILE_MATCH = 0.25; # 0.55
+my $RENAME_FILE_MATCH = 0.15; # 0.25 was too strict, 0.15 is more permissive
my $RENAME_CONTENT_MATCH = 0.85;
my $MOVE_CONTENT_MATCH = 0.90;
my $MOVE_DEPTH = 4;
@@ -564,9 +570,9 @@ ($)
sub readSymbols($)
{
my $Path = $_[0];
-
+
my %Symbols = ();
-
+
open(LIB, "readelf -WhlSsdA \"$Path\" 2>\"$TMP_DIR/null\" |");
my $symtab = undef; # indicates that we are processing 'symtab' section of 'readelf' output
while()
@@ -593,7 +599,7 @@ ($)
}
}
close(LIB);
-
+
return %Symbols;
}
@@ -642,12 +648,12 @@ ($)
sub compareSymbols($$)
{
my ($P1, $P2) = @_;
-
+
my %Symbols1 = readSymbols($P1);
my %Symbols2 = readSymbols($P2);
-
+
my $Changed = 0;
-
+
foreach my $Symbol (keys(%Symbols1))
{
if(not defined $Symbols2{$Symbol})
@@ -663,7 +669,7 @@ ($$)
}
}
}
-
+
foreach my $Symbol (keys(%Symbols2))
{
if(not defined $Symbols1{$Symbol})
@@ -679,7 +685,7 @@ ($$)
}
}
}
-
+
return $Changed;
}
@@ -722,7 +728,7 @@ ($$$$)
}
}
my ($Changed, $DLink, $RLink, $Rate, $Adv) = (0, "", "", 0, {});
-
+
if(not $ShowDetails)
{
if($Format eq "SHARED_OBJECT"
@@ -735,7 +741,7 @@ ($$$$)
}
}
}
-
+
if(defined $FormatInfo{$Format}{"Format"}
and $FormatInfo{$Format}{"Format"} eq "Text") {
($DLink, $Rate) = diffFiles($P1, $P2, getRPath("diffs", $N1));
@@ -750,13 +756,13 @@ ($$$$)
my $Page1 = showFile($P1, "ARCHIVE", 1);
my $Page2 = showFile($P2, "ARCHIVE", 2);
($DLink, $Rate) = diffFiles($Page1, $Page2, getRPath("diffs", $N1));
-
+
# clean space
unlink($Page1);
unlink($Page2);
}
else
- {
+ {
($DLink, $Rate) = diffFiles($P1, $P2, getRPath("diffs", $N1));
}
}
@@ -783,12 +789,12 @@ ($$$$)
# clean space
unlink($Page1);
unlink($Page2);
-
+
return (0, "", "", 0, {});
}
}
($DLink, $Rate) = diffFiles($Page1, $Page2, getRPath("diffs", $N1));
-
+
# clean space
unlink($Page1);
unlink($Page2);
@@ -798,7 +804,7 @@ ($$$$)
$Changed = 1;
$Rate = checkDiff($P1, $P2);
}
-
+
if($DLink or $Changed)
{
if($ShowDetails)
@@ -817,7 +823,7 @@ ($$$$)
$RLink=~s/\A\Q$REPORT_DIR\E\///;
return (1, $DLink, $RLink, $Rate, $Adv);
}
-
+
return (0, "", "", 0, {});
}
@@ -882,9 +888,9 @@ ($$$)
{
my ($Path, $Format, $Version) = @_;
my ($Dir, $Name) = sepPath($Path);
-
+
my $Cmd = undef;
-
+
if($Format eq "MANPAGE")
{
$Name=~s/\.(gz|bz2|xz)\Z//;
@@ -963,17 +969,17 @@ ($$$)
{ # error
return undef;
}
-
+
my $SPath = $TMP_DIR."/fmt/".$Format."/".$Version."/".$Name;
mkpath(getDirname($SPath));
-
+
my $TmpFile = $TMP_DIR."/null";
qx/$Cmd >"$SPath" 2>$TmpFile/;
-
+
if($Format eq "JAVA_CLASS") {
chdir($ORIG_DIR);
}
-
+
if($Format eq "SHARED_OBJECT"
or $Format eq "KERNEL_MODULE"
or $Format eq "DEBUG_INFO"
@@ -999,7 +1005,7 @@ ($$$)
$Content=~s/\s+Build ID: \w+\s+//g;
writeFile($SPath, uniqStr($Content));
}
-
+
return $SPath;
}
@@ -1030,37 +1036,37 @@ ($$)
sub compareABIs($$$$$)
{
my ($P1, $P2, $N1, $N2, $Path) = @_;
-
+
my $Sect = `readelf -S \"$P1\" 2>\"$TMP_DIR/error\"`;
my $Name = getFilename($P1);
-
+
if($Sect!~/\.debug_info/)
{ # No DWARF info
printMsg("WARNING", "No debug info in ".$Name);
return ("", {});
}
-
+
mkpath(getDirname($Path));
my $Adv = {};
-
+
$Name=~s/\.debug\Z//;
printMsg("INFO", "Compare ABIs of ".$Name." (".showNumber(getSize($P1)/1048576)."M) ...");
-
+
$N1=~s/\A\///;
$N2=~s/\A\///;
-
+
my $Cmd = undef;
my $Ret = undef;
-
+
my $D1 = $REPORT_DIR."/abi_dumps/".$Group{"V1"}."/".$N1."-ABI.dump";
my $D2 = $REPORT_DIR."/abi_dumps/".$Group{"V2"}."/".$N2."-ABI.dump";
-
+
$Adv->{"ABIDump"}{1} = $D1;
$Adv->{"ABIDump"}{2} = $D2;
-
+
$Adv->{"ABIDump"}{1}=~s/\A\Q$REPORT_DIR\E\///;
$Adv->{"ABIDump"}{2}=~s/\A\Q$REPORT_DIR\E\///;
-
+
$Cmd = $ABI_DUMPER." \"$P1\" -lver \"".$Group{"V1"}."\" -o \"$D1\" -sort";
if($Debug)
{
@@ -1074,17 +1080,17 @@ ($$$$$)
printMsg("ERROR", "Failed to run ABI Dumper ($Ret)");
return ("", {});
}
-
+
if($Debug)
{
my $DP = $REPORT_DIR."/dwarf_dumps/".$Group{"V1"}."/".$N1."-DWARF.dump";
mkpath(getDirname($DP));
move("$TMP_DIR/extra-info/debug_info", $DP);
-
+
$Adv->{"DWARFDump"}{1} = $DP;
$Adv->{"DWARFDump"}{1}=~s/\A\Q$REPORT_DIR\E\///;
}
-
+
$Cmd = $ABI_DUMPER." \"$P2\" -lver \"".$Group{"V2"}."\" -o \"$D2\" -sort";
if($Debug)
{
@@ -1098,27 +1104,27 @@ ($$$$$)
printMsg("ERROR", "Failed to run ABI Dumper ($Ret)");
return ("", {});
}
-
+
if($Debug)
{
my $DP = $REPORT_DIR."/dwarf_dumps/".$Group{"V2"}."/".$N2."-DWARF.dump";
mkpath(getDirname($DP));
move("$TMP_DIR/extra-info/debug_info", $DP);
-
+
$Adv->{"DWARFDump"}{2} = $DP;
$Adv->{"DWARFDump"}{2}=~s/\A\Q$REPORT_DIR\E\///;
}
-
+
# clean space
rmtree("$TMP_DIR/extra-info");
-
+
$Cmd = $ACC." -d1 \"$D1\" -d2 \"$D2\"";
-
+
$Cmd .= " -l \"".$Name."\"";
-
+
$Cmd .= " --report-path=\"$Path\"";
$Cmd .= " -quiet";
-
+
if($Debug) {
printMsg("INFO", "Running $Cmd");
}
@@ -1129,7 +1135,7 @@ ($$$$$)
printMsg("ERROR", "Failed to run ABI Compliance Checker ($Ret)");
return ("", {});
}
-
+
my ($Bin, $Src) = (0, 0);
if(my $Meta = readFilePart($Path, 2))
{
@@ -1141,11 +1147,11 @@ ($$$$$)
$Src = $1;
}
}
-
+
$ABI_Change{"Bin"} += $Bin;
$ABI_Change{"Src"} += $Src;
$ABI_Change{"Total"} += 1;
-
+
return ($Path, $Adv);
}
@@ -1158,7 +1164,7 @@ ($)
return 1;
}
}
-
+
return 0;
}
@@ -1181,19 +1187,19 @@ ($)
sub diffFiles($$$)
{
my ($P1, $P2, $Path) = @_;
-
+
if(not $P1 or not $P2) {
return ();
}
-
+
mkpath(getDirname($Path));
-
+
my $TmpPath = $TMP_DIR."/diff";
unlink($TmpPath);
-
+
my $Cmd = "sh $DIFF --width $DiffWidth --stdout";
$Cmd .= " --tmpdiff \"$TmpPath\" --prelines $DiffLines";
-
+
if($IgnoreSpaceChange) {
$Cmd .= " --ignore-space-change";
}
@@ -1210,12 +1216,12 @@ ($$$)
if($NoWdiff) {
$Cmd .= " --nowdiff";
}
-
+
$Cmd .= " \"".$P1."\" \"".$P2."\" >\"".$Path."\" 2>$TMP_DIR/null";
$Cmd=~s/\$/\\\$/g;
-
+
qx/$Cmd/;
-
+
if(getSize($Path)<3500)
{ # may be identical
if(readFilePart($Path, 2)=~/The files are identical/i)
@@ -1224,7 +1230,7 @@ ($$$)
return ();
}
}
-
+
if(getSize($Path)<3100)
{ # may be identical or non-text
if(index(readFile($Path), "No changes")!=-1)
@@ -1233,26 +1239,26 @@ ($$$)
return ();
}
}
-
+
my $Rate = getRate($P1, $P2, $TmpPath);
-
+
# clean space
unlink($TmpPath);
-
+
return ($Path, $Rate);
}
sub getRate($$$)
{
my ($P1, $P2, $PatchPath) = @_;
-
+
my $Size1 = getSize($P1);
if(not $Size1) {
return 1;
}
-
+
my $Size2 = getSize($P2);
-
+
my $Rate = 1;
# count removed/changed bytes
my $Patch = readFile($PatchPath);
@@ -1272,34 +1278,280 @@ ($$$)
sub readFilePart($$)
{
my ($Path, $Num) = @_;
-
+
open (FILE, $Path);
my $Lines = "";
foreach (1 ... $Num) {
$Lines .= ;
}
close(FILE);
-
+
return $Lines;
}
sub getType($)
{
my $Path = $_[0];
-
+
if($Cache{"getType"}{$Path}) {
return $Cache{"getType"}{$Path};
}
-
+
if($USE_LIBMAGIC)
{
my $Magic = File::LibMagic->new();
return ($Cache{"getType"}{$Path} = $Magic->describe_filename($Path));
}
-
+
return ($Cache{"getType"}{$Path} = qx/file -b \"$Path\"/);
}
+# Normalize path by removing version patterns
+sub normalizePathForComparison($)
+{
+ my $Path = $_[0];
+
+ # Remove common version patterns from directory components
+ # Examples: -1.2.3, _1.2, /v1.2/, -v1.2.3, .1.2.3
+ $Path =~ s/[\-\_]v?\d+\.\d+(?:\.\d+)*(?:[\-\_]\w+)?//g; # -1.2.3, _v1.2, -1.2.3-rc1
+ $Path =~ s/\/v?\d+\.\d+(?:\.\d+)*(?:[\-\_]\w+)?(?=\/)/\//g; # /1.2.3/, /v1.2/
+ $Path =~ s/\.\d+\.\d+(?:\.\d+)*(?:[\-\_]\w+)?//g; # .1.2.3, .1.2-alpha
+
+ # Remove date-like patterns (YYYYMMDD, YYYYMMDD-X, etc.)
+ $Path =~ s/[\-\_]?20\d{6}[\-\_]?\d*//g; # 20250618, 20250618-1, -20250618, _20250618
+ $Path =~ s/\/20\d{6}[\-\_]?\d*(?=\/)/\//g; # /20250618/, /20250618-1/
+ $Path =~ s/\.20\d{6}[\-\_]?\d*//g; # .20250618, .20250618-1
+
+ # Remove build numbers and commit hashes (common patterns)
+ $Path =~ s/[\-\_][a-f0-9]{6,8}[\-\_]?\d*//g; # -7b90b8, -f9cd74, -abc123def
+ $Path =~ s/\/[a-f0-9]{6,8}[\-\_]?\d*(?=\/)/\//g; # /7b90b8/, /f9cd74/
+ $Path =~ s/\.[a-f0-9]{6,8}[\-\_]?\d*//g; # .7b90b8, .f9cd74
+
+ # Remove sequential numbers at the end of path components
+ $Path =~ s/[\-\_]\d+(?=\/|$)//g; # -1, -2, _123
+ $Path =~ s/\/\d+(?=\/)/\//g; # /1/, /123/
+ $Path =~ s/\.\d+(?=\/|$)//g; # .1, .123
+
+ # Clean up any double slashes or trailing/leading slashes
+ $Path =~ s/\/+/\//g;
+ $Path =~ s/^\/+|\/+$//g;
+
+ return $Path;
+}
+
+# Enhanced isRenamed function with version-aware path normalization
+sub isRenamedEnhanced($$$)
+{
+ my ($P1, $P2, $Match) = @_;
+ my ($D1, $N1) = sepPath($P1);
+ my ($D2, $N2) = sepPath($P2);
+
+ # If paths are identical, no rename
+ if($P1 eq $P2) {
+ return 0;
+ }
+
+ # First try the original strict matching
+ if($D1 eq $D2 && $N1 ne $N2) {
+ my $L1 = length($N1);
+ my $L2 = length($N2);
+ if($L1<=8)
+ { # too short names
+ if($N1=~/\.(\w+)\Z/)
+ { # with equal extensions
+ my $E = $1;
+ if($N2=~s/\.\Q$E\E\Z//g)
+ { # compare without extensions
+ $N1=~s/\.\Q$E\E\Z//g;
+ }
+ }
+ }
+ my $HL = ($L1+$L2)*$RENAME_FILE_MATCH/$Match;
+ if(getBaseLen($N1, $N2)>=$HL) {
+ return 1;
+ }
+ }
+
+ # Try version-normalized path matching
+ my $ND1 = normalizePathForComparison($D1);
+ my $ND2 = normalizePathForComparison($D2);
+
+ if($ND1 eq $ND2) {
+ # Normalize filenames too (they might have version numbers)
+ my $NN1 = normalizePathForComparison($N1);
+ my $NN2 = normalizePathForComparison($N2);
+
+ if($NN1 eq $NN2 && $NN1 ne "" && $P1 ne $P2) {
+ # Same normalized filename in same normalized directory = renamed
+ return 1;
+ }
+
+ # Check if normalized filenames are similar enough
+ if($NN1 ne $NN2) {
+ my $L1 = length($NN1);
+ my $L2 = length($NN2);
+ if($L1 > 0 && $L2 > 0) {
+ if($L1<=8)
+ { # too short names
+ if($NN1=~/\.(\w+)\Z/)
+ { # with equal extensions
+ my $E = $1;
+ if($NN2=~s/\.\Q$E\E\Z//g)
+ { # compare without extensions
+ $NN1=~s/\.\Q$E\E\Z//g;
+ }
+ }
+ }
+ my $HL = ($L1+$L2)*$RENAME_FILE_MATCH/$Match;
+ if(getBaseLen($NN1, $NN2)>=$HL) {
+ return 1;
+ }
+ }
+ }
+ }
+
+ # Special case: Aggressive matching for shared libraries and similar files
+ # If the normalized directories are very similar, be more permissive
+ if($ND1 ne $ND2) {
+ my $DirSimilarity = getBaseLen($ND1, $ND2);
+ my $DirAvgLen = (length($ND1) + length($ND2)) / 2;
+
+ if($DirAvgLen > 0 && $DirSimilarity / $DirAvgLen > 0.7) {
+ # Directories are 70% similar, check if filenames match after normalization
+ my $NN1 = normalizePathForComparison($N1);
+ my $NN2 = normalizePathForComparison($N2);
+
+ if($NN1 eq $NN2 && $NN1 ne "") {
+ # Same normalized filename in similar normalized directories = likely renamed
+ return 1;
+ }
+
+ # Even if filenames don't match exactly, if they're very similar, consider it a rename
+ if($NN1 ne $NN2 && length($NN1) > 0 && length($NN2) > 0) {
+ my $FilenameSimilarity = getBaseLen($NN1, $NN2);
+ my $FilenameAvgLen = (length($NN1) + length($NN2)) / 2;
+
+ if($FilenameAvgLen > 0 && $FilenameSimilarity / $FilenameAvgLen > 0.8) {
+ # 80% similar normalized filenames in similar directories = renamed
+ return 1;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+# Enhanced isMoved function with version-aware path normalization
+sub isMovedEnhanced($$)
+{
+ my ($P1, $P2) = @_;
+ my ($D1, $N1) = sepPath($P1);
+ my ($D2, $N2) = sepPath($P2);
+
+ # First try the original strict matching
+ if($N1 eq $N2 && $D1 ne $D2) {
+ return 1;
+ }
+
+ # Try version-normalized matching
+ my $NN1 = normalizePathForComparison($N1);
+ my $NN2 = normalizePathForComparison($N2);
+ my $ND1 = normalizePathForComparison($D1);
+ my $ND2 = normalizePathForComparison($D2);
+
+ # Same normalized filename, different normalized directory = moved
+ if($NN1 eq $NN2 && $NN1 ne "" && $ND1 ne $ND2) {
+ return 1;
+ }
+
+ # More aggressive matching: if filenames are very similar after normalization
+ if($NN1 ne $NN2 && length($NN1) > 0 && length($NN2) > 0 && $ND1 ne $ND2) {
+ my $FilenameSimilarity = getBaseLen($NN1, $NN2);
+ my $FilenameAvgLen = (length($NN1) + length($NN2)) / 2;
+
+ if($FilenameAvgLen > 0 && $FilenameSimilarity / $FilenameAvgLen > 0.9) {
+ # 90% similar normalized filenames in different directories = moved
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+# Content-based file matching for files that couldn't be matched by path
+sub matchByContent($$)
+{
+ my ($P1, $P2) = @_;
+
+ my $Size1 = getSize($P1);
+ my $Size2 = getSize($P2);
+
+ # Quick size check - if sizes are very different, probably not the same file
+ if($Size1 > 0 && $Size2 > 0) {
+ my $SizeRatio = $Size1 > $Size2 ? $Size2/$Size1 : $Size1/$Size2;
+ if($SizeRatio < 0.8) { # More than 20% size difference
+ return 0;
+ }
+ }
+
+ # For small files, do exact content comparison
+ if($Size1 < 1024 && $Size2 < 1024) {
+ return (readFile($P1) eq readFile($P2));
+ }
+
+ # For larger files, use a quick checksum comparison
+ # This is a simple implementation - for production, consider using Digest::SHA
+ my $checksum1 = quickChecksum($P1);
+ my $checksum2 = quickChecksum($P2);
+
+ return ($checksum1 eq $checksum2);
+}
+
+# Simple checksum function (replace with proper hash if needed)
+sub quickChecksum($)
+{
+ my $Path = $_[0];
+
+ if(not -f $Path) {
+ return "";
+ }
+
+ # Read first 1KB, middle 1KB, and last 1KB for a quick fingerprint
+ my $Size = getSize($Path);
+ my $Content = "";
+
+ open(FILE, "<", $Path) or return "";
+
+ # First 1KB
+ read(FILE, my $First, 1024);
+ $Content .= $First;
+
+ # Middle 1KB (if file is large enough)
+ if($Size > 2048) {
+ seek(FILE, int($Size/2) - 512, 0);
+ read(FILE, my $Middle, 1024);
+ $Content .= $Middle;
+ }
+
+ # Last 1KB (if file is large enough)
+ if($Size > 1024) {
+ seek(FILE, -1024, 2);
+ read(FILE, my $Last, 1024);
+ $Content .= $Last;
+ }
+
+ close(FILE);
+
+ # Simple hash of the content
+ my $Hash = 0;
+ for my $Char (split //, $Content) {
+ $Hash = ($Hash * 31 + ord($Char)) % 1000000007;
+ }
+
+ return $Hash;
+}
+
sub isRenamed($$$)
{
my ($P1, $P2, $Match) = @_;
@@ -1333,7 +1585,7 @@ ($$)
if($_[0]<$_[1]) {
return $_[0];
}
-
+
return $_[1];
}
@@ -1344,20 +1596,20 @@ ($)
sub getBaseLen($$)
{
my ($Str1, $Str2) = @_;
-
+
if(defined $Cache{"getBaseLen"}{$Str1}{$Str2}) {
return $Cache{"getBaseLen"}{$Str1}{$Str2};
}
-
+
if($Str1 eq $Str2) {
return length($Str1);
}
-
+
my $BLen = 0;
my $Len1 = length($Str1);
my $Len2 = length($Str2);
my $Min = minNum($Len1, $Len2) - 1;
-
+
foreach my $Pos (0 .. $Min)
{
my $S1 = substr($Str1, $Pos, 1);
@@ -1369,7 +1621,7 @@ ($$)
last;
}
}
-
+
foreach my $Pos (0 .. $Min)
{
my $S1 = substr($Str1, $Len1-$Pos-1, 1);
@@ -1381,7 +1633,7 @@ ($$)
last;
}
}
-
+
return ($Cache{"getBaseLen"}{$Str1}{$Str2}=$BLen);
}
@@ -1400,9 +1652,9 @@ ($$)
sub writeExtraInfo()
{
my $FILES = "";
-
+
$FILES .= "\n ".$RESULT{"affected"}."\n\n\n";
-
+
if(my @Added = sort {lc($a) cmp lc($b)} keys(%AddedFiles)) {
$FILES .= "\n ".join("\n ", @Added)."\n\n\n";
}
@@ -1430,7 +1682,7 @@ ()
foreach (0 .. $#Changed) {
$Changed[$_] .= ";".showNumber($ChangeRate{$Changed[$_]}*100);
}
-
+
$FILES .= "\n ".join("\n ", @Changed)."\n\n\n";
}
if ($TrackUnchanged) {
@@ -1440,7 +1692,7 @@ ()
}
}
writeFile($ExtraInfo."/files.xml", $FILES);
-
+
my $SYMBOLS = "";
if(my @AddedSymbols = sort {lc($a) cmp lc($b)} keys(%AddedSymbols)) {
$SYMBOLS .= "\n ".join("\n ", @AddedSymbols)."\n\n\n";
@@ -1454,7 +1706,7 @@ ()
sub skipFile($)
{
my $Name = $_[0];
-
+
if(defined $SkipPattern)
{
if($Name=~/($SkipPattern)/)
@@ -1463,7 +1715,7 @@ ($)
return 1;
}
}
-
+
return 0;
}
@@ -1476,7 +1728,7 @@ ()
if($ShowDetails) {
mkpath($REPORT_DIR."/details");
}
-
+
foreach my $Format (keys(%FormatInfo))
{
%{$FileChanges{$Format}} = (
@@ -1484,14 +1736,16 @@ ()
"Added"=>0,
"Removed"=>0,
"Changed"=>0,
+ "Moved"=>0,
+ "Renamed"=>0,
"Size"=>0,
"SizeDelta"=>0
);
}
-
+
my (%AddedByDir, %RemovedByDir, %AddedByName,
%RemovedByName, %AddedByPrefix, %RemovedByPrefix) = ();
-
+
foreach my $Name (sort keys(%{$PackageFiles{1}}))
{ # checking old files
my $Format = getFormat($PackageFiles{1}{$Name});
@@ -1508,7 +1762,7 @@ ()
$StableFiles{$Name} = 1;
}
}
-
+
foreach my $Name (keys(%{$PackageFiles{2}}))
{ # checking new files
my $Format = getFormat($PackageFiles{2}{$Name});
@@ -1522,7 +1776,7 @@ ()
}
}
}
-
+
foreach my $Name (sort keys(%RemovedFiles))
{ # checking removed files
my $Path = $PackageFiles{1}{$Name};
@@ -1536,18 +1790,18 @@ ()
}
$FileChanges{$Format}{"Details"}{$Name}{"Status"} = "removed";
}
-
+
foreach my $Name (sort {getDepth($b)<=>getDepth($a)} sort keys(%RemovedFiles))
{ # checking moved files
my $Format = getFormat($PackageFiles{1}{$Name});
-
+
my $FileName = getFilename($Name);
my @Removed = keys(%{$RemovedByName{$FileName}});
my @Added = keys(%{$AddedByName{$FileName}});
-
+
my @Removed = grep {not defined $MovedFiles{$_}} @Removed;
my @Added = grep {not defined $MovedFiles_R{$_}} @Added;
-
+
if($#Added!=0 or $#Removed!=0)
{
my $Found = 0;
@@ -1555,22 +1809,22 @@ ()
{
my @RemovedPrefix = keys(%{$RemovedByPrefix{$Prefix}});
my @AddedPrefix = keys(%{$AddedByPrefix{$Prefix}});
-
+
my @RemovedPrefix = grep {not defined $MovedFiles{$_}} @RemovedPrefix;
my @AddedPrefix = grep {not defined $MovedFiles_R{$_}} @AddedPrefix;
-
+
if($#AddedPrefix==0 and $#RemovedPrefix==0)
{
@Added = @AddedPrefix;
$Found = 1;
}
-
+
}
if(not $Found) {
next;
}
}
-
+
foreach my $File (@Added)
{
if($Format ne getFormat($PackageFiles{2}{$File}))
@@ -1580,7 +1834,7 @@ ()
if(defined $MovedFiles_R{$File}) {
next;
}
- if(isMoved($Name, $File))
+ if(isMoved($Name, $File) || ($EnhancedMatching && isMovedEnhanced($Name, $File)))
{
$MovedFiles{$Name} = $File;
$MovedFiles_R{$File} = $Name;
@@ -1588,7 +1842,7 @@ ()
}
}
}
-
+
foreach my $Name (sort keys(%RemovedFiles))
{ # checking renamed files
if(defined $MovedFiles{$Name})
@@ -1616,15 +1870,46 @@ ()
{ # renamed or moved
next;
}
- if(isRenamed($Name, $File, $Match))
+ if(isRenamed($Name, $File, $Match) || ($EnhancedMatching && isRenamedEnhanced($Name, $File, $Match)))
{
$RenamedFiles{$Name} = $File;
$RenamedFiles_R{$File} = $Name;
last;
+ }
+ }
+ }
+
+ # Content-based fallback matching for remaining unmatched files (if enhanced matching is enabled)
+ if($EnhancedMatching) {
+ my @RemainingRemoved = grep { !defined $RenamedFiles{$_} && !defined $MovedFiles{$_} } keys(%RemovedFiles);
+ my @RemainingAdded = grep { !defined $RenamedFiles_R{$_} && !defined $MovedFiles_R{$_} } keys(%AddedFiles);
+
+ foreach my $RemovedFile (@RemainingRemoved) {
+ next if defined $RenamedFiles{$RemovedFile} || defined $MovedFiles{$RemovedFile};
+
+ my $RemovedPath = $PackageFiles{1}{$RemovedFile};
+ my $RemovedFormat = getFormat($RemovedPath);
+
+ foreach my $AddedFile (@RemainingAdded) {
+ next if defined $RenamedFiles_R{$AddedFile} || defined $MovedFiles_R{$AddedFile};
+
+ my $AddedPath = $PackageFiles{2}{$AddedFile};
+ my $AddedFormat = getFormat($AddedPath);
+
+ # Only match files of the same format
+ if($RemovedFormat eq $AddedFormat) {
+ if(matchByContent($RemovedPath, $AddedPath)) {
+ $RenamedFiles{$RemovedFile} = $AddedFile;
+ $RenamedFiles_R{$AddedFile} = $RemovedFile;
+
+ printMsg("INFO", "Content-based match: $RemovedFile -> $AddedFile") if $Debug;
+ last;
+ }
+ }
}
}
}
-
+
foreach my $Name (sort (keys(%StableFiles), keys(%RenamedFiles), keys(%MovedFiles)))
{ # checking files
my $Path = $PackageFiles{1}{$Name};
@@ -1647,10 +1932,10 @@ ()
{ # moved files
$NewPath = $PackageFiles{2}{$NewName};
}
-
+
my ($Changed, $DLink, $RLink, $Rate, $Adv) = compareFiles($Path, $NewPath, $Name, $NewName);
my %Details = %{$Adv};
-
+
if($Changed==1 or $Changed==3)
{
if($NewName eq $Name)
@@ -1668,7 +1953,7 @@ ()
$Details{"Report"} = $RLink;
$ChangeRate{$Name} = $Rate;
}
-
+
$ChangedFiles{$Name} = 1;
}
elsif($Changed==2)
@@ -1693,6 +1978,7 @@ ()
{ # renamed files
if($Rate<$RENAME_CONTENT_MATCH) {
$Details{"Status"} = "renamed";
+ $FileChanges{$Format}{"Renamed"} += 1;
}
else
{
@@ -1709,6 +1995,7 @@ ()
{ # moved files
if($Rate<$MOVE_CONTENT_MATCH) {
$Details{"Status"} = "moved";
+ $FileChanges{$Format}{"Moved"} += 1;
}
else
{
@@ -1723,7 +2010,7 @@ ()
}
%{$FileChanges{$Format}{"Details"}{$Name}} = %Details;
}
-
+
foreach my $Name (keys(%AddedFiles))
{ # checking added files
my $Path = $PackageFiles{2}{$Name};
@@ -1738,6 +2025,45 @@ ()
$FileChanges{$Format}{"Details"}{$Name}{"Status"} = "added";
}
+ # Fix double counting by adjusting added/removed counts for moved/renamed files
+ foreach my $Format (keys(%FormatInfo)) {
+ my $MovedCount = 0;
+ my $RenamedCount = 0;
+
+ # Count moved/renamed files by format
+ foreach my $Name (keys(%MovedFiles)) {
+ if(getFormat($PackageFiles{1}{$Name}) eq $Format) {
+ $MovedCount++;
+ }
+ }
+
+ foreach my $Name (keys(%RenamedFiles)) {
+ if(getFormat($PackageFiles{1}{$Name}) eq $Format) {
+ $RenamedCount++;
+ }
+ }
+
+ # Subtract moved/renamed files from added/removed counts
+ if($MovedCount > 0) {
+ $FileChanges{$Format}{"Added"} -= $MovedCount;
+ $FileChanges{$Format}{"Removed"} -= $MovedCount;
+ }
+
+ if($RenamedCount > 0) {
+ $FileChanges{$Format}{"Added"} -= $RenamedCount;
+ $FileChanges{$Format}{"Removed"} -= $RenamedCount;
+ }
+
+ # Make sure we don't have negative counts
+ if($FileChanges{$Format}{"Added"} < 0) {
+ $FileChanges{$Format}{"Added"} = 0;
+ }
+
+ if($FileChanges{$Format}{"Removed"} < 0) {
+ $FileChanges{$Format}{"Removed"} = 0;
+ }
+ }
+
# Deps
foreach my $Kind (keys(%{$PackageDeps{1}}))
{ # removed/changed deps
@@ -1749,13 +2075,13 @@ ()
"Size"=>0,
"SizeDelta"=>0
);
-
+
foreach my $Name (keys(%{$PackageDeps{1}{$Kind}}))
{
my $Size = length($Name);
$DepChanges{$Kind}{"Total"} += 1;
$DepChanges{$Kind}{"Size"} += $Size;
-
+
if(not defined($PackageDeps{2}{$Kind})
or not defined($PackageDeps{2}{$Kind}{$Name}))
{ # removed deps
@@ -1764,7 +2090,7 @@ ()
$DepChanges{$Kind}{"SizeDelta"} += $Size;
next;
}
-
+
my %Info1 = %{$PackageDeps{1}{$Kind}{$Name}};
my %Info2 = %{$PackageDeps{2}{$Kind}{$Name}};
if($Info1{"Op"} and $Info1{"V"}
@@ -1779,7 +2105,7 @@ ()
}
}
}
-
+
foreach my $Kind (keys(%{$PackageDeps{2}}))
{ # added deps
foreach my $Name (keys(%{$PackageDeps{2}{$Kind}}))
@@ -1798,7 +2124,7 @@ ()
}
}
}
-
+
# Info
%InfoChanges = (
"Added"=>0,
@@ -1808,10 +2134,10 @@ ()
"Size"=>0,
"SizeDelta"=>0
);
-
+
my $OldPkgs = keys(%{$TargetPackages{1}});
my $NewPkgs = keys(%{$TargetPackages{2}});
-
+
if(keys(%PackageInfo)==2
and $OldPkgs==1
and $NewPkgs==1)
@@ -1819,7 +2145,7 @@ ()
my @Names = keys(%PackageInfo);
my $N1 = $Names[0];
my $N2 = $Names[1];
-
+
if(defined $PackageInfo{$N1}{"V2"})
{
$PackageInfo{$N2}{"V2"} = $PackageInfo{$N1}{"V2"};
@@ -1831,15 +2157,15 @@ ()
delete($PackageInfo{$N2});
}
}
-
+
foreach my $Package (sort keys(%PackageInfo))
{
my $Old = $PackageInfo{$Package}{"V1"};
my $New = $PackageInfo{$Package}{"V2"};
-
+
my $OldSize = length($Old);
my $NewSize = length($New);
-
+
$InfoChanges{"Total"} += 1;
if($Old and not $New)
{
@@ -1859,23 +2185,23 @@ ()
{
my $P1 = $TMP_DIR."/1/".$Package."-info";
my $P2 = $TMP_DIR."/2/".$Package."-info";
-
+
writeFile($P1, $Old);
writeFile($P2, $New);
-
+
my ($DLink, $Rate) = diffFiles($P1, $P2, getRPath("info-diffs", $Package."-info"));
-
+
# clean space
rmtree($TMP_DIR."/1/");
rmtree($TMP_DIR."/2/");
-
+
$DLink =~s/\A\Q$REPORT_DIR\E\///;
-
+
my %Details = ();
$Details{"Status"} = "changed";
$Details{"Rate"} = $Rate;
$Details{"Diff"} = $DLink;
-
+
%{$InfoChanges{"Details"}{$Package}} = %Details;
$InfoChanges{"Changed"} += 1;
$InfoChanges{"Rate"} += $Rate;
@@ -1889,7 +2215,7 @@ ()
$InfoChanges{"SizeDelta"} += $OldSize;
}
}
-
+
$STAT_LINE .= "added:".keys(%AddedFiles).";";
$STAT_LINE .= "removed:".keys(%RemovedFiles).";";
$STAT_LINE .= "moved:".keys(%MovedFiles).";";
@@ -1914,12 +2240,12 @@ ()
if(not keys(%PackageUsage)) {
return "";
}
-
+
my $Report = "\n";
$Report .= "Usage Analysis
\n";
$Report .= "\n";
$Report .= "| Package | Status | Used By |
\n";
-
+
foreach my $Package (sort keys(%PackageUsage))
{
my $Num = keys(%{$PackageUsage{$Package}{"UsedBy"}});
@@ -1940,11 +2266,11 @@ ()
$Report .= "unused | \n";
$Report .= " | \n";
}
-
+
$Report .= "\n";
}
$Report .= "
\n";
-
+
return $Report;
}
@@ -1953,12 +2279,12 @@ ()
if(not keys(%PackageInfo)) {
return "";
}
-
+
my $Report = "\n";
$Report .= "Changes In Package Info
\n";
$Report .= "\n";
$Report .= "| Package | Status | Delta | Visual Diff |
\n";
-
+
my %Details = %{$InfoChanges{"Details"}};
foreach my $Package (sort keys(%Details))
{
@@ -1992,7 +2318,7 @@ ()
$Report .= "\n";
}
$Report .= "
\n";
-
+
return $Report;
}
@@ -2075,38 +2401,38 @@ ($)
sub createFileView($$$)
{
my ($File, $V, $Dir) = @_;
-
+
my $Path = $PackageFiles{$V}{$File};
-
+
if(not -T $Path)
{
return undef;
}
-
+
my $Name = getFilename($File);
my $Content = readFile($Path);
my $CssStyles = readModule("Styles", "View.css");
-
+
$Content = htmlSpecChars($Content);
-
+
if($Name=~/\.patch\Z/)
{
while($Content=~s&(\A|\n)(\+.*?)(\n|\Z)&$1$2$3&mg){};
while($Content=~s&(\A|\n)(\-.*?)(\n|\Z)&$1$2$3&mg){};
}
-
+
$Content = "".$Content."
\n";
-
+
$Content = "\n\n| plain | \n
\n\n| \n".$Content." | \n
\n
\n";
$Content = composeHTMLHead($Name, "", "View file ".$File, "", "", $CssStyles)."\n\n".$Content;
$Content .= "