2015-08-20 00:16:38 +02:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
|
|
|
# usage:
|
|
|
|
# git clone --bare git@git.zulip.net:eng/zulip.git
|
|
|
|
# cd zulip.git
|
2019-01-15 02:56:06 +01:00
|
|
|
# git fast-export --export-marks=../zulip.em --progress=1000 --all > ../zulip.fe
|
2015-08-20 00:16:38 +02:00
|
|
|
# git init --bare ../zulip-zanitized.git
|
|
|
|
# cd ../zulip-zanitized.git
|
2019-01-15 02:56:06 +01:00
|
|
|
# zanitizer ../zulip.fe ../zulip.em | git fast-import --quiet
|
2015-08-20 00:16:38 +02:00
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
use Digest::SHA qw(sha1_hex);
|
|
|
|
use FindBin;
|
|
|
|
|
|
|
|
use lib $FindBin::Bin;
|
|
|
|
use zanitizer_config;
|
|
|
|
|
|
|
|
sub eq_tree {
|
2021-03-24 18:32:23 +01:00
|
|
|
my ( $a, $b ) = @_;
|
|
|
|
!( grep { !exists $$b{$_} || $$a{$_} ne $$b{$_} } keys %$a )
|
|
|
|
&& !( grep { !exists $$a{$_} } keys %$b );
|
2015-08-20 00:16:38 +02:00
|
|
|
}
|
|
|
|
|
2021-03-24 18:32:23 +01:00
|
|
|
my ( $fast_export_file, $export_marks_file ) = @ARGV;
|
2015-08-20 00:16:38 +02:00
|
|
|
|
|
|
|
my %export_marks = ();
|
2021-03-24 18:32:23 +01:00
|
|
|
if ( defined $export_marks_file ) {
|
|
|
|
open EXPORT_MARKS, '<', $export_marks_file
|
|
|
|
or die "cannot open $export_marks_file: $!";
|
|
|
|
%export_marks = map { split } <EXPORT_MARKS>;
|
2015-08-20 00:16:38 +02:00
|
|
|
close EXPORT_MARKS;
|
|
|
|
}
|
|
|
|
|
2021-03-24 18:32:23 +01:00
|
|
|
my %mark_map = ();
|
|
|
|
my %blob_mark = ();
|
|
|
|
my %ref_commit = ();
|
|
|
|
my %commit_tree = ();
|
2015-08-20 00:16:38 +02:00
|
|
|
my %scrubbed_blob = ();
|
|
|
|
my %scrubbed_file = ();
|
2021-03-24 18:32:23 +01:00
|
|
|
my %deleted_file = ();
|
|
|
|
my %renamed_file = ();
|
2015-08-20 00:16:38 +02:00
|
|
|
|
2021-03-24 18:32:23 +01:00
|
|
|
open FAST_EXPORT, '<', $fast_export_file
|
|
|
|
or die "cannot open $fast_export_file: $!";
|
2015-08-20 00:16:38 +02:00
|
|
|
$_ = <FAST_EXPORT>;
|
2021-03-24 18:32:23 +01:00
|
|
|
while ( defined $_ ) {
|
|
|
|
if ( $_ eq "blob\n" ) {
|
|
|
|
my ($mark) = <FAST_EXPORT> =~ /^mark (\S*)\n$/s or die;
|
|
|
|
my ($len) = <FAST_EXPORT> =~ /^data (\d+)\n$/s or die;
|
|
|
|
read( FAST_EXPORT, my $data, $len ) == $len or die;
|
|
|
|
$_ = $data;
|
|
|
|
scrub_text;
|
|
|
|
if ( $_ ne $data ) {
|
|
|
|
$scrubbed_blob{$mark} = 1;
|
|
|
|
$data = $_;
|
|
|
|
}
|
|
|
|
<FAST_EXPORT> eq "\n" or die;
|
|
|
|
|
|
|
|
my $hash = sha1_hex($data);
|
|
|
|
if ( exists $blob_mark{$hash} ) {
|
|
|
|
$mark_map{$mark} = $blob_mark{$hash};
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$blob_mark{$hash} = $mark_map{$mark} = $mark;
|
|
|
|
print "blob\nmark $mark\ndata ", length $data, "\n", $data, "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (/^reset (?'ref'.*)\n$/s) {
|
|
|
|
my $ref = $+{ref};
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
my $from = undef;
|
|
|
|
while (1) {
|
|
|
|
if ( $_ eq "\n" ) {
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
elsif ( my ($from_) = /^from (?'from'.*)\n$/s ) {
|
|
|
|
$from = $+{from};
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
# The trailing LF on reset is optional
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
}
|
|
|
|
|
|
|
|
$ref_commit{$ref} = $mark_map{from};
|
|
|
|
print "reset $ref\n";
|
|
|
|
print "from $mark_map{$from}\n"
|
|
|
|
if defined $from && defined $mark_map{$from};
|
|
|
|
print "\n";
|
|
|
|
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
elsif (/^commit (?'ref'.*)\n$/s) {
|
|
|
|
my $ref = $+{ref};
|
|
|
|
my ($mark) = <FAST_EXPORT> =~ /^mark (\S*)\n$/s or die;
|
|
|
|
my ($author) = <FAST_EXPORT> =~ /^author (.*)\n$/s or die;
|
|
|
|
my ($committer) = <FAST_EXPORT> =~ /^committer (.*)\n$/s or die;
|
|
|
|
my ($len) = <FAST_EXPORT> =~ /^data (\d+)\n$/s or die;
|
|
|
|
read FAST_EXPORT, my ($data), $len;
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
my $from = undef;
|
|
|
|
|
|
|
|
if (/^from (?'from'.*)\n$/s) {
|
|
|
|
$from = $+{from};
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
}
|
|
|
|
my $base = defined $from ? $mark_map{$from} : $ref_commit{ref};
|
|
|
|
my @merge = ();
|
|
|
|
while (/^merge (?'mark'\S*)\n$/s) {
|
|
|
|
die "unimplemented case" if !defined $from;
|
|
|
|
push @merge, $+{mark};
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
}
|
|
|
|
|
|
|
|
# git fast-export incorrectly writes M before D when replacing
|
|
|
|
# a symlink with a directory. We move every D before every M
|
|
|
|
# to work around this bug.
|
|
|
|
my @delete = ();
|
|
|
|
my @modify = ();
|
|
|
|
while (1) {
|
|
|
|
if ( $_ eq "\n" ) {
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
elsif (/^D (?'file'.*)\n$/s) {
|
|
|
|
$_ = $+{file};
|
|
|
|
scrub_filename;
|
|
|
|
push @delete, { %+, file => $_ } if defined $_;
|
|
|
|
}
|
|
|
|
elsif (/^M (?'mode'\d+) (?'mark'\S+) (?'file'.*)\n$/s) {
|
|
|
|
$_ = $+{file};
|
|
|
|
scrub_filename;
|
|
|
|
if ( defined $_ ) {
|
|
|
|
$renamed_file{ $+{file} } = $_ if $_ ne $+{file};
|
|
|
|
$scrubbed_file{$_} = 1 if exists $scrubbed_blob{ $+{mark} };
|
|
|
|
push @modify, { %+, file => $_ };
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$deleted_file{ $+{file} } = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
die "unhandled command in commit: $_";
|
|
|
|
}
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
}
|
|
|
|
my $base_tree = defined $base ? $commit_tree{$base} : {};
|
|
|
|
my %tree = %$base_tree;
|
|
|
|
delete $tree{ $$_{file} } for @delete;
|
|
|
|
$tree{ $$_{file} } = "$$_{mode} $mark_map{$$_{mark}}" for @modify;
|
|
|
|
|
|
|
|
if ( eq_tree( \%tree, $base_tree )
|
|
|
|
&& !( grep { defined $mark_map{$_} } @merge ) )
|
|
|
|
{
|
|
|
|
$ref_commit{$ref} = $mark_map{$mark} = $base;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$ref_commit{$ref} = $mark_map{$mark} = $mark;
|
|
|
|
$commit_tree{$mark} = \%tree;
|
|
|
|
$_ = $data;
|
|
|
|
scrub_text;
|
|
|
|
if ( exists $export_marks{$mark} ) {
|
|
|
|
$_ .= "\n" until /\n\n$/;
|
|
|
|
$_ .= "(imported from commit $export_marks{$mark})\n";
|
|
|
|
}
|
|
|
|
print
|
|
|
|
"commit $ref\nmark $mark\nauthor $author\ncommitter $committer\ndata ",
|
|
|
|
length $_, "\n", $_;
|
|
|
|
if ( defined $from ) {
|
|
|
|
die "unimplemented case" if !defined $mark_map{$from};
|
|
|
|
print "from $mark_map{$from}\n";
|
|
|
|
}
|
|
|
|
for (@merge) {
|
|
|
|
print "merge $mark_map{$_}\n" if defined $mark_map{$_};
|
|
|
|
}
|
|
|
|
print "D $$_{file}\n" for @delete;
|
|
|
|
print "M $$_{mode} $mark_map{$$_{mark}} $$_{file}\n" for @modify;
|
|
|
|
print "\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
elsif (/^progress /) {
|
|
|
|
print $_;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
die "unhandled command: $_";
|
2015-08-20 00:16:38 +02:00
|
|
|
}
|
|
|
|
$_ = <FAST_EXPORT>;
|
|
|
|
}
|
|
|
|
close FAST_EXPORT;
|
|
|
|
|
|
|
|
print STDERR "Deleted files:\n";
|
|
|
|
print STDERR " $_\n" for sort keys %deleted_file;
|
2015-08-20 22:17:26 +02:00
|
|
|
print STDERR "Renamed files:\n";
|
|
|
|
print STDERR " $_ => $renamed_file{$_}\n" for sort keys %renamed_file;
|
2015-08-20 00:16:38 +02:00
|
|
|
print STDERR "Scrubbed files:\n";
|
|
|
|
print STDERR " $_\n" for sort keys %scrubbed_file;
|