From 2dda741279ba3c56340f3bd88745604bb5a394a6 Mon Sep 17 00:00:00 2001 From: "Mazo, Andrey" Date: Mon, 1 Apr 2019 18:02:17 +0000 Subject: git-p4: detect/prevent infinite loop in gitCommitByP4Change() Under certain circumstances, gitCommitByP4Change() can enter an infinite loop resulting in `git p4 sync` hanging forever. The problem is that `git rev-list --bisect ^` can return ``, which would result in reinspecting and potentially an infinite loop. This can happen when importing just a subset of P4 repository and/or with explicit "--changesfile" option. A real-life example: """ looking in ref refs/remotes/p4/mybranch for change 26894 using bisect... Reading pipe: git rev-parse refs/remotes/p4/mybranch trying: earliest latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git cat-file commit 147f5d3292af2e1cc4a56a7b96db845144c68486 current change 25339 trying: earliest ^147f5d3292af2e1cc4a56a7b96db845144c68486 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^147f5d3292af2e1cc4a56a7b96db845144c68486 Reading pipe: git cat-file commit 51db83df9d588010d0bd995641c85aa0408a5bb9 current change 25420 trying: earliest ^51db83df9d588010d0bd995641c85aa0408a5bb9 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^51db83df9d588010d0bd995641c85aa0408a5bb9 Reading pipe: git cat-file commit e8f83909ceb570f5a7e48c2853f3c5d8207cea52 current change 25448 trying: earliest ^e8f83909ceb570f5a7e48c2853f3c5d8207cea52 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^e8f83909ceb570f5a7e48c2853f3c5d8207cea52 Reading pipe: git cat-file commit 09a48eb7acd594dce52e06681be9c366e1844d66 current change 25521 trying: earliest ^09a48eb7acd594dce52e06681be9c366e1844d66 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^09a48eb7acd594dce52e06681be9c366e1844d66 Reading pipe: git cat-file commit 4daff81c520a82678e1ef347f2b5e97258101ae1 current change 26907 trying: earliest ^09a48eb7acd594dce52e06681be9c366e1844d66 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^09a48eb7acd594dce52e06681be9c366e1844d66 Reading pipe: git cat-file commit 4daff81c520a82678e1ef347f2b5e97258101ae1 current change 26907 trying: earliest ^09a48eb7acd594dce52e06681be9c366e1844d66 latest 4daff81c520a82678e1ef347f2b5e97258101ae1 Reading pipe: git rev-list --bisect 4daff81c520a82678e1ef347f2b5e97258101ae1 ^09a48eb7acd594dce52e06681be9c366e1844d66 Reading pipe: git cat-file commit 4daff81c520a82678e1ef347f2b5e97258101ae1 current change 26907 ... """ The fix is two-fold: * detect an infinite loop and die right away instead of looping forever; * make sure, `git rev-list --bisect` can't return "latestCommit" again by excluding it from the rev-list range explicitly. Signed-off-by: Andrey Mazo Signed-off-by: Junio C Hamano --- git-p4.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 5b79920f46..c0a3068b6f 100755 --- a/git-p4.py +++ b/git-p4.py @@ -3325,7 +3325,9 @@ def gitCommitByP4Change(self, ref, change): if currentChange < change: earliestCommit = "^%s" % next else: - latestCommit = "%s" % next + if next == latestCommit: + die("Infinite loop while looking in ref %s for change %s. Check your branch mappings" % (ref, change)) + latestCommit = "%s^@" % next return "" -- cgit 1.2.3-korg From f2768cb343cb0320f79692625dea7f50af643759 Mon Sep 17 00:00:00 2001 From: "Mazo, Andrey" Date: Mon, 1 Apr 2019 18:02:24 +0000 Subject: git-p4: match branches case insensitively if configured git-p4 knows how to handle case insensitivity in file paths if core.ignorecase is set. However, when determining a branch for a file, it still does a case-sensitive prefix match. This may result in some file changes to be lost on import. For example, given the following commits 1. add //depot/main/file1 2. add //depot/DirA/file2 3. add //depot/dira/file3 4. add //depot/DirA/file4 and "branchList = main:DirA" branch mapping, commit 3 will be lost. So, do branch search case insensitively if running with core.ignorecase set. Teach splitFilesIntoBranches() to use the p4PathStartsWith() function for path prefix matches instead of always case-sensitive match. Signed-off-by: Andrey Mazo Signed-off-by: Junio C Hamano --- git-p4.py | 4 ++-- t/t9801-git-p4-branch.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index c0a3068b6f..f3e5ccb7af 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2668,7 +2668,7 @@ def stripRepoPath(self, path, prefixes): path = self.clientSpecDirs.map_in_client(path) if self.detectBranches: for b in self.knownBranches: - if path.startswith(b + "/"): + if p4PathStartsWith(path, b + "/"): path = path[len(b)+1:] elif self.keepRepoPath: @@ -2723,7 +2723,7 @@ def splitFilesIntoBranches(self, commit): for branch in self.knownBranches.keys(): # add a trailing slash so that a commit into qt/4.2foo # doesn't end up in qt/4.2, e.g. - if relPath.startswith(branch + "/"): + if p4PathStartsWith(relPath, branch + "/"): if branch not in branches: branches[branch] = [] branches[branch].append(file) diff --git a/t/t9801-git-p4-branch.sh b/t/t9801-git-p4-branch.sh index c48532e12b..4779448b4c 100755 --- a/t/t9801-git-p4-branch.sh +++ b/t/t9801-git-p4-branch.sh @@ -650,7 +650,7 @@ test_expect_success !CASE_INSENSITIVE_FS 'basic p4 branches for case folding' ' ' # Check that files are properly split across branches when ignorecase is set -test_expect_failure !CASE_INSENSITIVE_FS 'git p4 clone, branchList branch definition, ignorecase' ' +test_expect_success !CASE_INSENSITIVE_FS 'git p4 clone, branchList branch definition, ignorecase' ' test_when_finished cleanup_git && test_create_repo "$git" && ( @@ -676,7 +676,7 @@ test_expect_failure !CASE_INSENSITIVE_FS 'git p4 clone, branchList branch defini ' # Check that files are properly split across branches when ignorecase is set, use-client-spec case -test_expect_failure !CASE_INSENSITIVE_FS 'git p4 clone with client-spec, branchList branch definition, ignorecase' ' +test_expect_success !CASE_INSENSITIVE_FS 'git p4 clone with client-spec, branchList branch definition, ignorecase' ' client_view "//depot/... //client/..." && test_when_finished cleanup_git && test_create_repo "$git" && -- cgit 1.2.3-korg From ff8c50ed0c98d9bc6aaf5fc2e93bd976c3076750 Mon Sep 17 00:00:00 2001 From: "Mazo, Andrey" Date: Mon, 1 Apr 2019 18:02:26 +0000 Subject: git-p4: don't groom exclude path list on every commit Currently, `cloneExclude` array is being groomed (by removing trailing "...") on every changeset. (since `extractFilesFromCommit()` is called on every imported changeset) As a micro-optimization, do it once while parsing arguments. Also, prepend "/" and remove trailing "..." at the same time. Signed-off-by: Andrey Mazo Signed-off-by: Junio C Hamano --- git-p4.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index f3e5ccb7af..7edcbad055 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1316,7 +1316,7 @@ def __init__(self): self.needsGit = True self.verbose = False - # This is required for the "append" cloneExclude action + # This is required for the "append" update_shelve action def ensure_value(self, attr, value): if not hasattr(self, attr) or getattr(self, attr) is None: setattr(self, attr, value) @@ -2530,6 +2530,11 @@ def map_in_client(self, depot_path): die( "Error: %s is not found in client spec path" % depot_path ) return "" +def cloneExcludeCallback(option, opt_str, value, parser): + # prepend "/" because the first "/" was consumed as part of the option itself. + # ("-//depot/A/..." becomes "/depot/A/..." after option parsing) + parser.values.cloneExclude += ["/" + re.sub(r"\.\.\.$", "", value)] + class P4Sync(Command, P4UserMap): def __init__(self): @@ -2553,7 +2558,7 @@ def __init__(self): optparse.make_option("--use-client-spec", dest="useClientSpec", action='store_true', help="Only sync files that are included in the Perforce Client Spec"), optparse.make_option("-/", dest="cloneExclude", - action="append", type="string", + action="callback", callback=cloneExcludeCallback, type="string", help="exclude depot path"), ] self.description = """Imports from Perforce into a git repository.\n @@ -2619,8 +2624,6 @@ def checkpoint(self): print("checkpoint finished: " + out) def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0): - self.cloneExclude = [re.sub(r"\.\.\.$", "", path) - for path in self.cloneExclude] files = [] fnum = 0 while "depotFile%s" % fnum in commit: @@ -3890,7 +3893,6 @@ def run(self, args): self.cloneDestination = depotPaths[-1] depotPaths = depotPaths[:-1] - self.cloneExclude = ["/"+p for p in self.cloneExclude] for p in depotPaths: if not p.startswith("//"): sys.stderr.write('Depot paths must start with "//": %s\n' % p) -- cgit 1.2.3-korg From a2bee10ad9ed4dc45c1f7da76f01388925b80212 Mon Sep 17 00:00:00 2001 From: "Mazo, Andrey" Date: Mon, 1 Apr 2019 18:02:32 +0000 Subject: git-p4: don't exclude other files with same prefix Make sure not to exclude files unintentionally if exclude paths are specified without a trailing /. I.e., don't exclude "//depot/file_dont_exclude" if run with "-//depot/file". Do this by ensuring that paths without a trailing "/" are only matched completely. Also, abort path search on the first match as a micro-optimization. Signed-off-by: Andrey Mazo Signed-off-by: Junio C Hamano --- git-p4.py | 21 ++++++++++++++------- t/t9817-git-p4-exclude.sh | 4 ++-- 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 7edcbad055..c47bd8c4d8 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2623,18 +2623,25 @@ def checkpoint(self): if self.verbose: print("checkpoint finished: " + out) + def isPathWanted(self, path): + for p in self.cloneExclude: + if p.endswith("/"): + if p4PathStartsWith(path, p): + return False + # "-//depot/file1" without a trailing "/" should only exclude "file1", but not "file111" or "file1_dir/file2" + elif path.lower() == p.lower(): + return False + for p in self.depotPaths: + if p4PathStartsWith(path, p): + return True + return False + def extractFilesFromCommit(self, commit, shelved=False, shelved_cl = 0): files = [] fnum = 0 while "depotFile%s" % fnum in commit: path = commit["depotFile%s" % fnum] - - if [p for p in self.cloneExclude - if p4PathStartsWith(path, p)]: - found = False - else: - found = [p for p in self.depotPaths - if p4PathStartsWith(path, p)] + found = self.isPathWanted(path) if not found: fnum = fnum + 1 continue diff --git a/t/t9817-git-p4-exclude.sh b/t/t9817-git-p4-exclude.sh index 1c22570797..275dd30425 100755 --- a/t/t9817-git-p4-exclude.sh +++ b/t/t9817-git-p4-exclude.sh @@ -53,7 +53,7 @@ test_expect_success 'clone, excluding part of repo' ' ) ' -test_expect_failure 'clone, excluding single file, no trailing /' ' +test_expect_success 'clone, excluding single file, no trailing /' ' test_when_finished cleanup_git && git p4 clone -//depot/discard_file --dest="$git" //depot/...@all && ( @@ -85,7 +85,7 @@ test_expect_success 'clone, then sync with exclude' ' ) ' -test_expect_failure 'clone, then sync with exclude, no trailing /' ' +test_expect_success 'clone, then sync with exclude, no trailing /' ' test_when_finished cleanup_git && git p4 clone -//depot/discard/... -//depot/discard_file --dest="$git" //depot/...@all && ( -- cgit 1.2.3-korg From d15068a6501bec72fddec9f79372a5e3ce5e1379 Mon Sep 17 00:00:00 2001 From: "Mazo, Andrey" Date: Mon, 1 Apr 2019 18:02:38 +0000 Subject: git-p4: respect excluded paths when detecting branches Currently, excluded paths are only handled in the following cases: * no branch detection; * branch detection with using clientspec. However, excluded paths are not respected in case of branch detection without using clientspec. Fix this by consulting the list of excluded paths when splitting files across branches. Signed-off-by: Andrey Mazo Signed-off-by: Junio C Hamano --- git-p4.py | 3 +-- t/t9801-git-p4-branch.sh | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index c47bd8c4d8..96c4b78dc7 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2710,8 +2710,7 @@ def splitFilesIntoBranches(self, commit): fnum = 0 while "depotFile%s" % fnum in commit: path = commit["depotFile%s" % fnum] - found = [p for p in self.depotPaths - if p4PathStartsWith(path, p)] + found = self.isPathWanted(path) if not found: fnum = fnum + 1 continue diff --git a/t/t9801-git-p4-branch.sh b/t/t9801-git-p4-branch.sh index 7530d22de2..9654362052 100755 --- a/t/t9801-git-p4-branch.sh +++ b/t/t9801-git-p4-branch.sh @@ -412,7 +412,7 @@ test_expect_failure 'git p4 clone file subset branch' ' ' # Check that excluded files are omitted during import -test_expect_failure 'git p4 clone complex branches with excluded files' ' +test_expect_success 'git p4 clone complex branches with excluded files' ' test_when_finished cleanup_git && test_create_repo "$git" && ( -- cgit 1.2.3-korg