Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve submodule operation performance #2655

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
118 changes: 8 additions & 110 deletions Source/CarthageKit/Git.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ public func contentsOfFileInRepository(_ repositoryFileURL: URL, _ path: String,
public func checkoutRepositoryToDirectory(
_ repositoryFileURL: URL,
_ workingDirectoryURL: URL,
force: Bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this added/needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original checkoutRepositoryToDirectory function was a forced checkout, and in checkoutSubmodule there was a non-forced checkout. So to reuse the checkoutRepositoryToDirectory function, I added the force parameter.

revision: String = "HEAD"
) -> SignalProducer<(), CarthageError> {
return SignalProducer { () -> Result<[String: String], CarthageError> in
Expand All @@ -170,66 +171,16 @@ public func checkoutRepositoryToDirectory(
)
}
}
.flatMap(.concat) { environment in
return launchGitTask([ "checkout", "--quiet", "--force", revision ], repositoryFileURL: repositoryFileURL, environment: environment)
}
.then(SignalProducer<(), CarthageError>.empty)
}

/// Clones the given submodule into the working directory of its parent
/// repository, but without any Git metadata.
public func cloneSubmoduleInWorkingDirectory(_ submodule: Submodule, _ workingDirectoryURL: URL) -> SignalProducer<(), CarthageError> {
let submoduleDirectoryURL = workingDirectoryURL.appendingPathComponent(submodule.path, isDirectory: true)

func repositoryCheck<T>(_ description: String, attempt closure: () throws -> T) -> Result<T, CarthageError> {
do {
return .success(try closure())
} catch let error as NSError {
let reason = "could not \(description)"
return .failure(
.repositoryCheckoutFailed(workingDirectoryURL: submoduleDirectoryURL, reason: reason, underlyingError: error)
)
.flatMap(.concat) { (environment: [String: String]) -> SignalProducer<String, CarthageError> in
var arguments = [ "checkout", "--quiet" ]
if force {
arguments.append("--force")
}
}
arguments.append(revision)

let purgeGitDirectories = FileManager.default.reactive
.enumerator(at: submoduleDirectoryURL, includingPropertiesForKeys: [ .isDirectoryKey, .nameKey ], catchErrors: true)
.attemptMap { enumerator, url -> Result<(), CarthageError> in
return repositoryCheck("enumerate name of descendant at \(url.path)", attempt: {
try url.resourceValues(forKeys: [ .nameKey ]).name
})
.flatMap { (name: String?) in
guard name == ".git" else { return .success(()) }

return repositoryCheck("determine whether \(url.path) is a directory", attempt: {
try url.resourceValues(forKeys: [ .isDirectoryKey ]).isDirectory!
})
.flatMap { (isDirectory: Bool) in
if isDirectory { enumerator.skipDescendants() }

return repositoryCheck("remove \(url.path)") {
try FileManager.default.removeItem(at: url)
}
}
}
}

return SignalProducer<(), CarthageError> { () -> Result<(), CarthageError> in
repositoryCheck("remove submodule checkout") {
try FileManager.default.removeItem(at: submoduleDirectoryURL)
}
return launchGitTask(arguments, repositoryFileURL: repositoryFileURL, environment: environment)
}
.then(cloneRepository(submodule.url, workingDirectoryURL.appendingPathComponent(submodule.path), isBare: false))
.then(checkoutSubmodule(submodule, submoduleDirectoryURL))
.then(purgeGitDirectories)
}

/// Recursively checks out the given submodule's revision, in its working
/// directory.
private func checkoutSubmodule(_ submodule: Submodule, _ submoduleWorkingDirectoryURL: URL) -> SignalProducer<(), CarthageError> {
return launchGitTask([ "checkout", "--quiet", submodule.sha ], repositoryFileURL: submoduleWorkingDirectoryURL)
.then(launchGitTask([ "submodule", "--quiet", "update", "--init", "--recursive" ], repositoryFileURL: submoduleWorkingDirectoryURL))
.then(SignalProducer<(), CarthageError>.empty)
.then(SignalProducer<(), CarthageError>.empty)
}

/// Parses each key/value entry from the given config file contents, optionally
Expand Down Expand Up @@ -472,56 +423,3 @@ public func isGitRepository(_ directoryURL: URL) -> SignalProducer<Bool, NoError
}
.flatMapError { _ in SignalProducer(value: false) }
}

/// Adds the given submodule to the given repository, cloning from `fetchURL` if
/// the desired revision does not exist or the submodule needs to be cloned.
public func addSubmoduleToRepository(_ repositoryFileURL: URL, _ submodule: Submodule, _ fetchURL: GitURL) -> SignalProducer<(), CarthageError> {
let submoduleDirectoryURL = repositoryFileURL.appendingPathComponent(submodule.path, isDirectory: true)

return isGitRepository(submoduleDirectoryURL)
.map { isRepository in
// Check if the submodule is initialized/updated already.
return isRepository && FileManager.default.fileExists(atPath: submoduleDirectoryURL.appendingPathComponent(".git").path)
}
.flatMap(.merge) { submoduleExists -> SignalProducer<(), CarthageError> in
if submoduleExists {
// Just check out and stage the correct revision.
return fetchRepository(submoduleDirectoryURL, remoteURL: fetchURL, refspec: "+refs/heads/*:refs/remotes/origin/*")
.then(
launchGitTask(
["config", "--file", ".gitmodules", "submodule.\(submodule.name).url", submodule.url.urlString],
repositoryFileURL: repositoryFileURL
)
)
.then(launchGitTask([ "submodule", "--quiet", "sync", "--recursive", submoduleDirectoryURL.path ], repositoryFileURL: repositoryFileURL))
.then(checkoutSubmodule(submodule, submoduleDirectoryURL))
.then(launchGitTask([ "add", "--force", submodule.path ], repositoryFileURL: repositoryFileURL))
.then(SignalProducer<(), CarthageError>.empty)
} else {
// `git clone` will fail if there's an existing file at that path, so try to remove
// anything that's currently there. If this fails, then we're no worse off.
// This can happen if you first do `carthage checkout` and then try it again with
// `--use-submodules`, e.g.
if FileManager.default.fileExists(atPath: submoduleDirectoryURL.path) {
try? FileManager.default.removeItem(at: submoduleDirectoryURL)
}

let addSubmodule = launchGitTask(
["submodule", "--quiet", "add", "--force", "--name", submodule.name, "--", submodule.url.urlString, submodule.path],
repositoryFileURL: repositoryFileURL
)
// A .failure to add usually means the folder was already added
// to the index. That's okay.
.flatMapError { _ in SignalProducer<String, CarthageError>.empty }

// If it doesn't exist, clone and initialize a submodule from our
// local bare repository.
return cloneRepository(fetchURL, submoduleDirectoryURL, isBare: false)
.then(launchGitTask([ "remote", "set-url", "origin", submodule.url.urlString ], repositoryFileURL: submoduleDirectoryURL))
.then(checkoutSubmodule(submodule, submoduleDirectoryURL))
.then(addSubmodule)
.then(launchGitTask([ "submodule", "--quiet", "init", "--", submodule.path ], repositoryFileURL: repositoryFileURL))
.then(SignalProducer<(), CarthageError>.empty)
}
}
}
214 changes: 211 additions & 3 deletions Source/CarthageKit/Project.swift
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ extension ProjectEvent: Equatable {
}
}

/// Use these queues to ensure there is only one write operation in a specific local
/// repository directory at a time.
internal struct RepositoryOperationQueues {
private static var queues: Atomic<[URL: SerialProducerQueue]> = Atomic([:])

internal static func queue(forLocalRepositoryURL url: URL) -> SerialProducerQueue {
return queues.modify { queues -> SerialProducerQueue in
if let queue = queues[url] {
return queue
} else {
let newQueue = SerialProducerQueue(name: "org.carthage.Constants.Project.cloneOrFetchQueue-\(url.absoluteString)")
queues[url] = newQueue
return newQueue
}
}
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This global state makes me very uncomfortable. I don't think it's a good idea to introduce a static var with this. It leads to code that's hard to follow and modify.

We should use another method to enforce exclusivity. I think it makes sense to do something like this:

  1. Fetch all the submodules from the repository cache, in parallel but with limited concurrency.
  2. Perform the actual checkouts, passing in a queue or doing them one at a time

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see there are already a lot of queues, but they are instance properties of a Project. So I just need to move the global variable to the Project class?


/// Represents a project that is using Carthage.
public final class Project { // swiftlint:disable:this type_body_length
/// File URL to the root directory of the project.
Expand Down Expand Up @@ -793,17 +811,17 @@ public final class Project { // swiftlint:disable:this type_body_length
if let submodule = submodule {
// In the presence of `submodule` for `dependency` — before symlinking, (not after) — add submodule and its submodules:
// `dependency`, subdependencies that are submodules, and non-Carthage-housed submodules.
return addSubmoduleToRepository(self.directoryURL, submodule, GitURL(repositoryURL.path))
return self.addSubmoduleToRepository(self.directoryURL, submodule, GitURL(repositoryURL.path))
.startOnQueue(self.gitOperationQueue)
.then(symlinkCheckoutPaths)
} else {
return checkoutRepositoryToDirectory(repositoryURL, workingDirectoryURL, revision: revision)
return checkoutRepositoryToDirectoryOnQueue(repositoryURL, workingDirectoryURL, force: true, revision: revision)
// For checkouts of “ideally bare” repositories of `dependency`, we add its submodules by cloning ourselves, after symlinking.
.then(symlinkCheckoutPaths)
.then(
submodulesInRepository(repositoryURL, revision: revision)
.flatMap(.merge) {
cloneSubmoduleInWorkingDirectory($0, workingDirectoryURL)
self.cloneSubmoduleInWorkingDirectory($0, workingDirectoryURL)
}
)
}
Expand Down Expand Up @@ -1165,6 +1183,173 @@ public final class Project { // swiftlint:disable:this type_body_length
return incompatibilities.isEmpty ? .init(value: ()) : .init(error: .invalidResolvedCartfile(incompatibilities))
}
}

/// Clones the given submodule into the working directory of its parent
/// repository, but without any Git metadata.
public func cloneSubmoduleInWorkingDirectory(_ submodule: Submodule, _ workingDirectoryURL: URL) -> SignalProducer<(), CarthageError> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these functions should move here:

  1. These are generic functions that shouldn't really be tied to the Project
  2. Because these were moved, the diff for this PR is really large and almost impossible to review

Can you move them back to the Git file?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions were moved because the new implementation depends on repositoryFileURL which is a private function in Project.swift.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then that should be passed in as a parameter

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about cloneOrFetchDependency? These functions actually rely on cloneOrFetchDependency which uses cloneOrFetchQueue and _projectEventsObserver which are Project properties. I think some kind of queue is definitely needed. So also pass the queue in as a parameter?

let submoduleDirectoryURL = workingDirectoryURL.appendingPathComponent(submodule.path, isDirectory: true)

func repositoryCheck<T>(_ description: String, attempt closure: () throws -> T) -> Result<T, CarthageError> {
do {
return .success(try closure())
} catch let error as NSError {
let reason = "could not \(description)"
return .failure(
.repositoryCheckoutFailed(workingDirectoryURL: submoduleDirectoryURL, reason: reason, underlyingError: error)
)
}
}

let purgeGitDirectories = FileManager.default.reactive
.enumerator(at: submoduleDirectoryURL,
includingPropertiesForKeys: [ .isDirectoryKey, .nameKey ],
options: [ .skipsSubdirectoryDescendants ],
catchErrors: true)
.attemptMap { enumerator, url -> Result<(), CarthageError> in
return repositoryCheck("enumerate name of descendant at \(url.path)", attempt: {
try url.resourceValues(forKeys: [ .nameKey ]).name
})
.flatMap { (name: String?) in
guard name == ".git" else { return .success(()) }

return repositoryCheck("determine whether \(url.path) is a directory", attempt: {
try url.resourceValues(forKeys: [ .isDirectoryKey ]).isDirectory!
})
.flatMap { (isDirectory: Bool) in
if isDirectory { enumerator.skipDescendants() }

return repositoryCheck("remove \(url.path)") {
try FileManager.default.removeItem(at: url)
}
}
}
}

let dependencyForSubmodule = Dependency.git(submodule.url)

return cloneOrFetchDependency(dependencyForSubmodule, commitish: submodule.sha)
.flatMap(.merge) { repositoryURL -> SignalProducer<String, CarthageError> in
return SignalProducer<(), CarthageError> { () -> Result<(), CarthageError> in
repositoryCheck("remove submodule checkout") {
try FileManager.default.removeItem(at: submoduleDirectoryURL)
}
}
.then(cloneRepository(GitURL(repositoryURL.path), submoduleDirectoryURL, isBare: false))
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: submoduleDirectoryURL))
}
.then(checkoutRepositoryToDirectoryOnQueue(submoduleDirectoryURL, submoduleDirectoryURL, force: false, revision: submodule.sha))
.then(submodulesInRepository(submoduleDirectoryURL)
.flatMap(.merge) { submodule -> SignalProducer<(), CarthageError> in
return self.cloneSubmoduleInWorkingDirectory(submodule, submoduleDirectoryURL)
}
)
.then(purgeGitDirectories)
}

/// Adds the given submodule to the given repository, cloning from `fetchURL` if
/// the desired revision does not exist or the submodule needs to be cloned.
public func addSubmoduleToRepository(_ repositoryFileURL: URL, _ submodule: Submodule, _ fetchURL: GitURL) -> SignalProducer<(), CarthageError> {
let submoduleDirectoryURL = repositoryFileURL.appendingPathComponent(submodule.path, isDirectory: true)

return isGitRepository(submoduleDirectoryURL)
.map { isRepository in
// Check if the submodule is initialized/updated already.
return isRepository && FileManager.default.fileExists(atPath: submoduleDirectoryURL.appendingPathComponent(".git").path)
}
.flatMap(.merge) { submoduleExists -> SignalProducer<(), CarthageError> in
if submoduleExists {
// Just check out and stage the correct revision.
return fetchRepository(submoduleDirectoryURL, remoteURL: fetchURL, refspec: "+refs/heads/*:refs/remotes/origin/*")
.then(
launchGitTask(
["config", "--file", ".gitmodules", "submodule.\(submodule.name).url", submodule.url.urlString],
repositoryFileURL: repositoryFileURL
)
)
.then(launchGitTask([ "submodule", "--quiet", "sync", "--recursive", submoduleDirectoryURL.path ], repositoryFileURL: repositoryFileURL))
.then(checkoutRepositoryToDirectory(submoduleDirectoryURL, submoduleDirectoryURL, force: false))
.then(launchGitTask([ "add", "--force", submodule.path ], repositoryFileURL: repositoryFileURL))
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: submoduleDirectoryURL))
.then(self.updateSubmodules(submoduleDirectoryURL))
} else {
// `git clone` will fail if there's an existing file at that path, so try to remove
// anything that's currently there. If this fails, then we're no worse off.
// This can happen if you first do `carthage checkout` and then try it again with
// `--use-submodules`, e.g.
if FileManager.default.fileExists(atPath: submoduleDirectoryURL.path) {
try? FileManager.default.removeItem(at: submoduleDirectoryURL)
}

let addSubmodule = launchGitTask(
["submodule", "--quiet", "add", "--force", "--name", submodule.name, "--", submodule.url.urlString, submodule.path],
repositoryFileURL: repositoryFileURL
)
// A .failure to add usually means the folder was already added
// to the index. That's okay.
.flatMapError { _ in SignalProducer<String, CarthageError>.empty }

// If it doesn't exist, clone and initialize a submodule from our
// local bare repository.
return cloneRepository(fetchURL, submoduleDirectoryURL, isBare: false)
.then(launchGitTask([ "remote", "set-url", "origin", submodule.url.urlString ], repositoryFileURL: submoduleDirectoryURL))
.then(checkoutRepositoryToDirectory(submoduleDirectoryURL, submoduleDirectoryURL, force: false))
.then(addSubmodule)
.then(launchGitTask([ "submodule", "--quiet", "init", "--", submodule.path ], repositoryFileURL: repositoryFileURL))
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: submoduleDirectoryURL))
.then(self.updateSubmodules(submoduleDirectoryURL))
}
}
}

public func updateSubmodules(_ superprojectDirectoryURL: URL) -> SignalProducer<(), CarthageError> {
func changeGitmodulesUrls(toCacheURL: Bool) -> SignalProducer<(), CarthageError> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is necessary. The original code in addSubmoduleToRepository already handled doing the equivalent of git submodule update --init from the repository cache.

If your intent is to make that logic recursive, I think you can do that following the existing method without modifying .gitmodules.

return submodulesInRepository(superprojectDirectoryURL)
.flatMap(.concat) { submodule -> SignalProducer<String, CarthageError> in
let url: String
if toCacheURL {
let dependencyForSubmodule = Dependency.git(submodule.url)
let submoduleCacheRepository = repositoryFileURL(for: dependencyForSubmodule)
url = submoduleCacheRepository.absoluteString
} else {
url = submodule.url.urlString
}

return launchGitTask(
[ "config", "--file", ".gitmodules", "submodule.\(submodule.name).url", url ],
repositoryFileURL: superprojectDirectoryURL
)
}
.then(launchGitTask([ "submodule", "--quiet", "sync" ], repositoryFileURL: superprojectDirectoryURL))
.then(SignalProducer<(), CarthageError>.empty)
}

return submodulesInRepository(superprojectDirectoryURL)
.collect()
.flatMap(.concat) { submodules -> SignalProducer<(), CarthageError> in
guard !submodules.isEmpty else {
return SignalProducer.empty
}

return SignalProducer(submodules)
.flatMap(.merge) { submodule -> SignalProducer<URL, CarthageError> in
let dependencyForSubmodule = Dependency.git(submodule.url)
return self.cloneOrFetchDependency(dependencyForSubmodule, commitish: submodule.sha)
}
.then(
changeGitmodulesUrls(toCacheURL: true)
.then(launchGitTask([ "submodule", "--quiet", "update", "--init" ], repositoryFileURL: superprojectDirectoryURL))
.then(changeGitmodulesUrls(toCacheURL: false))
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: superprojectDirectoryURL))
)
.then(
SignalProducer(submodules)
.flatMap(.merge) { submodule -> SignalProducer<(), CarthageError> in
let submoduleDirectoryURL = superprojectDirectoryURL.appendingPathComponent(submodule.path, isDirectory: true)
return self.updateSubmodules(submoduleDirectoryURL)
}
)
}
}
}

/// Creates symlink between the dependency build folder and the root build folder
Expand Down Expand Up @@ -1448,6 +1633,28 @@ internal func relativeLinkDestination(for dependency: Dependency, subdirectory:
return linkDestinationPath
}

/// Checks out the working tree of the given (ideally bare) repository, at the
/// specified revision, to the given folder, in a queue to prevent any race condition.
/// If the folder does not exist, it will be created.
internal func checkoutRepositoryToDirectoryOnQueue(
_ repositoryFileURL: URL,
_ workingDirectoryURL: URL,
force: Bool,
revision: String = "HEAD"
) -> SignalProducer<(), CarthageError> {
if repositoryFileURL != workingDirectoryURL {
// Git will try to lock the repositoryFileURL by creating a index.lock file.
// If the file already exists, the git operation will fail, so this queue
// is necessary.
return checkoutRepositoryToDirectory(repositoryFileURL, workingDirectoryURL, force: force, revision: revision)
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: workingDirectoryURL))
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: repositoryFileURL))
} else {
return checkoutRepositoryToDirectory(repositoryFileURL, workingDirectoryURL, force: force, revision: revision)
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: workingDirectoryURL))
}
}

/// Clones the given project to the given destination URL (defaults to the global
/// repositories folder), or fetches inside it if it has already been cloned.
/// Optionally takes a commitish to check for prior to fetching.
Expand Down Expand Up @@ -1516,4 +1723,5 @@ public func cloneOrFetch(
}
}
}
.startOnQueue(RepositoryOperationQueues.queue(forLocalRepositoryURL: repositoryURL))
}