Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,23 @@ public struct PackageCollectionGeneratorInput: Equatable, Codable {
/// The author of this package collection.
public let author: PackageCollectionModel.V1.Collection.Author?

/// A mapping of the hosts to the corresponding metadata-provider (GitHub, GitLab, ...).
public let metadataProviders: [String: String]?

public init(
name: String,
overview: String? = nil,
keywords: [String]? = nil,
packages: [Package],
author: PackageCollectionModel.V1.Collection.Author? = nil
author: PackageCollectionModel.V1.Collection.Author? = nil,
metadataProviders: [String: String]? = nil
) {
self.name = name
self.overview = overview
self.keywords = keywords
self.packages = packages
self.author = author
self.metadataProviders = metadataProviders
}
}

Expand All @@ -56,7 +61,8 @@ extension PackageCollectionGeneratorInput: CustomStringConvertible {
overview=\(self.overview ?? "nil"),
keywords=\(self.keywords.map { "\($0)" } ?? "nil"),
packages=\(self.packages),
author=\(self.author.map { "\($0)" } ?? "nil")
author=\(self.author.map { "\($0)" } ?? "nil"),
metadataProviders=\(self.metadataProviders.map { "\($0)" } ?? "nil")
}
"""
}
Expand Down
31 changes: 28 additions & 3 deletions Sources/PackageCollectionGenerator/PackageCollectionGenerate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public struct PackageCollectionGenerate: ParsableCommand {
@Option(parsing: .upToNextOption, help:
"""
Auth tokens each in the format of type:host:token for retrieving additional package metadata via source
hosting platform APIs. Currently only GitHub APIs are supported. An example token would be github:github.com:<TOKEN>.
hosting platform APIs. Currently GitHub and GitLab APIs are supported. An example token would be github:github.com:<TOKEN>.
""")
private var authToken: [String] = []

Expand Down Expand Up @@ -89,12 +89,37 @@ public struct PackageCollectionGenerate: ParsableCommand {
let input = try jsonDecoder.decode(PackageCollectionGeneratorInput.self, from: Data(contentsOf: URL(fileURLWithPath: self.inputPath)))
print("\(input)", verbose: self.verbose)

let githubPackageMetadataProvider = GitHubPackageMetadataProvider(authTokens: authTokens)
// Build the host-to-metadata-provider-mapping
var hostToProviderMapping: [String: PackageMetadataProvider] = [
"github.com": GitHubPackageMetadataProvider(authTokens: authTokens),
"gitlab.com": GitLabPackageMetadataProvider(authTokens: authTokens)
]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should introduce PackageMetadataProviderType enum:

enum PackageMetadataProviderType {
    case github
    case gitlab
}

such that:

  • PackageCollectionGeneratorInput.metadataProviderMapping would be of type [String: PackageMetadataProviderType] instead.
  • There would be a dictionary metadataProviderByType: [PackageMetadataProviderType: PackageMetadataProvider] so GitHubPackageMetadataProvider/GitLabPackageMetadataProvider only needs to be initialized once.
  • hostToProviderMapping: [String: PackageMetadataProvider] wouldn't be necessary I think, because we can do
let metadataProviderType = input.metadataProviderMapping[host]
let metadataProvider = metadataProviderByType[metadataProviderType]

Also, we should default metadataProvider to GitHubPackageMetadataProvider to keep the original behavior--i.e., the generator always tries fetching metadata through GitHub APIs even if user doesn't provide any tokens and/or configuration.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my first try I implemented it this way, but afterwards I changed it to [String: String]. The main reason is the usability for the user. Currently, if the input.json contains a provider that is not implemented, the error-message will be "Provider \(provider) for host \(host) is not implemented". If the provider is an enum, the decoding of the JSON will fail with a message like "Cannot initialize PackageMetadataProviderType from invalid String value bitbucket". This wouldn't guide the user what is really going wrong. Implementing the init(from decoder: Decoder) throws-method and throw a more descriptive error, like this

enum PackageMetadataProviderType: String {
	case github
	case gitlab
}

extension PackageMetadataProviderType: Codable {
	enum Errors: Error, Equatable, CustomDebugStringConvertible {
		case providerNotImplemented(String)
		
		var debugDescription: String {
			switch self {
				case let .providerNotImplemented(provider):
				return "Provider '\(provider)' is not implemented"
			}
		}
	}
	
	init(from decoder: Decoder) throws {
		let container = try decoder.singleValueContainer()
		let provider = try container.decode(String.self)
		switch provider {
			case "github":
				self = .github
			case "gitlab":
				self = .gitlab
			default:
				throw Errors.providerNotImplemented(provider)
		}
	}
}

improves the situation, as the user will at least get a message like "Provider 'bitbucket' is not implemented". This would be acceptable, but from my point of view it is not as good as the current message. And it doesn't simplify the implementation. Would you still prefer this implementation, or do you see another possibility that I didn't think of?

Another point where I wasn't sure about: In my current implementation, there is one instance of a *MetaDataProvider-class per Host, each one with it's own instance of HTTPClient. I'm not sure if this may cause any problems, if one instance would handle multiple hosts (I don't see any state in it on a first glance that could cause problems, but I haven't studied it enough to rule it out). Like, if there are three different hosts that make use of the gitlab-API - may it cause a problem, if all of them use the same HTTPClient-instance?

Regarding making GitHubPackageMetadataProvider as the default: I prefer to not make an internal assumption of a default-provider. It would lead to confusing and inconsistent errors, if it tries to use the GitHub-API for other providers. The current error-message "Missing provider for host \(package.url.host ?? "invalid host"). Please specify it in the input-json in metadataProviders." does not only give an error what is missing, but also offers a guidance where to add it. As the providers are already setup for github.com, it would not change the current behaviour.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my first try I implemented it this way, but afterwards I changed it to [String: String]. The main reason is the usability for the user.

I see. What if we keep metadataProviders [String: String] in PackageCollectionGeneratorInput but introduce an intermediate, internal method that converts it to [String: PackageMetadataProviderType]? The method can throw error with user-friendly message when it comes across a provider that it doesn't recognize.

Another point where I wasn't sure about: In my current implementation, there is one instance of a *MetaDataProvider-class per Host, each one with it's own instance of HTTPClient.

Yes, I don't think it's necessary to have one provider instance per host and that's why I suggest having metadataProviderByType: [PackageMetadataProviderType: PackageMetadataProvider] instead. I don't see a problem sharing the same provider instance across multiple hosts (HTTPClient is not tied to a host), but then I also don't think there would be so many different hosts that we would end up getting into trouble with one instance per host.

I do prefer singleton by provider type though.

I prefer to not make an internal assumption of a default-provider.

How about we add an option (e.g., --default-metadata-provider) that defaults to nil (none), but user can set it to one of PackageMetadataProviderType if they choose to?

if let mappingFromInput = input.metadataProviders {
for (host, provider) in mappingFromInput {
switch provider {
case "github":
hostToProviderMapping[host] = GitHubPackageMetadataProvider(authTokens: authTokens)
case "gitlab":
hostToProviderMapping[host] = GitLabPackageMetadataProvider(authTokens: authTokens)
default:
printError("Provider \(provider) for host \(host) is not implemented")
return
}
}
}

// Generate metadata for each package
let packages: [Model.Collection.Package] = input.packages.compactMap { package in
do {
let packageMetadata = try self.generateMetadata(for: package, metadataProvider: githubPackageMetadataProvider, jsonDecoder: jsonDecoder)
guard let host = package.url.host,
let metadataProvider = hostToProviderMapping[host] else {
printError("Missing provider for host \(package.url.host ?? "invalid host"). Please specify it in the input-json in metadataProviders.")
return nil
}

let packageMetadata = try self.generateMetadata(for: package,
metadataProvider: metadataProvider,
jsonDecoder: jsonDecoder)
print("\(packageMetadata)", verbose: self.verbose)

guard !packageMetadata.versions.isEmpty else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Package Collection Generator open source project
//
// Copyright (c) 2021 Apple Inc. and the Swift Package Collection Generator project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.txt for the list of Swift Package Collection Generator project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

import Dispatch
import Foundation

import TSCBasic
import Basics
import PackageCollectionsModel
import Utilities

struct GitLabPackageMetadataProvider: PackageMetadataProvider {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add GitLabPackageMetadataProviderTests (see GitHubPackageMetadataProviderTests for example) that tests with real sample response payloads.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I will add them.

private static let apiHostURLPathPostfix = "api/v4"

private let authTokens: [AuthTokenType: String]
private let httpClient: HTTPClient
private let decoder: JSONDecoder
private enum ResultKeys {
case metadata
}

init(authTokens: [AuthTokenType: String] = [:], httpClient: HTTPClient? = nil) {
self.authTokens = authTokens
self.httpClient = httpClient ?? Self.makeDefaultHTTPClient()
self.decoder = JSONDecoder.makeWithDefaults()
}

func get(_ packageURL: URL, callback: @escaping (Result<PackageBasicMetadata, Error>) -> Void) {
guard let baseURL: URL = self.apiURL(packageURL) else {
return callback(.failure(Errors.invalidGitURL(packageURL)))
}

// get the main data
let metadataHeaders = HTTPClientHeaders()
let metadataOptions = self.makeRequestOptions(validResponseCodes: [200, 401, 403, 404])
let hasAuthorization = metadataOptions.authorizationProvider?(baseURL) != nil
var result: Result<HTTPClient.Response, Error> = tsc_await { callback in self.httpClient.get(baseURL, headers: metadataHeaders, options: metadataOptions, completion: callback) }

if case .success(let response) = result {
let apiLimit = response.headers.get("RateLimit-Limit").first.flatMap(Int.init) ?? -1
let apiRemaining = response.headers.get("RateLimit-Remaining").first.flatMap(Int.init) ?? -1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

switch (response.statusCode, hasAuthorization, apiRemaining) {
case (_, _, 0):
result = .failure(Errors.apiLimitsExceeded(baseURL, apiLimit, apiRemaining))
case (401, true, _):
result = .failure(Errors.invalidAuthToken(baseURL))
case (401, false, _):
result = .failure(Errors.permissionDenied(baseURL))
case (403, _, _):
result = .failure(Errors.permissionDenied(baseURL))
case (404, _, _):
result = .failure(Errors.notFound(baseURL))
case (200, _, _):
guard let metadata = try? response.decodeBody(GetProjectResponse.self, using: self.decoder) else {
callback(.failure(Errors.invalidResponse(baseURL, "Invalid body")))
return
}

let license = metadata.license
let packageLicense: PackageCollectionModel.V1.License?
if let licenseURL = metadata.licenseURL.flatMap({ URL(string: $0) }) {
packageLicense = .init(name: license?.key, url: licenseURL)
} else {
packageLicense = nil
}

let model = PackageBasicMetadata(
summary: metadata.description,
keywords: metadata.topics,
readmeURL: metadata.readmeURL.flatMap { URL(string: $0) },
license: packageLicense
)

callback(.success(model))
default:
callback(.failure(Errors.invalidResponse(baseURL, "Invalid status code: \(response.statusCode)")))
}
}
}

internal func apiURL(_ url: URL) -> Foundation.URL? {
guard let baseURL = URL(string: url.absoluteString.spm_dropGitSuffix()),
let urlComponents = URLComponents(url: baseURL, resolvingAgainstBaseURL: false),
let scheme = urlComponents.scheme,
let host = urlComponents.host else {
return nil
}
let projectPath = urlComponents.path.dropFirst().replacingOccurrences(of: "/", with: "%2F")
let apiPrefix = GitLabPackageMetadataProvider.apiHostURLPathPostfix
let metadataURL = URL(string: "\(scheme)://\(host)/\(apiPrefix)/projects/\(projectPath)")!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be safe, we can guard instead of using !?

return metadataURL
}

private func makeRequestOptions(validResponseCodes: [Int]) -> HTTPClientRequest.Options {
var options = HTTPClientRequest.Options()
options.addUserAgent = true
options.validResponseCodes = validResponseCodes
options.authorizationProvider = { url in
url.host.flatMap { host in
return self.authTokens[.gitlab(host)].flatMap { token in "Bearer \(token)" }
}
}
return options
}

private static func makeDefaultHTTPClient() -> HTTPClient {
var client = HTTPClient()
client.configuration.requestTimeout = .seconds(2)
client.configuration.retryStrategy = .exponentialBackoff(maxAttempts: 3, baseDelay: .milliseconds(50))
client.configuration.circuitBreakerStrategy = .hostErrors(maxErrors: 50, age: .seconds(30))
return client
}

enum Errors: Error, Equatable {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps refactor this into PackageMetadataProviderError so it can be shared across all providers.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only realise now that there is also code inside SwiftPM to fetch metadata. And it seems like that the usage of the GitHubPackageMetadataProvider is hard-coded in it. This seems like that there should be a bigger consolidation overall. I agree with you that the error-codes should be shared, but I would either put them inside a new file inside this repository or defer it for a greater consolodation inside SwiftPM.
Is there anything planned to consolidate the metadata-provider-logic of SwiftPM and swift-package-collection-generator?

Copy link
Contributor

@yim-lee yim-lee Jan 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything planned to consolidate the metadata-provider-logic of SwiftPM and swift-package-collection-generator?

There is plan to move swift-package-collection-generator into SwiftPM actually. This repo started out separate from SwiftPM because we needed to be able to iterate on this more quickly, but now that things have stabilized, we would like to make this part of SwiftPM, which is more fitting. And when we do that, we will consolidate the code as you point out.

Let's just leave this code as-is for now.

case invalidGitURL(URL)
case invalidResponse(URL, String)
case permissionDenied(URL)
case invalidAuthToken(URL)
case apiLimitsExceeded(URL, Int, Int)
case notFound(URL)
}
}

extension GitLabPackageMetadataProvider {
fileprivate struct GetProjectResponse: Codable {
let name: String
let fullName: String
let description: String?
let topics: [String]?
let licenseURL: String?
let readmeURL: String?
let license: License?

private enum CodingKeys: String, CodingKey {
case name
case fullName = "name_with_namespace"
case description
case topics
case licenseURL = "license_url"
case readmeURL = "readme_url"
case license
}
}
}

extension GitLabPackageMetadataProvider {
fileprivate struct License: Codable {
let htmlURL: Foundation.URL
let sourceURL: Foundation.URL
let key: String?

private enum CodingKeys: String, CodingKey {
case htmlURL = "html_url"
case sourceURL = "source_url"
case key
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,23 @@ protocol PackageMetadataProvider {

enum AuthTokenType: Hashable, CustomStringConvertible {
case github(_ host: String)
case gitlab(_ host: String)

var description: String {
switch self {
case .github(let host):
return "github(\(host))"
case .gitlab(let host):
return "gitlab(\(host))"
}
}

static func from(type: String, host: String) -> AuthTokenType? {
switch type {
case "github":
return .github(host)
case "gitlab":
return .gitlab(host)
default:
return nil
}
Expand Down
4 changes: 4 additions & 0 deletions Sources/PackageCollectionGenerator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Collection metadata:
* `keywords`: An array of keywords that the collection is associated with. **Optional.**
* `author`: The author of this package collection. **Optional.**
* `name`: The author name.
* `metadataProviders`: A dictionary describing a mapping from hosts to metadata-provider (github / gitlab). **Optional.**
* `packages`: An array of package objects.

Each item in the `packages` array is a package object with the following fields:
Expand Down Expand Up @@ -83,6 +84,9 @@ Each item in the `packages` array is a package object with the following fields:
],
"author": {
"name": "Jane Doe"
},
"metadataProviders": {
"www.example.com": "gitlab"
}
}
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ final class PackageCollectionGenerateTests: XCTestCase {
summary: "Package Baz",
versions: ["1.0.0"]
),
]
],
metadataProviders: ["package-collection-tests.com": "github"]
)
let jsonEncoder = JSONEncoder.makeWithDefaults()
let inputData = try jsonEncoder.encode(input)
Expand Down Expand Up @@ -235,7 +236,8 @@ final class PackageCollectionGenerateTests: XCTestCase {
summary: "Package Foo & Bar",
excludedVersions: ["0.1.0"]
),
]
],
metadataProviders: ["package-collection-tests.com": "github"]
)
let jsonEncoder = JSONEncoder.makeWithDefaults()
let inputData = try jsonEncoder.encode(input)
Expand Down
6 changes: 6 additions & 0 deletions Tests/PackageCollectionGeneratorTests/Inputs/test-input.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
"url": "https://package-collection-tests.com/repos/foobaz.git"
}
],
"metadataProviders": {
"package-collection-tests.com": "github"
},
"author": {
"name": "Jane Doe"
},
"metadataProviders": {
"package-collection-tests.com": "github"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class PackageCollectionGeneratorInputTests: XCTestCase {
url: URL(string: "https://package-collection-tests.com/repos/foobaz.git")!
),
],
author: .init(name: "Jane Doe")
author: .init(name: "Jane Doe"),
metadataProviders: ["package-collection-tests.com": "github"]
)

let inputFilePath = AbsolutePath(#file).parentDirectory.appending(components: "Inputs", "test-input.json")
Expand Down Expand Up @@ -68,7 +69,8 @@ class PackageCollectionGeneratorInputTests: XCTestCase {
readmeURL: URL(string: "https://package-collection-tests.com/repos/foobar/README")!
),
],
author: .init(name: "Jane Doe")
author: .init(name: "Jane Doe"),
metadataProviders: ["package-collection-tests.com": "github"]
)

let data = try JSONEncoder().encode(input)
Expand Down