|
| 1 | +/// Enables checking a package's license from pub.dev. |
| 2 | +/// |
| 3 | +/// This library is intented to be used by Very Good CLI to help extracting |
| 4 | +/// license information. The existance of this library is likely to be |
| 5 | +/// ephemeral. It may be obsolete once [pub.dev](https://pub.dev/) exposes |
| 6 | +/// stable license information in their official API; you may track the |
| 7 | +/// progress [here](https://github.com/dart-lang/pub-dev/issues/4717). |
| 8 | +library pub_license; |
| 9 | + |
| 10 | +import 'package:html/dom.dart' as html_dom; |
| 11 | +import 'package:html/parser.dart' as html_parser; |
| 12 | +import 'package:http/http.dart' as http; |
| 13 | +import 'package:meta/meta.dart'; |
| 14 | + |
| 15 | +/// The pub.dev [Uri] used to retrieve the license of a package. |
| 16 | +Uri _pubPackageLicenseUri(String packageName) => |
| 17 | + Uri.parse('https://pub.dev/packages/$packageName/license'); |
| 18 | + |
| 19 | +/// {@template pub_license_exception} |
| 20 | +/// An exception thrown by [PubLicense]. |
| 21 | +/// {@endtemplate} |
| 22 | +class PubLicenseException implements Exception { |
| 23 | + /// {@macro pub_license_exception} |
| 24 | + const PubLicenseException(String message) |
| 25 | + : message = '[pub_license] $message'; |
| 26 | + |
| 27 | + /// The exception message. |
| 28 | + final String message; |
| 29 | +} |
| 30 | + |
| 31 | +/// The function signature for parsing HTML documents. |
| 32 | +@visibleForTesting |
| 33 | +typedef HtmlDocumentParse = html_dom.Document Function( |
| 34 | + dynamic input, { |
| 35 | + String? encoding, |
| 36 | + bool generateSpans, |
| 37 | + String? sourceUrl, |
| 38 | +}); |
| 39 | + |
| 40 | +/// {@template pub_license} |
| 41 | +/// Enables checking pub.dev's hosted packages license. |
| 42 | +/// {@endtemplate} |
| 43 | +class PubLicense { |
| 44 | + /// {@macro pub_license} |
| 45 | + PubLicense({ |
| 46 | + @visibleForTesting http.Client? client, |
| 47 | + @visibleForTesting HtmlDocumentParse? parse, |
| 48 | + }) : _client = client ?? http.Client(), |
| 49 | + _parse = parse ?? html_parser.parse; |
| 50 | + |
| 51 | + final http.Client _client; |
| 52 | + |
| 53 | + final html_dom.Document Function( |
| 54 | + dynamic input, { |
| 55 | + String? encoding, |
| 56 | + bool generateSpans, |
| 57 | + String? sourceUrl, |
| 58 | + }) _parse; |
| 59 | + |
| 60 | + /// Retrieves the license of a package. |
| 61 | + /// |
| 62 | + /// Some packages may have multiple licenses, hence a [Set] is returned. |
| 63 | + /// |
| 64 | + /// It may throw a [PubLicenseException] if: |
| 65 | + /// * The response from pub.dev is not successful. |
| 66 | + /// * The response body cannot be parsed. |
| 67 | + Future<Set<String>> getLicense(String packageName) async { |
| 68 | + final response = await _client.get(_pubPackageLicenseUri(packageName)); |
| 69 | + |
| 70 | + if (response.statusCode != 200) { |
| 71 | + throw PubLicenseException( |
| 72 | + '''Failed to retrieve the license of the package, received status code: ${response.statusCode}''', |
| 73 | + ); |
| 74 | + } |
| 75 | + |
| 76 | + late final html_dom.Document document; |
| 77 | + try { |
| 78 | + document = _parse(response.body); |
| 79 | + } on html_parser.ParseError catch (e) { |
| 80 | + throw PubLicenseException( |
| 81 | + 'Failed to parse the response body, received error: $e', |
| 82 | + ); |
| 83 | + } catch (e) { |
| 84 | + throw PubLicenseException( |
| 85 | + '''An unknown error occurred when trying to parse the response body, received error: $e''', |
| 86 | + ); |
| 87 | + } |
| 88 | + |
| 89 | + return _scrapeLicense(document); |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +/// Scrapes the license from the pub.dev's package license page. |
| 94 | +/// |
| 95 | +/// The expected HTML structure is: |
| 96 | +/// ```html |
| 97 | +/// <aside class="detail-info-box"> |
| 98 | +/// <h3> ... </h3> |
| 99 | +/// <p> ... </p> |
| 100 | +/// <h3 class="title">License</h3> |
| 101 | +/// <p> |
| 102 | +/// <img/> |
| 103 | +/// MIT (<a href="/packages/very_good_cli/license">LICENSE</a>) |
| 104 | +/// </p> |
| 105 | +/// </aside> |
| 106 | +/// ``` |
| 107 | +/// |
| 108 | +/// It may throw a [PubLicenseException] if: |
| 109 | +/// * The detail info box is not found. |
| 110 | +/// * The license header is not found. |
| 111 | +Set<String> _scrapeLicense(html_dom.Document document) { |
| 112 | + final detailInfoBox = document.querySelector('.detail-info-box'); |
| 113 | + if (detailInfoBox == null) { |
| 114 | + throw const PubLicenseException( |
| 115 | + '''Failed to scrape license because `.detail-info-box` was not found.''', |
| 116 | + ); |
| 117 | + } |
| 118 | + |
| 119 | + String? rawLicenseText; |
| 120 | + for (var i = 0; i < detailInfoBox.children.length; i++) { |
| 121 | + final child = detailInfoBox.children[i]; |
| 122 | + |
| 123 | + final headerText = child.text.trim().toLowerCase(); |
| 124 | + if (headerText == 'license') { |
| 125 | + rawLicenseText = detailInfoBox.children[i + 1].text.trim(); |
| 126 | + break; |
| 127 | + } |
| 128 | + } |
| 129 | + if (rawLicenseText == null) { |
| 130 | + throw const PubLicenseException( |
| 131 | + '''Failed to scrape license because the license header was not found.''', |
| 132 | + ); |
| 133 | + } |
| 134 | + |
| 135 | + final licenseText = rawLicenseText.split('(').first.trim(); |
| 136 | + return licenseText.split(',').map((e) => e.trim()).toSet(); |
| 137 | +} |
0 commit comments