From 92f790c07125c046a0ce9c10bde5343b36fe3c23 Mon Sep 17 00:00:00 2001 From: leomattic Date: Wed, 28 Aug 2024 15:50:49 +0200 Subject: [PATCH 1/3] Add new stream for pr-files --- setup.py | 2 +- tap_github/schemas/pr_files.json | 66 ++++++++++++++++++++++++++++++++ tap_github/streams.py | 26 ++++++++++++- 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 tap_github/schemas/pr_files.json diff --git a/setup.py b/setup.py index 2271e6ea..3be29e12 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import setup, find_packages setup(name='tap-github', - version='2.0.6', + version='2.0.7', description='Singer.io tap for extracting data from the GitHub API', author='Stitch', url='http://singer.io', diff --git a/tap_github/schemas/pr_files.json b/tap_github/schemas/pr_files.json new file mode 100644 index 00000000..f32b40c1 --- /dev/null +++ b/tap_github/schemas/pr_files.json @@ -0,0 +1,66 @@ +{ + "properties": { + "_sdc_repository": { + "type": ["string"] + }, + "sha": { + "type": ["null", "string"] + }, + "filename": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"], + "enum": [ + "added", + "removed", + "modified", + "renamed", + "copied", + "changed", + "unchanged" + ] + }, + "additions": { + "type": ["null", "integer"] + }, + "deletions": { + "type": ["null", "integer"] + }, + "changes": { + "type": ["null", "integer"] + }, + "blob_url": { + "type": ["null", "string"], + "format": "uri" + }, + "raw_url": { + "type": ["null", "string"], + "format": "uri" + }, + "contents_url": { + "type": ["null", "string"], + "format": "uri" + }, + "patch": { + "type": ["null", "string"] + }, + "previous_filename": { + "type": ["null", "string"] + }, + "pr_number": { + "type": ["null", "integer"] + }, + "pr_id": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + } + } + } + \ No newline at end of file diff --git a/tap_github/streams.py b/tap_github/streams.py index 3a75a003..a371c180 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -505,6 +505,29 @@ def add_fields_at_1st_level(self, record, parent_record = None): record['pr_id'] = parent_record.get('id') record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('sha')) +class PRFiles(IncrementalStream): + ''' + https://docs.github.com/en/rest/pulls/pulls#list-pull-requests-files + ''' + tap_stream_id = "pr_files" + replication_method = "INCREMENTAL" + replication_keys = "updated_at" + key_properties = ["id"] + path = "pulls/{}/files" + use_repository = True + id_keys = ['number'] + parent = 'pull_requests' + + def add_fields_at_1st_level(self, record, parent_record = None): + """ + Add fields in the record explicitly at the 1st level of JSON. + """ + record['updated_at'] = record['commit']['committer']['date'] + + record['pr_number'] = parent_record.get('number') + record['pr_id'] = parent_record.get('id') + record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('sha')) + class PullRequests(IncrementalOrderedStream): ''' https://developer.github.com/v3/pulls/#list-pull-requests @@ -514,7 +537,7 @@ class PullRequests(IncrementalOrderedStream): replication_keys = "updated_at" key_properties = ["id"] path = "pulls?state=all&sort=updated&direction=desc" - children = ['reviews', 'review_comments', 'pr_commits'] + children = ['reviews', 'review_comments', 'pr_commits', 'pr_files'] pk_child_fields = ["number"] class ProjectCards(IncrementalStream): @@ -773,6 +796,7 @@ def add_fields_at_1st_level(self, record, parent_record = None): "reviews": Reviews, "review_comments": ReviewComments, "pr_commits": PRCommits, + "pr_files": PRFiles, "teams": Teams, "team_members": TeamMembers, "team_memberships": TeamMemberships, From a3ae238f1dd46bf8bbad92f33b26ecb0b7708b6b Mon Sep 17 00:00:00 2001 From: leomattic Date: Wed, 28 Aug 2024 16:58:38 +0200 Subject: [PATCH 2/3] fix end of line --- tap_github/schemas/pr_files.json | 1 - 1 file changed, 1 deletion(-) diff --git a/tap_github/schemas/pr_files.json b/tap_github/schemas/pr_files.json index f32b40c1..9ce73c48 100644 --- a/tap_github/schemas/pr_files.json +++ b/tap_github/schemas/pr_files.json @@ -63,4 +63,3 @@ } } } - \ No newline at end of file From cbeb3360d7dc1ed264cb7d598e8d9f2ff8a5bd73 Mon Sep 17 00:00:00 2001 From: Joao Amaral <7281460+joaopamaral@users.noreply.github.com> Date: Fri, 30 Aug 2024 10:59:50 -0300 Subject: [PATCH 3/3] schema fix --- tap_github/schemas/pr_files.json | 2 +- tap_github/streams.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tap_github/schemas/pr_files.json b/tap_github/schemas/pr_files.json index 9ce73c48..1340f311 100644 --- a/tap_github/schemas/pr_files.json +++ b/tap_github/schemas/pr_files.json @@ -57,7 +57,7 @@ "id": { "type": ["null", "string"] }, - "updated_at": { + "pr_updated_at": { "type": ["null", "string"], "format": "date-time" } diff --git a/tap_github/streams.py b/tap_github/streams.py index a371c180..84e8cdc1 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -511,7 +511,7 @@ class PRFiles(IncrementalStream): ''' tap_stream_id = "pr_files" replication_method = "INCREMENTAL" - replication_keys = "updated_at" + replication_keys = "pr_updated_at" key_properties = ["id"] path = "pulls/{}/files" use_repository = True @@ -522,10 +522,9 @@ def add_fields_at_1st_level(self, record, parent_record = None): """ Add fields in the record explicitly at the 1st level of JSON. """ - record['updated_at'] = record['commit']['committer']['date'] - + record['pr_updated_at'] = parent_record['updated_at'] record['pr_number'] = parent_record.get('number') - record['pr_id'] = parent_record.get('id') + record['pr_id'] = str(parent_record.get('id')) record['id'] = '{}-{}'.format(parent_record.get('id'), record.get('sha')) class PullRequests(IncrementalOrderedStream):