From 6be3a2a51755709fe5d37dc5920412de13fdbbdf Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 12 Jan 2023 13:51:16 -0300 Subject: [PATCH 01/12] Stopping requests on last page and increasing max per page --- tap_github/client.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tap_github/client.py b/tap_github/client.py index e8a11d4c..2120230e 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -1,5 +1,6 @@ import time import requests +from requests.models import PreparedRequest import backoff from simplejson import JSONDecodeError import singer @@ -9,6 +10,7 @@ LOGGER = singer.get_logger() DEFAULT_SLEEP_SECONDS = 600 DEFAULT_MIN_REMAIN_RATE_LIMIT = 0 +DEFAULT_MAX_PER_PAGE = 100 DEFAULT_DOMAIN = "https://api.github.com" # Set default timeout of 300 seconds @@ -97,7 +99,7 @@ class TooManyRequests(GithubException): } } -def raise_for_error(resp, source, stream, client, should_skip_404): +def raise_for_error(resp, source, stream, client, should_skip_404, should_skip_422): """ Retrieve the error code and the error message from the response and return custom exceptions accordingly. """ @@ -118,6 +120,13 @@ def raise_for_error(resp, source, stream, client, should_skip_404): # Don't raise a NotFoundException return None + if error_code == 422 and should_skip_422: + message = ("HTTP-error-code: 404, Error: {}. Please refer \'{}\' for more details. " + "The next pages will be skipped due to Github API limit of 40k records.").format( + response_json.get('message'), response_json.get("documentation_url")) + LOGGER.warning(message) + return None + message = "HTTP-error-code: {}, Error: {}".format( error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json) @@ -165,6 +174,7 @@ def __init__(self, config): self.min_remain_rate_limit = self.config.get('min_remain_rate_limit', DEFAULT_MIN_REMAIN_RATE_LIMIT) self.set_auth_in_session() self.not_accessible_repos = set() + self.max_per_page = self.config.get('max_per_page', DEFAULT_MAX_PER_PAGE) def get_request_timeout(self): """ @@ -211,12 +221,16 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 """ Fetch all pages of records and return them. """ + prepared_request = PreparedRequest() + prepared_request.prepare_url(url, {'per_page': self.max_per_page}) + url = prepared_request.url + LOGGER.info(url) while True: r = self.authed_get(source, url, headers, stream, should_skip_404) yield r # Fetch the next page if next found in the response. - if 'next' in r.links: + if 'next' in r.links and r.links['last'] != url: url = r.links['next']['url'] else: # Break the loop if all pages are fetched. From ac14f7894fe597d1ef5343af734eaa93a1f7f37b Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:40:38 -0300 Subject: [PATCH 02/12] Extracting prepare_url to method --- README.md | 6 +++++- tap_github/client.py | 23 +++++++++++------------ tests/unittests/test_custom_domain.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e8c4df01..aa55f97c 100644 --- a/README.md +++ b/README.md @@ -63,9 +63,13 @@ This tap: "repository": "singer-io/tap-github singer-io/getting-started", "start_date": "2021-01-01T00:00:00Z", "request_timeout": 300, - "base_url": "https://api.github.com" + "base_url": "https://api.github.com", } ``` + +> Note: The max results per page is configurable with the parameter `max_per_page`, +> as default it will return 100 (that is the max of most of the endpoints) + 4. Run the tap in discovery mode to get properties.json file ```bash diff --git a/tap_github/client.py b/tap_github/client.py index 2120230e..12b203e5 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -99,7 +99,7 @@ class TooManyRequests(GithubException): } } -def raise_for_error(resp, source, stream, client, should_skip_404, should_skip_422): +def raise_for_error(resp, source, stream, client, should_skip_404): """ Retrieve the error code and the error message from the response and return custom exceptions accordingly. """ @@ -120,13 +120,6 @@ def raise_for_error(resp, source, stream, client, should_skip_404, should_skip_4 # Don't raise a NotFoundException return None - if error_code == 422 and should_skip_422: - message = ("HTTP-error-code: 404, Error: {}. Please refer \'{}\' for more details. " - "The next pages will be skipped due to Github API limit of 40k records.").format( - response_json.get('message'), response_json.get("documentation_url")) - LOGGER.warning(message) - return None - message = "HTTP-error-code: {}, Error: {}".format( error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json) @@ -221,10 +214,7 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 """ Fetch all pages of records and return them. """ - prepared_request = PreparedRequest() - prepared_request.prepare_url(url, {'per_page': self.max_per_page}) - url = prepared_request.url - LOGGER.info(url) + url = self.prepare_url(url) while True: r = self.authed_get(source, url, headers, stream, should_skip_404) yield r @@ -236,6 +226,15 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 # Break the loop if all pages are fetched. break + def prepare_url(self, url): + """ + Prepare the URL with some additional parameters + """ + prepared_request = PreparedRequest() + # Including max per page param + prepared_request.prepare_url(url, {'per_page': self.max_per_page}) + return prepared_request.url + def verify_repo_access(self, url_for_repo, repo): """ Call rest API to verify that the user has sufficient permissions to access this repository. diff --git a/tests/unittests/test_custom_domain.py b/tests/unittests/test_custom_domain.py index 139b2426..d996da18 100644 --- a/tests/unittests/test_custom_domain.py +++ b/tests/unittests/test_custom_domain.py @@ -27,3 +27,18 @@ def test_config_with_domain(self, mock_verify_access): # Verify domain in client is from config self.assertEqual(test_client.base_url, mock_config["base_url"]) + + def test_prepare_url(self, mock_verify_access): + """ + Test if the correct params are added to url + """ + mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": "", "max_per_page": 35} + test_client = GithubClient(mock_config) + + # Verify domain in client is from config + self.assertEqual(test_client.prepare_url(test_client.base_url), f"{mock_config['base_url'].lower()}/?per_page=35") + self.assertEqual(test_client.prepare_url('http://CUSTOM-git.com/?q=query'), 'http://custom-git.com/?q=query&per_page=35') + + del mock_config["max_per_page"] + test_client2 = GithubClient(mock_config) + self.assertEqual(test_client2.prepare_url(test_client2.base_url), f"{mock_config['base_url'].lower()}/?per_page=100") \ No newline at end of file From 7c9fdae152b56cb2e71004a79dba6c18691e486e Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:52:27 -0300 Subject: [PATCH 03/12] nit --- README.md | 2 +- tests/unittests/test_custom_domain.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aa55f97c..7b8e8845 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ This tap: "repository": "singer-io/tap-github singer-io/getting-started", "start_date": "2021-01-01T00:00:00Z", "request_timeout": 300, - "base_url": "https://api.github.com", + "base_url": "https://api.github.com" } ``` diff --git a/tests/unittests/test_custom_domain.py b/tests/unittests/test_custom_domain.py index d996da18..c719d45c 100644 --- a/tests/unittests/test_custom_domain.py +++ b/tests/unittests/test_custom_domain.py @@ -41,4 +41,4 @@ def test_prepare_url(self, mock_verify_access): del mock_config["max_per_page"] test_client2 = GithubClient(mock_config) - self.assertEqual(test_client2.prepare_url(test_client2.base_url), f"{mock_config['base_url'].lower()}/?per_page=100") \ No newline at end of file + self.assertEqual(test_client2.prepare_url(test_client2.base_url), f"{mock_config['base_url'].lower()}/?per_page=100") From 2d361d341d5f511094dff71ed8171eb752761f3e Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 12 Jan 2023 14:54:01 -0300 Subject: [PATCH 04/12] nit --- tests/unittests/test_custom_domain.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unittests/test_custom_domain.py b/tests/unittests/test_custom_domain.py index c719d45c..218c618f 100644 --- a/tests/unittests/test_custom_domain.py +++ b/tests/unittests/test_custom_domain.py @@ -35,10 +35,11 @@ def test_prepare_url(self, mock_verify_access): mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": "", "max_per_page": 35} test_client = GithubClient(mock_config) - # Verify domain in client is from config + # Verify if per_page param was added as expected self.assertEqual(test_client.prepare_url(test_client.base_url), f"{mock_config['base_url'].lower()}/?per_page=35") self.assertEqual(test_client.prepare_url('http://CUSTOM-git.com/?q=query'), 'http://custom-git.com/?q=query&per_page=35') + # Verify if per_page param was added with default value del mock_config["max_per_page"] test_client2 = GithubClient(mock_config) self.assertEqual(test_client2.prepare_url(test_client2.base_url), f"{mock_config['base_url'].lower()}/?per_page=100") From 8c2d2812dcdc8ae60c4680ac64ae6489a647f41d Mon Sep 17 00:00:00 2001 From: Joao Amaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 08:32:08 -0300 Subject: [PATCH 05/12] Returning when seconds to sleep is 0 --- tap_github/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tap_github/client.py b/tap_github/client.py index 12b203e5..7372aae0 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -143,6 +143,8 @@ def rate_throttling(response, max_sleep_seconds, min_remain_rate_limit): if 'X-RateLimit-Remaining' in response.headers: if int(response.headers['X-RateLimit-Remaining']) <= min_remain_rate_limit: seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset'])) + if seconds_to_sleep <= 0: + return if seconds_to_sleep > max_sleep_seconds: message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep) From a67f7887abf27c206e609c302ee3dc9dba3136c4 Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 11:17:31 -0300 Subject: [PATCH 06/12] Refactoring tests --- tests/unittests/test_custom_domain.py | 28 +++++++++++++-------------- tests/unittests/test_rate_limit.py | 2 -- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/unittests/test_custom_domain.py b/tests/unittests/test_custom_domain.py index 218c618f..3a517786 100644 --- a/tests/unittests/test_custom_domain.py +++ b/tests/unittests/test_custom_domain.py @@ -12,8 +12,8 @@ def test_config_without_domain(self, mock_verify_access): """ Test if the domain is not given in the config """ - mock_config = {'repository': 'singer-io/test-repo', "access_token": ""} - test_client = GithubClient(mock_config) + config = {'repository': 'singer-io/test-repo', "access_token": ""} + test_client = GithubClient(config) # Verify domain in client is default self.assertEqual(test_client.base_url, DEFAULT_DOMAIN) @@ -22,24 +22,24 @@ def test_config_with_domain(self, mock_verify_access): """ Test if the domain is given in the config """ - mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": ""} - test_client = GithubClient(mock_config) + config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": ""} + test_client = GithubClient(config) # Verify domain in client is from config - self.assertEqual(test_client.base_url, mock_config["base_url"]) + self.assertEqual(test_client.base_url, config["base_url"]) def test_prepare_url(self, mock_verify_access): """ Test if the correct params are added to url """ - mock_config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": "", "max_per_page": 35} - test_client = GithubClient(mock_config) - - # Verify if per_page param was added as expected - self.assertEqual(test_client.prepare_url(test_client.base_url), f"{mock_config['base_url'].lower()}/?per_page=35") - self.assertEqual(test_client.prepare_url('http://CUSTOM-git.com/?q=query'), 'http://custom-git.com/?q=query&per_page=35') + config = {'repository': 'singer-io/test-repo', "base_url": "http://CUSTOM-git.com", "access_token": ""} + test_client = GithubClient(config) # Verify if per_page param was added with default value - del mock_config["max_per_page"] - test_client2 = GithubClient(mock_config) - self.assertEqual(test_client2.prepare_url(test_client2.base_url), f"{mock_config['base_url'].lower()}/?per_page=100") + self.assertEqual(test_client.prepare_url(test_client.base_url), "http://custom-git.com/?per_page=100") + self.assertEqual(test_client.prepare_url('http://CUSTOM-git.com/?q=query'), 'http://custom-git.com/?q=query&per_page=100') + + # Verify if per_page param was added as expected + config["max_per_page"] = 35 + test_client2 = GithubClient(config) + self.assertEqual(test_client2.prepare_url(test_client2.base_url), "http://custom-git.com/?per_page=35") diff --git a/tests/unittests/test_rate_limit.py b/tests/unittests/test_rate_limit.py index 4de4ef98..770acd29 100644 --- a/tests/unittests/test_rate_limit.py +++ b/tests/unittests/test_rate_limit.py @@ -36,7 +36,6 @@ def test_rate_limt_wait(self, mocked_sleep): mocked_sleep.assert_called_with(121) self.assertTrue(mocked_sleep.called) - def test_rate_limit_exception(self, mocked_sleep): """ Test `rate_throttling` for 'sleep_time' greater than `MAX_SLEEP_SECONDS` @@ -53,7 +52,6 @@ def test_rate_limit_exception(self, mocked_sleep): rate_throttling(resp, DEFAULT_SLEEP_SECONDS, DEFAULT_MIN_REMAIN_RATE_LIMIT) self.assertEqual(str(e.exception), "API rate limit exceeded, please try after 602 seconds.") - def test_rate_limit_not_exceeded(self, mocked_sleep): """ Test `rate_throttling` if sleep time does not exceed limit From 077f8f4247d0e61eff4c9f1d06aeeb39a52994ff Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 11:51:27 -0300 Subject: [PATCH 07/12] Fixing TestAuthedGetAllPages tests ti include per_page argument --- tests/unittests/test_get_all_repos.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/unittests/test_get_all_repos.py b/tests/unittests/test_get_all_repos.py index 9235acad..5db16fef 100644 --- a/tests/unittests/test_get_all_repos.py +++ b/tests/unittests/test_get_all_repos.py @@ -95,7 +95,7 @@ class TestAuthedGetAllPages(unittest.TestCase): """ Test `authed_get_all_pages` method from client. """ - config = {"access_token": "", "repository": "test-org/repo1"} + config = {"access_token": "", "repository": "test-org/repo1", "max_per_page": 100} def test_for_one_page(self, mock_auth_get, mock_verify_access): @@ -104,7 +104,7 @@ def test_for_one_page(self, mock_auth_get, mock_verify_access): test_client = GithubClient(self.config) mock_auth_get.return_value = MockResponse({}) - list(test_client.authed_get_all_pages("", "mock_url", {})) + list(test_client.authed_get_all_pages("", "http://mock_url", {})) # Verify `auth_get` call count self.assertEqual(mock_auth_get.call_count, 1) @@ -114,14 +114,15 @@ def test_for_multiple_pages(self, mock_auth_get, mock_verify_access): """Verify `authed_get` is called equal number times as pages available.""" test_client = GithubClient(self.config) - mock_auth_get.side_effect = [MockResponse({"next": {"url": "mock_url_2"}}),MockResponse({"next": {"url": "mock_url_3"}}),MockResponse({})] + mock_auth_get.side_effect = [MockResponse({"next": {"url": "http://mock_url_2/?per_page=100"}}), + MockResponse({"next": {"url": "http://mock_url_3/?per_page=100"}}),MockResponse({})] - list(test_client.authed_get_all_pages("", "mock_url_1", {})) + list(test_client.authed_get_all_pages("", "http://mock_url_1", {})) # Verify `auth_get` call count self.assertEqual(mock_auth_get.call_count, 3) # Verify `auth_get` calls with expected url - self.assertEqual(mock_auth_get.mock_calls[0], mock.call("", "mock_url_1", {}, '', True)) - self.assertEqual(mock_auth_get.mock_calls[1], mock.call("", "mock_url_2", {}, '', True)) - self.assertEqual(mock_auth_get.mock_calls[2], mock.call("", "mock_url_3", {}, '', True)) + self.assertEqual(mock_auth_get.mock_calls[0], mock.call("", "http://mock_url_1/?per_page=100", {}, '', True)) + self.assertEqual(mock_auth_get.mock_calls[1], mock.call("", "http://mock_url_2/?per_page=100", {}, '', True)) + self.assertEqual(mock_auth_get.mock_calls[2], mock.call("", "http://mock_url_3/?per_page=100", {}, '', True)) From 0ad5a283019b31de6e87412c193efafe48be23bd Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 12:12:47 -0300 Subject: [PATCH 08/12] Fixing pagination exceed limit issue --- tap_github/client.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/tap_github/client.py b/tap_github/client.py index 7372aae0..160ca1ec 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -16,6 +16,8 @@ # Set default timeout of 300 seconds REQUEST_TIMEOUT = 300 +PAGINATION_EXCEED_MSG = 'In order to keep the API fast for everyone, pagination is limited for this resource.' + class GithubException(Exception): pass @@ -120,6 +122,14 @@ def raise_for_error(resp, source, stream, client, should_skip_404): # Don't raise a NotFoundException return None + if error_code == 422 and PAGINATION_EXCEED_MSG in response_json.get('message', ''): + message = f"HTTP-error-code: 422, Error: {response_json.get('message', '')}. " \ + f"Please refer '{response_json.get('documentation_url')}' for more details." \ + "This is a known issue when the results exceed 40k and the last page is not full" \ + " (it will trim the results to get only the available by the API)." + LOGGER.warning(message) + return None + message = "HTTP-error-code: {}, Error: {}".format( error_code, ERROR_CODE_EXCEPTION_MAPPING.get(error_code, {}).get("message", "Unknown Error") if response_json == {} else response_json) @@ -134,7 +144,7 @@ def calculate_seconds(epoch): Calculate the seconds to sleep before making a new request. """ current = time.time() - return int(ceil(epoch - current)) + return max(0, int(ceil(epoch - current))) def rate_throttling(response, max_sleep_seconds, min_remain_rate_limit): """ @@ -143,8 +153,6 @@ def rate_throttling(response, max_sleep_seconds, min_remain_rate_limit): if 'X-RateLimit-Remaining' in response.headers: if int(response.headers['X-RateLimit-Remaining']) <= min_remain_rate_limit: seconds_to_sleep = calculate_seconds(int(response.headers['X-RateLimit-Reset'])) - if seconds_to_sleep <= 0: - return if seconds_to_sleep > max_sleep_seconds: message = "API rate limit exceeded, please try after {} seconds.".format(seconds_to_sleep) @@ -207,9 +215,9 @@ def authed_get(self, source, url, headers={}, stream="", should_skip_404 = True) raise_for_error(resp, source, stream, self, should_skip_404) timer.tags[metrics.Tag.http_status_code] = resp.status_code rate_throttling(resp, self.max_sleep_seconds, self.min_remain_rate_limit) - if resp.status_code == 404: + if resp.status_code == 404 or resp.status_code == 422: # Return an empty response body since we're not raising a NotFoundException - resp._content = b'{}' # pylint: disable=protected-access + resp._content = b'{}' # pylint: disable=protected-access return resp def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_404 = True): @@ -217,16 +225,17 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 Fetch all pages of records and return them. """ url = self.prepare_url(url) - while True: - r = self.authed_get(source, url, headers, stream, should_skip_404) - yield r + next_page = True + while next_page: + response = self.authed_get(source, url, headers, stream, should_skip_404) + yield response # Fetch the next page if next found in the response. - if 'next' in r.links and r.links['last'] != url: - url = r.links['next']['url'] + if 'next' in response.links: + url = response.links['next']['url'] else: - # Break the loop if all pages are fetched. - break + # Break the loop if all pages are fetched. + next_page = False def prepare_url(self, url): """ From 786438925aec2f21df9ae2da8d8c9089311b4f5e Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 12:17:16 -0300 Subject: [PATCH 09/12] Simplifying the pagination loop --- tap_github/client.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tap_github/client.py b/tap_github/client.py index 160ca1ec..cd6aca70 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -224,18 +224,12 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 """ Fetch all pages of records and return them. """ - url = self.prepare_url(url) - next_page = True - while next_page: + next_url = self.prepare_url(url) + while next_url: response = self.authed_get(source, url, headers, stream, should_skip_404) yield response - # Fetch the next page if next found in the response. - if 'next' in response.links: - url = response.links['next']['url'] - else: - # Break the loop if all pages are fetched. - next_page = False + next_url = response.links.get('next', None) def prepare_url(self, url): """ From d780571ebf792c88e91e8aa4f2ed5bbb1a1a1f4b Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 13:10:21 -0300 Subject: [PATCH 10/12] Logging url instead of source --- tap_github/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/client.py b/tap_github/client.py index cd6aca70..b7058367 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -208,7 +208,7 @@ def authed_get(self, source, url, headers={}, stream="", should_skip_404 = True) """ Call rest API and return the response in case of status code 200. """ - with metrics.http_request_timer(source) as timer: + with metrics.http_request_timer(url) as timer: self.session.headers.update(headers) resp = self.session.request(method='get', url=url, timeout=self.get_request_timeout()) if resp.status_code != 200: From 3d46ebc8406687bf4a66f50005325cad541e4ddf Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 13:18:11 -0300 Subject: [PATCH 11/12] Fixing next url --- tap_github/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/client.py b/tap_github/client.py index b7058367..d85d5cca 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -229,7 +229,7 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 response = self.authed_get(source, url, headers, stream, should_skip_404) yield response - next_url = response.links.get('next', None) + next_url = response.links.get('next', {}).get('url', None) def prepare_url(self, url): """ From 5b53c97058cee25aae5b433f2d1964463ecdefdc Mon Sep 17 00:00:00 2001 From: joaoamaral <7281460+joaopamaral@users.noreply.github.com> Date: Thu, 19 Jan 2023 13:22:11 -0300 Subject: [PATCH 12/12] Fixing next url --- tap_github/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_github/client.py b/tap_github/client.py index d85d5cca..b38286a1 100644 --- a/tap_github/client.py +++ b/tap_github/client.py @@ -226,7 +226,7 @@ def authed_get_all_pages(self, source, url, headers={}, stream="", should_skip_4 """ next_url = self.prepare_url(url) while next_url: - response = self.authed_get(source, url, headers, stream, should_skip_404) + response = self.authed_get(source, next_url, headers, stream, should_skip_404) yield response next_url = response.links.get('next', {}).get('url', None)