diff --git a/.travis.yml b/.travis.yml index e8941654..ca82befa 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,7 @@ matrix: language: generic env: - TOX_ENV=py27 - - PYTHON_VERSION='2.7' + - PYTHON_VERSION='2.7.9' - os: osx language: generic env: @@ -40,7 +40,8 @@ branches: before_install: | if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then # From https://pythonhosted.org/CodeChat/.travis.yml.html - brew install pyenv-virtualenv + # Homebrew currently fails after updating. See also: https://discuss.circleci.com/t/brew-install-fails-while-updating/32992/4 + HOMEBREW_NO_AUTO_UPDATE=1 brew install pyenv-virtualenv eval "$(pyenv init -)" eval "$(pyenv virtualenv-init -)" # See https://github.com/travis-ci/travis-ci/issues/4834, but diff --git a/requirements.in b/requirements.in index 3ac4583b..22932550 100644 --- a/requirements.in +++ b/requirements.in @@ -1,13 +1,15 @@ click docker-py -PyYAML -requests +#PyYAML +#requests retrying six tqdm scrapinghub>=2.0.3 +pip<19.3 + # address known vulnerabilities requests>=2.20.0 # CVE-2018-18074 pyyaml>=4.2b1 # CVE-2017-18342 diff --git a/requirements.txt b/requirements.txt index a2bba22e..68744217 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,6 @@ six==1.10.0 tqdm==4.11.2 urllib3==1.25.3 # via requests websocket-client==0.37.0 # via docker-py + +# The following packages are considered to be unsafe in a requirements file: +# pip==19.2.3 diff --git a/setup.py b/setup.py index 01abbe29..b11d9fff 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ install_requires=[ 'click', 'docker-py', - 'pip', + 'pip<19.3', 'PyYAML', 'retrying', 'requests', diff --git a/shub/items.py b/shub/items.py index ad656b56..ac854aac 100644 --- a/shub/items.py +++ b/shub/items.py @@ -30,6 +30,10 @@ by providing the -f flag: shub items -f 2/15 + +Additional filters may be applied to the query: + + shub items 12345/2/15 --filter '["foo","exists",[]]' """ SHORT_HELP = "Fetch items from Scrapy Cloud" @@ -40,8 +44,13 @@ @click.option('-f', '--follow', help='output new items as they are scraped', is_flag=True) @click.option('-n', '--tail', help='output last N items only', type=int) -def cli(job_id, follow, tail): +@click.option('--filter', 'filter_', help='filter to be applied to the query') +@click.option('--filter_type', default='filter', + type=click.Choice(['filter', 'filterall', 'filterany']), + help='type of filter to be applied') +def cli(job_id, follow, tail, filter_, filter_type): job = get_job(job_id) for item in job_resource_iter(job, job.items, output_json=True, - follow=follow, tail=tail): + follow=follow, tail=tail, filter_=filter_, + filter_type=filter_type): click.echo(item) diff --git a/shub/log.py b/shub/log.py index 569458c8..ceb51801 100644 --- a/shub/log.py +++ b/shub/log.py @@ -32,6 +32,10 @@ providing the -f flag: shub log -f 2/15 + +Additional filters may be applied to the query: + + shub log 12345/2/15 --filter '["level",">=",["20"]]' # loglevel>=INFO """ SHORT_HELP = "Fetch log from Scrapy Cloud" @@ -43,9 +47,11 @@ 'produced', is_flag=True) @click.option('-n', '--tail', help='output last N log entries only', type=int) @click.option('--json', 'json_', help='output log entries in JSON', is_flag=True, default=False) -def cli(job_id, follow, tail, json_): +@click.option('--filter', 'filter_', help='filter to be applied to the query') +def cli(job_id, follow, tail, json_, filter_): job = get_job(job_id) - for item in job_resource_iter(job, job.logs, follow=follow, tail=tail, output_json=json_): + for item in job_resource_iter(job, job.logs, follow=follow, tail=tail, output_json=json_, + filter_=filter_): if json_: click.echo(item) else: diff --git a/shub/requests.py b/shub/requests.py index 45425f45..5ac5dc37 100644 --- a/shub/requests.py +++ b/shub/requests.py @@ -30,6 +30,10 @@ by providing the -f flag: shub requests -f 2/15 + +Additional filters may be applied to the query: + + shub requests 12345/2/15 --filter '["url","icontains",["foo"]]' """ SHORT_HELP = "Fetch requests from Scrapy Cloud" @@ -40,8 +44,9 @@ @click.option('-f', '--follow', help='output new requests as they are made', is_flag=True) @click.option('-n', '--tail', help='output last N requests only', type=int) -def cli(job_id, follow, tail): +@click.option('--filter', 'filter_', help='filter to be applied to the query') +def cli(job_id, follow, tail, filter_): job = get_job(job_id) for item in job_resource_iter(job, job.requests, output_json=True, - follow=follow, tail=tail): + follow=follow, tail=tail, filter_=filter_): click.echo(item) diff --git a/shub/utils.py b/shub/utils.py index 6a988ed1..db75eda2 100644 --- a/shub/utils.py +++ b/shub/utils.py @@ -531,7 +531,7 @@ def job_live(job, refresh_meta_after=60): def job_resource_iter(job, resource, output_json=False, follow=True, - tail=None): + tail=None, filter_=None, filter_type=None): """ Given a python-hubstorage job and resource (e.g. job.items), return a generator that periodically checks the job resource and yields its items. @@ -549,17 +549,24 @@ def job_resource_iter(job, resource, output_json=False, follow=True, last_item_key = '{}/{}'.format(job.key, last_item) if not job_live(job): follow = False + # XXX: Some simple validations for the filter value? + api_params = { + # It's okay to have null-values included here since the underlying + # package would have it removed + 'startafter': last_item_key, + filter_type or 'filter': filter_, + } resource_iter = resource.iter_json if output_json else resource.iter_values if not follow: - for item in resource_iter(startafter=last_item_key): + for item in resource_iter(**api_params): yield item return while True: # XXX: Always use iter_json until Kumo team fixes iter_values to also # return '_key' - for json_line in resource.iter_json(startafter=last_item_key): + for json_line in resource.iter_json(**api_params): item = json.loads(json_line) - last_item_key = item['_key'] + api_params['startafter'] = item['_key'] yield json_line if output_json else item if not job_live(job): break @@ -637,6 +644,8 @@ def download_from_pypi(dest, pkg=None, reqfile=None, extra_args=None): no_wheel = ['--no-binary=:all:'] if pip_version >= LooseVersion('8'): cmd = 'download' + if pip_version >= LooseVersion('19.3'): + raise NotImplementedError('Expecting pip<19.3') with patch_sys_executable(): pip_main([cmd, '-d', dest, '--no-deps'] + no_wheel + extra_args + target) diff --git a/tests/test_jobresource.py b/tests/test_jobresource.py index dc951dee..bcdda5ed 100644 --- a/tests/test_jobresource.py +++ b/tests/test_jobresource.py @@ -36,13 +36,27 @@ def _test_forwards_follow(self, cmd_mod): self.runner.invoke(cmd_mod.cli, ('1/2/3', '-f')) self.assertTrue(mock_jri.call_args[1]['follow']) + def _test_resource_filter(self, cmd_mod, test_filter_type=False): + with mock.patch.object(cmd_mod, 'get_job'), \ + mock.patch.object(cmd_mod, 'job_resource_iter', autospec=True) \ + as mock_jri: + self.runner.invoke(cmd_mod.cli, ('1/2/3',)) + self.assertFalse(mock_jri.call_args[1]['filter_']) + self.runner.invoke(cmd_mod.cli, ('1/2/3', '--filter', '["foo"]')) + self.assertEqual(mock_jri.call_args[1]['filter_'], '["foo"]') + if test_filter_type: + self.runner.invoke(cmd_mod.cli, ('1/2/3', '--filter', '["foo"]', '--filter_type', 'filterall')) + self.assertEqual(mock_jri.call_args[1]['filter_type'], 'filterall') + def test_items(self): self._test_prints_objects(items, 'items') self._test_forwards_follow(items) + self._test_resource_filter(items, test_filter_type=True) def test_requests(self): self._test_prints_objects(requests, 'requests') self._test_forwards_follow(requests) + self._test_resource_filter(requests) def test_log(self): objects = [ @@ -65,6 +79,7 @@ def test_log(self): for idx, line in enumerate(result.output.splitlines()): self.assertEqual(json.loads(line), objects[idx]) self._test_forwards_follow(log) + self._test_resource_filter(log) def test_log_unicode(self): objects = [ diff --git a/tests/test_utils.py b/tests/test_utils.py index cbdc3f49..594379ed 100755 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -243,23 +243,26 @@ def magic_iter(*args, **kwargs): self.assertEqual(kwargs['startafter'], 'jobkey/996') return iter([]) - def jri_result(follow, tail=None): + def jri_result(follow, tail=None, filter_=None, filter_type=None): return list(utils.job_resource_iter( job, job.resource, follow=follow, tail=tail, + filter_=filter_, + filter_type=filter_type, output_json=True, )) - job.resource.iter_json = magic_iter + job.resource.iter_json = Mock(wraps=magic_iter) magic_iter.stage = 0 self.assertEqual(jri_result(False), make_items([1, 2, 3])) self.assertFalse(mock_sleep.called) magic_iter.stage = 0 - self.assertEqual(jri_result(True), make_items([1, 2, 3, 4, 5, 6])) + self.assertEqual(jri_result(True, filter_='["foo"]'), make_items([1, 2, 3, 4, 5, 6])) + self.assertEqual(job.resource.iter_json.call_args[1]['filter'], '["foo"]') self.assertTrue(mock_sleep.called) magic_iter.stage = 0 @@ -268,7 +271,8 @@ def jri_result(follow, tail=None): magic_iter.stage = 2 job.resource.stats.return_value = {'totals': {'input_values': 1000}} - self.assertEqual(jri_result(True, tail=3), []) + self.assertEqual(jri_result(True, tail=3, filter_='["foo"]', filter_type='filterall'), []) + self.assertEqual(job.resource.iter_json.call_args[1]['filterall'], '["foo"]') @patch('shub.utils.requests.get', autospec=True) def test_latest_github_release(self, mock_get): @@ -371,6 +375,10 @@ def _call(*args, **kwargs): pipargs = _call('tmpdir', reqfile='req.txt') self.assertEqual(pipargs.index('-r') + 1, pipargs.index('req.txt')) + # pip>=19.3 shall be unsupported for now + mock_pip.__version__ = '19.3' + self.assertRaises(NotImplementedError, _call, ['tmpdir'], {'pkg': 'shub'}) + # Replace deprecated commands in newer versions mock_pip.__version__ = '7.1.2.dev0' pipargs = _call('tmpdir', pkg='shub')