From 3d55bff2d370d3f1070d165f81c7c4149b0e625d Mon Sep 17 00:00:00 2001
From: mirceachira <chira.mircea.mc@gmail.com>
Date: Mon, 16 Sep 2019 16:03:40 +0300
Subject: [PATCH 1/2] Added search feature, search project key by part of url

---
 shub/search.py | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 shub/tool.py   |  1 +
 2 files changed, 56 insertions(+)
 create mode 100755 shub/search.py

diff --git a/shub/search.py b/shub/search.py
new file mode 100755
index 00000000..09d3814c
--- /dev/null
+++ b/shub/search.py
@@ -0,0 +1,55 @@
+from datetime import datetime
+
+import click
+from dateparser import parse
+from scrapinghub import ScrapinghubClient
+
+
+HELP = """
+Given a project key and part of an url, fetch job ids from Scrapy Cloud.
+
+This is useful when you want to find a job in an efficient way starting from
+an url.
+
+The project key and an url (or part of it). The matching is case sensitive!
+
+    shub search 123456 "B07F3NG1234"
+
+You can provide other parameters to narrow down the search significantly such
+as the spider name and the date interval to search for. Or both! The default
+is to search only the last 6 months.
+    
+    shub search 123456 "B07F3NG1234" --spider="amazon"
+
+    shub search 123456 "B07F3NG1234" --start_date="last week" --end_date="2 days ago" 
+"""
+
+SHORT_HELP = "Fetch job ids from Scrapy Cloud based on urls"
+
+
+@click.command(help=HELP, short_help=SHORT_HELP)
+@click.argument('project_key')
+@click.argument('url_content')
+@click.option(
+    '--start_date',
+    default='6 months ago',
+    help='date to start searching from, defaults to 6 months ago'
+)
+@click.option('--end_date', default='now', help='date to end the search')
+@click.option('-s', '--spider', help='the spider to search')
+def cli(project_key, url_content, start_date, end_date, spider):
+    def date_string_to_seconds(date):
+        return int((parse(date) - datetime(1970, 1, 1)).total_seconds() * 1000)
+
+    start_time = date_string_to_seconds(start_date)
+    end_time = date_string_to_seconds(end_date)
+
+    project = ScrapinghubClient().get_project(project_key)
+
+    jobs = project.jobs.iter(startts=start_time, endts=end_time, spider=spider)
+    for job_dict in jobs:
+        job = project.jobs.get(job_dict['key'])
+        for req in job.requests.iter(filter=[('url', 'contains', [url_content])]):
+            click.echo(job_dict['key'])
+            break
+
diff --git a/shub/tool.py b/shub/tool.py
index 14b2f80e..28db7c20 100644
--- a/shub/tool.py
+++ b/shub/tool.py
@@ -51,6 +51,7 @@ def cli():
     "migrate_eggs",
     "image",
     "cancel",
+    "search"
 ]
 
 for command in commands:

From bcc772a14b5f9c2f6ca86e73bbe5f9817d3e5f66 Mon Sep 17 00:00:00 2001
From: mirceachira <chira.mircea.mc@gmail.com>
Date: Mon, 16 Sep 2019 16:04:02 +0300
Subject: [PATCH 2/2] Added dateparser dependency

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 922d9f03..91cf666a 100644
--- a/setup.py
+++ b/setup.py
@@ -39,6 +39,7 @@
         'six>=1.7.0',
         'tqdm',
         'toml',
+        'dateparser'
     ],
     classifiers=[
         'Development Status :: 5 - Production/Stable',