diff --git a/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py b/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py index 961b182..f8ed086 100644 --- a/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py +++ b/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py @@ -24,11 +24,70 @@ class SearchModifiers(Enum): HIDE_DLC = "hide_dlc" +class SearchInformations: + search_url = None + api_key = None + + def __init__(self, script_content: str): + self.api_key = self.__extract_api_from_script(script_content) + self.search_url = self.__extract_search_url_script(script_content) + if HTMLRequests.BASE_URL.endswith("/") and self.search_url is not None: + self.search_url = self.search_url.lstrip("/") + + def __extract_api_from_script(self, script_content: str): + """ + Function that extract the htlb code to use in the request from the given script + @return: the string of the api key found + """ + # Try multiple find one after the other as hltb keep changing format + # Test 1 - The API Key is in the user id in the request json + user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"' + matches = re.findall(user_id_api_key_pattern, script_content) + if matches: + key = ''.join(matches) + return key + # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")... + concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*' + matches = re.findall(concat_api_key_pattern, script_content) + if matches: + matches = str(matches).split('.concat') + matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]] + key = ''.join(matches) + return key + # Unable to find :( + return None + + def __extract_search_url_script(self, script_content: str): + """ + Function that extract the htlb search url to append from the script as /api/search + @return: the search url to append + """ + pattern = re.compile( + r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint + r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings + r'\s*,', # Matches up to the comma + re.DOTALL + ) + matches = pattern.finditer(script_content) + for match in matches: + endpoint = match.group(1) + concat_calls = match.group(2) + # Extract all concatenated strings + concat_strings = re.findall(r'\.concat\(\s*["\']([^"\']*)["\']\s*\)', concat_calls) + concatenated_str = ''.join(concat_strings) + # Check if the concatenated string matches the known string + if concatenated_str == self.api_key: + return endpoint + # Unable to find :( + return None + + class HTMLRequests: BASE_URL = 'https://howlongtobeat.com/' REFERER_HEADER = BASE_URL - SEARCH_URL = BASE_URL + "api/s" # should update this to some kind of regex for api/[any alphanumeric characters here] to be more future proof since this keeps changing GAME_URL = BASE_URL + "game" + # Static search url to use in case it can't be extracted from JS code + SEARCH_URL = BASE_URL + "api/s/" @staticmethod def get_search_request_headers(): @@ -46,7 +105,7 @@ def get_search_request_headers(): return headers @staticmethod - def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, api_key: str): + def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, search_info: SearchInformations): """ Generate the data payload for the search request @param game_name: The name of the game to search @@ -96,8 +155,8 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p } # If api_key is passed add it to the dict - if api_key is not None: - payload['searchOptions']['users']['id'] = api_key + if search_info is not None and search_info.api_key is not None: + payload['searchOptions']['users']['id'] = search_info.api_key return json.dumps(payload) @@ -112,19 +171,21 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM @return: The HTML code of the research if the request returned 200(OK), None otherwise """ headers = HTMLRequests.get_search_request_headers() - api_key_result = HTMLRequests.send_website_request_getcode(False) - if api_key_result is None: - api_key_result = HTMLRequests.send_website_request_getcode(True) + search_info_data = HTMLRequests.send_website_request_getcode(False) + if search_info_data is None or search_info_data.api_key is None: + search_info_data = HTMLRequests.send_website_request_getcode(True) # Make the request + if search_info_data.search_url is not None: + HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url # The main method currently is the call to the API search URL - search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result + search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None) resp = requests.post(search_url_with_key, headers=headers, data=payload, timeout=60) if resp.status_code == 200: return resp.text # Try to call with the standard url adding the api key to the user search_url = HTMLRequests.SEARCH_URL - payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result) + payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data) resp = requests.post(search_url, headers=headers, data=payload, timeout=60) if resp.status_code == 200: return resp.text @@ -141,12 +202,14 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie @return: The HTML code of the research if the request returned 200(OK), None otherwise """ headers = HTMLRequests.get_search_request_headers() - api_key_result = await HTMLRequests.async_send_website_request_getcode(False) - if api_key_result is None: - api_key_result = await HTMLRequests.async_send_website_request_getcode(True) + search_info_data = HTMLRequests.send_website_request_getcode(False) + if search_info_data is None or search_info_data.api_key is None: + search_info_data = HTMLRequests.send_website_request_getcode(True) # Make the request + if search_info_data.search_url is not None: + HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url # The main method currently is the call to the API search URL - search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result + search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None) async with aiohttp.ClientSession() as session: async with session.post(search_url_with_key, headers=headers, data=payload) as resp_with_key: @@ -154,7 +217,7 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie return await resp_with_key.text() else: search_url = HTMLRequests.SEARCH_URL - payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result) + payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data) async with session.post(search_url, headers=headers, data=payload) as resp_user_id: if resp_user_id is not None and resp_user_id.status == 200: return await resp_user_id.text() @@ -240,30 +303,6 @@ async def async_get_game_title(game_id: int): return HTMLRequests.__cut_game_title(text) return None - @staticmethod - def extract_api_from_script(script_content: str): - """ - Function that extract the htlb code to use in the request from the given script - @return: the string of the api key found - """ - # Try multiple find one after the other as hltb keep changing format - # Test 1 - The API Key is in the user id in the request json - user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"' - matches = re.findall(user_id_api_key_pattern, script_content) - if matches: - key = ''.join(matches) - return key - # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")... - concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*' - matches = re.findall(concat_api_key_pattern, script_content) - if matches: - matches = str(matches).split('.concat') - matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]] - key = ''.join(matches) - return key - # Unable to find :( - return None - @staticmethod def send_website_request_getcode(parse_all_scripts: bool): """ @@ -286,9 +325,10 @@ def send_website_request_getcode(parse_all_scripts: bool): script_url = HTMLRequests.BASE_URL + script_url script_resp = requests.get(script_url, headers=headers, timeout=60) if script_resp.status_code == 200 and script_resp.text is not None: - api_key_result = HTMLRequests.extract_api_from_script(script_resp.text) - if api_key_result is not None: - return api_key_result + search_info = SearchInformations(script_resp.text) + if search_info.api_key is not None: + # The api key is necessary + return search_info return None @staticmethod @@ -317,9 +357,10 @@ async def async_send_website_request_getcode(parse_all_scripts: bool): async with session.get(script_url, headers=headers) as script_resp: if script_resp is not None and resp.status == 200: script_resp_text = await script_resp.text() - api_key_result = HTMLRequests.extract_api_from_script(script_resp_text) - if api_key_result is not None: - return api_key_result + search_info = SearchInformations(script_resp_text) + if search_info.api_key is not None: + # The api key is necessary + return search_info else: return None else: diff --git a/howlongtobeatpy/setup.py b/howlongtobeatpy/setup.py index 267f0f7..4878f49 100644 --- a/howlongtobeatpy/setup.py +++ b/howlongtobeatpy/setup.py @@ -4,7 +4,7 @@ long_description = fh.read() setup(name='howlongtobeatpy', - version='1.0.16', + version='1.0.17', packages=find_packages(exclude=['tests']), description='A Python API for How Long to Beat', long_description=long_description, diff --git a/sonar-project.properties b/sonar-project.properties index 61e2cff..6ce5e3b 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -2,7 +2,7 @@ sonar.organization=scrappycocco-github sonar.projectKey=ScrappyCocco_HowLongToBeat-PythonAPI sonar.projectName=HowLongToBeat-PythonAPI -sonar.projectVersion=1.0.16 +sonar.projectVersion=1.0.17 sonar.python.version=3.9 # Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.