diff --git a/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py b/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py
index 961b182..f8ed086 100644
--- a/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py
+++ b/howlongtobeatpy/howlongtobeatpy/HTMLRequests.py
@@ -24,11 +24,70 @@ class SearchModifiers(Enum):
HIDE_DLC = "hide_dlc"
+class SearchInformations:
+ search_url = None
+ api_key = None
+
+ def __init__(self, script_content: str):
+ self.api_key = self.__extract_api_from_script(script_content)
+ self.search_url = self.__extract_search_url_script(script_content)
+ if HTMLRequests.BASE_URL.endswith("/") and self.search_url is not None:
+ self.search_url = self.search_url.lstrip("/")
+
+ def __extract_api_from_script(self, script_content: str):
+ """
+ Function that extract the htlb code to use in the request from the given script
+ @return: the string of the api key found
+ """
+ # Try multiple find one after the other as hltb keep changing format
+ # Test 1 - The API Key is in the user id in the request json
+ user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
+ matches = re.findall(user_id_api_key_pattern, script_content)
+ if matches:
+ key = ''.join(matches)
+ return key
+ # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
+ concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
+ matches = re.findall(concat_api_key_pattern, script_content)
+ if matches:
+ matches = str(matches).split('.concat')
+ matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]]
+ key = ''.join(matches)
+ return key
+ # Unable to find :(
+ return None
+
+ def __extract_search_url_script(self, script_content: str):
+ """
+ Function that extract the htlb search url to append from the script as /api/search
+ @return: the search url to append
+ """
+ pattern = re.compile(
+ r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
+ r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
+ r'\s*,', # Matches up to the comma
+ re.DOTALL
+ )
+ matches = pattern.finditer(script_content)
+ for match in matches:
+ endpoint = match.group(1)
+ concat_calls = match.group(2)
+ # Extract all concatenated strings
+ concat_strings = re.findall(r'\.concat\(\s*["\']([^"\']*)["\']\s*\)', concat_calls)
+ concatenated_str = ''.join(concat_strings)
+ # Check if the concatenated string matches the known string
+ if concatenated_str == self.api_key:
+ return endpoint
+ # Unable to find :(
+ return None
+
+
class HTMLRequests:
BASE_URL = 'https://howlongtobeat.com/'
REFERER_HEADER = BASE_URL
- SEARCH_URL = BASE_URL + "api/s" # should update this to some kind of regex for api/[any alphanumeric characters here] to be more future proof since this keeps changing
GAME_URL = BASE_URL + "game"
+ # Static search url to use in case it can't be extracted from JS code
+ SEARCH_URL = BASE_URL + "api/s/"
@staticmethod
def get_search_request_headers():
@@ -46,7 +105,7 @@ def get_search_request_headers():
return headers
@staticmethod
- def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, api_key: str):
+ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, search_info: SearchInformations):
"""
Generate the data payload for the search request
@param game_name: The name of the game to search
@@ -96,8 +155,8 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
}
# If api_key is passed add it to the dict
- if api_key is not None:
- payload['searchOptions']['users']['id'] = api_key
+ if search_info is not None and search_info.api_key is not None:
+ payload['searchOptions']['users']['id'] = search_info.api_key
return json.dumps(payload)
@@ -112,19 +171,21 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
@return: The HTML code of the research if the request returned 200(OK), None otherwise
"""
headers = HTMLRequests.get_search_request_headers()
- api_key_result = HTMLRequests.send_website_request_getcode(False)
- if api_key_result is None:
- api_key_result = HTMLRequests.send_website_request_getcode(True)
+ search_info_data = HTMLRequests.send_website_request_getcode(False)
+ if search_info_data is None or search_info_data.api_key is None:
+ search_info_data = HTMLRequests.send_website_request_getcode(True)
# Make the request
+ if search_info_data.search_url is not None:
+ HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
# The main method currently is the call to the API search URL
- search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result
+ search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
resp = requests.post(search_url_with_key, headers=headers, data=payload, timeout=60)
if resp.status_code == 200:
return resp.text
# Try to call with the standard url adding the api key to the user
search_url = HTMLRequests.SEARCH_URL
- payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result)
+ payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
resp = requests.post(search_url, headers=headers, data=payload, timeout=60)
if resp.status_code == 200:
return resp.text
@@ -141,12 +202,14 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
@return: The HTML code of the research if the request returned 200(OK), None otherwise
"""
headers = HTMLRequests.get_search_request_headers()
- api_key_result = await HTMLRequests.async_send_website_request_getcode(False)
- if api_key_result is None:
- api_key_result = await HTMLRequests.async_send_website_request_getcode(True)
+ search_info_data = HTMLRequests.send_website_request_getcode(False)
+ if search_info_data is None or search_info_data.api_key is None:
+ search_info_data = HTMLRequests.send_website_request_getcode(True)
# Make the request
+ if search_info_data.search_url is not None:
+ HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
# The main method currently is the call to the API search URL
- search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result
+ search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
async with aiohttp.ClientSession() as session:
async with session.post(search_url_with_key, headers=headers, data=payload) as resp_with_key:
@@ -154,7 +217,7 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
return await resp_with_key.text()
else:
search_url = HTMLRequests.SEARCH_URL
- payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result)
+ payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
async with session.post(search_url, headers=headers, data=payload) as resp_user_id:
if resp_user_id is not None and resp_user_id.status == 200:
return await resp_user_id.text()
@@ -240,30 +303,6 @@ async def async_get_game_title(game_id: int):
return HTMLRequests.__cut_game_title(text)
return None
- @staticmethod
- def extract_api_from_script(script_content: str):
- """
- Function that extract the htlb code to use in the request from the given script
- @return: the string of the api key found
- """
- # Try multiple find one after the other as hltb keep changing format
- # Test 1 - The API Key is in the user id in the request json
- user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
- matches = re.findall(user_id_api_key_pattern, script_content)
- if matches:
- key = ''.join(matches)
- return key
- # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
- concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
- matches = re.findall(concat_api_key_pattern, script_content)
- if matches:
- matches = str(matches).split('.concat')
- matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]]
- key = ''.join(matches)
- return key
- # Unable to find :(
- return None
-
@staticmethod
def send_website_request_getcode(parse_all_scripts: bool):
"""
@@ -286,9 +325,10 @@ def send_website_request_getcode(parse_all_scripts: bool):
script_url = HTMLRequests.BASE_URL + script_url
script_resp = requests.get(script_url, headers=headers, timeout=60)
if script_resp.status_code == 200 and script_resp.text is not None:
- api_key_result = HTMLRequests.extract_api_from_script(script_resp.text)
- if api_key_result is not None:
- return api_key_result
+ search_info = SearchInformations(script_resp.text)
+ if search_info.api_key is not None:
+ # The api key is necessary
+ return search_info
return None
@staticmethod
@@ -317,9 +357,10 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
async with session.get(script_url, headers=headers) as script_resp:
if script_resp is not None and resp.status == 200:
script_resp_text = await script_resp.text()
- api_key_result = HTMLRequests.extract_api_from_script(script_resp_text)
- if api_key_result is not None:
- return api_key_result
+ search_info = SearchInformations(script_resp_text)
+ if search_info.api_key is not None:
+ # The api key is necessary
+ return search_info
else:
return None
else:
diff --git a/howlongtobeatpy/setup.py b/howlongtobeatpy/setup.py
index 267f0f7..4878f49 100644
--- a/howlongtobeatpy/setup.py
+++ b/howlongtobeatpy/setup.py
@@ -4,7 +4,7 @@
long_description = fh.read()
setup(name='howlongtobeatpy',
- version='1.0.16',
+ version='1.0.17',
packages=find_packages(exclude=['tests']),
description='A Python API for How Long to Beat',
long_description=long_description,
diff --git a/sonar-project.properties b/sonar-project.properties
index 61e2cff..6ce5e3b 100644
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -2,7 +2,7 @@ sonar.organization=scrappycocco-github
sonar.projectKey=ScrappyCocco_HowLongToBeat-PythonAPI
sonar.projectName=HowLongToBeat-PythonAPI
-sonar.projectVersion=1.0.16
+sonar.projectVersion=1.0.17
sonar.python.version=3.9
# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.