Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 85 additions & 44 deletions howlongtobeatpy/howlongtobeatpy/HTMLRequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,70 @@ class SearchModifiers(Enum):
HIDE_DLC = "hide_dlc"


class SearchInformations:
search_url = None
api_key = None

def __init__(self, script_content: str):
self.api_key = self.__extract_api_from_script(script_content)
self.search_url = self.__extract_search_url_script(script_content)
if HTMLRequests.BASE_URL.endswith("/") and self.search_url is not None:
self.search_url = self.search_url.lstrip("/")

def __extract_api_from_script(self, script_content: str):
"""
Function that extract the htlb code to use in the request from the given script
@return: the string of the api key found
"""
# Try multiple find one after the other as hltb keep changing format
# Test 1 - The API Key is in the user id in the request json
user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
matches = re.findall(user_id_api_key_pattern, script_content)
if matches:
key = ''.join(matches)
return key
# Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
matches = re.findall(concat_api_key_pattern, script_content)
if matches:
matches = str(matches).split('.concat')
matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]]
key = ''.join(matches)
return key
# Unable to find :(
return None

def __extract_search_url_script(self, script_content: str):
"""
Function that extract the htlb search url to append from the script as /api/search
@return: the search url to append
"""
pattern = re.compile(
r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
r'\s*,', # Matches up to the comma
re.DOTALL
)
matches = pattern.finditer(script_content)
for match in matches:
endpoint = match.group(1)
concat_calls = match.group(2)
# Extract all concatenated strings
concat_strings = re.findall(r'\.concat\(\s*["\']([^"\']*)["\']\s*\)', concat_calls)
concatenated_str = ''.join(concat_strings)
# Check if the concatenated string matches the known string
if concatenated_str == self.api_key:
return endpoint
# Unable to find :(
return None


class HTMLRequests:
BASE_URL = 'https://howlongtobeat.com/'
REFERER_HEADER = BASE_URL
SEARCH_URL = BASE_URL + "api/s" # should update this to some kind of regex for api/[any alphanumeric characters here] to be more future proof since this keeps changing
GAME_URL = BASE_URL + "game"
# Static search url to use in case it can't be extracted from JS code
SEARCH_URL = BASE_URL + "api/s/"

@staticmethod
def get_search_request_headers():
Expand All @@ -46,7 +105,7 @@ def get_search_request_headers():
return headers

@staticmethod
def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, api_key: str):
def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, search_info: SearchInformations):
"""
Generate the data payload for the search request
@param game_name: The name of the game to search
Expand Down Expand Up @@ -96,8 +155,8 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
}

# If api_key is passed add it to the dict
if api_key is not None:
payload['searchOptions']['users']['id'] = api_key
if search_info is not None and search_info.api_key is not None:
payload['searchOptions']['users']['id'] = search_info.api_key

return json.dumps(payload)

Expand All @@ -112,19 +171,21 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
@return: The HTML code of the research if the request returned 200(OK), None otherwise
"""
headers = HTMLRequests.get_search_request_headers()
api_key_result = HTMLRequests.send_website_request_getcode(False)
if api_key_result is None:
api_key_result = HTMLRequests.send_website_request_getcode(True)
search_info_data = HTMLRequests.send_website_request_getcode(False)
if search_info_data is None or search_info_data.api_key is None:
search_info_data = HTMLRequests.send_website_request_getcode(True)
# Make the request
if search_info_data.search_url is not None:
HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
# The main method currently is the call to the API search URL
search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result
search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
resp = requests.post(search_url_with_key, headers=headers, data=payload, timeout=60)
if resp.status_code == 200:
return resp.text
# Try to call with the standard url adding the api key to the user
search_url = HTMLRequests.SEARCH_URL
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result)
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
resp = requests.post(search_url, headers=headers, data=payload, timeout=60)
if resp.status_code == 200:
return resp.text
Expand All @@ -141,20 +202,22 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
@return: The HTML code of the research if the request returned 200(OK), None otherwise
"""
headers = HTMLRequests.get_search_request_headers()
api_key_result = await HTMLRequests.async_send_website_request_getcode(False)
if api_key_result is None:
api_key_result = await HTMLRequests.async_send_website_request_getcode(True)
search_info_data = HTMLRequests.send_website_request_getcode(False)
if search_info_data is None or search_info_data.api_key is None:
search_info_data = HTMLRequests.send_website_request_getcode(True)
# Make the request
if search_info_data.search_url is not None:
HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
# The main method currently is the call to the API search URL
search_url_with_key = HTMLRequests.SEARCH_URL + "/" + api_key_result
search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
async with aiohttp.ClientSession() as session:
async with session.post(search_url_with_key, headers=headers, data=payload) as resp_with_key:
if resp_with_key is not None and resp_with_key.status == 200:
return await resp_with_key.text()
else:
search_url = HTMLRequests.SEARCH_URL
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, api_key_result)
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
async with session.post(search_url, headers=headers, data=payload) as resp_user_id:
if resp_user_id is not None and resp_user_id.status == 200:
return await resp_user_id.text()
Expand Down Expand Up @@ -240,30 +303,6 @@ async def async_get_game_title(game_id: int):
return HTMLRequests.__cut_game_title(text)
return None

@staticmethod
def extract_api_from_script(script_content: str):
"""
Function that extract the htlb code to use in the request from the given script
@return: the string of the api key found
"""
# Try multiple find one after the other as hltb keep changing format
# Test 1 - The API Key is in the user id in the request json
user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
matches = re.findall(user_id_api_key_pattern, script_content)
if matches:
key = ''.join(matches)
return key
# Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
matches = re.findall(concat_api_key_pattern, script_content)
if matches:
matches = str(matches).split('.concat')
matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]]
key = ''.join(matches)
return key
# Unable to find :(
return None

@staticmethod
def send_website_request_getcode(parse_all_scripts: bool):
"""
Expand All @@ -286,9 +325,10 @@ def send_website_request_getcode(parse_all_scripts: bool):
script_url = HTMLRequests.BASE_URL + script_url
script_resp = requests.get(script_url, headers=headers, timeout=60)
if script_resp.status_code == 200 and script_resp.text is not None:
api_key_result = HTMLRequests.extract_api_from_script(script_resp.text)
if api_key_result is not None:
return api_key_result
search_info = SearchInformations(script_resp.text)
if search_info.api_key is not None:
# The api key is necessary
return search_info
return None

@staticmethod
Expand Down Expand Up @@ -317,9 +357,10 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
async with session.get(script_url, headers=headers) as script_resp:
if script_resp is not None and resp.status == 200:
script_resp_text = await script_resp.text()
api_key_result = HTMLRequests.extract_api_from_script(script_resp_text)
if api_key_result is not None:
return api_key_result
search_info = SearchInformations(script_resp_text)
if search_info.api_key is not None:
# The api key is necessary
return search_info
else:
return None
else:
Expand Down
2 changes: 1 addition & 1 deletion howlongtobeatpy/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = fh.read()

setup(name='howlongtobeatpy',
version='1.0.16',
version='1.0.17',
packages=find_packages(exclude=['tests']),
description='A Python API for How Long to Beat',
long_description=long_description,
Expand Down
2 changes: 1 addition & 1 deletion sonar-project.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ sonar.organization=scrappycocco-github
sonar.projectKey=ScrappyCocco_HowLongToBeat-PythonAPI

sonar.projectName=HowLongToBeat-PythonAPI
sonar.projectVersion=1.0.16
sonar.projectVersion=1.0.17
sonar.python.version=3.9

# Path is relative to the sonar-project.properties file. Replace "\" by "/" on Windows.
Expand Down
Loading