@@ -24,11 +24,70 @@ class SearchModifiers(Enum):
2424 HIDE_DLC = "hide_dlc"
2525
2626
27+ class SearchInformations :
28+ search_url = None
29+ api_key = None
30+
31+ def __init__ (self , script_content : str ):
32+ self .api_key = self .__extract_api_from_script (script_content )
33+ self .search_url = self .__extract_search_url_script (script_content )
34+ if HTMLRequests .BASE_URL .endswith ("/" ) and self .search_url is not None :
35+ self .search_url = self .search_url .lstrip ("/" )
36+
37+ def __extract_api_from_script (self , script_content : str ):
38+ """
39+ Function that extract the htlb code to use in the request from the given script
40+ @return: the string of the api key found
41+ """
42+ # Try multiple find one after the other as hltb keep changing format
43+ # Test 1 - The API Key is in the user id in the request json
44+ user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
45+ matches = re .findall (user_id_api_key_pattern , script_content )
46+ if matches :
47+ key = '' .join (matches )
48+ return key
49+ # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
50+ concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
51+ matches = re .findall (concat_api_key_pattern , script_content )
52+ if matches :
53+ matches = str (matches ).split ('.concat' )
54+ matches = [re .sub (r'["\(\)\[\]\']' , '' , match ) for match in matches [1 :]]
55+ key = '' .join (matches )
56+ return key
57+ # Unable to find :(
58+ return None
59+
60+ def __extract_search_url_script (self , script_content : str ):
61+ """
62+ Function that extract the htlb search url to append from the script as /api/search
63+ @return: the search url to append
64+ """
65+ pattern = re .compile (
66+ r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
67+ r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
68+ r'\s*,' , # Matches up to the comma
69+ re .DOTALL
70+ )
71+ matches = pattern .finditer (script_content )
72+ for match in matches :
73+ endpoint = match .group (1 )
74+ concat_calls = match .group (2 )
75+ # Extract all concatenated strings
76+ concat_strings = re .findall (r'\.concat\(\s*["\']([^"\']*)["\']\s*\)' , concat_calls )
77+ concatenated_str = '' .join (concat_strings )
78+ # Check if the concatenated string matches the known string
79+ if concatenated_str == self .api_key :
80+ return endpoint
81+ # Unable to find :(
82+ return None
83+
84+
2785class HTMLRequests :
2886 BASE_URL = 'https://howlongtobeat.com/'
2987 REFERER_HEADER = BASE_URL
30- SEARCH_URL = BASE_URL + "api/s" # should update this to some kind of regex for api/[any alphanumeric characters here] to be more future proof since this keeps changing
3188 GAME_URL = BASE_URL + "game"
89+ # Static search url to use in case it can't be extracted from JS code
90+ SEARCH_URL = BASE_URL + "api/s/"
3291
3392 @staticmethod
3493 def get_search_request_headers ():
@@ -46,7 +105,7 @@ def get_search_request_headers():
46105 return headers
47106
48107 @staticmethod
49- def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int , api_key : str ):
108+ def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int , search_info : SearchInformations ):
50109 """
51110 Generate the data payload for the search request
52111 @param game_name: The name of the game to search
@@ -96,8 +155,8 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
96155 }
97156
98157 # If api_key is passed add it to the dict
99- if api_key is not None :
100- payload ['searchOptions' ]['users' ]['id' ] = api_key
158+ if search_info is not None and search_info . api_key is not None :
159+ payload ['searchOptions' ]['users' ]['id' ] = search_info . api_key
101160
102161 return json .dumps (payload )
103162
@@ -112,19 +171,21 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
112171 @return: The HTML code of the research if the request returned 200(OK), None otherwise
113172 """
114173 headers = HTMLRequests .get_search_request_headers ()
115- api_key_result = HTMLRequests .send_website_request_getcode (False )
116- if api_key_result is None :
117- api_key_result = HTMLRequests .send_website_request_getcode (True )
174+ search_info_data = HTMLRequests .send_website_request_getcode (False )
175+ if search_info_data is None or search_info_data . api_key is None :
176+ search_info_data = HTMLRequests .send_website_request_getcode (True )
118177 # Make the request
178+ if search_info_data .search_url is not None :
179+ HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
119180 # The main method currently is the call to the API search URL
120- search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
181+ search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data . api_key
121182 payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
122183 resp = requests .post (search_url_with_key , headers = headers , data = payload , timeout = 60 )
123184 if resp .status_code == 200 :
124185 return resp .text
125186 # Try to call with the standard url adding the api key to the user
126187 search_url = HTMLRequests .SEARCH_URL
127- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , api_key_result )
188+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
128189 resp = requests .post (search_url , headers = headers , data = payload , timeout = 60 )
129190 if resp .status_code == 200 :
130191 return resp .text
@@ -141,20 +202,22 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
141202 @return: The HTML code of the research if the request returned 200(OK), None otherwise
142203 """
143204 headers = HTMLRequests .get_search_request_headers ()
144- api_key_result = await HTMLRequests .async_send_website_request_getcode (False )
145- if api_key_result is None :
146- api_key_result = await HTMLRequests .async_send_website_request_getcode (True )
205+ search_info_data = HTMLRequests .send_website_request_getcode (False )
206+ if search_info_data is None or search_info_data . api_key is None :
207+ search_info_data = HTMLRequests .send_website_request_getcode (True )
147208 # Make the request
209+ if search_info_data .search_url is not None :
210+ HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
148211 # The main method currently is the call to the API search URL
149- search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
212+ search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data . api_key
150213 payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
151214 async with aiohttp .ClientSession () as session :
152215 async with session .post (search_url_with_key , headers = headers , data = payload ) as resp_with_key :
153216 if resp_with_key is not None and resp_with_key .status == 200 :
154217 return await resp_with_key .text ()
155218 else :
156219 search_url = HTMLRequests .SEARCH_URL
157- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , api_key_result )
220+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
158221 async with session .post (search_url , headers = headers , data = payload ) as resp_user_id :
159222 if resp_user_id is not None and resp_user_id .status == 200 :
160223 return await resp_user_id .text ()
@@ -240,30 +303,6 @@ async def async_get_game_title(game_id: int):
240303 return HTMLRequests .__cut_game_title (text )
241304 return None
242305
243- @staticmethod
244- def extract_api_from_script (script_content : str ):
245- """
246- Function that extract the htlb code to use in the request from the given script
247- @return: the string of the api key found
248- """
249- # Try multiple find one after the other as hltb keep changing format
250- # Test 1 - The API Key is in the user id in the request json
251- user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
252- matches = re .findall (user_id_api_key_pattern , script_content )
253- if matches :
254- key = '' .join (matches )
255- return key
256- # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
257- concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
258- matches = re .findall (concat_api_key_pattern , script_content )
259- if matches :
260- matches = str (matches ).split ('.concat' )
261- matches = [re .sub (r'["\(\)\[\]\']' , '' , match ) for match in matches [1 :]]
262- key = '' .join (matches )
263- return key
264- # Unable to find :(
265- return None
266-
267306 @staticmethod
268307 def send_website_request_getcode (parse_all_scripts : bool ):
269308 """
@@ -286,9 +325,10 @@ def send_website_request_getcode(parse_all_scripts: bool):
286325 script_url = HTMLRequests .BASE_URL + script_url
287326 script_resp = requests .get (script_url , headers = headers , timeout = 60 )
288327 if script_resp .status_code == 200 and script_resp .text is not None :
289- api_key_result = HTMLRequests .extract_api_from_script (script_resp .text )
290- if api_key_result is not None :
291- return api_key_result
328+ search_info = SearchInformations (script_resp .text )
329+ if search_info .api_key is not None :
330+ # The api key is necessary
331+ return search_info
292332 return None
293333
294334 @staticmethod
@@ -317,9 +357,10 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
317357 async with session .get (script_url , headers = headers ) as script_resp :
318358 if script_resp is not None and resp .status == 200 :
319359 script_resp_text = await script_resp .text ()
320- api_key_result = HTMLRequests .extract_api_from_script (script_resp_text )
321- if api_key_result is not None :
322- return api_key_result
360+ search_info = SearchInformations (script_resp_text )
361+ if search_info .api_key is not None :
362+ # The api key is necessary
363+ return search_info
323364 else :
324365 return None
325366 else :
0 commit comments