diff --git a/GramAddict/core/handle_sources.py b/GramAddict/core/handle_sources.py index 0109616b..466020a6 100644 --- a/GramAddict/core/handle_sources.py +++ b/GramAddict/core/handle_sources.py @@ -16,6 +16,7 @@ from GramAddict.core.resources import ClassName from GramAddict.core.storage import FollowingStatus from GramAddict.core.utils import ( + append_to_file, get_value, inspect_current_view, random_choice, @@ -88,6 +89,57 @@ def interact( scraped=scraped, ) +def scrape( + storage, + is_follow_limit_reached, + username, + interaction, + device, + session_state, + current_job, + target, + on_interaction, +): + # can_follow = False + # if is_follow_limit_reached is not None: + # can_follow = not is_follow_limit_reached() and storage.get_following_status( + # username + # ) in [FollowingStatus.NONE, FollowingStatus.NOT_IN_LIST] + + ( + interaction_succeed, + followed, + requested, + scraped, + pm_sent, + number_of_liked, + number_of_watched, + number_of_comments, + ) = interaction(device, username=username, can_follow=False) + + add_interacted_user = partial( + storage.add_interacted_user, + session_id=session_state.id, + job_name=current_job, + target=target, + ) + + add_interacted_user( + username, + followed=False, + is_requested=False, + scraped=True, + liked=0, + watched=0, + commented=0, + pm_sent=False, + ) + return on_interaction( + succeed=True, + followed=False, + scraped=True, + ) + def handle_blogger( self, @@ -138,6 +190,55 @@ def handle_blogger( ): return +def scrape_blogger( + self, + device, + session_state, + blogger, + current_job, + storage, + profile_filter, + on_interaction, + interaction, + is_follow_limit_reached, +): + if not nav_to_blogger(device, blogger, session_state.my_username): + return + can_interact = False + if storage.is_user_in_blacklist(blogger): + logger.info(f"@{blogger} is in blacklist. Skip.") + else: + interacted, interacted_when = storage.check_user_was_interacted(blogger) + if interacted: + can_reinteract = storage.can_be_reinteract( + interacted_when, get_value(self.args.can_reinteract_after, None, 0) + ) + logger.info( + f"@{blogger}: already interacted on {interacted_when:%Y/%m/%d %H:%M:%S}. {'Interacting again now' if can_reinteract else 'Skip'}." + ) + if can_reinteract: + can_interact = True + else: + can_interact = True + + if can_interact: + logger.info( + f"@{blogger}: scrape", + extra={"color": f"{Fore.YELLOW}"}, + ) + if not scrape( + storage=storage, + is_follow_limit_reached=is_follow_limit_reached, + username=blogger, + interaction=interaction, + device=device, + session_state=session_state, + current_job=current_job, + target=blogger, + on_interaction=on_interaction, + ): + return + def handle_blogger_from_file( self, @@ -689,6 +790,37 @@ def handle_followers( ) +def scrape_followers( + self, + device, + session_state, + username, + current_job, + storage, + on_interaction, + interaction, + is_follow_limit_reached, + scroll_end_detector, +): + is_myself = username == session_state.my_username + if not nav_to_blogger(device, username, current_job): + return + + iterate_and_scrape_over_followers( + self, + device, + interaction, + is_follow_limit_reached, + storage, + on_interaction, + is_myself, + scroll_end_detector, + session_state, + current_job, + username, + ) + + def iterate_over_followers( self, device, @@ -865,3 +997,169 @@ def scrolled_to_top(): extra={"color": f"{Fore.GREEN}"}, ) return + +def iterate_and_scrape_over_followers( + self, + device, + interaction, + is_follow_limit_reached, + storage, + on_interaction, + is_myself, + scroll_end_detector, + session_state, + current_job, + target, +): + device.find( + resourceId=self.ResourceID.FOLLOW_LIST_CONTAINER, + className=ClassName.LINEAR_LAYOUT, + ).wait(Timeout.LONG) + + def scrolled_to_top(): + row_search = device.find( + resourceId=self.ResourceID.ROW_SEARCH_EDIT_TEXT, + className=ClassName.EDIT_TEXT, + ) + return row_search.exists() + + while True: + logger.info("Iterate over visible followers.") + screen_iterated_followers = [] + screen_skipped_followers_count = 0 + scroll_end_detector.notify_new_page() + user_list = device.find( + resourceIdMatches=self.ResourceID.USER_LIST_CONTAINER, + ) + row_height, n_users = inspect_current_view(user_list) + try: + + for item in user_list: + cur_row_height = item.get_height() + if cur_row_height < row_height: + continue + user_info_view = item.child(index=1) + user_name_view = user_info_view.child(index=0).child() + if not user_name_view.exists(): + logger.info( + "Next item not found: probably reached end of the screen.", + extra={"color": f"{Fore.GREEN}"}, + ) + break + + username = user_name_view.get_text() + screen_iterated_followers.append(username) + scroll_end_detector.notify_username_iterated(username) + + can_interact = False + if storage.is_user_in_blacklist(username): + logger.info(f"@{username} is in blacklist. Skip.") + else: + interacted, interacted_when = storage.check_user_was_interacted_with_target( + username, target + ) + if interacted: + can_reinteract = storage.can_be_reinteract( + interacted_when, + get_value(self.args.can_reinteract_after, None, 0), + ) + logger.info( + f"@{username}: already interacted with @{target} on {interacted_when:%Y/%m/%d %H:%M:%S}. {'Interacting again now' if can_reinteract else 'Skip'}." + ) + if can_reinteract: + can_interact = True + else: + screen_skipped_followers_count += 1 + else: + can_interact = True + + if can_interact: + if not session_state.check_limit(limit_type=self.session_state.Limit.TOTAL, output=True): + logger.info( + f"@{username}: interact", extra={"color": f"{Fore.YELLOW}"} + ) + # Scrape the username to file + append_to_file(f"{target}_followers", username) + storage.add_interacted_user(username, session_state.id, target=target) + session_state.add_interaction(source=username, succeed=True, followed=False, scraped=True) + else: + return + + except IndexError: + logger.info( + "Cannot get next item: probably reached end of the screen.", + extra={"color": f"{Fore.GREEN}"}, + ) + + if is_myself and scrolled_to_top(): + logger.info("Scrolled to top, finish.", extra={"color": f"{Fore.GREEN}"}) + return + elif len(screen_iterated_followers) > 0: + load_more_button = device.find( + resourceId=self.ResourceID.ROW_LOAD_MORE_BUTTON + ) + load_more_button_exists = load_more_button.exists() + + if scroll_end_detector.is_the_end(): + return + + need_swipe = screen_skipped_followers_count == len( + screen_iterated_followers + ) + list_view = device.find( + resourceId=self.ResourceID.LIST, className=ClassName.LIST_VIEW + ) + if not list_view.exists(): + logger.error( + "Cannot find the list of followers. Trying to press back again." + ) + device.back() + list_view = device.find( + resourceId=self.ResourceID.LIST, + className=ClassName.LIST_VIEW, + ) + + if is_myself: + logger.info("Need to scroll now", extra={"color": f"{Fore.GREEN}"}) + list_view.scroll(Direction.UP) + else: + pressed_retry = False + if load_more_button_exists: + retry_button = load_more_button.child( + className=ClassName.IMAGE_VIEW, + descriptionMatches=case_insensitive_re("Retry"), + ) + if retry_button.exists(): + random_sleep() + """It exist but can disappear without pressing on it""" + if retry_button.exists(): + logger.info('Press "Load" button and wait few seconds.') + retry_button.click_retry() + random_sleep(5, 10, modulable=False) + pressed_retry = True + + if need_swipe and not pressed_retry: + scroll_end_detector.notify_skipped_all() + if scroll_end_detector.is_skipped_limit_reached(): + return + if scroll_end_detector.is_fling_limit_reached(): + logger.info( + "Limit of all followers skipped reached, let's fling.", + extra={"color": f"{Fore.GREEN}"}, + ) + list_view.fling(Direction.DOWN) + else: + logger.info( + "All followers skipped, let's scroll.", + extra={"color": f"{Fore.GREEN}"}, + ) + list_view.scroll(Direction.DOWN) + else: + logger.info("Need to scroll now", extra={"color": f"{Fore.GREEN}"}) + list_view.scroll(Direction.DOWN) + else: + logger.info( + "No followers were iterated, finish.", + extra={"color": f"{Fore.GREEN}"}, + ) + return \ No newline at end of file diff --git a/GramAddict/core/storage.py b/GramAddict/core/storage.py index 70cb7cc2..166c55ce 100644 --- a/GramAddict/core/storage.py +++ b/GramAddict/core/storage.py @@ -123,6 +123,31 @@ def check_user_was_interacted(self, username): ) return True, last_interaction + def check_user_was_interacted_with_target(self, username, target): + """returns when a username has been interacted for a given target, False if not already interacted""" + user = self.interacted_users.get(username) + interacted = False + index = 0 + if user is None: + return False, None + + logger.debug(f"self.interacted_users.get({user}): {user}") + + for i, interaction in enumerate(user): + user_target = interaction.get('target') + if user_target == target: + interacted = True + index = i + + if not interacted: + logger.info(f"@{username} has not interacted with target: {target}, allowing to interact") + return False, None + + last_interaction = datetime.strptime( + user[index][USER_LAST_INTERACTION], "%Y-%m-%d %H:%M:%S.%f" + ) + return True, last_interaction + def get_following_status(self, username): user = self.interacted_users.get(username) if user is None: @@ -160,62 +185,87 @@ def add_interacted_user( job_name=None, target=None, ): - user = self.interacted_users.get(username, {}) - user[USER_LAST_INTERACTION] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + user_interactions = self.interacted_users.get(username, {}) + interaction = {} + index = 0 + updated = False + + if len(user_interactions) > 0: + logger.debug(f"Have interacted with @{username} before, checking target") + for i, inter in enumerate(user_interactions): + logger.debug(f"Assessing interaction: {inter}") + index = i + if target == inter.get('target'): + logger.debug(f"{username} interaction with @{target} found... updating") + interaction = inter + updated = True + break + + interaction[USER_LAST_INTERACTION] = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") if followed: if is_requested: - user[USER_FOLLOWING_STATUS] = FollowingStatus.REQUESTED.name.casefold() + interaction[USER_FOLLOWING_STATUS] = FollowingStatus.REQUESTED.name.casefold() else: - user[USER_FOLLOWING_STATUS] = FollowingStatus.FOLLOWED.name.casefold() + interaction[USER_FOLLOWING_STATUS] = FollowingStatus.FOLLOWED.name.casefold() elif unfollowed: - user[USER_FOLLOWING_STATUS] = FollowingStatus.UNFOLLOWED.name.casefold() + interaction[USER_FOLLOWING_STATUS] = FollowingStatus.UNFOLLOWED.name.casefold() elif scraped: - user[USER_FOLLOWING_STATUS] = FollowingStatus.SCRAPED.name.casefold() + interaction[USER_FOLLOWING_STATUS] = FollowingStatus.SCRAPED.name.casefold() else: - user[USER_FOLLOWING_STATUS] = FollowingStatus.NONE.name.casefold() + interaction[USER_FOLLOWING_STATUS] = FollowingStatus.NONE.name.casefold() # Save only the last session_id - user["session_id"] = session_id + interaction["session_id"] = session_id # Save only the last job_name and target - if not user.get("job_name"): - user["job_name"] = job_name - if not user.get("target"): - user["target"] = target + if not interaction.get("job_name"): + interaction["job_name"] = job_name + if not interaction.get("target"): + interaction["target"] = target # Increase the value of liked, watched or commented if we have already a value - user["liked"] = liked if "liked" not in user else (user["liked"] + liked) - user["watched"] = ( - watched if "watched" not in user else (user["watched"] + watched) + interaction["liked"] = liked if "liked" not in interaction else (interaction["liked"] + liked) + interaction["watched"] = ( + watched if "watched" not in interaction else (interaction["watched"] + watched) ) - user["commented"] = ( - commented if "commented" not in user else (user["commented"] + commented) + interaction["commented"] = ( + commented if "commented" not in interaction else (interaction["commented"] + commented) ) # Update the followed or unfollowed boolean only if we have a real update - user["followed"] = ( + interaction["followed"] = ( followed - if "followed" not in user or user["followed"] != followed - else user["followed"] + if "followed" not in interaction or interaction["followed"] != followed + else interaction["followed"] ) - user["unfollowed"] = ( + interaction["unfollowed"] = ( unfollowed - if "unfollowed" not in user or user["unfollowed"] != unfollowed - else user["unfollowed"] + if "unfollowed" not in interaction or interaction["unfollowed"] != unfollowed + else interaction["unfollowed"] ) - user["scraped"] = ( + interaction["scraped"] = ( scraped - if "scraped" not in user or user["scraped"] != scraped - else user["scraped"] + if "scraped" not in interaction or interaction["scraped"] != scraped + else interaction["scraped"] ) # Save the boolean if we sent a PM - user["pm_sent"] = ( + interaction["pm_sent"] = ( pm_sent - if "pm_sent" not in user or user["pm_sent"] != pm_sent - else user["pm_sent"] + if "pm_sent" not in interaction or interaction["pm_sent"] != pm_sent + else interaction["pm_sent"] ) - self.interacted_users[username] = user + + logger.debug(f"Adding interaction with @{target} to @{username}") + + if updated: + self.interacted_users[username][index] = interaction + else: + if len(user_interactions) == 0: + self.interacted_users[username] = [interaction] + else: + self.interacted_users[username].append(interaction) + self._update_file() def is_user_in_whitelist(self, username): diff --git a/GramAddict/plugins/scrape_blogger_followers.py b/GramAddict/plugins/scrape_blogger_followers.py new file mode 100644 index 00000000..dbad1924 --- /dev/null +++ b/GramAddict/plugins/scrape_blogger_followers.py @@ -0,0 +1,262 @@ +import logging +from functools import partial +from random import seed + +from colorama import Fore, Style + +from GramAddict.core.decorators import run_safely +from GramAddict.core.handle_sources import handle_followers, scrape_followers +from GramAddict.core.interaction import ( + interact_with_user, + is_follow_limit_reached_for_source, +) +from GramAddict.core.plugin_loader import Plugin +from GramAddict.core.resources import ResourceID as resources +from GramAddict.core.scroll_end_detector import ScrollEndDetector +from GramAddict.core.utils import get_value, init_on_things, sample_sources, append_to_file + +logger = logging.getLogger(__name__) + + +# Script Initialization +seed() + + +class ScrapeBloggerFollowers_Following(Plugin): + """Handles the functionality of scraping usernames of a bloggers followers/following""" + + def __init__(self): + super().__init__() + self.description = ( + "Handles the functionality of scraping usernames of a bloggers followers/following" + ) + self.arguments = [ + { + "arg": "--scrape-blogger-followers", + "nargs": "+", + "help": "list of usernames with whose followers you want to interact", + "metavar": ("username1", "username2"), + "default": None, + "operation": True, + }, + { + "arg": "--scrape-blogger-following", + "nargs": "+", + "help": "list of usernames with whose following you want to interact", + "metavar": ("username1", "username2"), + "default": None, + "operation": True, + }, + ] + + def run(self, device, configs, storage, sessions, profile_filter, plugin): + class State: + def __init__(self): + pass + + is_job_completed = False + + self.device_id = configs.args.device + self.state = None + self.sessions = sessions + self.session_state = sessions[-1] + self.args = configs.args + self.ResourceID = resources(self.args.app_id) + self.current_mode = plugin + + # IMPORTANT: in each job we assume being on the top of the Profile tab already + if self.args.scrape_blogger_followers is not None: + sources = [s for s in self.args.scrape_blogger_followers if s.strip()] + else: + sources = [s for s in self.args.scrape_blogger_following if s.strip()] + + # Start + for source in sample_sources(sources, self.args.truncate_sources): + ( + active_limits_reached, + _, + actions_limit_reached, + ) = self.session_state.check_limit(limit_type=self.session_state.Limit.ALL) + limit_reached = active_limits_reached or actions_limit_reached + + self.state = State() + is_myself = source[1:] == self.session_state.my_username + its_you = is_myself and " (it's you)" or "" + logger.info( + f"Handle {source} {its_you}", extra={"color": f"{Style.BRIGHT}"} + ) + + # Init common things + ( + on_interaction, + stories_percentage, + likes_percentage, + follow_percentage, + comment_percentage, + pm_percentage, + interact_percentage, + ) = init_on_things(source, self.args, self.sessions, self.session_state) + + @run_safely( + device=device, + device_id=self.device_id, + sessions=self.sessions, + session_state=self.session_state, + screen_record=self.args.screen_record, + configs=configs, + ) + def job(): + self.scrape_blogger( + device, + source, + plugin, + storage, + profile_filter, + on_interaction, + stories_percentage, + likes_percentage, + follow_percentage, + comment_percentage, + pm_percentage, + interact_percentage, + ) + self.state.is_job_completed = True + + while not self.state.is_job_completed and not limit_reached: + job() + + if limit_reached: + logger.info("Ending session.") + self.session_state.check_limit( + limit_type=self.session_state.Limit.ALL, output=True + ) + break + + def scrape_blogger( + self, + device, + username, + current_job, + storage, + profile_filter, + on_interaction, + stories_percentage, + likes_percentage, + follow_percentage, + comment_percentage, + pm_percentage, + interact_percentage, + ): + interaction = partial( + interact_with_user, + my_username=self.session_state.my_username, + likes_count=self.args.likes_count, + likes_percentage=likes_percentage, + stories_percentage=stories_percentage, + follow_percentage=follow_percentage, + comment_percentage=comment_percentage, + pm_percentage=pm_percentage, + profile_filter=profile_filter, + args=self.args, + session_state=self.session_state, + scraping_file=self.args.scrape_to_file, + current_mode=self.current_mode, + ) + + source_follow_limit = ( + get_value(self.args.follow_limit, None, 15) + if self.args.follow_limit is not None + else None + ) + + is_follow_limit_reached = partial( + is_follow_limit_reached_for_source, + session_state=self.session_state, + follow_limit=source_follow_limit, + source=username, + ) + + skipped_list_limit = get_value(self.args.skipped_list_limit, None, 15) + skipped_fling_limit = get_value(self.args.fling_when_skipped, None, 0) + + posts_end_detector = ScrollEndDetector( + repeats_to_end=2, + skipped_list_limit=skipped_list_limit, + skipped_fling_limit=skipped_fling_limit, + ) + + scrape_followers( + self, + device, + self.session_state, + username, + current_job, + storage, + on_interaction, + interaction, + is_follow_limit_reached, + posts_end_detector, + ) + + def handle_blogger( + self, + device, + username, + current_job, + storage, + profile_filter, + on_interaction, + stories_percentage, + likes_percentage, + follow_percentage, + comment_percentage, + pm_percentage, + interact_percentage, + ): + interaction = partial( + interact_with_user, + my_username=self.session_state.my_username, + likes_count=self.args.likes_count, + likes_percentage=likes_percentage, + stories_percentage=stories_percentage, + follow_percentage=follow_percentage, + comment_percentage=comment_percentage, + pm_percentage=pm_percentage, + profile_filter=profile_filter, + args=self.args, + session_state=self.session_state, + scraping_file=self.args.scrape_to_file, + current_mode=self.current_mode, + ) + source_follow_limit = ( + get_value(self.args.follow_limit, None, 15) + if self.args.follow_limit is not None + else None + ) + is_follow_limit_reached = partial( + is_follow_limit_reached_for_source, + session_state=self.session_state, + follow_limit=source_follow_limit, + source=username, + ) + + skipped_list_limit = get_value(self.args.skipped_list_limit, None, 15) + skipped_fling_limit = get_value(self.args.fling_when_skipped, None, 0) + + posts_end_detector = ScrollEndDetector( + repeats_to_end=2, + skipped_list_limit=skipped_list_limit, + skipped_fling_limit=skipped_fling_limit, + ) + handle_followers( + self, + device, + self.session_state, + username, + current_job, + storage, + on_interaction, + interaction, + is_follow_limit_reached, + posts_end_detector, + ) diff --git a/config-examples/config.yml b/config-examples/config.yml index 2f2f6a41..d204f2b0 100644 --- a/config-examples/config.yml +++ b/config-examples/config.yml @@ -1,5 +1,5 @@ ############################################################################## -# For more information on parameters, refer to: +# For more information on parameters, refer to: # https://docs.gramaddict.org/#/configuration?id=configuration-file # # Note: be sure to comment out any parameters not used by adding a # in front @@ -31,18 +31,19 @@ truncate-sources: 2-5 ############################################################################## ## Interaction (active jobs) -blogger-followers: [ username1, username2 ] -blogger-following: [ username1, username2 ] -blogger-post-likers: [ username1, username2 ] -blogger: [ username1, username2 ] -hashtag-likers-top: [ hashtag1, hashtag2 ] -hashtag-likers-recent: [ hashtag1, hashtag2 ] -hashtag-posts-top: [ hashtag1, hashtag2 ] -hashtag-posts-recent: [ hashtag1, hashtag2 ] -place-posts-top: [ place1, place2 ] -place-posts-recent: [ place1, place2 ] -place-likers-top: [ place1, place2 ] -place-likers-recent: [ place1, place2 ] +blogger-followers: [username1, username2] +blogger-following: [username1, username2] +blogger-post-likers: [username1, username2] +blogger: [username1, username2] +scrape-blogger-followers: [username1, username2] +hashtag-likers-top: [hashtag1, hashtag2] +hashtag-likers-recent: [hashtag1, hashtag2] +hashtag-posts-top: [hashtag1, hashtag2] +hashtag-posts-recent: [hashtag1, hashtag2] +place-posts-top: [place1, place2] +place-posts-recent: [place1, place2] +place-likers-top: [place1, place2] +place-likers-recent: [place1, place2] interact-from-file: [usernames1.txt 10-15, usernames2.txt 3] posts-from-file: posts.txt feed: 2-5 # is the number of likes you will give in feed