最受欢迎主题： Multilogin X, Multilogin 6,

自动收集Cookie

由 Bruno Moraes 编写

最近一次更新时间：November 25th, 2024

开始之前准备环境准备文件 cookie_robot.py multilogin.py .env 运行脚本 Video guide

我们将在本文中分享自动为浏览器配置文件自动收集cookie的脚本。

开始之前

准备环境

首先，确保您的设备安装了Python 3 或更新版本。

运行脚本之前必须安装以下 Python 库。在此处查找安装 Python 库的说明。

dotenv
os
requests
hashlib

准备文件

将如下.py 脚本保存在您设备的某个文件夹中。

cookie_robot.py

import os
import dotenv
import time
import random
from multilogin import Mlx
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains as Ac
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait


class CookieRobot:
    def __init__(
        self,
        email_address: str,
        password: str,
        websites: list,
        profile_id=None,
        folder_id=None,
        token=None,
        browser_type=None,
    ):
        self.folder_id = folder_id
        self.websites = websites
        self.profile_id = profile_id
        self.token = token
        self.mlx = Mlx(email_address, password, token)
        self.browser_type = browser_type

    def allow_cookies(self):
        driver = self.driver
        wait = WebDriverWait(driver, 20)
        time.sleep(10)
        for handle in driver.window_handles:
            driver.switch_to.window(handle)
            title = driver.title
            if title == "Welcome to superagent!":
                break
        wait.until(
            EC.presence_of_element_located(
                (By.XPATH, "//h6[contains(., 'Cookie Preferences')]")
            )
        ).click()
        wait.until(EC.presence_of_element_located((By.NAME, "advertising"))).click()
        wait.until(EC.presence_of_element_located((By.NAME, "other"))).click()
        cookie_options_number = len(
            driver.find_elements(By.XPATH, "//h6[contains(., 'Accept')]")
        )
        assert cookie_options_number == 8
        driver.close()

    def automation(self):
        self.allow_cookies()
        main_handle = self.driver.window_handles[0]
        self.driver.switch_to.window(main_handle)

        try:
            for website in self.websites:
                domain = website.split("//")[1].split("/")[0].split(".")[0]
                cookie_counter = 0
                self.driver.get(website)
                while cookie_counter < 15:
                    current_page = self.driver.current_url
                    # Watch Youtube videos
                    if "watch?" in current_page:
                        time.sleep(random.randrange(120, 240))
                    # Watch "Shorts" videos on Youtube
                    elif "shorts" in current_page:
                        time.sleep(random.randrange(60, 90))
                    self.scroll_randomly(random.randint(1, 5))
                    link_elements = self.driver.find_elements(By.TAG_NAME, "a")
                    elements_with_domain = []
                    for element in link_elements:
                        element_url = element.get_attribute("href")
                        if element_url == None:
                            continue
                        if domain in element_url:
                            elements_with_domain.append(element)
                    random_link = random.choice(elements_with_domain)
                    try:
                        Ac(self.driver).move_to_element(random_link).pause(
                            5
                        ).click().perform()
                        cookie_counter += 1
                    except:
                        try:
                            self.driver.execute_script(
                                "arguments[0].scrollIntoView(true); arguments[0].click();",
                                random_link,
                            )
                            cookie_counter += 1
                        except:
                            continue
                    finally:
                        time.sleep(random.randint(3, 5))
        except Exception as e:
            print(f"Something happened: {e}")
        finally:
            # Close browser profile and quit driver
            self.driver.quit()
            self.mlx.stop_profile(self.profile_id)

    def scroll_randomly(self, times):
        for _ in range(times):
            total_height = self.driver.execute_script(
                "return document.body.scrollHeight"
            )
            random_position = random.randint(0, total_height)
            self.driver.execute_script(f"window.scrollTo(0, {random_position});")
            time.sleep(random.randint(1, 5))

    def start_profile(self):
        try:
            profile_started = False
            while not profile_started:
                (
                    self.profile_id,
                    self.profile_port,
                    profile_started,
                    message,
                ) = self.mlx.start_normal_profile(self.profile_id, self.folder_id)
                if profile_started:
                    return
                print(
                    f"Profile couldn't be started. Probably downloading core. Will wait for 60 seconds and try again. Here is the message: {message}"
                )
                time.sleep(60)
        except Exception as e:
            print(f"Problem with starting profile: {e}")

    def run(self):
        if self.token == None:
            self.token = self.mlx.signin()
        self.start_profile()
        self.driver = self.mlx.instantiate_driver(self.profile_port, self.browser_type)
        self.automation()


if __name__ == "__main__":

    dotenv.load_dotenv()
    # Add as many websites as you want for cookie collection
    WEBSITES = ["https://stackoverflow.com/", "https://medium.com/"]

    EMAIL = os.getenv("MLX_EMAIL")
    PASSWORD = os.getenv("MLX_PASSWORD")
    EXTENSION = os.getenv("EXTENSION_PATH")
    BROWSER = os.getenv("BROWSER_TYPE")
    PROFILE_ID = os.getenv("PROFILE_ID")
    FOLDER_ID = os.getenv("FOLDER_ID")

    bot = CookieRobot(
        email_address=EMAIL,
        password=PASSWORD,
        websites=WEBSITES,
        browser_type=BROWSER,
        profile_id=PROFILE_ID,
        folder_id=FOLDER_ID,
    )

    bot.run()

multilogin.py

import os
import requests
import hashlib
from selenium import webdriver
from selenium.webdriver.chromium.options import ChromiumOptions
from selenium.webdriver.firefox.options import Options


class Mlx:
    def __init__(self, email: str, password: str, token=None):
        self.email = email
        self.password = password
        self.token = token
        self.headers = {
            "Content-Type": "application/json",
            "Accept": "application/json",
        }

    def signin(self) -> str:
        url = "https://api.multilogin.com/user/signin"
        payload = {
            "email": self.email,
            "password": hashlib.md5(self.password.encode()).hexdigest(),
        }
        r = requests.post(url=url, headers=self.headers, json=payload)
        if r.status_code != 200:
            print("Wrong credentials")
        else:
            json_response = r.json()
            self.token = json_response["data"]["token"]
            self.headers.update({"Authorization": f"Bearer {self.token}"})
            return self.token

    def start_quick_profile(self, browser_type):

        if browser_type == "stealthfox":
            relative_path = "./extensions/superagent.xpi"
            self.extension_path = os.path.abspath(relative_path)
        if browser_type == "mimic":
            relative_path = "./extensions/superagent"
            self.extension_path = os.path.abspath(relative_path)

        payload = {
            "browser_type": browser_type,
            "os_type": "linux",
            "automation": "selenium",
            "parameters": {
                "fingerprint": {
                    "cmd_params": {
                        "params": [
                            {"flag": "load-extension", "value": self.extension_path}
                        ]
                    }
                },
                "flags": {
                    "audio_masking": "mask",
                    "fonts_masking": "mask",
                    "geolocation_masking": "mask",
                    "geolocation_popup": "prompt",
                    "graphics_masking": "mask",
                    "graphics_noise": "mask",
                    "localization_masking": "mask",
                    "media_devices_masking": "mask",
                    "navigator_masking": "mask",
                    "ports_masking": "mask",
                    "proxy_masking": "disabled",
                    "screen_masking": "mask",
                    "timezone_masking": "mask",
                    "webrtc_masking": "mask",
                },
            },
        }

        try:
            response = requests.post(
                url="https://launcher.mlx.yt:45001/api/v2/profile/quick",
                headers=self.headers,
                json=payload,
            )
            data = response.json()
            if data["status"]["http_code"] != 200:
                message = data["status"]["message"]
                return None, None, False, message
            else:
                quick_profile_id = data["data"]["id"]
                quick_profile_port = data["data"]["port"]
                profile_started = True
                message = data["status"]["message"]
                return quick_profile_id, quick_profile_port, profile_started, message
        except Exception as e:
            return None, None, False, str(e)

    def start_normal_profile(self, profile_id: str, folder_id: str):
        url = f"https://launcher.mlx.yt:45001/api/v2/profile/f/{folder_id}/p/{profile_id}/start?automation_type=selenium&headless_mode=false"
        response = requests.get(url=url, headers=self.headers)
        if response.status_code != 200:
            message = response.json()["status"]["message"]
            profile_port = False
            profile_started = False
            print(f"Error at starting profile: {message}")
            return profile_id, profile_port, profile_started, message
        else:
            profile_port = response.json()["data"]["port"]
            message = response.json()["status"]["message"]
            profile_started = True
            return profile_id, profile_port, profile_started, message

    def stop_profile(self, profile_id: str):
        url = f"https://launcher.mlx.yt:45001/api/v1/profile/stop/p/{profile_id}"
        r = requests.get(url=url, headers=self.headers)
        if r.status_code != 200:
            print("Can't stop profile")
        else:
            print("Profile stopped")

    def instantiate_driver(self, profile_port: str, browser_type="mimic") -> webdriver:
        if browser_type == "mimic":
            options = ChromiumOptions()
            driver = webdriver.Remote(
                command_executor=f"http://127.0.0.1:{profile_port}", options=options
            )
        elif browser_type == "stealthfox":
            options = Options()
            driver = webdriver.Remote(
                command_executor=f"http://127.0.0.1:{profile_port}", options=options
            )
        return driver

    def get_proxy_details(self, proxy_settings, token=None) -> dict:

        if token == None:
            self.token = self.signin()

        self.headers.update({"Authorization": f"Bearer {self.token}"})
        url = "https://profile-proxy.multilogin.com/v1/proxy/connection_url"
        payload = {
            "country": proxy_settings["country_code"],
            "region": proxy_settings["region"],
            "city": proxy_settings["city"],
            "protocol": "socks5",
            "sessionType": "sticky",
            "IPTTL": 0,
        }
        response = requests.post(url=url, headers=self.headers, json=payload)
        if response.status_code != 201:
            print(f"Could not get proxy session: {response.status_code}")
        else:
            session = response.json()["data"].split(":")
            proxy_details = {
                "host": session[0],
                "port": session[1],
                "username": session[2],
                "password": session[3],
            }
            return proxy_details

    def create_profile(self, proxy_details, profile_details, FOLDER_ID):

        if self.token == None:
            self.token = self.signin()

        self.headers.update({"Authorization": f"Bearer {self.token}"})

        payload = {
            "name": f"{profile_details['first_name']} {profile_details['last_name']}",
            "folder_id": FOLDER_ID,
            "browser_type": "mimic",
            "os_type": "linux",
            "is_headless": False,
            "proxy": {
                "host": proxy_details["host"],
                "type": "socks5",
                "port": proxy_details["port"],
                "username": proxy_details["username"],
                "password": proxy_details["password"],
            },
            "parameters": {
                "fingerprint": {
                    "cmd_params": {
                        "params": [{"flag": "disable-notifications", "value": "true"}]
                    }
                },
                "flags": {
                    "audio_masking": "natural",
                    "fonts_masking": "mask",
                    "geolocation_masking": "mask",
                    "geolocation_popup": "allow",
                    "graphics_masking": "natural",
                    "graphics_noise": "natural",
                    "localization_masking": "mask",
                    "media_devices_masking": "natural",
                    "navigator_masking": "mask",
                    "ports_masking": "natural",
                    "proxy_masking": "custom",
                    "screen_masking": "natural",
                    "timezone_masking": "mask",
                    "webrtc_masking": "mask",
                },
                "storage": {"is_local": False, "save_service_worker": False},
            },
        }
        url = "https://api.multilogin.com/profile/create"
        response = requests.post(url=url, headers=self.headers, json=payload)
        if response.status_code != 201:
            print(f"Could not create profile: Error {response.status_code}")
            return None, None, None
        else:
            profile_id = response.json()["data"]["ids"][0]
            created = True
            return profile_id, FOLDER_ID, created

通过 “Folder“ 方式安装此浏览器扩展程序。此扩展程序将自动接受目标网站的cookie政策。
在相同文件夹下保存如下.env文件，并自行为文件中的变量赋值。

.env

MLX_EMAIL=
MLX_PASSWORD=
BROWSER_TYPE=
PROFILE_ID=
FOLDER_ID=

编辑cookie_robot.py文件

# 输入您需要收集cookie的网站，您还可以通过其他方式，比如表格的形式批量读取网站链接。
        WEBSITES = [
            "https://stackoverflow.com/",
            "https://medium.com/"
        ]

运行脚本

确保Agent已连接，否则配置文件无法开启。
运行cookie_robot.py脚本。

联系客户支持