Skip to content

Commit 1360eb4

Browse files
committed
Update CDP Mode
1 parent 63bfd92 commit 1360eb4

5 files changed

Lines changed: 38 additions & 13 deletions

File tree

examples/cdp_mode/ReadMe.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ sb.cdp.get_current_url()
471471
sb.cdp.get_origin()
472472
sb.cdp.get_html(include_shadow_dom=True)
473473
sb.cdp.get_page_source(include_shadow_dom=True)
474+
sb.cdp.get_beautiful_soup(source=None)
474475
sb.cdp.get_user_agent()
475476
sb.cdp.get_cookie_string()
476477
sb.cdp.get_locale_code()

help_docs/cdp_mode_methods.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ sb.cdp.get_current_url()
9999
sb.cdp.get_origin()
100100
sb.cdp.get_html(include_shadow_dom=True)
101101
sb.cdp.get_page_source(include_shadow_dom=True)
102+
sb.cdp.get_beautiful_soup(source=None)
102103
sb.cdp.get_user_agent()
103104
sb.cdp.get_cookie_string()
104105
sb.cdp.get_locale_code()

seleniumbase/core/browser_launcher.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,7 @@ def uc_open_with_cdp_mode(driver, url=None, **kwargs):
872872
cdp.get_gui_element_center = CDPM.get_gui_element_center
873873
cdp.get_html = CDPM.get_html
874874
cdp.get_page_source = CDPM.get_page_source
875+
cdp.get_beautiful_soup = CDPM.get_beautiful_soup
875876
cdp.get_user_agent = CDPM.get_user_agent
876877
cdp.get_cookie_string = CDPM.get_cookie_string
877878
cdp.get_locale_code = CDPM.get_locale_code

seleniumbase/core/sb_cdp.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ def get(self, url, **kwargs):
117117
if hasattr(driver, "cdp_base"):
118118
driver = driver.cdp_base
119119
load_timeout = 60.0
120-
wait_timeout = 30.0
120+
wait_timeout = 50.0
121121
if hasattr(sb_config, "_cdp_proxy") and sb_config._cdp_proxy:
122122
load_timeout = 90.0
123-
wait_timeout = 45.0
123+
wait_timeout = 75.0
124124
try:
125125
task = self.page.get(url, **kwargs)
126126
self.loop.run_until_complete(
@@ -1399,6 +1399,20 @@ def get_page_source(self, include_shadow_dom=True):
13991399
)
14001400
return source
14011401

1402+
def get_beautiful_soup(self, source=None):
1403+
"""BeautifulSoup is a toolkit for dissecting an HTML document
1404+
and extracting what you need. It's great for screen-scraping!
1405+
See: https://www.crummy.com/software/BeautifulSoup/bs4/doc/ """
1406+
from bs4 import BeautifulSoup
1407+
1408+
if not source:
1409+
with suppress(Exception):
1410+
self.wait_for_element_visible(
1411+
"body", timeout=settings.MINI_TIMEOUT
1412+
)
1413+
source = self.get_page_source()
1414+
return BeautifulSoup(source, "html.parser")
1415+
14021416
def get_user_agent(self):
14031417
return self.loop.run_until_complete(
14041418
self.page.evaluate("navigator.userAgent")

seleniumbase/undetected/cdp_driver/cdp_util.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
PROXY_DIR_LOCK = proxy_helper.PROXY_DIR_LOCK
3232
EXTENSIONS_DIR = os.path.dirname(os.path.realpath(extensions.__file__))
3333
AD_BLOCK_ZIP_PATH = os.path.join(EXTENSIONS_DIR, "ad_block.zip")
34+
DISABLE_CSP_ZIP_PATH = os.path.join(EXTENSIONS_DIR, "disable_csp.zip")
3435
T = typing.TypeVar("T")
3536

3637

@@ -598,16 +599,6 @@ async def start(
598599
proxy_pass,
599600
proxy_scheme,
600601
)
601-
if ad_block:
602-
sb_config.ad_block_on = True
603-
incognito = False
604-
guest = False
605-
ad_block_zip = AD_BLOCK_ZIP_PATH
606-
ad_block_dir = os.path.join(DOWNLOADS_FOLDER, "ad_block")
607-
__unzip_to_new_folder(ad_block_zip, ad_block_dir)
608-
extension_dir = __add_chrome_ext_dir(extension_dir, ad_block_dir)
609-
if disable_csp:
610-
sb_config.disable_csp = True
611602
if "binary_location" in kwargs and not browser_executable_path:
612603
browser_executable_path = kwargs["binary_location"]
613604
if not user_data_dir and "--user-data-dir" in arg_join:
@@ -624,8 +615,8 @@ async def start(
624615
user_data_dir = udd_string
625616
if user_data_dir:
626617
user_data_dir = os.path.abspath(user_data_dir)
618+
browser = None
627619
if not browser_executable_path:
628-
browser = None
629620
if "browser" in kwargs:
630621
browser = kwargs["browser"]
631622
if not browser and "--browser" in arg_join:
@@ -675,6 +666,23 @@ async def start(
675666
sb_config._cdp_browser = "atlas"
676667
else:
677668
sb_config._cdp_browser = "chrome"
669+
if ad_block:
670+
sb_config.ad_block_on = True
671+
incognito = False
672+
guest = False
673+
ad_block_zip = AD_BLOCK_ZIP_PATH
674+
ad_block_dir = os.path.join(DOWNLOADS_FOLDER, "ad_block")
675+
__unzip_to_new_folder(ad_block_zip, ad_block_dir)
676+
extension_dir = __add_chrome_ext_dir(extension_dir, ad_block_dir)
677+
if disable_csp:
678+
sb_config.disable_csp = True
679+
if not incognito and not guest:
680+
disable_csp_zip = DISABLE_CSP_ZIP_PATH
681+
disable_csp_dir = os.path.join(DOWNLOADS_FOLDER, "disable_csp")
682+
__unzip_to_new_folder(disable_csp_zip, disable_csp_dir)
683+
extension_dir = __add_chrome_ext_dir(
684+
extension_dir, disable_csp_dir
685+
)
678686
sb_config.incognito = incognito
679687
sb_config.guest_mode = guest
680688
if not config:

0 commit comments

Comments
 (0)