from time import sleep from playwright.sync_api import sync_playwright pw = sync_playwright().start() firefox = pw.firefox.launch(headless=False) context = firefox.new_context(ignore_https_errors=True) page = context.new_page() page.goto( "https://rrk.ir/ords/r/rrs/rrs-front/%D8%AF%D8%A7%D8%AF%D9%87-%D8%A8%D8%A7%D8%B2" ) page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20") page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01") page.locator("#B912476867105247978").click() print("reached") sleep(4) table = page.locator(".a-GV-table").nth(1) print("table is found") rows = table.locator("tbody tr") print("rows found") rows.first.wait_for() for i in range(rows.count()): print(rows.nth(i).inner_html()) link = "https://rrk.ir" + rows.nth(i).locator("td a").last.get_attribute("href") newPage = page.context.new_page() newPage.goto(link) detailedData = newPage.locator( "#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container " ) with open("htmldocs/" + str(i) + ".html", "w+", encoding="utf-8") as file: file.write(detailedData.inner_html()) newPage.close() sleep(5)