refactor getting the data from the table , waiting for ui loader to load and parsing 1 day at the time because of the limit on the more then the 999 instances of the loaded data

This commit is contained in:
2025-12-23 09:34:57 +03:30
parent 1b3b0f4bd8
commit 78421703ec

View File

@@ -1,34 +1,79 @@
from time import sleep from time import sleep
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
import jdatetime
from datetime import timedelta
import sys
sys.stdout.reconfigure(encoding="utf-8") # type: ignore
def GetDataFromTheTable(filenamenumber):
for i in range(rows.count()):
# print(rows.nth(i).inner_html())
link = "https://rrk.ir" + str(
rows.nth(i).locator("td a").last.get_attribute("href")
)
newPage = page.context.new_page()
newPage.goto(link)
detailedData = newPage.locator(
"#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container "
)
with open(
"htmldocs/" + str(filenamenumber) + ".html", "w+", encoding="utf-8"
) as file:
file.write(detailedData.inner_html())
newPage.close()
filenamenumber += 1
pw = sync_playwright().start() pw = sync_playwright().start()
firefox = pw.firefox.launch(headless=False) firefox = pw.firefox.launch(headless=False)
context = firefox.new_context(ignore_https_errors=True) context = firefox.new_context(ignore_https_errors=True)
page = context.new_page() page = context.new_page()
inputindate = "1404/9/22"
inputoutdate = "1404/10/2"
def SplitTime(time):
global year, month, day
splitted = time.split("/")
year = int(splitted[0])
month = int(splitted[1])
day = int(splitted[2])
SplitTime(inputindate)
start = jdatetime.date(year, month, day)
SplitTime(inputoutdate)
end = jdatetime.date(year, month, day)
current = start
# while current <= end:
# print(current)
# current += timedelta(days=1)
datefrom = "1404/9/20"
dateto = "1404/9/24"
page.goto( page.goto(
"https://rrk.ir/ords/r/rrs/rrs-front/%D8%AF%D8%A7%D8%AF%D9%87-%D8%A8%D8%A7%D8%B2" "https://rrk.ir/ords/r/rrs/rrs-front/%D8%AF%D8%A7%D8%AF%D9%87-%D8%A8%D8%A7%D8%B2"
) )
page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20") page.locator("#P199_NEWSPAPERDATE_AZ").fill(datefrom)
page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01") page.locator("#P199_SABTNODATE_AZ").fill(datefrom)
page.locator("#P199_NEWSPAPER_TA").fill(dateto)
page.locator("#P199_SABTNODATE_TA").fill(dateto)
page.locator("#B912476867105247978").click() page.locator("#B912476867105247978").click()
print("reached") page.locator(".u-Processing").wait_for(state="attached")
sleep(4) page.locator(".u-Processing").wait_for(state="detached")
table = page.locator(".a-GV-table").nth(1)
print("table is found") if page.locator(".a-GV-pageRange").inner_text():
rows = table.locator("tbody tr") table = page.locator(".a-GV-table").nth(1)
print("rows found") rows = table.locator("tbody tr")
rows.first.wait_for() rows.first.wait_for()
for i in range(rows.count()): filenamenumber = 0
print(rows.nth(i).inner_html()) GetDataFromTheTable(filenamenumber)
link = "https://rrk.ir" + rows.nth(i).locator("td a").last.get_attribute("href") else:
newPage = page.context.new_page() print("no data ")
newPage.goto(link)
detailedData = newPage.locator(
"#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container " def NextPage():
) pass
with open("htmldocs/" + str(i) + ".html", "w+", encoding="utf-8") as file:
file.write(detailedData.inner_html())
newPage.close()
sleep(5)