refactor getting the data from the table , waiting for ui loader to load and parsing 1 day at the time because of the limit on the more then the 999 instances of the loaded data
This commit is contained in:
@@ -1,34 +1,79 @@
|
|||||||
from time import sleep
|
from time import sleep
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
|
import jdatetime
|
||||||
|
from datetime import timedelta
|
||||||
|
import sys
|
||||||
|
|
||||||
pw = sync_playwright().start()
|
sys.stdout.reconfigure(encoding="utf-8") # type: ignore
|
||||||
firefox = pw.firefox.launch(headless=False)
|
|
||||||
context = firefox.new_context(ignore_https_errors=True)
|
|
||||||
page = context.new_page()
|
|
||||||
|
|
||||||
|
|
||||||
page.goto(
|
def GetDataFromTheTable(filenamenumber):
|
||||||
"https://rrk.ir/ords/r/rrs/rrs-front/%D8%AF%D8%A7%D8%AF%D9%87-%D8%A8%D8%A7%D8%B2"
|
|
||||||
)
|
|
||||||
page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20")
|
|
||||||
page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01")
|
|
||||||
page.locator("#B912476867105247978").click()
|
|
||||||
print("reached")
|
|
||||||
sleep(4)
|
|
||||||
table = page.locator(".a-GV-table").nth(1)
|
|
||||||
print("table is found")
|
|
||||||
rows = table.locator("tbody tr")
|
|
||||||
print("rows found")
|
|
||||||
rows.first.wait_for()
|
|
||||||
for i in range(rows.count()):
|
for i in range(rows.count()):
|
||||||
print(rows.nth(i).inner_html())
|
# print(rows.nth(i).inner_html())
|
||||||
link = "https://rrk.ir" + rows.nth(i).locator("td a").last.get_attribute("href")
|
link = "https://rrk.ir" + str(
|
||||||
|
rows.nth(i).locator("td a").last.get_attribute("href")
|
||||||
|
)
|
||||||
newPage = page.context.new_page()
|
newPage = page.context.new_page()
|
||||||
newPage.goto(link)
|
newPage.goto(link)
|
||||||
detailedData = newPage.locator(
|
detailedData = newPage.locator(
|
||||||
"#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container "
|
"#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container "
|
||||||
)
|
)
|
||||||
with open("htmldocs/" + str(i) + ".html", "w+", encoding="utf-8") as file:
|
with open(
|
||||||
|
"htmldocs/" + str(filenamenumber) + ".html", "w+", encoding="utf-8"
|
||||||
|
) as file:
|
||||||
file.write(detailedData.inner_html())
|
file.write(detailedData.inner_html())
|
||||||
newPage.close()
|
newPage.close()
|
||||||
sleep(5)
|
filenamenumber += 1
|
||||||
|
|
||||||
|
|
||||||
|
pw = sync_playwright().start()
|
||||||
|
firefox = pw.firefox.launch(headless=False)
|
||||||
|
context = firefox.new_context(ignore_https_errors=True)
|
||||||
|
page = context.new_page()
|
||||||
|
inputindate = "1404/9/22"
|
||||||
|
inputoutdate = "1404/10/2"
|
||||||
|
|
||||||
|
|
||||||
|
def SplitTime(time):
|
||||||
|
global year, month, day
|
||||||
|
splitted = time.split("/")
|
||||||
|
year = int(splitted[0])
|
||||||
|
month = int(splitted[1])
|
||||||
|
day = int(splitted[2])
|
||||||
|
|
||||||
|
|
||||||
|
SplitTime(inputindate)
|
||||||
|
start = jdatetime.date(year, month, day)
|
||||||
|
SplitTime(inputoutdate)
|
||||||
|
end = jdatetime.date(year, month, day)
|
||||||
|
current = start
|
||||||
|
# while current <= end:
|
||||||
|
# print(current)
|
||||||
|
# current += timedelta(days=1)
|
||||||
|
|
||||||
|
|
||||||
|
datefrom = "1404/9/20"
|
||||||
|
dateto = "1404/9/24"
|
||||||
|
page.goto(
|
||||||
|
"https://rrk.ir/ords/r/rrs/rrs-front/%D8%AF%D8%A7%D8%AF%D9%87-%D8%A8%D8%A7%D8%B2"
|
||||||
|
)
|
||||||
|
page.locator("#P199_NEWSPAPERDATE_AZ").fill(datefrom)
|
||||||
|
page.locator("#P199_SABTNODATE_AZ").fill(datefrom)
|
||||||
|
page.locator("#P199_NEWSPAPER_TA").fill(dateto)
|
||||||
|
page.locator("#P199_SABTNODATE_TA").fill(dateto)
|
||||||
|
page.locator("#B912476867105247978").click()
|
||||||
|
page.locator(".u-Processing").wait_for(state="attached")
|
||||||
|
page.locator(".u-Processing").wait_for(state="detached")
|
||||||
|
|
||||||
|
if page.locator(".a-GV-pageRange").inner_text():
|
||||||
|
table = page.locator(".a-GV-table").nth(1)
|
||||||
|
rows = table.locator("tbody tr")
|
||||||
|
rows.first.wait_for()
|
||||||
|
filenamenumber = 0
|
||||||
|
GetDataFromTheTable(filenamenumber)
|
||||||
|
else:
|
||||||
|
print("no data ")
|
||||||
|
|
||||||
|
|
||||||
|
def NextPage():
|
||||||
|
pass
|
||||||
|
|||||||
Reference in New Issue
Block a user