adding locater for the table and looping and extracting data on each link
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -14,6 +14,7 @@ htmlcov/
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
.vscode/
|
.vscode/
|
||||||
.eggs
|
.eggs
|
||||||
|
log
|
||||||
_repo_version.py
|
_repo_version.py
|
||||||
coverage.xml
|
coverage.xml
|
||||||
junit/
|
junit/
|
||||||
|
|||||||
@@ -13,5 +13,22 @@ page.goto(
|
|||||||
page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20")
|
page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20")
|
||||||
page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01")
|
page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01")
|
||||||
page.locator("#B912476867105247978").click()
|
page.locator("#B912476867105247978").click()
|
||||||
valueList = page.locator(".a-GV-table ngh(1) tbody tr")
|
print("reached")
|
||||||
sleep(10)
|
sleep(4)
|
||||||
|
table = page.locator(".a-GV-table").nth(1)
|
||||||
|
print("table is found")
|
||||||
|
rows = table.locator("tbody tr")
|
||||||
|
print("rows found")
|
||||||
|
rows.first.wait_for()
|
||||||
|
for i in range(rows.count()):
|
||||||
|
print(rows.nth(i).inner_html())
|
||||||
|
link = "https://rrk.ir" + rows.nth(i).locator("td a").last.get_attribute("href")
|
||||||
|
newPage = page.context.new_page()
|
||||||
|
newPage.goto(link)
|
||||||
|
detailedData = newPage.locator(
|
||||||
|
"#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container "
|
||||||
|
)
|
||||||
|
with open("htmldocs/" + str(i) + ".html", "w+", encoding="utf-8") as file:
|
||||||
|
file.write(detailedData.inner_html())
|
||||||
|
newPage.close()
|
||||||
|
sleep(5)
|
||||||
|
|||||||
Reference in New Issue
Block a user