From 1b3b0f4bd848efc6aa63ba932a278c7dd7723b0b Mon Sep 17 00:00:00 2001
From: = <=>
Date: Mon, 22 Dec 2025 23:36:29 +0330
Subject: [PATCH] adding locater for the table and looping and extracting data
 on each link

---
 .gitignore     |  1 +
 crawlingrrk.py | 21 +++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5924335..1c6d7e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ htmlcov/
 .DS_Store
 .vscode/
 .eggs
+log
 _repo_version.py
 coverage.xml
 junit/
diff --git a/crawlingrrk.py b/crawlingrrk.py
index 1177ca7..1cf3372 100644
--- a/crawlingrrk.py
+++ b/crawlingrrk.py
@@ -13,5 +13,22 @@ page.goto(
 page.locator("#P199_SABTNODATE_AZ").fill("1404/09/20")
 page.locator("#P199_NEWSPAPERDATE_AZ").fill("1404/10/01")
 page.locator("#B912476867105247978").click()
-valueList = page.locator(".a-GV-table ngh(1) tbody tr")
-sleep(10)
+print("reached")
+sleep(4)
+table = page.locator(".a-GV-table").nth(1)
+print("table is found")
+rows = table.locator("tbody tr")
+print("rows found")
+rows.first.wait_for()
+for i in range(rows.count()):
+    print(rows.nth(i).inner_html())
+    link = "https://rrk.ir" + rows.nth(i).locator("td a").last.get_attribute("href")
+    newPage = page.context.new_page()
+    newPage.goto(link)
+    detailedData = newPage.locator(
+        "#R41756901674822518 > div.t-Region-bodyWrap > div.t-Region-body > div.container "
+    )
+    with open("htmldocs/" + str(i) + ".html", "w+", encoding="utf-8") as file:
+        file.write(detailedData.inner_html())
+    newPage.close()
+sleep(5)