Selenium - Iterating Through Groups of Elements - Python

Selenium: Iterating through groups of elements

Use find_elements_by_class_name() to get all blocks and find_element_by_xpath() to get title and company for each person:

persons = []
for person in driver.find_elements_by_class_name('person'):
    title = person.find_element_by_xpath('.//div[@class="title"]/a').text
    company = person.find_element_by_xpath('.//div[@class="company"]/a').text

    persons.append({'title': title, 'company': company})

In Python, how to loop through all clickable elements of a given Xpath group when length of list of elements changes

Give this a try. Looks like you can get it through post requests.

import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import time

s = requests.Session()
s.get('https://www.seethroughny.net/')

cookies = s.cookies.get_dict()
cookieStr = ''
for k,v in cookies.items():
    cookieStr += f'{k}={v};'

url = 'https://www.seethroughny.net/tools/required/reports/payroll'
headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
        'cookie':cookieStr,
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'referer': 'https://www.seethroughny.net/payrolls'}
payload = {'action':'get'}



def get_data(s, formData, payload):
    retry = 0
    try:
        jsonData = s.post(url, headers=headers, data=formData, params=payload, timeout=10).json()
        success = True
    except:
        retry+=1
        print('Retry: %s' %retry)
        jsonData = None
        success = False
        time.sleep(10)
        
    return jsonData, success
        
    
def parse_data(jsonData):
    rows = []
    html = jsonData['html']   
    soup = BeautifulSoup(html, 'html.parser')    
        
    trs = soup.find_all('tr', {'id':re.compile("^resultRow")})
    for tr in trs:
        tds = tr.find_all('td')
        
        row = {
            'Name':tds[1].text,
            'Employer/Agency':tds[2].text,
            'Total Pay':tds[3].text,
            'Subagency/Type':tds[4].text
            }
        
        expandedRow = tr.find_next('tr', {'id':re.compile("^expandRow")})
        td = expandedRow.find_all('td')[1]
        divs = td.find_all('div', {'class':re.compile("^row")})
        for each in divs:
            header = each.find_all('div')[0].text
            value = each.find_all('div')[1].text
            
            row.update({header:value})
            
        rows.append(row)
        
    return rows
    

rows = []
for year in [2021]:
    completedYear = False
    # Get Total pages 
    formData = {
    'PayYear[]': f'{year}',
    'SortBy': 'YTDPay DESC',
    'current_page': '0',
    'result_id': '0',
    'url': '/tools/required/reports/payroll?action=get',
    'nav_request': '0'}   
    
    jsonData, success = get_data(s, formData, payload)
    total_pages = jsonData['total_pages']
    print(f'Year: {year}\tPage: 0 of {total_pages}')
    
    rows += parse_data(jsonData)
    
    page = 1
    while completedYear == False:
        success = False
        while success == False:
            formData.update({'current_page': '%s' %page})
            jsonData, success = get_data(s, formData, payload)
            
        rows += parse_data(jsonData)   
        print(f'Year: {year}\tPage: {page} of {total_pages}')
        page+=1
        if page > total_pages:
            completedYear = True
            
df = pd.DataFrame(rows)

Output:

print(df.head(5).to_string())
                   Name                            Employer/Agency   Total Pay                             Subagency/Type                             SubAgency/Type            Title Rate of Pay Pay Year Pay Basis Branch/Major Category
0      Johnson, Candace  Roswell Park Cancer Institute Corporation  $1,622,807  Roswell Park Cancer Institute Corporation  Roswell Park Cancer Institute Corporation  President & Ceo          $0     2021    Annual    Public Authorities
1      Kuettel, Michael  Roswell Park Cancer Institute Corporation    $958,373  Roswell Park Cancer Institute Corporation  Roswell Park Cancer Institute Corporation            Chair          $0     2021    Annual    Public Authorities
2      Odunsi, Adekunle  Roswell Park Cancer Institute Corporation    $918,079  Roswell Park Cancer Institute Corporation  Roswell Park Cancer Institute Corporation  Deputy Director          $0     2021    Annual    Public Authorities
3  Fenstermaker, Robert  Roswell Park Cancer Institute Corporation    $914,887  Roswell Park Cancer Institute Corporation  Roswell Park Cancer Institute Corporation            Chair          $0     2021    Annual    Public Authorities
4        Guru, Khurshid  Roswell Park Cancer Institute Corporation    $856,979  Roswell Park Cancer Institute Corporation  Roswell Park Cancer Institute Corporation            Chair          $0     2021    Annual    Public Authorities

Selenium - Iterating through groups of elements - Python

try this,

You can directly make array of buttons rather than li array,

Go click all buttons contains text as Follow,

simple,

browser.maximize_window()
users = []
    users = browser.find_elements_by_xpath('*//button[text()='Follow']')
print(len(users))  # check it must be 12

for user in users:
        browser.execute_script("arguments[0].click()", user)
        # user.click()  Go click all buttons

How do I iterate through elements in Selenium and Python?

You are not using items inside the loop. You loop should be

meci = driver.find_elements_by_class_name('KambiBC-event-result__match')

for items in meci:
    scor = items.find_element_by_class_name('KambiBC-event-result__points')
    print (scor.text)

meci.find_element_by_class_name should be items.find_element_by_class_name

Iterating through elements get repeating result on Selenium on Python

Your XPath for the 2nd search is incorrect. It must begin with .. Otherwise, it will start searching from the top. That's the reason why it always find the first item. See my example below.

results = driver.find_elements_by_xpath('//*[@id="results"]/li')
for result in results:
    name = result.find_element_by_xpath('.//*[@class="name"]').text
    print(name)

Python Selenium iterate table of links clicking each link

To perform what you want to do here you first need to close cookies banner on the bottom of the page.

Then you can iterate over the links in the table.

Since by clicking on each link you are opening a new page, after scaring the data there you will have to get back to the main page and get the next link. You can not just get all the links into some list and then iterate over that list since by navigating to another web page all the existing elements grabbed by Selenium on the initial page become Stale.

Your code can be something like this:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time


driver = webdriver.Chrome(executable_path=r'C:\Users\my_path\chromedriver_96.exe')
driver.get(r"https://www.fidelity.co.uk/shares/ftse-350/")
actions = ActionChains(driver)
#close the cookies banner
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.ID, "ensCloseBanner"))).click()
#wait for the first link in the table
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
#extra wait to make all the links loaded
time.sleep(1)
#get the total links amount
links = driver.find_elements_by_xpath('//table//tbody/tr/td/a') 
for index, val in enumerate(links):
    #get the links again after getting back to the initial page in the loop
    links = driver.find_elements_by_xpath('//table//tbody/tr/td/a')
    #scroll to the n-th link, it may be out of the initially visible area
    actions.move_to_element(links[index]).perform()
    links[index].click()
    #scrape the data on the new page and get back with the following command
    driver.execute_script("window.history.go(-1)") #you can alternatevely use this as well: driver.back()
    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//table//tbody/tr/td/a")))
    time.sleep(1)