How to get elements from bs4.ResultSet?

I have a list of books and I want to parse genres of books from famous online stores in our country.

def add_plus(title):
return +.join(title.split( ))

def get_category(title):
url_search = https://www.kitapal.kz/search?search= + add_plus(title)
request_page_name = requests.get(url_search)
soup = BeautifulSoup(request_page_name.content, html.parser)
url_book = soup.find(div, class_=item-books).parent.get('href')
url_book = https://www.kitapal.kz + url_book
request_book_page = requests.get(url_book)
soup_book = BeautifulSoup(request_book_page.content, html.parser)
soup_category = soup_book.find_all(lambda tag:tag.name==span and Сериясы in tag.text)
return soup_category

When I call function get_category(Как я выиграл жизнь) I get this result:

 [span class=d-blockСериясы:
 span class=text-green
 a href=/catalog/5Тұлғалық даму,/a
 /span
 span class=text-green
 a href=/catalog/15Мотивация,/a
 /span
 span class=text-green
 a href=/catalog/6Әр түрлі/a
 /span
 /span]

But I want to get genres inside a tag.Like ['Тұлғалық даму', 'Мотивация', 'Әр түрлі',]. I could not go further. I will really appreciate any help

Topic parsing python

Category Data Science


You can approach the problem in following 2 ways:

  1. You may convert the bs4.ResultSet to a list object (if you are more familiar with Python lists) and then by calling the pop function of Python lists to store the value in the new variable.

OR

  1. You may simply call the pop function of bs4.ResultSet and store the value in a new variable.

Either way once you call the pop function, you shall obtain a value of type bs4.Tag. Thereafter, you can use the in-built property text and perform a combination of Python string manipulations to obtain your desired result.

Here is the modified version of your code:

1. Without Lambda Function

def add_plus(title):
    return "+".join(title.split(" "))

def get_category(title):
    url_search = "https://www.kitapal.kz/search?search=" + add_plus(title)
    request_page_name = requests.get(url_search)
    soup = BeautifulSoup(request_page_name.content, "html.parser")
    url_book = soup.find("div", class_="item-books").parent.get('href')
    url_book = "https://www.kitapal.kz" + url_book
    request_book_page = requests.get(url_book)
    soup_book = BeautifulSoup(request_book_page.content, "html.parser")
    soup_category = soup_book.find_all(lambda tag:tag.name=="span" and "Сериясы" in tag.text)
    categories = soup_category.pop()
    categories = categories.text.split('\n')
    categories = ''.join(categories).strip('Сериясы:').split(',')
    return categories

2. With Lambda Functions

def add_plus(title):
    return "+".join(title.split(" "))

def get_category(title):
    url_search = "https://www.kitapal.kz/search?search=" + add_plus(title)
    request_page_name = requests.get(url_search)
    soup = BeautifulSoup(request_page_name.content, "html.parser")
    url_book = soup.find("div", class_="item-books").parent.get('href')
    url_book = "https://www.kitapal.kz" + url_book
    request_book_page = requests.get(url_book)
    soup_book = BeautifulSoup(request_book_page.content, "html.parser")
    soup_category = soup_book.find_all(lambda tag:tag.name=="span" and "Сериясы" in tag.text)
    clear_soup = lambda soup: soup.pop()
    categories = lambda soup: [category.text for category in clear_soup(soup).find_all('a')]
    return categories(soup_category)

About

Geeks Mental is a community that publishes articles and tutorials about Web, Android, Data Science, new techniques and Linux security.