Source code for app.facilities.wolfsburg

import re
import logging

from bs4 import BeautifulSoup
from datetime import date, datetime
from selenium.webdriver.common.keys import Keys
from urllib.parse import urljoin

from app.browser import create_browser
from app.utils import next_page


logger = logging.getLogger('app')

metadata = {
    'name': 'Stadtbibliothek Wolfsburg',
    'city': 'Wolfsburg',
    'url': 'http://webopac.stadt.wolfsburg.de/webopac/index.asp?DB=web_biblio'
}





[docs]def parse_search_title_overview(page_source, base_url): """Parses search page results. :param page_source: HTML page source. :param base_url: URL of the base that gets joined together. :type page_source: str :type base_url: str :return: List of item urls. :rtype: list """ soup = BeautifulSoup(page_source, 'lxml') table = soup.find('table', 'resulttab') if not table: return [] rows = [i for i in table.find_all('tr') if 'class' in i.attrs if 'result_treffer' in i.attrs['class'][0]] return [urljoin(base_url, i.a.get('href')) for i in rows]
[docs]def parse_string_for_year(line): """Parses string-line for year. :param line: Line string. :type line: str :return: Year. :rtype: datetime.date or None """ year = re.findall('\d\d\d\d', line) if year: return date(int(year[0]), 1, 1) else: return None
[docs]def parse_due_date(line): """Parses due date from line. :param line: Line string. :type line: str :return: datetime.date if successful, None otherwise. :rtype: datetime.date or None """ try: return datetime.strptime(line, '%d.%m.%Y').date() except ValueError: return None
[docs]def parse_copy_availability(line): """Parses 'Status-'line for availability. :param line: Line string. :type line: str :return: Availability. :rtype: bool """ if 'Entliehen' in line: return False elif 'Verfügbar' in line: return True else: return False
[docs]def parse_title(page_source, base_url): """Parses title details. :param page_source: HTML page source of title details. :param base_url: URL of the base that gets joined together. :type page_source: str :type base_url: str :return: Title dict. :rtype: dict """ title = {} soup = BeautifulSoup(page_source, 'lxml') # title title['title'] = soup.find_all('div', class_='detail_titel')[0].text # cover cover = soup.find('div', class_='detail_cover').img['src'] if 'amazon' in cover: title['cover'] = cover elif cover.startswith('/read'): title['cover'] = urljoin(base_url, cover) for tr in soup.find_all('tr'): line = str(tr) # annotation if 'Beschreibung:' in line: title['annotation'] = tr.find_all('td')[1].text # author if 'Verfasserangaben:' in line: title['author'] = tr.find_all('td')[1].text # isbn if 'ISBN:' in line: title['isbn'] = tr.find_all('td')[1].text # year if 'Impressum:' in line: impressum = tr.find_all('td')[1].text title['year'] = parse_string_for_year(impressum) # parse copies copies = [] for tr in soup.find_all('tr', class_='tabExemplar'): copy = {} tds = tr.find_all('td') # id copy['id'] = tds[0].text # position copy['position'] = tds[1].text # type copy['type'] = tds[2].text # available copy['available'] = parse_copy_availability(tds[3].text) # branch copy['branch'] = tds[4].text # due_date copy['due_date'] = parse_due_date(tds[5].text) # add to list copies.append(copy) # add copies to title dict title['copies'] = copies return title
[docs]def parse_page_numbers(page_source): """Parses possible pages for pagination. :param page_source: HTML page source. :type page_source: str :return: List of pages. :rtype: list[int] """ pages = [] soup = BeautifulSoup(page_source, 'lxml') nav = soup.find('div', class_='result_nav') if nav: pages.extend(nav.find_all('u')) pages.extend(nav.find_all('a')) if pages: pages = [int(i.text) for i in pages] return pages
[docs]def login(browser, cardnumber, password): """Logs into account. Browse to the login form and logs in. :param browser: Browser to use. :param cardnumber: Library Cardnumber. :param passowrd: Library Accountpassword :type browser: selenium.webdriver.phantomjs.webdriver.WebDriver :type cardnumber: str :type password: str :return: True if could login or False if it couldnt. :rtype: bool """ entry_url = ('http://webopac.stadt.wolfsburg.de' '/webopac/index.asp?DB=web_biblio') # browse to the login form browser.get(entry_url) browser.find_element_by_name('link_konto').click() # fills data and submits browser.find_element_by_id('inputAUSWEIS').send_keys(cardnumber) browser.find_element_by_name('PWD').send_keys(password) browser.find_element_by_name('B1').click() if "Konto für Ausweis" in browser.page_source: logger.debug('login sucessful') return True else: logger.debug('login failed') return False
[docs]def lent_list(cardnumber, password): """Gets lent list. Logs in a parses the account for lent items. :param cardnumber: Library Cardnumber. :param password: Library Accountpassword :return: Dictionary with lent items :rtype: dict """ browser = create_browser() login(browser, cardnumber, password) items = parse_lent_list(browser.page_source) saldo = parse_saldo(browser.page_source) return_value = {'items': items, 'saldo': saldo} logger.debug(return_value) return return_value
[docs]def parse_lent_list(page_source): """Parses lent list. :param page_source: HTML page source :type page_source: str :return: List of results. :rtype: list """ soup = BeautifulSoup(page_source, 'lxml') items = [] rows = soup.find_all('tr', class_='tabKonto') for row in rows: cols = row.find_all('td') cols = [i.text.strip() for i in cols] if 'Anzahl' in cols[0]: continue item = {} if cols[0]: item['author'] = cols[0] if cols[1]: item['title'] = cols[1] if cols[2]: item['due_date'] = parse_due_date(cols[2]) items.append(item) return items
[docs]def parse_saldo(page_source): """Parses saldo. :param page_source: HTML page source :return: Saldo or None :rtype: str or None """ soup = BeautifulSoup(page_source, 'lxml') lines = soup.find_all('div', class_='kontozeile_center') if lines: saldo = re.findall('-?\d+,\d{2}', lines[0].text) if saldo: return saldo[0] return None