Source code for app.facilities.wolfsburg

import re
import logging

from bs4 import BeautifulSoup
from datetime import date, datetime
from selenium.webdriver.common.keys import Keys
from urllib.parse import urljoin

from app.browser import create_browser
from app.utils import next_page


logger = logging.getLogger('app')

metadata = {
    'name': 'Stadtbibliothek Wolfsburg',
    'city': 'Wolfsburg',
    'url': 'http://webopac.stadt.wolfsburg.de/webopac/index.asp?DB=web_biblio'
}


[docs]def search(term, page=1):
    """Searches for a term.

    :param term: A term to search for.
    :param page: Pagination page.
    :type term: str
    :type page: int
    :return: Dictionary with results and next_page integer.
    :rtype: dict
    """
    entry_url = ('http://webopac.stadt.wolfsburg.de'
                 '/webopac/index.asp?DB=web_biblio')

    # list to store results
    results = []

    # set up browser
    browser = create_browser()

    # fill out search and click the button
    browser.get(entry_url)
    browser.find_element_by_name('stichwort').send_keys(term)
    browser.find_element_by_name('stichwort').send_keys(Keys.RETURN)

    # return empty list if wanted page is not there
    page_numbers = parse_page_numbers(browser.page_source)
    if page not in page_numbers:
        return {'results': [], 'next_page': None}

    if page != 1:
        browser.find_element_by_link_text(str(page)).click()

    links = parse_search_title_overview(browser.page_source, entry_url)

    for link in links:
        browser.get(link)

        # if the word "Bände" in the source the link should be skipped
        if 'Bände' in browser.page_source:
            continue

        # parse the source for title and append it to the resaults list
        results.append(parse_title(browser.page_source, entry_url))

    next_page_number = next_page(page_numbers, page)

    return_value = {'results': results, 'next_page': next_page_number}

    logger.debug(return_value)
    return return_value


[docs]def parse_search_title_overview(page_source, base_url):
    """Parses search page results.

    :param page_source: HTML page source.
    :param base_url: URL of the base that gets joined together.
    :type page_source: str
    :type base_url: str
    :return: List of item urls.
    :rtype: list
    """
    soup = BeautifulSoup(page_source, 'lxml')

    table = soup.find('table', 'resulttab')

    if not table:
        return []

    rows = [i
            for i in table.find_all('tr')
            if 'class' in i.attrs
            if 'result_treffer' in i.attrs['class'][0]]

    return [urljoin(base_url, i.a.get('href')) for i in rows]


[docs]def parse_string_for_year(line):
    """Parses string-line for year.

    :param line: Line string.
    :type line: str
    :return: Year.
    :rtype: datetime.date or None
    """
    year = re.findall('\d\d\d\d', line)
    if year:
        return date(int(year[0]), 1, 1)
    else:
        return None


[docs]def parse_due_date(line):
    """Parses due date from line.

    :param line: Line string.
    :type line: str
    :return: datetime.date if successful, None otherwise.
    :rtype: datetime.date or None
    """
    try:
        return datetime.strptime(line, '%d.%m.%Y').date()

    except ValueError:
        return None


[docs]def parse_copy_availability(line):
    """Parses 'Status-'line for availability.

    :param line: Line string.
    :type line: str
    :return: Availability.
    :rtype: bool
    """
    if 'Entliehen' in line:
        return False

    elif 'Verfügbar' in line:
        return True

    else:
        return False


[docs]def parse_title(page_source, base_url):
    """Parses title details.

    :param page_source: HTML page source of title details.
    :param base_url: URL of the base that gets joined together.
    :type page_source: str
    :type base_url: str
    :return: Title dict.
    :rtype: dict
    """
    title = {}

    soup = BeautifulSoup(page_source, 'lxml')

    # title
    title['title'] = soup.find_all('div', class_='detail_titel')[0].text

    # cover
    cover = soup.find('div', class_='detail_cover').img['src']
    if 'amazon' in cover:
        title['cover'] = cover
    elif cover.startswith('/read'):
        title['cover'] = urljoin(base_url, cover)

    for tr in soup.find_all('tr'):

        line = str(tr)

        # annotation
        if 'Beschreibung:' in line:
            title['annotation'] = tr.find_all('td')[1].text

        # author
        if 'Verfasserangaben:' in line:
            title['author'] = tr.find_all('td')[1].text

        # isbn
        if 'ISBN:' in line:
            title['isbn'] = tr.find_all('td')[1].text

        # year
        if 'Impressum:' in line:
            impressum = tr.find_all('td')[1].text
            title['year'] = parse_string_for_year(impressum)

    # parse copies
    copies = []
    for tr in soup.find_all('tr', class_='tabExemplar'):

        copy = {}

        tds = tr.find_all('td')

        # id
        copy['id'] = tds[0].text

        # position
        copy['position'] = tds[1].text

        # type
        copy['type'] = tds[2].text

        # available
        copy['available'] = parse_copy_availability(tds[3].text)

        # branch
        copy['branch'] = tds[4].text

        # due_date
        copy['due_date'] = parse_due_date(tds[5].text)

        # add to list
        copies.append(copy)

    # add copies to title dict
    title['copies'] = copies

    return title


[docs]def parse_page_numbers(page_source):
    """Parses possible pages for pagination.

    :param page_source: HTML page source.
    :type page_source: str
    :return: List of pages.
    :rtype: list[int]
    """
    pages = []

    soup = BeautifulSoup(page_source, 'lxml')

    nav = soup.find('div', class_='result_nav')

    if nav:
        pages.extend(nav.find_all('u'))
        pages.extend(nav.find_all('a'))

    if pages:
        pages = [int(i.text) for i in pages]

    return pages


[docs]def login(browser, cardnumber, password):
    """Logs into account.

    Browse to the login form and logs in.

    :param browser: Browser to use.
    :param cardnumber: Library Cardnumber.
    :param passowrd: Library Accountpassword
    :type browser: selenium.webdriver.phantomjs.webdriver.WebDriver
    :type cardnumber: str
    :type password: str
    :return: True if could login or False if it couldnt.
    :rtype: bool
    """
    entry_url = ('http://webopac.stadt.wolfsburg.de'
                 '/webopac/index.asp?DB=web_biblio')

    # browse to the login form
    browser.get(entry_url)
    browser.find_element_by_name('link_konto').click()

    # fills data and submits
    browser.find_element_by_id('inputAUSWEIS').send_keys(cardnumber)
    browser.find_element_by_name('PWD').send_keys(password)
    browser.find_element_by_name('B1').click()

    if "Konto für Ausweis" in browser.page_source:
        logger.debug('login sucessful')
        return True

    else:
        logger.debug('login failed')
        return False


[docs]def lent_list(cardnumber, password):
    """Gets lent list.

    Logs in a parses the account for lent items.

    :param cardnumber: Library Cardnumber.
    :param password: Library Accountpassword
    :return: Dictionary with lent items
    :rtype: dict
    """
    browser = create_browser()
    login(browser, cardnumber, password)

    items = parse_lent_list(browser.page_source)
    saldo = parse_saldo(browser.page_source)

    return_value = {'items': items, 'saldo': saldo}

    logger.debug(return_value)
    return return_value


[docs]def parse_lent_list(page_source):
    """Parses lent list.

    :param page_source: HTML page source
    :type page_source: str
    :return: List of results.
    :rtype: list
    """
    soup = BeautifulSoup(page_source, 'lxml')

    items = []

    rows = soup.find_all('tr', class_='tabKonto')

    for row in rows:
        cols = row.find_all('td')
        cols = [i.text.strip() for i in cols]

        if 'Anzahl' in cols[0]:
            continue

        item = {}

        if cols[0]:
            item['author'] = cols[0]

        if cols[1]:
            item['title'] = cols[1]

        if cols[2]:
            item['due_date'] = parse_due_date(cols[2])

        items.append(item)

    return items


[docs]def parse_saldo(page_source):
    """Parses saldo.

    :param page_source: HTML page source
    :return: Saldo or None
    :rtype: str or None
    """
    soup = BeautifulSoup(page_source, 'lxml')

    lines = soup.find_all('div', class_='kontozeile_center')

    if lines:
        saldo = re.findall('-?\d+,\d{2}', lines[0].text)

        if saldo:
            return saldo[0]

    return None