txtv

Swiss text tv in the terminal
git clone https://git.in0rdr.ch/txtv.git
Log | Files | Refs | Pull requests |Archive | README | LICENSE

commit 88cf01cbc68460f12d17c4752f669f57d29e8882
parent 6af26dbdd29c7bd7ad4305265635f450f8688e03
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date:   Thu, 17 Jan 2019 19:39:23 +0100

fixed listing bug and refactored config stuff

Diffstat:
Msrc/listing.py | 59+++++++++++++++++++++++++++++++++++------------------------
Msrc/txtv.py | 42+++++++-----------------------------------
2 files changed, 42 insertions(+), 59 deletions(-)

diff --git a/src/listing.py b/src/listing.py @@ -9,36 +9,47 @@ def is_content_entry(tag: bs4.element.Tag): # c for c in tag.children # if not (isinstance(c, str) and re.match(r' +', c)) # ] - children = list(tag.children) + # children = list(tag.children) + # return ( + # tag.name == 'span' + # and 'W' in tag.attrs.['class'] + # and len(children) >= 2 + # and isinstance(children[-1], bs4.element.Tag) + # and all(isinstance(elem, str) for elem in children[:-1]) + # and children[-1].name == 'a' + # ) + pass return ( - tag.name == 'span' - and len(children) >= 2 - and isinstance(children[-1], bs4.element.Tag) - and all(isinstance(elem, str) for elem in children[:-1]) - and children[-1].name == 'a' + isinstance(tag, bs4.element.Tag) + and tag.name == 'span' + and all(not cls.startswith('bg') for cls in tag.attrs['class']) + and any((c in tag.attrs['class']) for c in ['W', 'C']) + and not re.fullmatch(' *', tag.get_text()) ) -def parse_content_entry(tag: bs4.element.Tag) -> tuple: - # children = [ - # c for c in tag.children - # if not (isinstance(c, str) and re.match(r' +', c)) - # ] - children = list(tag.children) - if is_content_entry(tag): - title = re.search(r'^(.+[^.])\.*$', ''.join(children[:-1])).group(1).strip() - num = children[-1].get_text() - return title, num - else: - return None, None +def parse_content_listing(page: bs4.element.Tag) -> list: + raw = '' + for n in page.children: + if isinstance(n, str): + raw += n + pass + elif isinstance(n, bs4.element.Tag): + if all((x not in n.attrs['class']) for x in ['bgB', 'bgY', 'Y']): + raw += n.get_text() + entries = raw.splitlines() + entries = [e for e in entries if not re.fullmatch(' *', e)] + entries = [parse_content_entry(e) for e in entries] + return entries +def parse_content_entry(line: str) -> tuple: + m = re.fullmatch(r'(\* )?(.+[^.]).*[^0-9]([0-9]{3})[-f]?', line) -def parse_content_listing(page: bs4.element.Tag) -> list: - return [ - parse_content_entry(span) - for span in page.find_all('span') - if is_content_entry(span) - ] + if m: + return (m.group(2).strip(), m.group(3)) + else: + # raise RuntimeError(f'LINE DIDNT MATCH! {line}') + return None def test_content_listing(): from pprint import pprint diff --git a/src/txtv.py b/src/txtv.py @@ -4,13 +4,12 @@ import bs4 import requests as rq import sys import re -import configparser import colorama from colorama import Fore, Back, Style from util import err from pathlib import Path +from config import get_or_gen_config, apply_aliases -CONFIG_DIR = Path.home() / '.config' / 'svtxtv' def validate_page_nbr(arg: str) -> int: """ @@ -49,6 +48,7 @@ def get_page_loop(start_num: int, pattern): pages.append(get_page(int(match.group(1)))[0]) return pages + def test_page_loop(): pages = get_page_loop(101) print(f'number of pages = {len(pages)}') @@ -80,42 +80,14 @@ def show_page(page: bs4.element.Tag): style = Fore.BLUE print(style + node.get_text() + Style.RESET_ALL, end='') + def show_headers(): from listing import list_all_articles articles = list_all_articles() - for title, page_nbr in articles: - print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET) - - -def get_or_gen_config(config_path=CONFIG_DIR / 'svtxtv.conf'): - cfg = configparser.ConfigParser() - if config_path.exists(): - cfg.read_file(open(config_path, 'r')) - else: - cfg['color'] = { - 'header' : 'yellow', - 'frame' : 'blue', - } - cfg['alias'] = { - '__DEFAULT__' : '100', # magic alias, will be used when given no arguments. - 'inrikes':'101', - 'in':'101', - 'utrikes':'104', - 'ut':'104', - 'innehÄll':'700', - } - if not CONFIG_DIR.exists(): - CONFIG_DIR.mkdir() - cfg.write(open(config_path, 'w')) - return cfg - - -def apply_aliases(txt: str, cfg: configparser.ConfigParser) -> str: - txt = txt.strip() - if 'alias' in cfg and txt in cfg['alias']: - return cfg['alias'][txt] - else: - return txt + for art in articles: + if art: + title, page_nbr = art + print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET) if __name__ == '__main__':