txtv

Swiss text tv in the terminal
git clone https://git.in0rdr.ch/txtv.git
Log | Files | Refs | Pull requests |Archive | README | LICENSE

commit 6dcb0806a076c0fe4f491eb3714cc4210406f876
parent 1e34b5558b029c9b392aacc321ca20e961edccd2
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date:   Sun, 10 Feb 2019 22:13:53 +0100

removed old shit

Diffstat:
Msrc/listing.py | 35++++++++++++++---------------------
Msrc/txtv.py | 152++++++++++++++++++++++++-------------------------------------------------------
Msrc/util.py | 5+++--
3 files changed, 62 insertions(+), 130 deletions(-)

diff --git a/src/listing.py b/src/listing.py @@ -1,9 +1,19 @@ import bs4 import re -from txtv import get_page_loop, get_page +from txtv import Page from pprint import pprint +def get_page_loop(start_num: int, pattern): + pages = [Page(start_num)] + while True: + match = re.search(pattern, pages[-1].subpages[0].get_text()) + if not match or match.group(1) == str(start_num): + break + pages.append(Page(int(match.group(1)))) + return pages + + def is_content_entry(tag: bs4.element.Tag): # children = [ # c for c in tag.children @@ -27,9 +37,9 @@ def is_content_entry(tag: bs4.element.Tag): ) -def parse_content_listing(page: bs4.element.Tag) -> list: +def parse_content_listing(page: Page) -> list: raw = '' - for n in page.children: + for n in page.subpages[0].children: if isinstance(n, str): raw += n pass @@ -52,24 +62,6 @@ def parse_content_entry(line: str) -> tuple: return None -def test_content_listing(): - from pprint import pprint - page = get_page(102)[0] - content = parse_content_listing(page) - pprint(content) - assert False - - -def content_list() -> list: - import re - itempattern = r'(\w+)\.*(\d\d\d)' - page = get_page(700)[0] - spans = page.find_all('span') - spans = [s for s in spans if len(list(s.children)) >= 2 and s.find('a')] - return spans - # return [re.findall(itempattern, node.get_text()) for node in page] - - def list_all_articles(): full_listing = [] for nbr in [101, 104]: @@ -77,3 +69,4 @@ def list_all_articles(): for p in pages: full_listing += parse_content_listing(p) return full_listing + diff --git a/src/txtv.py b/src/txtv.py @@ -48,51 +48,17 @@ class Page: def validate_page_nbr(arg: str) -> int: """ - Validates a page number, returns as int. Complains to user if bad. + Validates a page number, returns as int. Raises ValueError if bad. """ try: num = int(arg) except ValueError: - err('txtv <PAGE>\nexample: txtv 130') + raise ValueError('txtv <PAGE>\nexample: txtv 130') if num < 100 or num > 999: - err('Text tv pages range from 100 to 999') + raise ValueError('Text tv pages range from 100 to 999') return num -def get_page_loop(start_num: int, pattern): - pages = [get_page(start_num)[0]] - while True: - match = re.search(pattern, pages[-1].get_text()) - if not match or match.group(1) == str(start_num): - break - pages.append(get_page(int(match.group(1)))[0]) - return pages - - -def show_page(page: bs4.element.Tag): - # def nodetext(node, parent_style=''): - # if isinstance(node, str): - # return node - # elif isinstance(node, bs4.element.Tag): - # if node.name == 'a': - # return Fore.RED + node.get_text() + Fore.RESET + parent_style - # else: - # return ''.join([nodetext(child) for child in node.children]) - """Prints the page contained by the specified tag in color.""" - for node in page: - if isinstance(node, str): - print(node, end='') - continue - style = '' - if 'DH' in node.attrs['class']: - style = Fore.YELLOW + Style.BRIGHT - elif 'Y' in node.attrs['class']: - style = Style.DIM - elif 'bgB' in node.attrs['class']: - style = Fore.BLUE - print(style + node.get_text() + Style.RESET_ALL, end='') - - def match_command(arg: str, interactive=False): for cmd in commands: if interactive or 'interactive_only' not in cmd or not cmd['interactive_only']: @@ -109,38 +75,18 @@ def interactive(start_page: Page): try: raw = input('> ').strip().lower() cmd, m = match_command(raw, interactive=True) - cmd['func'](state=state, match=m) + if cmd: + cmd['func'](state=state, match=m) + else: + err("That's not a command, kompis. 'help' gives you a list of commands.", fatal=False) except EOFError: exit(0) - # while running: - # try: - # cmd = input('> ').strip().lower() - # if cmd == '': - # pass - # elif cmd == 'help': - # print('here will be a helptext later') # TODO - # elif cmd in ['quit', 'q', 'exit']: - # running = False - # elif cmd in ['next', 'n', 'j', '>']: - # page = Page(page.next) - # page.show() - # elif cmd in ['previous', 'prev', 'p', 'k', '<']: - # page = Page(page.prev) - # page.show() - # elif re.fullmatch('[1-9][0-9][0-9]',cmd): - # nbr = int(cmd) - # page = Page(int(cmd)) - # page.show() - # else: - # print("That's not a command, type help for help, or quit to quit.") - # except EOFError: - # running = False - - -##################### -# COMMAND FUNCTIONS # -##################### + + ##################### + # COMMAND FUNCTIONS # + ##################### + def cmd_help(**kwargs): print('commands:') @@ -174,60 +120,52 @@ def cmd_list(**kwargs): def cmd_page(match, state=None, **kwargs): - num = validate_page_nbr(match.group(0)) - if state: - state['page'] = Page(num) - state['page'].show() - else: - Page(num).show() - + try: + num = validate_page_nbr(match.group(0)) + if state: + state['page'] = Page(num) + state['page'].show() + else: + Page(num).show() + except ValueError as e: + err(str(e), fatal=(state is None)) -def get_page(num: int) -> list: - """ - Returns a list of the tags containing - the page and potential subpages (type: bs4.element.Tag) - on the specified page number. - For most pages this will be a list of one element. - """ - res = rq.get(f'https://www.svt.se/svttext/web/pages/{num}.html') - if res.status_code != 200: - err(f'Got HTTP status code {res.status_code}.') - soup = bs4.BeautifulSoup(res.content, 'html.parser') - subpages = soup.find_all('pre', class_='root') - return subpages commands = [ { - 'pattern' : 'h|\?|help', - 'func': cmd_help, - 'help':'show this help text.', + 'helpname': 'help | h | ?', + 'pattern': 'h|\?|help', + 'func': cmd_help, + 'help': 'show this help text.', }, { - 'pattern' : 'q|quit|exit', - 'func': lambda **kwargs: sys.exit(0), - 'help':'quit the program (duh)', - 'interactive_only' : True, + 'pattern': 'quit|q|exit', + 'func': lambda **kwargs: sys.exit(0), + 'help': 'quit the program (duh)', + 'interactive_only': True, }, { - 'pattern' : 'l|ls|list', - 'func': cmd_list, - 'help':'list all articles', + 'pattern': 'list|ls|l', + 'func': cmd_list, + 'help': 'list all articles', }, { - 'pattern':'n|next|>', - 'func': cmd_next, - 'interactive_only' : True, + 'pattern': 'next|n|>', + 'func': cmd_next, + 'help': 'show next available page.', + 'interactive_only': True, }, { - 'pattern':'previous|prev|p|<', - 'func': cmd_prev, - 'interactive_only' : True, + 'pattern': 'previous|prev|p|<', + 'func': cmd_prev, + 'help': 'show previous available page.', + 'interactive_only': True, }, { - 'helpname' : '<PAGE NUMBER>', - 'pattern':'[0-9]{3}', - 'func': cmd_page, - 'help':'show the specified page', + 'helpname': '<PAGE NUMBER>', + 'pattern': '[0-9]{3}', + 'func': cmd_page, + 'help': 'show the specified page', }, ] @@ -247,5 +185,5 @@ if __name__ == '__main__': cmd['func'](match=m, cfg=cfg) sys.exit(0) else: - err("That's not a command, kompis. 'help' gives you a list of commands.") + err("That's not a command, kompis. 'txtv.py help' gives you a list of commands.") colorama.deinit() diff --git a/src/util.py b/src/util.py @@ -1,7 +1,8 @@ from colorama import Fore, Back, Style import sys -def err(txt: str): +def err(txt: str, fatal=True): """Prints a red error message and quits with exit code 1.""" print(Fore.RED + txt + Fore.RESET, file=sys.stderr) - sys.exit(1) + if fatal: + sys.exit(1)