commit 6dcb0806a076c0fe4f491eb3714cc4210406f876
parent 1e34b5558b029c9b392aacc321ca20e961edccd2
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date: Sun, 10 Feb 2019 22:13:53 +0100
removed old shit
Diffstat:
3 files changed, 62 insertions(+), 130 deletions(-)
diff --git a/src/listing.py b/src/listing.py
@@ -1,9 +1,19 @@
import bs4
import re
-from txtv import get_page_loop, get_page
+from txtv import Page
from pprint import pprint
+def get_page_loop(start_num: int, pattern):
+ pages = [Page(start_num)]
+ while True:
+ match = re.search(pattern, pages[-1].subpages[0].get_text())
+ if not match or match.group(1) == str(start_num):
+ break
+ pages.append(Page(int(match.group(1))))
+ return pages
+
+
def is_content_entry(tag: bs4.element.Tag):
# children = [
# c for c in tag.children
@@ -27,9 +37,9 @@ def is_content_entry(tag: bs4.element.Tag):
)
-def parse_content_listing(page: bs4.element.Tag) -> list:
+def parse_content_listing(page: Page) -> list:
raw = ''
- for n in page.children:
+ for n in page.subpages[0].children:
if isinstance(n, str):
raw += n
pass
@@ -52,24 +62,6 @@ def parse_content_entry(line: str) -> tuple:
return None
-def test_content_listing():
- from pprint import pprint
- page = get_page(102)[0]
- content = parse_content_listing(page)
- pprint(content)
- assert False
-
-
-def content_list() -> list:
- import re
- itempattern = r'(\w+)\.*(\d\d\d)'
- page = get_page(700)[0]
- spans = page.find_all('span')
- spans = [s for s in spans if len(list(s.children)) >= 2 and s.find('a')]
- return spans
- # return [re.findall(itempattern, node.get_text()) for node in page]
-
-
def list_all_articles():
full_listing = []
for nbr in [101, 104]:
@@ -77,3 +69,4 @@ def list_all_articles():
for p in pages:
full_listing += parse_content_listing(p)
return full_listing
+
diff --git a/src/txtv.py b/src/txtv.py
@@ -48,51 +48,17 @@ class Page:
def validate_page_nbr(arg: str) -> int:
"""
- Validates a page number, returns as int. Complains to user if bad.
+ Validates a page number, returns as int. Raises ValueError if bad.
"""
try:
num = int(arg)
except ValueError:
- err('txtv <PAGE>\nexample: txtv 130')
+ raise ValueError('txtv <PAGE>\nexample: txtv 130')
if num < 100 or num > 999:
- err('Text tv pages range from 100 to 999')
+ raise ValueError('Text tv pages range from 100 to 999')
return num
-def get_page_loop(start_num: int, pattern):
- pages = [get_page(start_num)[0]]
- while True:
- match = re.search(pattern, pages[-1].get_text())
- if not match or match.group(1) == str(start_num):
- break
- pages.append(get_page(int(match.group(1)))[0])
- return pages
-
-
-def show_page(page: bs4.element.Tag):
- # def nodetext(node, parent_style=''):
- # if isinstance(node, str):
- # return node
- # elif isinstance(node, bs4.element.Tag):
- # if node.name == 'a':
- # return Fore.RED + node.get_text() + Fore.RESET + parent_style
- # else:
- # return ''.join([nodetext(child) for child in node.children])
- """Prints the page contained by the specified tag in color."""
- for node in page:
- if isinstance(node, str):
- print(node, end='')
- continue
- style = ''
- if 'DH' in node.attrs['class']:
- style = Fore.YELLOW + Style.BRIGHT
- elif 'Y' in node.attrs['class']:
- style = Style.DIM
- elif 'bgB' in node.attrs['class']:
- style = Fore.BLUE
- print(style + node.get_text() + Style.RESET_ALL, end='')
-
-
def match_command(arg: str, interactive=False):
for cmd in commands:
if interactive or 'interactive_only' not in cmd or not cmd['interactive_only']:
@@ -109,38 +75,18 @@ def interactive(start_page: Page):
try:
raw = input('> ').strip().lower()
cmd, m = match_command(raw, interactive=True)
- cmd['func'](state=state, match=m)
+ if cmd:
+ cmd['func'](state=state, match=m)
+ else:
+ err("That's not a command, kompis. 'help' gives you a list of commands.", fatal=False)
except EOFError:
exit(0)
- # while running:
- # try:
- # cmd = input('> ').strip().lower()
- # if cmd == '':
- # pass
- # elif cmd == 'help':
- # print('here will be a helptext later') # TODO
- # elif cmd in ['quit', 'q', 'exit']:
- # running = False
- # elif cmd in ['next', 'n', 'j', '>']:
- # page = Page(page.next)
- # page.show()
- # elif cmd in ['previous', 'prev', 'p', 'k', '<']:
- # page = Page(page.prev)
- # page.show()
- # elif re.fullmatch('[1-9][0-9][0-9]',cmd):
- # nbr = int(cmd)
- # page = Page(int(cmd))
- # page.show()
- # else:
- # print("That's not a command, type help for help, or quit to quit.")
- # except EOFError:
- # running = False
-
-
-#####################
-# COMMAND FUNCTIONS #
-#####################
+
+ #####################
+ # COMMAND FUNCTIONS #
+ #####################
+
def cmd_help(**kwargs):
print('commands:')
@@ -174,60 +120,52 @@ def cmd_list(**kwargs):
def cmd_page(match, state=None, **kwargs):
- num = validate_page_nbr(match.group(0))
- if state:
- state['page'] = Page(num)
- state['page'].show()
- else:
- Page(num).show()
-
+ try:
+ num = validate_page_nbr(match.group(0))
+ if state:
+ state['page'] = Page(num)
+ state['page'].show()
+ else:
+ Page(num).show()
+ except ValueError as e:
+ err(str(e), fatal=(state is None))
-def get_page(num: int) -> list:
- """
- Returns a list of the tags containing
- the page and potential subpages (type: bs4.element.Tag)
- on the specified page number.
- For most pages this will be a list of one element.
- """
- res = rq.get(f'https://www.svt.se/svttext/web/pages/{num}.html')
- if res.status_code != 200:
- err(f'Got HTTP status code {res.status_code}.')
- soup = bs4.BeautifulSoup(res.content, 'html.parser')
- subpages = soup.find_all('pre', class_='root')
- return subpages
commands = [
{
- 'pattern' : 'h|\?|help',
- 'func': cmd_help,
- 'help':'show this help text.',
+ 'helpname': 'help | h | ?',
+ 'pattern': 'h|\?|help',
+ 'func': cmd_help,
+ 'help': 'show this help text.',
},
{
- 'pattern' : 'q|quit|exit',
- 'func': lambda **kwargs: sys.exit(0),
- 'help':'quit the program (duh)',
- 'interactive_only' : True,
+ 'pattern': 'quit|q|exit',
+ 'func': lambda **kwargs: sys.exit(0),
+ 'help': 'quit the program (duh)',
+ 'interactive_only': True,
},
{
- 'pattern' : 'l|ls|list',
- 'func': cmd_list,
- 'help':'list all articles',
+ 'pattern': 'list|ls|l',
+ 'func': cmd_list,
+ 'help': 'list all articles',
},
{
- 'pattern':'n|next|>',
- 'func': cmd_next,
- 'interactive_only' : True,
+ 'pattern': 'next|n|>',
+ 'func': cmd_next,
+ 'help': 'show next available page.',
+ 'interactive_only': True,
},
{
- 'pattern':'previous|prev|p|<',
- 'func': cmd_prev,
- 'interactive_only' : True,
+ 'pattern': 'previous|prev|p|<',
+ 'func': cmd_prev,
+ 'help': 'show previous available page.',
+ 'interactive_only': True,
},
{
- 'helpname' : '<PAGE NUMBER>',
- 'pattern':'[0-9]{3}',
- 'func': cmd_page,
- 'help':'show the specified page',
+ 'helpname': '<PAGE NUMBER>',
+ 'pattern': '[0-9]{3}',
+ 'func': cmd_page,
+ 'help': 'show the specified page',
},
]
@@ -247,5 +185,5 @@ if __name__ == '__main__':
cmd['func'](match=m, cfg=cfg)
sys.exit(0)
else:
- err("That's not a command, kompis. 'help' gives you a list of commands.")
+ err("That's not a command, kompis. 'txtv.py help' gives you a list of commands.")
colorama.deinit()
diff --git a/src/util.py b/src/util.py
@@ -1,7 +1,8 @@
from colorama import Fore, Back, Style
import sys
-def err(txt: str):
+def err(txt: str, fatal=True):
"""Prints a red error message and quits with exit code 1."""
print(Fore.RED + txt + Fore.RESET, file=sys.stderr)
- sys.exit(1)
+ if fatal:
+ sys.exit(1)