commit 88cf01cbc68460f12d17c4752f669f57d29e8882
parent 6af26dbdd29c7bd7ad4305265635f450f8688e03
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date: Thu, 17 Jan 2019 19:39:23 +0100
fixed listing bug and refactored config stuff
Diffstat:
M | src/listing.py | | | 59 | +++++++++++++++++++++++++++++++++++------------------------ |
M | src/txtv.py | | | 42 | +++++++----------------------------------- |
2 files changed, 42 insertions(+), 59 deletions(-)
diff --git a/src/listing.py b/src/listing.py
@@ -9,36 +9,47 @@ def is_content_entry(tag: bs4.element.Tag):
# c for c in tag.children
# if not (isinstance(c, str) and re.match(r' +', c))
# ]
- children = list(tag.children)
+ # children = list(tag.children)
+ # return (
+ # tag.name == 'span'
+ # and 'W' in tag.attrs.['class']
+ # and len(children) >= 2
+ # and isinstance(children[-1], bs4.element.Tag)
+ # and all(isinstance(elem, str) for elem in children[:-1])
+ # and children[-1].name == 'a'
+ # )
+ pass
return (
- tag.name == 'span'
- and len(children) >= 2
- and isinstance(children[-1], bs4.element.Tag)
- and all(isinstance(elem, str) for elem in children[:-1])
- and children[-1].name == 'a'
+ isinstance(tag, bs4.element.Tag)
+ and tag.name == 'span'
+ and all(not cls.startswith('bg') for cls in tag.attrs['class'])
+ and any((c in tag.attrs['class']) for c in ['W', 'C'])
+ and not re.fullmatch(' *', tag.get_text())
)
-def parse_content_entry(tag: bs4.element.Tag) -> tuple:
- # children = [
- # c for c in tag.children
- # if not (isinstance(c, str) and re.match(r' +', c))
- # ]
- children = list(tag.children)
- if is_content_entry(tag):
- title = re.search(r'^(.+[^.])\.*$', ''.join(children[:-1])).group(1).strip()
- num = children[-1].get_text()
- return title, num
- else:
- return None, None
+def parse_content_listing(page: bs4.element.Tag) -> list:
+ raw = ''
+ for n in page.children:
+ if isinstance(n, str):
+ raw += n
+ pass
+ elif isinstance(n, bs4.element.Tag):
+ if all((x not in n.attrs['class']) for x in ['bgB', 'bgY', 'Y']):
+ raw += n.get_text()
+ entries = raw.splitlines()
+ entries = [e for e in entries if not re.fullmatch(' *', e)]
+ entries = [parse_content_entry(e) for e in entries]
+ return entries
+def parse_content_entry(line: str) -> tuple:
+ m = re.fullmatch(r'(\* )?(.+[^.]).*[^0-9]([0-9]{3})[-f]?', line)
-def parse_content_listing(page: bs4.element.Tag) -> list:
- return [
- parse_content_entry(span)
- for span in page.find_all('span')
- if is_content_entry(span)
- ]
+ if m:
+ return (m.group(2).strip(), m.group(3))
+ else:
+ # raise RuntimeError(f'LINE DIDNT MATCH! {line}')
+ return None
def test_content_listing():
from pprint import pprint
diff --git a/src/txtv.py b/src/txtv.py
@@ -4,13 +4,12 @@ import bs4
import requests as rq
import sys
import re
-import configparser
import colorama
from colorama import Fore, Back, Style
from util import err
from pathlib import Path
+from config import get_or_gen_config, apply_aliases
-CONFIG_DIR = Path.home() / '.config' / 'svtxtv'
def validate_page_nbr(arg: str) -> int:
"""
@@ -49,6 +48,7 @@ def get_page_loop(start_num: int, pattern):
pages.append(get_page(int(match.group(1)))[0])
return pages
+
def test_page_loop():
pages = get_page_loop(101)
print(f'number of pages = {len(pages)}')
@@ -80,42 +80,14 @@ def show_page(page: bs4.element.Tag):
style = Fore.BLUE
print(style + node.get_text() + Style.RESET_ALL, end='')
+
def show_headers():
from listing import list_all_articles
articles = list_all_articles()
- for title, page_nbr in articles:
- print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET)
-
-
-def get_or_gen_config(config_path=CONFIG_DIR / 'svtxtv.conf'):
- cfg = configparser.ConfigParser()
- if config_path.exists():
- cfg.read_file(open(config_path, 'r'))
- else:
- cfg['color'] = {
- 'header' : 'yellow',
- 'frame' : 'blue',
- }
- cfg['alias'] = {
- '__DEFAULT__' : '100', # magic alias, will be used when given no arguments.
- 'inrikes':'101',
- 'in':'101',
- 'utrikes':'104',
- 'ut':'104',
- 'innehÄll':'700',
- }
- if not CONFIG_DIR.exists():
- CONFIG_DIR.mkdir()
- cfg.write(open(config_path, 'w'))
- return cfg
-
-
-def apply_aliases(txt: str, cfg: configparser.ConfigParser) -> str:
- txt = txt.strip()
- if 'alias' in cfg and txt in cfg['alias']:
- return cfg['alias'][txt]
- else:
- return txt
+ for art in articles:
+ if art:
+ title, page_nbr = art
+ print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET)
if __name__ == '__main__':