fixed listing bug and refactored config stuff - txtv

commit 88cf01cbc68460f12d17c4752f669f57d29e8882
parent 6af26dbdd29c7bd7ad4305265635f450f8688e03
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date:   Thu, 17 Jan 2019 19:39:23 +0100

fixed listing bug and refactored config stuff

Diffstat:
M src/listing.py  | 59 +++++++++++++++++++++++++++++++++++------------------------
M src/txtv.py  | 42 +++++++-----------------------------------

2 files changed, 42 insertions(+), 59 deletions(-)
diff --git a/src/listing.py b/src/listing.py
@@ -9,36 +9,47 @@ def is_content_entry(tag: bs4.element.Tag):
     #         c for c in tag.children
     #         if not (isinstance(c, str) and re.match(r' +', c))
     #         ]
-    children = list(tag.children)
+    # children = list(tag.children)
+    # return (
+    #         tag.name == 'span'
+    #         and 'W' in tag.attrs.['class']
+    #         and len(children) >= 2
+    #         and isinstance(children[-1], bs4.element.Tag)
+    #         and all(isinstance(elem, str) for elem in children[:-1])
+    #         and children[-1].name == 'a'
+    #         )
+    pass
     return (
-            tag.name == 'span'
-            and len(children) >= 2
-            and isinstance(children[-1], bs4.element.Tag)
-            and all(isinstance(elem, str) for elem in children[:-1])
-            and children[-1].name == 'a'
+            isinstance(tag, bs4.element.Tag)
+            and tag.name == 'span'
+            and all(not cls.startswith('bg') for cls in tag.attrs['class'])
+            and any((c in tag.attrs['class']) for c in ['W', 'C'])
+            and not re.fullmatch(' *', tag.get_text())
             )
 
 
-def parse_content_entry(tag: bs4.element.Tag) -> tuple:
-    # children = [
-    #         c for c in tag.children
-    #         if not (isinstance(c, str) and re.match(r' +', c))
-    #         ]
-    children = list(tag.children)
-    if is_content_entry(tag):
-        title = re.search(r'^(.+[^.])\.*$', ''.join(children[:-1])).group(1).strip()
-        num = children[-1].get_text()
-        return title, num
-    else:
-        return None, None
+def parse_content_listing(page: bs4.element.Tag) -> list:
+    raw = ''
+    for n in page.children:
+        if isinstance(n, str):
+            raw += n
+            pass
+        elif isinstance(n, bs4.element.Tag):
+            if all((x not in n.attrs['class']) for x in ['bgB', 'bgY', 'Y']):
+                raw += n.get_text()
+    entries = raw.splitlines()
+    entries = [e for e in entries if not re.fullmatch(' *', e)]
+    entries = [parse_content_entry(e) for e in entries]
+    return entries
 
+def parse_content_entry(line: str) -> tuple:
+    m = re.fullmatch(r'(\* )?(.+[^.]).*[^0-9]([0-9]{3})[-f]?', line)
 
-def parse_content_listing(page: bs4.element.Tag) -> list:
-    return [
-            parse_content_entry(span)
-            for span in page.find_all('span')
-            if is_content_entry(span)
-            ]
+    if m:
+        return (m.group(2).strip(), m.group(3))
+    else:
+        # raise RuntimeError(f'LINE DIDNT MATCH! {line}')
+        return None
 
 def test_content_listing():
     from pprint import pprint
diff --git a/src/txtv.py b/src/txtv.py
@@ -4,13 +4,12 @@ import bs4
 import requests as rq
 import sys
 import re
-import configparser
 import colorama
 from colorama import Fore, Back, Style
 from util import err
 from pathlib import Path
+from config import get_or_gen_config, apply_aliases
 
-CONFIG_DIR = Path.home() / '.config' / 'svtxtv'
 
 def validate_page_nbr(arg: str) -> int:
     """
@@ -49,6 +48,7 @@ def get_page_loop(start_num: int, pattern):
         pages.append(get_page(int(match.group(1)))[0])
     return pages
 
+
 def test_page_loop():
     pages = get_page_loop(101)
     print(f'number of pages = {len(pages)}')
@@ -80,42 +80,14 @@ def show_page(page: bs4.element.Tag):
             style = Fore.BLUE
         print(style + node.get_text() + Style.RESET_ALL, end='')
 
+
 def show_headers():
     from listing import list_all_articles
     articles = list_all_articles()
-    for title, page_nbr in articles:
-        print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET)
-
-
-def get_or_gen_config(config_path=CONFIG_DIR / 'svtxtv.conf'):
-    cfg = configparser.ConfigParser()
-    if config_path.exists():
-        cfg.read_file(open(config_path, 'r'))
-    else:
-        cfg['color'] = {
-                'header' : 'yellow',
-                'frame' : 'blue',
-                }
-        cfg['alias'] = {
-                '__DEFAULT__' : '100',  # magic alias, will be used when given no arguments.
-                'inrikes':'101',
-                'in':'101',
-                'utrikes':'104',
-                'ut':'104',
-                'innehåll':'700',
-                }
-        if not CONFIG_DIR.exists():
-            CONFIG_DIR.mkdir()
-        cfg.write(open(config_path, 'w'))
-    return cfg
-
-
-def apply_aliases(txt: str, cfg: configparser.ConfigParser) -> str:
-    txt = txt.strip()
-    if 'alias' in cfg and txt in cfg['alias']:
-        return cfg['alias'][txt]
-    else:
-        return txt
+    for art in articles:
+        if art:
+            title, page_nbr = art
+            print(title.ljust(38, '.'), Fore.BLUE + str(page_nbr) + Fore.RESET)
 
 
 if __name__ == '__main__':

	txtv Swiss text tv in the terminal
	git clone https://git.in0rdr.ch/txtv.git
	Log \| Files \| Refs \| Pull requests \|Archive \| README \| LICENSE

M	src/listing.py	\|	59	+++++++++++++++++++++++++++++++++++------------------------
M	src/txtv.py	\|	42	+++++++-----------------------------------