commit 6cdda6bfd09e6328582ef1b87cbae0f928c9e71e
parent 88cf01cbc68460f12d17c4752f669f57d29e8882
Author: Isak Lindhé <isak.e.lindhe@gmail.com>
Date: Sun, 20 Jan 2019 16:24:32 +0100
experimenting with content listing
Diffstat:
1 file changed, 129 insertions(+), 0 deletions(-)
diff --git a/notebooks/listing_test.ipynb b/notebooks/listing_test.ipynb
@@ -0,0 +1,129 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import txtv\n",
+ "import listing as ls\n",
+ "import bs4\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inr = txtv.get_page(101)[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 101 SVT Text Torsdag 17 jan 2019\n",
+ " \u001b[34m \u001b[0m\u001b[34m \u001b[0m\n",
+ " \u001b[34m \u001b[0m\u001b[34m \u001b[0m\u001b[34m \u001b[0m\n",
+ " \u001b[34m \u001b[0m\u001b[34m \u001b[0m\u001b[34m \u001b[0m\u001b[34mSVT Text \u001b[0m\n",
+ " \n",
+ " \u001b[34m \u001b[0m\u001b[34m \u001b[0m\u001b[34m* = efter kl 12 Nyhetsrullen 188 \u001b[0m\n",
+ " \u001b[0m\n",
+ "* S och SD har ökat sedan valet......106\u001b[0m\n",
+ "* Opinionen enligt Novus mätning....160-\u001b[0m\n",
+ " \u001b[0m\n",
+ "* Novus: Förtroenderas för politiker 107\u001b[0m\n",
+ "* SD- och M-väljare mest kritiska....108\u001b[0m\n",
+ " \u001b[0m\n",
+ "* Fler MP-profiler hoppar av.........109\u001b[0m\n",
+ " \u001b[0m\n",
+ "* Migrationsministern S EU-kandidat..110\u001b[0m\n",
+ " \u001b[0m\n",
+ "* Höjt skadestånd efter terrordådet..111\u001b[0m\n",
+ " \u001b[0m\n",
+ "* Städade utan tillstånd hos polisen 112\u001b[0m\n",
+ " \u001b[0m\n",
+ " \u001b[0m\n",
+ " \u001b[2m \u001b[0m\u001b[2m \u001b[0mFler rubriker 102 \u001b[0m \u001b[0m \u001b[0m\n",
+ " \u001b[34m \u001b[0m\u001b[34m \u001b[0m\u001b[34mUtrikes 104 Sport 300 Innehåll 700 \u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "txtv.show_page(inr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['S och SD har ökat sedan valet......106',\n",
+ " 'Opinionen enligt Novus mätning....160-',\n",
+ " 'Novus: Förtroenderas för politiker 107',\n",
+ " 'SD- och M-väljare mest kritiska....108',\n",
+ " 'Fler MP-profiler hoppar av.........109',\n",
+ " 'Migrationsministern S EU-kandidat..110',\n",
+ " 'Höjt skadestånd efter terrordådet..111',\n",
+ " 'Städade utan tillstånd hos polisen 112']"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "entries = []\n",
+ "for n in inr.children:\n",
+ " if isinstance(n, str):\n",
+ " #print(n, end='')\n",
+ " pass\n",
+ " elif isinstance(n, bs4.element.Tag):\n",
+ " if 'bgB' not in n.attrs['class'] and 'bgY' not in n.attrs['class']:\n",
+ " entries.append(n.get_text())\n",
+ "entries = [e for e in entries if not re.fullmatch(' *', e)]\n",
+ "entries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}