Initial commit

2017-10-21 03:51:37 +02:00 · 2017-10-21 03:51:37 +02:00 · b94920e6e5
commit b94920e6e5
6 changed files with 209 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+*mp4
+*mp4*
+*.swp
+
+.idea
+*.pyc
--- a/TestScraper.py
+++ b/TestScraper.py
@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# TestCalculatorFunctions.py
+
+import unittest
+from scraper import Scraper
+from pprint import pprint
+
+
+class KnownValues(unittest.TestCase):
+    scraper = Scraper();
+
+    def test_instance(self):
+        self.assertIsInstance(self.scraper, Scraper)
+
+    def test_get_stations(self):
+
+        fname = 'rickandmorty.conf'
+        with open(fname) as fopen:
+            self.assertTrue(True)
+
+    def test_parse_stations(self):
+        source = '''BARI CENTRALE|S11119
+BARI TORRE QUETTA|S11004
+BOLOGNA C.LE|S05043'''
+        stations = self.scraper.parse_stations(source)
+        self.assertListEqual(stations, [
+            {'name': 'BARI CENTRALE', 'code': 'S11119'},
+            {'name': 'BARI TORRE QUETTA', 'code': 'S11004'},
+            {'name': 'BOLOGNA C.LE', 'code': 'S05043'},
+        ])
+        for station in stations:
+            self.assertTrue('name' in station)
+            self.assertTrue('code' in station)
+
+    def test_parse_station(self):
+        station = 'SAN LEONARDO DI CUTRO|S11827'
+        expected = {'name': 'SAN LEONARDO DI CUTRO', 'code': 'S11827'}
+        self.assertDictEqual(self.scraper.parse_station(station), expected)
+
+
+        # def test_can_connect(self):
+        #     scraper = Scraper()
+        #     self.assertEqual(scraper.touch('http://ddg.gg'), 200)
+        #
+        # def test_get_page(self):
+        #     scraper = Scraper()
+        #     self.assertEqual(scraper.get_page().status_code, 200)
+        #
+        # def  test_format_hackerspace(self):
+        #     scraper = Scraper()
+        #     hackerspace =  {'name':'pippo'}
+        #     formatted = scraper.format_hackerspace(hackerspace)
+        #     self.assertTrue('url' in formatted)
+        #
+        #
+        # def test_get_hackerspaces(self):
+        #     scraper = Scraper()
+        #     hackerspaces = scraper.get_hackerspaces()
+        #     self.assertGreater(len(hackerspaces), 0)
+        #
+        #     for hackerspace in hackerspaces:
+        #         self.assertTrue('url' in hackerspace)
+        #
+        # def test_convert_text_field_to_hs_url(self):
+        #     scraper = Scraper()
+        #     textfield = '<b><a href="/Freaknet" title="Freaknet">Freaknet</a></b>'
+        #     self.assertEqual(scraper.convert_text_field_to_hs_url(textfield), 'https://wiki.hackerspaces.org/Freaknet')
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/fetch.sh
+++ b/fetch.sh
@ -0,0 +1,10 @@
+#/bin/bash
+
+for season in $(seq 1 3); do for episode in $(seq 1 11)
+do
+
+	as='curl -m 0 -s http://watchseries.do/series/rick-and-morty/season/'$season'/episode/'$episode' | grep player | grep estream | cut -d "\"" --fields=4 | xargs -t curl  | grep mp4 | cut -d \" -f 2' 
+#	as='https://it.wikipedia.org/wiki/'$season''$episode
+	wget $(eval $as) -o "s"$season"e"$episode".mp4"
+
+done;done
--- a/rickandmorty.conf
+++ b/rickandmorty.conf
@ -0,0 +1,2 @@
+[Main]
+url=http://watchseries.do/series/rick-and-morty/season/%s/episode/%s
--- a/scraper.py
+++ b/scraper.py
@ -0,0 +1,108 @@
+import requests, json, ConfigParser, os
+from bs4 import BeautifulSoup
+from pprint import pprint
+
+
+class Scraper():
+    # url = 'http://www.viaggiatreno.it/viaggiatrenonew/resteasy/viaggiatreno/partenze/S01480/Tue%20Oct%2011%202017%2008:30:00%20GMT+0200%20(CEST)'
+
+    def __init__(self):
+        config = ConfigParser.RawConfigParser()
+        config.read('rickandmorty.conf')
+
+        # getfloat() raises an exception if the value is not a float
+        # getint() and getboolean() also do this for their respective types
+        self.url = config.get('Main', 'url')
+
+        pprint(self.url)
+
+        # config.read(['site.cfg', os.path.expanduser('~/.rickandmorty.conf')])
+        self.load_page()
+        pass
+
+    def calc_res(self, resolution):
+        if resolution is None:
+            return 0
+        vals = resolution.split('x')
+        if (len(vals) < 2):
+            return 0
+        pprint(vals)
+        _ret = int(vals[0]) * int(vals[1])
+        return _ret
+
+    def load_page(self):
+        url = self.url % (1, 1)
+        r = requests.get(url)
+        # pprint(content)
+        pprint(url)
+        soup = BeautifulSoup(r.text, 'html.parser')
+        player = soup.find(id="player")
+        frameUrl = player.get('data-src').strip()
+
+        r = requests.get(frameUrl)
+        # pprint(content)
+        pprint(url)
+        soup = BeautifulSoup(r.text, 'html.parser')
+        videoResolutions = [x.get('res') for x in soup.find_all(name="source")]
+        videoSources = [x.get('src') for x in soup.find_all(name="source")]
+        pprint(videoSources)
+        pprint(videoResolutions)
+
+        videoUrls = zip(videoSources, videoResolutions)
+        topRes = 0
+        curTop = videoUrls[0][0]
+        for video in videoUrls:
+            if (self.calc_res(video[1]) > topRes):
+                topRes = self.calc_res(video[1])
+                curTop = video[0]
+        url = curTop
+
+        # frameUrl = player.get('data-src').strip()
+
+        print "downloading with requests"
+        local_filename = "s01e01.mp4"
+        # NOTE the stream=True parameter
+        r = requests.get(url, stream=True)
+        with open(local_filename, 'wb') as f:
+            for chunk in r.iter_content(chunk_size=1024):
+                if chunk:  # filter out keep-alive new chunks
+                    print('writing chunk...\n')
+                    f.write(chunk)
+                    # f.flush() commented by recommendation from J.F.Sebastian
+        # r = requests.get(url)
+        # with open("s01e01.mp4", "wb") as code:
+        #     code.write(r.content)
+        # pprint(url)
+        # pprint(frameUrl)
+        # iframe#player
+
+
+def parse_stations(self, stations):
+    _ret = []
+    for station in stations.split('\n'):
+        if len(station) > 0:
+            _ret.append(self.parse_station(station))
+    return _ret
+
+
+def parse_station(self, station):
+    stat = station.split('|')
+    return {
+        'name': stat[0].strip(),
+        'code': stat[1].strip()
+    }
+
+
+def find_stations(self, station_name, stations):
+    _ret = []
+    for station in stations:
+        if station_name.lower() in station['name'].lower():
+            _ret.append(station)
+    return _ret
+
+
+if __name__ == '__main__':
+    scraper = Scraper()
+
+    stations = scraper.get_stations('elenco_stazioni.txt')
+    pprint(stations)
--- a/status.sh
+++ b/status.sh
@ -0,0 +1,12 @@
+#/bin/sh
+
+#while true; do
+
+
+files="$(ls -lah | grep 's[1-9]*e[1-9]*\.mp4' | cut -d' ' -f10,6)"
+#files="$(ls -lah | awk '/s[1-9]\*e[1-9]\*\.mp4/{print $10}')"
+printf "%s\n\n" "$files"
+	sleep 1
+#done
+exec ./status.sh
+