
commit
b94920e6e5
6 changed files with 209 additions and 0 deletions
@ -0,0 +1,6 @@ |
|||
*mp4 |
|||
*mp4* |
|||
*.swp |
|||
|
|||
.idea |
|||
*.pyc |
@ -0,0 +1,71 @@ |
|||
#!/usr/bin/env python |
|||
# TestCalculatorFunctions.py |
|||
|
|||
import unittest |
|||
from scraper import Scraper |
|||
from pprint import pprint |
|||
|
|||
|
|||
class KnownValues(unittest.TestCase): |
|||
scraper = Scraper(); |
|||
|
|||
def test_instance(self): |
|||
self.assertIsInstance(self.scraper, Scraper) |
|||
|
|||
def test_get_stations(self): |
|||
|
|||
fname = 'rickandmorty.conf' |
|||
with open(fname) as fopen: |
|||
self.assertTrue(True) |
|||
|
|||
def test_parse_stations(self): |
|||
source = '''BARI CENTRALE|S11119 |
|||
BARI TORRE QUETTA|S11004 |
|||
BOLOGNA C.LE|S05043''' |
|||
stations = self.scraper.parse_stations(source) |
|||
self.assertListEqual(stations, [ |
|||
{'name': 'BARI CENTRALE', 'code': 'S11119'}, |
|||
{'name': 'BARI TORRE QUETTA', 'code': 'S11004'}, |
|||
{'name': 'BOLOGNA C.LE', 'code': 'S05043'}, |
|||
]) |
|||
for station in stations: |
|||
self.assertTrue('name' in station) |
|||
self.assertTrue('code' in station) |
|||
|
|||
def test_parse_station(self): |
|||
station = 'SAN LEONARDO DI CUTRO|S11827' |
|||
expected = {'name': 'SAN LEONARDO DI CUTRO', 'code': 'S11827'} |
|||
self.assertDictEqual(self.scraper.parse_station(station), expected) |
|||
|
|||
|
|||
# def test_can_connect(self): |
|||
# scraper = Scraper() |
|||
# self.assertEqual(scraper.touch('http://ddg.gg'), 200) |
|||
# |
|||
# def test_get_page(self): |
|||
# scraper = Scraper() |
|||
# self.assertEqual(scraper.get_page().status_code, 200) |
|||
# |
|||
# def test_format_hackerspace(self): |
|||
# scraper = Scraper() |
|||
# hackerspace = {'name':'pippo'} |
|||
# formatted = scraper.format_hackerspace(hackerspace) |
|||
# self.assertTrue('url' in formatted) |
|||
# |
|||
# |
|||
# def test_get_hackerspaces(self): |
|||
# scraper = Scraper() |
|||
# hackerspaces = scraper.get_hackerspaces() |
|||
# self.assertGreater(len(hackerspaces), 0) |
|||
# |
|||
# for hackerspace in hackerspaces: |
|||
# self.assertTrue('url' in hackerspace) |
|||
# |
|||
# def test_convert_text_field_to_hs_url(self): |
|||
# scraper = Scraper() |
|||
# textfield = '<b><a href="/Freaknet" title="Freaknet">Freaknet</a></b>' |
|||
# self.assertEqual(scraper.convert_text_field_to_hs_url(textfield), 'https://wiki.hackerspaces.org/Freaknet') |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
unittest.main() |
@ -0,0 +1,10 @@ |
|||
#/bin/bash |
|||
|
|||
for season in $(seq 1 3); do for episode in $(seq 1 11) |
|||
do |
|||
|
|||
as='curl -m 0 -s http://watchseries.do/series/rick-and-morty/season/'$season'/episode/'$episode' | grep player | grep estream | cut -d "\"" --fields=4 | xargs -t curl | grep mp4 | cut -d \" -f 2' |
|||
# as='https://it.wikipedia.org/wiki/'$season''$episode |
|||
wget $(eval $as) -o "s"$season"e"$episode".mp4" |
|||
|
|||
done;done |
@ -0,0 +1,2 @@ |
|||
[Main] |
|||
url=http://watchseries.do/series/rick-and-morty/season/%s/episode/%s |
@ -0,0 +1,108 @@ |
|||
import requests, json, ConfigParser, os |
|||
from bs4 import BeautifulSoup |
|||
from pprint import pprint |
|||
|
|||
|
|||
class Scraper(): |
|||
# url = 'http://www.viaggiatreno.it/viaggiatrenonew/resteasy/viaggiatreno/partenze/S01480/Tue%20Oct%2011%202017%2008:30:00%20GMT+0200%20(CEST)' |
|||
|
|||
def __init__(self): |
|||
config = ConfigParser.RawConfigParser() |
|||
config.read('rickandmorty.conf') |
|||
|
|||
# getfloat() raises an exception if the value is not a float |
|||
# getint() and getboolean() also do this for their respective types |
|||
self.url = config.get('Main', 'url') |
|||
|
|||
pprint(self.url) |
|||
|
|||
# config.read(['site.cfg', os.path.expanduser('~/.rickandmorty.conf')]) |
|||
self.load_page() |
|||
pass |
|||
|
|||
def calc_res(self, resolution): |
|||
if resolution is None: |
|||
return 0 |
|||
vals = resolution.split('x') |
|||
if (len(vals) < 2): |
|||
return 0 |
|||
pprint(vals) |
|||
_ret = int(vals[0]) * int(vals[1]) |
|||
return _ret |
|||
|
|||
def load_page(self): |
|||
url = self.url % (1, 1) |
|||
r = requests.get(url) |
|||
# pprint(content) |
|||
pprint(url) |
|||
soup = BeautifulSoup(r.text, 'html.parser') |
|||
player = soup.find(id="player") |
|||
frameUrl = player.get('data-src').strip() |
|||
|
|||
r = requests.get(frameUrl) |
|||
# pprint(content) |
|||
pprint(url) |
|||
soup = BeautifulSoup(r.text, 'html.parser') |
|||
videoResolutions = [x.get('res') for x in soup.find_all(name="source")] |
|||
videoSources = [x.get('src') for x in soup.find_all(name="source")] |
|||
pprint(videoSources) |
|||
pprint(videoResolutions) |
|||
|
|||
videoUrls = zip(videoSources, videoResolutions) |
|||
topRes = 0 |
|||
curTop = videoUrls[0][0] |
|||
for video in videoUrls: |
|||
if (self.calc_res(video[1]) > topRes): |
|||
topRes = self.calc_res(video[1]) |
|||
curTop = video[0] |
|||
url = curTop |
|||
|
|||
# frameUrl = player.get('data-src').strip() |
|||
|
|||
print "downloading with requests" |
|||
local_filename = "s01e01.mp4" |
|||
# NOTE the stream=True parameter |
|||
r = requests.get(url, stream=True) |
|||
with open(local_filename, 'wb') as f: |
|||
for chunk in r.iter_content(chunk_size=1024): |
|||
if chunk: # filter out keep-alive new chunks |
|||
print('writing chunk...\n') |
|||
f.write(chunk) |
|||
# f.flush() commented by recommendation from J.F.Sebastian |
|||
# r = requests.get(url) |
|||
# with open("s01e01.mp4", "wb") as code: |
|||
# code.write(r.content) |
|||
# pprint(url) |
|||
# pprint(frameUrl) |
|||
# iframe#player |
|||
|
|||
|
|||
def parse_stations(self, stations): |
|||
_ret = [] |
|||
for station in stations.split('\n'): |
|||
if len(station) > 0: |
|||
_ret.append(self.parse_station(station)) |
|||
return _ret |
|||
|
|||
|
|||
def parse_station(self, station): |
|||
stat = station.split('|') |
|||
return { |
|||
'name': stat[0].strip(), |
|||
'code': stat[1].strip() |
|||
} |
|||
|
|||
|
|||
def find_stations(self, station_name, stations): |
|||
_ret = [] |
|||
for station in stations: |
|||
if station_name.lower() in station['name'].lower(): |
|||
_ret.append(station) |
|||
return _ret |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
scraper = Scraper() |
|||
|
|||
stations = scraper.get_stations('elenco_stazioni.txt') |
|||
pprint(stations) |
@ -0,0 +1,12 @@ |
|||
#/bin/sh |
|||
|
|||
#while true; do |
|||
|
|||
|
|||
files="$(ls -lah | grep 's[1-9]*e[1-9]*\.mp4' | cut -d' ' -f10,6)" |
|||
#files="$(ls -lah | awk '/s[1-9]\*e[1-9]\*\.mp4/{print $10}')" |
|||
printf "%s\n\n" "$files" |
|||
sleep 1 |
|||
#done |
|||
exec ./status.sh |
|||
|
Loading…
Reference in new issue