Initial commit
This commit is contained in:
commit
b94920e6e5
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
*mp4
|
||||||
|
*mp4*
|
||||||
|
*.swp
|
||||||
|
|
||||||
|
.idea
|
||||||
|
*.pyc
|
71
TestScraper.py
Executable file
71
TestScraper.py
Executable file
|
@ -0,0 +1,71 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# TestCalculatorFunctions.py
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
from scraper import Scraper
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
|
||||||
|
class KnownValues(unittest.TestCase):
|
||||||
|
scraper = Scraper();
|
||||||
|
|
||||||
|
def test_instance(self):
|
||||||
|
self.assertIsInstance(self.scraper, Scraper)
|
||||||
|
|
||||||
|
def test_get_stations(self):
|
||||||
|
|
||||||
|
fname = 'rickandmorty.conf'
|
||||||
|
with open(fname) as fopen:
|
||||||
|
self.assertTrue(True)
|
||||||
|
|
||||||
|
def test_parse_stations(self):
|
||||||
|
source = '''BARI CENTRALE|S11119
|
||||||
|
BARI TORRE QUETTA|S11004
|
||||||
|
BOLOGNA C.LE|S05043'''
|
||||||
|
stations = self.scraper.parse_stations(source)
|
||||||
|
self.assertListEqual(stations, [
|
||||||
|
{'name': 'BARI CENTRALE', 'code': 'S11119'},
|
||||||
|
{'name': 'BARI TORRE QUETTA', 'code': 'S11004'},
|
||||||
|
{'name': 'BOLOGNA C.LE', 'code': 'S05043'},
|
||||||
|
])
|
||||||
|
for station in stations:
|
||||||
|
self.assertTrue('name' in station)
|
||||||
|
self.assertTrue('code' in station)
|
||||||
|
|
||||||
|
def test_parse_station(self):
|
||||||
|
station = 'SAN LEONARDO DI CUTRO|S11827'
|
||||||
|
expected = {'name': 'SAN LEONARDO DI CUTRO', 'code': 'S11827'}
|
||||||
|
self.assertDictEqual(self.scraper.parse_station(station), expected)
|
||||||
|
|
||||||
|
|
||||||
|
# def test_can_connect(self):
|
||||||
|
# scraper = Scraper()
|
||||||
|
# self.assertEqual(scraper.touch('http://ddg.gg'), 200)
|
||||||
|
#
|
||||||
|
# def test_get_page(self):
|
||||||
|
# scraper = Scraper()
|
||||||
|
# self.assertEqual(scraper.get_page().status_code, 200)
|
||||||
|
#
|
||||||
|
# def test_format_hackerspace(self):
|
||||||
|
# scraper = Scraper()
|
||||||
|
# hackerspace = {'name':'pippo'}
|
||||||
|
# formatted = scraper.format_hackerspace(hackerspace)
|
||||||
|
# self.assertTrue('url' in formatted)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# def test_get_hackerspaces(self):
|
||||||
|
# scraper = Scraper()
|
||||||
|
# hackerspaces = scraper.get_hackerspaces()
|
||||||
|
# self.assertGreater(len(hackerspaces), 0)
|
||||||
|
#
|
||||||
|
# for hackerspace in hackerspaces:
|
||||||
|
# self.assertTrue('url' in hackerspace)
|
||||||
|
#
|
||||||
|
# def test_convert_text_field_to_hs_url(self):
|
||||||
|
# scraper = Scraper()
|
||||||
|
# textfield = '<b><a href="/Freaknet" title="Freaknet">Freaknet</a></b>'
|
||||||
|
# self.assertEqual(scraper.convert_text_field_to_hs_url(textfield), 'https://wiki.hackerspaces.org/Freaknet')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
10
fetch.sh
Executable file
10
fetch.sh
Executable file
|
@ -0,0 +1,10 @@
|
||||||
|
#/bin/bash
|
||||||
|
|
||||||
|
for season in $(seq 1 3); do for episode in $(seq 1 11)
|
||||||
|
do
|
||||||
|
|
||||||
|
as='curl -m 0 -s http://watchseries.do/series/rick-and-morty/season/'$season'/episode/'$episode' | grep player | grep estream | cut -d "\"" --fields=4 | xargs -t curl | grep mp4 | cut -d \" -f 2'
|
||||||
|
# as='https://it.wikipedia.org/wiki/'$season''$episode
|
||||||
|
wget $(eval $as) -o "s"$season"e"$episode".mp4"
|
||||||
|
|
||||||
|
done;done
|
2
rickandmorty.conf
Normal file
2
rickandmorty.conf
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[Main]
|
||||||
|
url=http://watchseries.do/series/rick-and-morty/season/%s/episode/%s
|
108
scraper.py
Normal file
108
scraper.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
import requests, json, ConfigParser, os
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
|
|
||||||
|
class Scraper():
|
||||||
|
# url = 'http://www.viaggiatreno.it/viaggiatrenonew/resteasy/viaggiatreno/partenze/S01480/Tue%20Oct%2011%202017%2008:30:00%20GMT+0200%20(CEST)'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
config = ConfigParser.RawConfigParser()
|
||||||
|
config.read('rickandmorty.conf')
|
||||||
|
|
||||||
|
# getfloat() raises an exception if the value is not a float
|
||||||
|
# getint() and getboolean() also do this for their respective types
|
||||||
|
self.url = config.get('Main', 'url')
|
||||||
|
|
||||||
|
pprint(self.url)
|
||||||
|
|
||||||
|
# config.read(['site.cfg', os.path.expanduser('~/.rickandmorty.conf')])
|
||||||
|
self.load_page()
|
||||||
|
pass
|
||||||
|
|
||||||
|
def calc_res(self, resolution):
|
||||||
|
if resolution is None:
|
||||||
|
return 0
|
||||||
|
vals = resolution.split('x')
|
||||||
|
if (len(vals) < 2):
|
||||||
|
return 0
|
||||||
|
pprint(vals)
|
||||||
|
_ret = int(vals[0]) * int(vals[1])
|
||||||
|
return _ret
|
||||||
|
|
||||||
|
def load_page(self):
|
||||||
|
url = self.url % (1, 1)
|
||||||
|
r = requests.get(url)
|
||||||
|
# pprint(content)
|
||||||
|
pprint(url)
|
||||||
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
player = soup.find(id="player")
|
||||||
|
frameUrl = player.get('data-src').strip()
|
||||||
|
|
||||||
|
r = requests.get(frameUrl)
|
||||||
|
# pprint(content)
|
||||||
|
pprint(url)
|
||||||
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
videoResolutions = [x.get('res') for x in soup.find_all(name="source")]
|
||||||
|
videoSources = [x.get('src') for x in soup.find_all(name="source")]
|
||||||
|
pprint(videoSources)
|
||||||
|
pprint(videoResolutions)
|
||||||
|
|
||||||
|
videoUrls = zip(videoSources, videoResolutions)
|
||||||
|
topRes = 0
|
||||||
|
curTop = videoUrls[0][0]
|
||||||
|
for video in videoUrls:
|
||||||
|
if (self.calc_res(video[1]) > topRes):
|
||||||
|
topRes = self.calc_res(video[1])
|
||||||
|
curTop = video[0]
|
||||||
|
url = curTop
|
||||||
|
|
||||||
|
# frameUrl = player.get('data-src').strip()
|
||||||
|
|
||||||
|
print "downloading with requests"
|
||||||
|
local_filename = "s01e01.mp4"
|
||||||
|
# NOTE the stream=True parameter
|
||||||
|
r = requests.get(url, stream=True)
|
||||||
|
with open(local_filename, 'wb') as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=1024):
|
||||||
|
if chunk: # filter out keep-alive new chunks
|
||||||
|
print('writing chunk...\n')
|
||||||
|
f.write(chunk)
|
||||||
|
# f.flush() commented by recommendation from J.F.Sebastian
|
||||||
|
# r = requests.get(url)
|
||||||
|
# with open("s01e01.mp4", "wb") as code:
|
||||||
|
# code.write(r.content)
|
||||||
|
# pprint(url)
|
||||||
|
# pprint(frameUrl)
|
||||||
|
# iframe#player
|
||||||
|
|
||||||
|
|
||||||
|
def parse_stations(self, stations):
|
||||||
|
_ret = []
|
||||||
|
for station in stations.split('\n'):
|
||||||
|
if len(station) > 0:
|
||||||
|
_ret.append(self.parse_station(station))
|
||||||
|
return _ret
|
||||||
|
|
||||||
|
|
||||||
|
def parse_station(self, station):
|
||||||
|
stat = station.split('|')
|
||||||
|
return {
|
||||||
|
'name': stat[0].strip(),
|
||||||
|
'code': stat[1].strip()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def find_stations(self, station_name, stations):
|
||||||
|
_ret = []
|
||||||
|
for station in stations:
|
||||||
|
if station_name.lower() in station['name'].lower():
|
||||||
|
_ret.append(station)
|
||||||
|
return _ret
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
scraper = Scraper()
|
||||||
|
|
||||||
|
stations = scraper.get_stations('elenco_stazioni.txt')
|
||||||
|
pprint(stations)
|
Loading…
Reference in New Issue
Block a user