Script pour mangastream

Je savais pas trop quoi programmer, et comme j’ai remarqué qu’ils avaient enlevé Bleach, One Piece… de onemanga.com, j’ai fait un petit script pour télécharger les mangas à partir du site mangastream.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""A small script that downloads mangas from MangaStream
    Licenced under the GPL >=v2 Licence
    Copyright TANGUY Arnaud <arn.tanguy@gmail.com>"""

import os
import sys
import urllib
from BeautifulSoup import BeautifulSoup

def helper():
    """Gives help about the use of this script"""
    print """Usage:
        - python """+sys.argv[0]+""" manga chapter
        """
    exit(0)

# Config
MAXTRIES = 5
MS = "http://www.mangastream.com"
DL_DIR = "/media/data/Images/Mangas"
TMP_DOWN = "/tmp/mangastream"
IMG_BASE_URL = "http://static3.mangastream.com/manga"

def retrieve_url(url, save_location):
    "Download URL using wget"
    downloaded=False
    tries=0
    while tries < 5 and downloaded == False:
        print "Retrieving file "+url
        status = os.system("wget "+url+" -O "+save_location.replace(' ','\ ')+" 2>/dev/null")
        if status == 256:
            tries += 1
            print "\033[31;1mFailed download, retrying…\033[0m"
        else:
            downloaded=True
            return save_location
        if tries < MAXTRIES:
            print "\033[31;1mThe url "+url+" is unreachable...\033[0m"
            exit(1)

def manga_name(manga):
    return manga.lower().replace(' ', '_').replace('.','_').replace('-', '_')

def make_pretty(name):
    """Returns the chain given, in order to have a normal name"""
    return str(name).capitalize().replace("/", "").replace("_", " ")

def find_chapter_url(manga, chapter):
    """Find the chapter url."""
    retrieve_url(MS+"/manga", TMP_DOWN)
    f = urllib.urlopen(TMP_DOWN)
    soup = BeautifulSoup(f.read())
    result=soup.findAll('td')
    mangaUrl=""
    for i in result:
        try:
            mangaUrl =  i.a['href']
        except:
            continue
        if manga in mangaUrl:
            if str(chapter) in i.a.string:
                print "\033[32mFound chapter url : "+mangaUrl+"\033[0m"
                return mangaUrl
    print "\033[31;1mError : manga or chapter not found\033[0m"
    exit(1)

def find_img_page_list(chapter_url):
    print "Retrieving page list..."
    url = retrieve_url(chapter_url, TMP_DOWN+"_img")
    f = urllib.urlopen(url)
    soup = BeautifulSoup(f.read())
    result=soup.findAll('select')[1]
    img_page_url=[]
    soup = BeautifulSoup(str(result))
    result = soup.findAll('option')
    for i in result:
        img_page_url.append(i['value'])
    return img_page_url

def find_img_urls(img_page_list):
    print "Retrieving images urls.."
    image_url_list = []
    for img_page in img_page_list:
        retrieve_url(MS+img_page, TMP_DOWN+"_img")
        f = urllib.urlopen(TMP_DOWN+"_img")
        soup = BeautifulSoup(f.read())
        result=soup.findAll('img')
        for i in result:
            if IMG_BASE_URL in i["src"]:
                print "\033[32mFound image url : " + i["src"]+"\033[0m"
                image_url_list.append(i["src"])
        f.close()
    return image_url_list

def create_folder (path, chapter):
    "Create a folder if it isn't already existing"
    if not os.path.isdir(path):
        os.mkdir(path)
    path = path+'/'+chapter
    if not os.path.isdir(path):
        os.mkdir(path)


def download_chapter(manga, chapter):
    "Download a chapter"
    print "Downloading manga : "+manga+" chapter "+chapter

# problem for this manga, here's a hack to make it work...
    if manga == "Hitory's Strongest Disciple Kenichi" :
        manga = "hsdk"

    manga_n = manga_name(manga)
    path=DL_DIR +'/'+ make_pretty(manga)
    create_folder(path, chapter)
    chapter_url = MS+find_chapter_url(manga_n, chapter)
    i=1
    imgUrls = find_img_urls(find_img_page_list(chapter_url))
    print "Downloading images..."
    for url in imgUrls:
        try:
            retrieve_url(url, path+"/"+chapter+"/"+make_pretty(manga)+"_"+str(i)+os.path.splitext(url)[1])
            i=i+1
        except (socket.error, IOError):
            print "\033[31;1mError downloading image "+url+"\033[0m"
    print "\n\033[32mChapter downloaded.\033[0m"
    exit(0)

for i in sys.argv:
    if len(sys.argv) == 3:
        download_chapter(sys.argv[1], sys.argv[2])
    else :
        helper()

Pour faire fonctionner ce script, il faut installer le module beautiful soup de python.
Sous Ubuntu, il suffit de faire :

sudo apt-get install python-beautifulsoup

Pour l’utiliser, il suffit de faire

mangastream.py manga chapitre

Exemple :

mangastream.py "History's Strongest Disciple Kenichi" 389

Laisser un commentaire

Entrez vos coordonnées ci-dessous ou cliquez sur une icône pour vous connecter:

Logo WordPress.com

Vous commentez à l'aide de votre compte WordPress.com. Déconnexion / Changer )

Image Twitter

Vous commentez à l'aide de votre compte Twitter. Déconnexion / Changer )

Photo Facebook

Vous commentez à l'aide de votre compte Facebook. Déconnexion / Changer )

Photo Google+

Vous commentez à l'aide de votre compte Google+. Déconnexion / Changer )

Connexion à %s

%d blogueurs aiment cette page :