mirror of
https://github.com/captn3m0/Scripts.git
synced 2024-09-27 22:22:53 +00:00
215 lines
6.3 KiB
Python
215 lines
6.3 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# encoding: utf-8
|
||
|
|
||
|
|
||
|
"""
|
||
|
imguralbum.py - Download a whole imgur album in one go.
|
||
|
|
||
|
Provides both a class and a command line utility in a single script
|
||
|
to download Imgur albums.
|
||
|
|
||
|
MIT License
|
||
|
Copyright Alex Gisby <alex@solution10.com>
|
||
|
"""
|
||
|
|
||
|
|
||
|
import sys
|
||
|
import re
|
||
|
import urllib.request, urllib.parse, urllib.error
|
||
|
import os
|
||
|
import math
|
||
|
from collections import Counter
|
||
|
|
||
|
|
||
|
help_message = """
|
||
|
Quickly and easily download an album from Imgur.
|
||
|
|
||
|
Format:
|
||
|
$ python imguralbum.py [album URL] [destination folder]
|
||
|
|
||
|
Example:
|
||
|
$ python imguralbum.py http://imgur.com/a/uOOju#6 /Users/alex/images
|
||
|
|
||
|
If you omit the dest folder name, the utility will create one with the same name
|
||
|
as the album
|
||
|
(for example for http://imgur.com/a/uOOju it'll create uOOju/ in the cwd)
|
||
|
"""
|
||
|
|
||
|
|
||
|
class ImgurAlbumException(Exception):
|
||
|
def __init__(self, msg=False):
|
||
|
self.msg = msg
|
||
|
|
||
|
|
||
|
class ImgurAlbumDownloader:
|
||
|
def __init__(self, album_url):
|
||
|
"""
|
||
|
Constructor. Pass in the album_url that you want to download.
|
||
|
"""
|
||
|
self.album_url = album_url
|
||
|
|
||
|
# Callback members:
|
||
|
self.image_callbacks = []
|
||
|
self.complete_callbacks = []
|
||
|
|
||
|
# Check the URL is actually imgur:
|
||
|
match = re.match("(https?)\:\/\/(www\.)?(?:m\.)?imgur\.com/(a|gallery)/([a-zA-Z0-9]+)(#[0-9]+)?", album_url)
|
||
|
if not match:
|
||
|
raise ImgurAlbumException("URL must be a valid Imgur Album")
|
||
|
|
||
|
self.protocol = match.group(1)
|
||
|
self.album_key = match.group(4)
|
||
|
|
||
|
# Read the no-script version of the page for all the images:
|
||
|
fullListURL = "http://imgur.com/a/" + self.album_key + "/layout/blog"
|
||
|
|
||
|
try:
|
||
|
self.response = urllib.request.urlopen(url=fullListURL)
|
||
|
response_code = self.response.getcode()
|
||
|
except Exception as e:
|
||
|
self.response = False
|
||
|
response_code = e.code
|
||
|
|
||
|
if not self.response or self.response.getcode() != 200:
|
||
|
raise ImgurAlbumException("Error reading Imgur: Error Code %d" % response_code)
|
||
|
|
||
|
# Read in the images now so we can get stats and stuff:
|
||
|
html = self.response.read().decode('utf-8')
|
||
|
self.imageIDs = re.findall('.*?{"hash":"([a-zA-Z0-9]+)".*?"ext":"(\.[a-zA-Z0-9]+)".*?', html)
|
||
|
|
||
|
self.cnt = Counter()
|
||
|
for i in self.imageIDs:
|
||
|
self.cnt[i[1]] += 1
|
||
|
|
||
|
|
||
|
def num_images(self):
|
||
|
"""
|
||
|
Returns the number of images that are present in this album.
|
||
|
"""
|
||
|
return len(self.imageIDs)
|
||
|
|
||
|
|
||
|
def list_extensions(self):
|
||
|
"""
|
||
|
Returns list with occurrences of extensions in descending order.
|
||
|
"""
|
||
|
return self.cnt.most_common()
|
||
|
|
||
|
|
||
|
def album_key(self):
|
||
|
"""
|
||
|
Returns the key of this album. Helpful if you plan on generating your own
|
||
|
folder names.
|
||
|
"""
|
||
|
return self.album_key
|
||
|
|
||
|
|
||
|
def on_image_download(self, callback):
|
||
|
"""
|
||
|
Allows you to bind a function that will be called just before an image is
|
||
|
about to be downloaded. You'll be given the 1-indexed position of the image, it's URL
|
||
|
and it's destination file in the callback like so:
|
||
|
my_awesome_callback(1, "http://i.imgur.com/fGWX0.jpg", "~/Downloads/1-fGWX0.jpg")
|
||
|
"""
|
||
|
self.image_callbacks.append(callback)
|
||
|
|
||
|
|
||
|
def on_complete(self, callback):
|
||
|
"""
|
||
|
Allows you to bind onto the end of the process, displaying any lovely messages
|
||
|
to your users, or carrying on with the rest of the program. Whichever.
|
||
|
"""
|
||
|
self.complete_callbacks.append(callback)
|
||
|
|
||
|
|
||
|
def save_images(self, foldername=False):
|
||
|
"""
|
||
|
Saves the images from the album into a folder given by foldername.
|
||
|
If no foldername is given, it'll use the cwd and the album key.
|
||
|
And if the folder doesn't exist, it'll try and create it.
|
||
|
"""
|
||
|
# Try and create the album folder:
|
||
|
if foldername:
|
||
|
albumFolder = foldername
|
||
|
else:
|
||
|
albumFolder = self.album_key
|
||
|
|
||
|
if not os.path.exists(albumFolder):
|
||
|
os.makedirs(albumFolder)
|
||
|
|
||
|
# And finally loop through and save the images:
|
||
|
for (counter, image) in enumerate(self.imageIDs, start=1):
|
||
|
image_url = "http://i.imgur.com/"+image[0]+image[1]
|
||
|
|
||
|
prefix = "%0*d-" % (
|
||
|
int(math.ceil(math.log(len(self.imageIDs) + 1, 10))),
|
||
|
counter
|
||
|
)
|
||
|
path = os.path.join(albumFolder, prefix + image[0] + image[1])
|
||
|
|
||
|
# Run the callbacks:
|
||
|
for fn in self.image_callbacks:
|
||
|
fn(counter, image_url, path)
|
||
|
|
||
|
# Actually download the thing
|
||
|
if os.path.isfile(path):
|
||
|
print ("Skipping, already exists.")
|
||
|
else:
|
||
|
try:
|
||
|
urllib.request.urlretrieve(image_url, path)
|
||
|
except:
|
||
|
print ("Download failed.")
|
||
|
os.remove(path)
|
||
|
|
||
|
# Run the complete callbacks:
|
||
|
for fn in self.complete_callbacks:
|
||
|
fn()
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
args = sys.argv
|
||
|
|
||
|
if len(args) == 1:
|
||
|
# Print out the help message and exit:
|
||
|
print (help_message)
|
||
|
exit()
|
||
|
|
||
|
try:
|
||
|
# Fire up the class:
|
||
|
downloader = ImgurAlbumDownloader(args[1])
|
||
|
|
||
|
print(("Found {0} images in album".format(downloader.num_images())))
|
||
|
|
||
|
for i in downloader.list_extensions():
|
||
|
print(("Found {0} files with {1} extension".format(i[1],i[0])))
|
||
|
|
||
|
# Called when an image is about to download:
|
||
|
def print_image_progress(index, url, dest):
|
||
|
print(("Downloading Image %d" % index))
|
||
|
print((" %s >> %s" % (url, dest)))
|
||
|
downloader.on_image_download(print_image_progress)
|
||
|
|
||
|
# Called when the downloads are all done.
|
||
|
def all_done():
|
||
|
print ("")
|
||
|
print ("Done!")
|
||
|
downloader.on_complete(all_done)
|
||
|
|
||
|
# Work out if we have a foldername or not:
|
||
|
if len(args) == 3:
|
||
|
albumFolder = args[2]
|
||
|
else:
|
||
|
albumFolder = False
|
||
|
|
||
|
# Enough talk, let's save!
|
||
|
downloader.save_images(albumFolder)
|
||
|
exit()
|
||
|
|
||
|
except ImgurAlbumException as e:
|
||
|
print(("Error: " + e.msg))
|
||
|
print ("")
|
||
|
print ("How to use")
|
||
|
print ("=============")
|
||
|
print (help_message)
|
||
|
exit(1)
|