web-scraper

import json
from progress.bar import IncrementalBar
import requests
from io import BytesIO
import os
from PIL import Image, ImageOps
import readchar
import datetime
import answer

base_url = "https://oauth.digiboek.be"


def get_files(book_number, page_number, pages):
    url = base_url + \
        f"/userfiles/static/output/html5/issues/{book_number}/{page_number}/spread.js"
    response = requests.get(url=url)
    data = response.json()

    for child in data['structure']['children']:
        try:
            for item in child['link']:
                if item in ["photoviewer", "video", "sound"]:
                    try:
                        os.mkdir(os.path.join(os.getcwd(), pages))
                    except OSError as error:
                        pass
                    link = child['link'][0][0]['src']
                    name = child['link'][2].strip()
                    for c in "?\\/:*\"<>|,":
                        name = name.replace(c, "")
                    find_dot = link.rfind(".")
                    extension = link[find_dot:]
                    output = f"{pages}\\" + name + extension
                    if extension == ".jpg":
                        get_photo(link, output)
                    elif extension == ".mp4":
                        get_video(link, output)
                    elif extension == ".mp3":
                        get_sound(link, output)
        except KeyError:
            pass


def get_photo(link, output):
    response = requests.get(base_url + link)
    image = Image.open(BytesIO(response.content))
    image.save(output + ".jpg")


def get_video(link, output):
    response = requests.get(base_url + link)
    with open(output, 'wb') as file:
        for chunk in response.iter_content(chunk_size=255):
            if chunk:
                file.write(chunk)


def get_sound(link, output):
    response = requests.get(base_url + link)
    with open(output, 'wb') as file:
        file.write(response.content)


downloadText = """
 ____  _____ _ _ _ _____ __    _____ _____ ____  _____ _____
|    \|     | | | |   | |  |  |     |  _  |    \|   __| __  |
|  |  |  |  | | | | | | |  |__|  |  |     |  |  |   __|    -|
|____/|_____|_____|_|___|_____|_____|__|__|____/|_____|__|__|
"""
os.system('cls')
print(downloadText)
COOKIE = input("What is your cookie? ")
BEGIN_NUMBER = int(input("What is the begin number of the page? "))
END_NUMBER = int(input("What is the end number of the page? "))
MAGAZINE_NUMBER = int(input('What is the magazine number? '))
WEBSITE_URL = "https://oauth.digiboek.be"

try:
    os.mkdir(os.path.join(os.getcwd(), str(MAGAZINE_NUMBER)))
except OSError as error:
    pass

print("Current working directory: {0}".format(os.getcwd()))

# Change the current working directory
os.chdir(os.path.join(os.getcwd(), str(MAGAZINE_NUMBER)))

# Print the current working directory
print("Current working directory: {0}".format(os.getcwd()))


payload = ""

session = requests.Session()
res = session.get(
    WEBSITE_URL + f"/userfiles/static/output/html5/issues/{MAGAZINE_NUMBER}/{BEGIN_NUMBER}/spread.js")
data = res.text
jsondata = json.loads(data)

# print(jsondata)

# currentSpread = jsondata['structure']['spread']['spread_id']
amountPages = len(jsondata['structure']['spread']['bg']['hires'])

stepSize = (1 + amountPages % 2)
print(f"Stepsize: {stepSize}")

basePath = os.getcwd()
imgPath = os.path.join(basePath, "images")

start = datetime.datetime.now()

os.system('cls')
print(downloadText)
bar = IncrementalBar('Pages downloaded', max=(
    (END_NUMBER - BEGIN_NUMBER)*(1 + stepSize % 2) + 1), suffix='%(percent)d%% - %(eta)ds')
image_list = []
for currentPage in range(BEGIN_NUMBER, END_NUMBER + 1, stepSize):
    currentPageNumber = (currentPage - BEGIN_NUMBER) * (1 + stepSize % 2)
    # get_files(MAGAZINE_NUMBER, currentPage,
    #           f"{currentPageNumber} - {currentPageNumber + 1}")
    # print(f"{currentPageNumber} - {currentPageNumber + 1}")
    res = session.get(
        WEBSITE_URL + f"/userfiles/static/output/html5/issues/{MAGAZINE_NUMBER}/{currentPage}/spread.js")
    data = res.text
    jsondata = json.loads(data)

    # print(jsondata)

    # currentSpread = jsondata['structure']['spread']['spread_id']
    amountPages = len(jsondata['structure']['spread']['bg']['hires'])

    if amountPages == 1:
        fullPage = jsondata['structure']['spread']['bg']['hires'][0]
        imgDataFull = requests.get(WEBSITE_URL + fullPage).content
        im = Image.open(BytesIO(imgDataFull))

        answer_link = answer.get_answer_link(
            COOKIE, MAGAZINE_NUMBER, currentPage)
        im_answ = Image.open(
            BytesIO(session.get(WEBSITE_URL + answer_link).content))
        edited_im = ImageOps.fit(im_answ, im.size)
        im.paste(edited_im, edited_im)

        imgwidth, imgheight = im.size
        imageLeft = im.crop((0, 0, int(imgwidth / 2), imgheight))
        imageRight = im.crop((int(imgwidth / 2), 0, imgwidth, imgheight))
        if currentPageNumber != 0:
            im2 = imageLeft.convert('RGB')
            image_list.append(im2)
        im2 = imageRight.convert('RGB')
        image_list.append(im2)
        del im
    if amountPages == 2:
        answer_link = answer.get_answer_link(
            COOKIE, MAGAZINE_NUMBER, currentPage)
        im_answ = Image.open(
            BytesIO(session.get(WEBSITE_URL + answer_link).content))
        imgwidth, imgheight = im_answ.size
        im_answ_r = im_answ.crop((int(imgwidth / 2), 0, imgwidth, imgheight))
        im_answ_l = im_answ.crop((0, 0, int(imgwidth / 2), imgheight))

        leftPage = jsondata['structure']['spread']['bg']['hires'][0]
        rightPage = jsondata['structure']['spread']['bg']['hires'][1]
        if currentPageNumber != 0:
            imgDataLeft = requests.get(WEBSITE_URL + leftPage).content
        imgDataRight = requests.get(WEBSITE_URL + rightPage).content

        if currentPageNumber != 0:
            imageLeft = Image.open(BytesIO(imgDataLeft))
            edited_im = ImageOps.fit(im_answ_l, imageLeft.size)
            # edited_im.save(f"{currentPage}.png")
            imageLeft.paste(edited_im, edited_im)
            # imageLeft.save(f"{currentPage}.png")
            im = imageLeft.convert('RGB')
            image_list.append(im)

        imageRight = Image.open(BytesIO(imgDataRight))
        edited_im = ImageOps.fit(im_answ_r, imageRight.size)
        imageRight.paste(edited_im, edited_im)
        im = imageRight.convert('RGB')
        image_list.append(im)
    bar.next(2)

firstImg = image_list[0]
image_list.pop(0)
firstImg.save(basePath + "\\" + f"{MAGAZINE_NUMBER}.pdf",
              save_all=True, append_images=image_list)
bar.finish()
print(f"Time elapsed: {datetime.datetime.now() - start}")
print("Press Any Key To Exit")
k = readchar.readchar()
Editor is loading...