# 主要参考网页 https://www.devdungeon.com/content/grab-image-clipboard-python-pillow from selenium import webdriver from time import sleep import pyperclip,pyautogui import numpy as np
# 下载环节 chrome_options = webdriver.ChromeOptions() prefs = {'download.default_directory' : '/user/defined/path'} chrome_options.add_experimental_option('prefs', prefs) driver = webdriver.Chrome(executable_path="./geckodriver/chromedriver", chrome_options=chrome_options) ## 此url具体位置如下图 driver.get("https://thesis.lib.pku.edu.cn/onlinePDF?dbid=72&objid=53_57_54_50_56_49&flag=online") for t in np.arange(1,3,1): img_url = driver.find_element_by_id('ViewContainer_BG_0').get_attribute('src')[:-10] driver.get(img_url+"_%s.jpg" %"{:05d}".format(t)) # Move to the specified location, right click pyautogui.rightClick(x=600, y=500) # V pyautogui.typewrite(['V']) pyautogui.hotkey('ctrlleft','V') sleep(0.8) pyautogui.press('enter') sleep(0.8) pyautogui.press('enter') driver.close()
# 图片调整环节 ## 有些图片是横置的,逆时针旋转90度 import os import natsort from fpdf import FPDF import cv2 files = os.listdir('/DOWNLOAD_PATH/') files = natsort.natsorted(files, reverse = False) files = files[0:] for file in files: if file[-3:]=='jpg': img = cv2.imread('/DOWNLOAD_PATH/'+file) h,w,c = img.shape if h<w: imgrot = cv2.rotate(img,cv2.ROTATE_90_COUNTERCLOCKWISE) cv2.imwrite('/DOWNLOAD_PATH/'+file,imgrot)
# 合成PDF pdf = FPDF() for file in files[0:]: if file[-3:]=='jpg': pdf.add_page() pdf.image('/DOWNLOAD_PATH/'+file,0,0,210,297) pdf.output("FILENAME.pdf", "F")
在北大图书馆检索后,点击目标文献,在该页码打开chrome开发者工具,寻找到下列标记字段,右键选择“copy link element”即为预览页url。
Kommentare