支持Mac和Windows系统,支持腾讯企业邮箱。https://zhuanlan.zhihu.com/p/51543237
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
brew install --cask helium brew install --cask chromedriver brew install --cask prettytable
pip install selenium pip install helium pip install prettytable
pip install helium -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
QQNUMBER="132465798" PASSWORD="132465798"
ROOTPATH
, 剩下两个会在这个目录创建文件夹。ROOTPATH = "D:\\Downloads\\2020" DOWNLOAD_FOLDER = os.path.join(ROOTPATH,"download") # 不需要修改。附件实际下载目录 即:"D:\\Downloads\\2020\\download" USERDATA_FOLDER = os.path.join(ROOTPATH,"selenium") # 不需要修改。浏览器的缓存数据 即:"D:\\Downloads\\2020\\selenium"
https://mail.qq.com/cgi-bin/frame_html?t=frame_html&sid={x}&url=/cgi-bin/mail_list?folderid={ A }%26page={x}
Title_Task = { 'start':1, 'step':0, 'end': 0, 'relay': 0 } Pages_Task = { 'start':1, 'step':0, 'end':0, 'autoNext': 1, 'reverse': 0 }
TITLE_WHITELIST_KEYS = ['反馈','2020'] # 只处理标题同时包含这两个关键词的邮件 TITLE_BACKLIST_KEYS = ['发信方已撤回邮件'] # 不处理
attach_blacklist_filetype = [''] attach_whitelist_filetype = ['psd', 'ai'] # 不下载 psd 或 ai 类型的文件
# 浏览器禁用显示图片,(浏览器首次运行前生效) can_disabled_images = 0 # 是否倒序读取(从最后一页开始往前) can_reverse_list = 0 #··········· 下载 ···········# # 是否需要重命名附件 can_rename_file = 0 # 是否需要每封邮件创建文件夹 can_move_folder = 1 # 在下载前,检查本地文件是否已存在附件(根据文件名+文件大小) # 如果存在则跳过本次下载。 ready_download_but_file_exists = 'skip' or 'continue' #··········· 星标 / 标签 ···········# # 没有附件的邮件设为星标 can_star_nofile = 1 # 过期附件的邮件设为星标 can_star_timeoutfile = 0 # 没有附件添加标签 can_tag_nofile = 0 str_tag_nofile = '没有附件' # 过期附件添加标签 can_tag_timeoutfile = 0 str_tag_timeoutfile = '过期附件' #··········· 功能 ···········# # 是否需要下载附件 can_download_file = 1 # 是否需要进入邮件正文 can_load_email = 1 # 是否需要获取邮件标题列表 can_load_title = 1 # 下载等待时长(单位:秒)。超过时长后则放弃后续操作,如移动文件夹或重命名。 downloading_timeout = 300 #··········· 控制台 ···········# # 是否需要 PrettyTable 来打印表格 # 如果有人装不上 PrettyTable,可以将它禁用。 can_print_prettytable = 1 # 是否在控制台打印邮件信息 can_print_title = 1 can_print_attch = 1 # 是否在控制台打印统计表格 can_print_folder_table = 1 can_print_title_table = 1 #··········· 统计 ···········# # 是否将数据导出为CSV文件 can_export_titledata_to_csv = 1 can_export_attchdata_to_csv = 1 #··········· 高级选项 ···········# # 是否需要设置 desired_capabilities 参数 can_set_capabilities = 1 config_timeout_pageLoad = 10000 config_timeout_script = 1500
rule =
#------------------------------------------------------------------------------- # 重命名模板 #------------------------------------------------------------------------------- # filename1 附件文件名(不包含扩展名) 例: 简历 # filename2 附件文件名(包含扩展名) 例: 简历.pdf #··············································································· # extension1 附件扩展名(包含.) 例: .jpg .txt .pdf # extension2 附件扩展名(不包含.) 例: jpg txt pdf #··············································································· # attchindex 计数:目前是第几个附件(不包含过期附件) 例: 0001 # titleindex 计数:目前是第几封邮件 (从1开始计数) 例: 0001 # pageindex 计数:目前是第几页 (从1开始计数) 例: 001 # attchtitleindex 计数:在当前邮件中的多个附件的顺序 (从1开始计数) 例: 01 #··············································································· # titlecount 总数:本次下载计划的邮件数量 例: 0 # attchcount 总数:本次下载计划的附件数量(包含过期附件) 例: 0 #··············································································· # folderid 文件夹:folder_id 例: 129 # foldername 文件夹:名称 例: 我的文件夹 # foldertitle 文件夹:邮件数量 例: 500 # folderpage 文件夹:总页数 例: 20 #··············································································· # titlename 邮件标题 例: 小明_简历_2021 #··············································································· # nameid 发信方的邮箱昵称 例: 小明 # address 发信方的邮箱地址 例: 123456@qq.com, xiaomin233@vip.qq.com # emailid 发信方的邮箱账号,通常是QQ号 例: 123456, xiaomin233 #··············································································· # year 发送时间:年 %Y 例: 2020 # month 发送时间:月 %m 例: 12 # day 发送时间:日 %d 例: 07 #··············································································· # week 发送时间:周 %a 例: Mon # ampm 发送时间:上/下午 %p 例: AM #··············································································· # hours 发送时间:时 %H 例: 14 # minutes 发送时间:分 %M 例: 30 # seconds 发送时间:秒 %S 例: 59 #··············································································· # time1 发送时间:格式化 %H%M 例: 1430 # time2 发送时间:格式化 %H-%M-%S 例: 14-30-59 # time3 发送时间:格式化 %H'%M'%S 例: 14'30'59 #··············································································· # date1 发送时间:格式化 %m%d 例: 1207 # date2 发送时间:格式化 %Y%m%d 例: 20201207 # date3 发送时间:格式化 %Y-%m-%d 例: 2020-12-07 #··············································································· # fulldate1 发送时间:格式化 %Y-%m-%d_%H-%M-%S 例: 2020-12-07_14-30-59 # fulldata2 发送时间:格式化 %Y%m%d_%H'%M'%S 例: 20201207_14'30'59 #··············································································· # 需要放到花括号里。例如 {attchindex}_{filename2} => 0001_作品.pdf # 附件重命名规则。 # 案例:{attchindex}_{filename2} => 0001_作品.pdf rule_rename = "{attchindex}_{filename2}" # 文件夹名称。 # 案例:{address}({date4}) => 0001_123456@qq.com_2020-12-07_14-30-59 rule_folder = "{titleindex}_{address}_{fulldate1}"