批量下载人民教育网高中数学电子课本(jpg)

There's more than one way to do it!
https://metacpan.org http://perlmonks.org
回复
头像
523066680
Administrator
Administrator
帖子: 573
注册时间: 2016年07月19日 12:14
联系:

批量下载人民教育网高中数学电子课本(jpg)

帖子 523066680 »

相关页面:高中数学>>教师中心>>同步教学资源>>课程标准实验教材>>必修1>>电子课本

脚本应保存为utf8编码格式,坦白说,pep人教网上面扫描的图片像素太低了……

书的列表在这个数组中,从必修1到选修4-9
my @booklist = qw/
bx1 bx2 bx3 bx4 bx5 xx11 xx12 xx21
xx22 xx23 xx31 xx33 xx34 xx41 xx42 xx44
xx45 xx46 xx47 xx49 /;

下载文件所在的目录设置
$WORKDIR = "D:\\Book\\Math\\" . $book;
DownMathBook.pl
#????Code:?523066680 #????Date:?2016-06 use?v5.16; use?utf8; use?Encode; use?LWP::UserAgent; use?LWP::Simple?qw/getstore?get/; use?IO::Handle; STDOUT->autoflush(1); our?$website?=?"http://www.pep.com.cn/gzsx/jszx_1/czsxtbjxzy/xkbsyjc/dzkb/"; our?$bookpage; our?$WORKDIR; our?$page1maps; my??($path,?$begin,?$end); my?@booklist?=?qw/? ????bx1??bx2??bx3??bx4??bx5??xx11?xx12?xx21? ????xx22?xx23?xx31?xx33?xx34?xx41?xx42?xx44? ????xx45?xx46?xx47?xx49?/; for?my?$book?(?@booklist[10?..?$#booklist]??) { ????print?"Now?is?downloading:?$book\n"; ????$bookpage?=?$website.?$book?.?"/"; ????$WORKDIR??=??"D:\\Book\\Math\\"?.?$book; ????CreatePath($WORKDIR); ????($path,?$begin,?$end)?=?get_pgnum_range(?$bookpage?); ????print?"Path:?$path,?$begin?to?$end\n"; ????#页面1?对应的?页码 ????$page1maps?=?get_who_map_page1(?$bookpage?);? ????get_picture(?$bookpage,?$path,?$begin,?$end?); } system("pause"); sub?get_pgnum_range { ????my?$bookpage?=?shift; ????my?$all; ????$all?=?get($bookpage);???#使用lwp::simple?得到的是unicode, ?????????????????????????????#使用lwp::UserAgent?得到的是GB2312 ????my?@pglist; ????my?$path; ????$all?=~s/.*封面//s;??????#如果有封面,剔除 ????#./201102/t20110217_1021412.htm ????for?my?$e?(?split("\r?\n",?$all)??) ????{ ????????if?($e=~/href="\.\/([^"]*_)(\d+).htm"/) ????????{ ????????????$path?=?$1; ????????????push?@pglist,?$2; ????????} ????} ????@pglist?=?sort?@pglist; ????return?$path,?@pglist[?0,?$#pglist?]; } sub?get_who_map_page1 { ????my?$bookpage?=?shift; ????my?$all?=?get($bookpage); ????$all=~s/\r?\n//g; ????if?($all=~/\d+_(\d+)\.htm[^.]+第一(章|讲)/?) ????{ ????????return?$1; ????} ????else ????{ ????????die?"first?page?code?not?found!?"; ????} } sub?get_picture { ????my?($bookpage,?$path,?$begin,?$end)?=?@_; ????our?$page1maps; ????our?$WORKDIR; ????my??$all;???????????#网页内容 ????my??$subpage;???????#子页面 ????my??$pic;???????????#图片名 ????my??$count?=?0;?????#页码计数 ????my??$fname;?????????#文件名 ????#该网页地址是逆序的,书本page+1,网址代码-1 ????for?(my?$n?=?$end;?$n?>=?$begin;?$n--?) ????{ ????????$subpage?=?$bookpage?.?$path?.?$n?.?".htm"; ????????$all?=?get(?$subpage?)?or?next; ????????if?(?$all=~/IMG?src="?\.\/([^".]*.jpg)"?/i?)??#不一定有""符号 ????????{ ????????????$pic?=?$1; ????????????$subpage?=~s?/[^\/]+$/$pic/; ????????????if?(?$n?<=?$page1maps?) ????????????{ ????????????????$count++; ????????????????$fname?=?sprintf("%03d.jpg",?$count); ????????????} ????????????else ????????????{ ????????????????$fname?=?$pic; ????????????} ????????????getstore($subpage,?$WORKDIR."\\".$fname)?or?die?"$!"; ????????????print?"$pic\n"; ????????} ????} } sub?CreatePath? { ????my?$path?=?shift; ????my?@arr=split(/[\\\/]/,?$path); ????my?$main; ????$main?=?shift?@arr;??#以盘符开始 ????for?my?$s?(@arr) ????{ ????????$main?.=?"/"?.?$s; ????????mkdir(?$main?)?if?(?!?-d?$main?); ????} }?
回复

在线用户

正浏览此版面之用户: 没有注册用户 和 2 访客