用网页来看网络学堂的公告很费时间,就打算像 rss2email
那样,把消息制作成邮件。 用 Ruby
的 mechanize
来和网站交互,读取“课程公告”和“课程文件”中的消息, 把链接
sha1
后判断是否生成过提示邮件,没有则跟踪链接,用
w3m
输出成纯文本, 生成的邮件用 sendmail
投递。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 require 'base64' require 'date' require 'digest/sha1' require 'etc' require 'mechanize' require 'set' agent = Mechanize .new agent.max_history = 1 conf_dir = File .expand_path('~/.net_lesson' ) unless File .directory?(conf_dir) && File .file?(File .join(conf_dir, 'passwd' )) STDERR .puts 'echo [userid] [passwd] > ~/.net_lesson/passwd' exit 1 end userid, passwd = File .open(File .join(conf_dir, 'passwd' ), 'r' ) {|f | f.gets.split } feeds = Set .new File .open(File .join(conf_dir, 'feeds.dat' ), 'r:binary' ) do |f | begin while (h = f.read 20 ) feeds.add h end rescue EOFError end end new_feeds = [] puts "loaded #{feeds.size} feeds" unless feeds.empty? page = agent.get('http://learn.tsinghua.edu.cn/' ) form = page.form('form1' ) form.field_with(:name => 'userid' ).value = userid form.field_with(:name => 'userpass' ).value = passwd agent.submit(form) puts 'login' page = agent.get('http://learn.tsinghua.edu.cn/MultiLanguage/lesson/student/MyCourse.jsp?language=cn' ) page.links_with(:href => /course_locate.jsp/ ).each do |lesson | lesson_name = lesson.text.gsub(/\s/ , '' ).sub(/\(.*/ , '' ) page = lesson.click puts "checking #{lesson_name} " ['getnoteid_student.jsp' , 'download.jsp' ].collect do |uri | download = uri == 'download.jsp' page2 = page.link_with(:href => /#{uri} / ).click page2.links_with(:href => /note_reply|filePath/ ).each do |note | h = Digest : :SHA1 .digest note.href next if feeds.member? h puts " found #{note.text.strip} " author = (download ? 'file ' : '' ) + note.node.xpath("../following-sibling::td" )[-2 ].text IO .popen(['/usr/sbin/sendmail' , Etc .getlogin], 'w' ) do |f | bar = download ? "[#{lesson_name} ]" : "*#{lesson_name} *" time = Date .parse(note.node.xpath("../following-sibling::td" )[-1 ].text).strftime '%a, %d %b %Y 00:00:00 +0800' f.puts(<<EOF) From: #{author} <#{Etc .getlogin} > Subject: =?utf-8?B?#{Base64 .strict_encode64("#{bar} #{note.text.strip} " )} ?= Date: #{time} User-Agent: net_lesson Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: binary EOF if download f.puts note.text else IO .popen(['w3m' , '-dump' , '-T' , 'text/html' ], 'r+' ) do |p | p.puts(note.click.body) p.close_write f.puts p.read end end f.puts "\nURI: #{page2.uri.merge URI .escape(note.href, /[\u4E00-\u9FFF]/ )} " end new_feeds << h end end end unless new_feeds.empty? puts "appending to feeds.dat" File .open(File .join(conf_dir, 'feeds.dat' ), 'a:binary' ) {|f | new_feeds.each {|a | f.write(a) } } end
2014年11月30日更新
这个方案已废弃,现在改用newsbeuter阅读rss了。并使用一个Ruby脚本抓取通知。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 #!/usr/bin/ruby require 'open-uri' require 'nokogiri' require 'rss' s = open('http://oars.tsinghua.edu.cn/zzh/30630.nsf/infobytime?openview' ).read m = Nokogiri .parse(s).xpath('//script/text()' )[0 ].text.match(/(?<= location\.replace\(")[^"]*(?=")/ ) s = open("http://oars.tsinghua.edu.cn#{m[0 ]} " ).read s = s.force_encoding('gbk' ).encode 'utf-8' d = Nokogiri .parse(s) rss = RSS : :Maker .make("atom" ) do |maker | maker.channel.author = '' maker.channel.about = '' maker.channel.updated = Time .now.to_s maker.channel.title = '教务通知' d.xpath('//tr[contains(@valign, "top")]' ).each {|tr | tds = tr.search('td' ) next if tds.size != 4 maker.items.new_item {|item | item.link = (URI ('http://oars.tsinghua.edu.cn' ) + tds[2 ].search('a' )[1 ].attr('href' )).to_s item.title = tds[2 ].text item.updated = Time .parse tds[3 ].text } } end File .write '/tmp/教务通知.rss' , rss.to_srss = RSS : :Maker .make("atom" ) do |maker | maker.channel.author = '' maker.channel.about = '' maker.channel.updated = Time .now.to_s maker.channel.title = '重要通知' t = Time .now d = Nokogiri : :HTML open('http://info.tsinghua.edu.cn/html/view/notice_beforelogin.htm' ) d.xpath('//td' ).each {|td | next if td.children.size != 2 a = td.search('a' )[0 ] maker.items.new_item {|item | item.link = (URI ('http://oars.tsinghua.edu.cn' ) + a.attr('href' )).to_s item.title = a.text item.updated = t } } end File .write '/tmp/重要通知.rss' , rss.to_s
然后用fcron定期执行上面的脚本,产生/tmp/教务通知.rss
和/tmp/重要通知.rss
:
1 2 % fcrontab -l @ 1h /home/ray/bin/教务通知.rb 2>> /tmp/stderr
~/.config/newsbeuter/urls
里添加下面两行:
1 2 file:///tmp/教务通知.rss file:///tmp/重要通知.rss