Wednesday, June 13, 2007

利用mechanize获取gmail


require 'rubygems'
require 'mechanize'

agent = WWW::Mechanize.new

page = agent.get 'http://www.gmail.com'
form = page.forms.first
form.Email = 'gmailacc'
form.Passwd = 'gmailpasswd'
page = agent.submit form

page = agent.get page.search("//meta").first.attributes['href'].gsub(/'/,'')
page = agent.get page.uri.to_s.sub(/\?.*$/, "?ui=html&zy=n")
page.search("//tr[@bgcolor='#ffffff']") do |row|
from, subject = *row.search("//b/text()")
url = page.uri.to_s.sub(/ui.*$/, row.search("//a").first.attributes["href"])
puts "From: #{from}\nSubject: #{subject}\nLink: #{url}\n\n"

email = agent.get url
# ..
end


http://schf.uc.org/articles/2007/02/14/scraping-gmail-with-mechanize-and-hpricot

No comments :